3
3
import java .io .ByteArrayOutputStream ;
4
4
import java .io .StringWriter ;
5
5
import java .io .Writer ;
6
+ import java .nio .charset .StandardCharsets ;
6
7
7
8
import org .junit .jupiter .api .Test ;
8
9
@@ -17,6 +18,10 @@ class SurrogateWrite223Test extends JUnit5TestBase
17
18
{
18
19
private final JsonFactory DEFAULT_JSON_F = newStreamFactory ();
19
20
21
+ private final JsonFactory SURROGATE_COMBINING_JSON_F = JsonFactory .builder ()
22
+ .enable (JsonWriteFeature .COMBINE_UNICODE_SURROGATES_IN_UTF8 )
23
+ .build ();
24
+
20
25
// for [core#223]
21
26
@ Test
22
27
void surrogatesDefaultSetting () throws Exception {
@@ -35,9 +40,7 @@ void surrogatesByteBacked() throws Exception
35
40
36
41
out = new ByteArrayOutputStream ();
37
42
38
- JsonFactory f = JsonFactory .builder ()
39
- .enable (JsonWriteFeature .COMBINE_UNICODE_SURROGATES_IN_UTF8 )
40
- .build ();
43
+ JsonFactory f = SURROGATE_COMBINING_JSON_F ;
41
44
g = f .createGenerator (out );
42
45
g .writeStartArray ();
43
46
g .writeString (toQuote );
@@ -96,9 +99,7 @@ void surrogatesCharBacked() throws Exception
96
99
//https://github.com/FasterXML/jackson-core/issues/1359
97
100
@ Test
98
101
void checkNonSurrogates () throws Exception {
99
- JsonFactory f = JsonFactory .builder ()
100
- .enable (JsonWriteFeature .COMBINE_UNICODE_SURROGATES_IN_UTF8 )
101
- .build ();
102
+ JsonFactory f = SURROGATE_COMBINING_JSON_F ;
102
103
ByteArrayOutputStream out = new ByteArrayOutputStream ();
103
104
try (JsonGenerator gen = f .createGenerator (out )) {
104
105
gen .writeStartObject ();
@@ -126,9 +127,7 @@ void checkNonSurrogates() throws Exception {
126
127
127
128
@ Test
128
129
void checkSurrogateWithCharacterEscapes () throws Exception {
129
- JsonFactory f = JsonFactory .builder ()
130
- .enable (JsonWriteFeature .COMBINE_UNICODE_SURROGATES_IN_UTF8 )
131
- .build ();
130
+ JsonFactory f = SURROGATE_COMBINING_JSON_F ;
132
131
f .setCharacterEscapes (JsonpCharacterEscapes .instance ());
133
132
ByteArrayOutputStream out = new ByteArrayOutputStream ();
134
133
try (JsonGenerator gen = f .createGenerator (out )) {
@@ -140,4 +139,37 @@ void checkSurrogateWithCharacterEscapes() throws Exception {
140
139
String json = out .toString ("UTF-8" );
141
140
assertEquals ("{\" test_emoji\" :\" \uD83D \uDE0A \" }" , json );
142
141
}
142
+
143
+ //https://github.com/FasterXML/jackson-core/issues/1473
144
+ @ Test
145
+ void surrogateCharSplitInTwoSegments () throws Exception
146
+ {
147
+ // UTF8JsonGenerator must avoid splitting surrogate chars
148
+ // into separate segments. We want to test the third segment
149
+ // split to make sure indexes, offsets, etc are all correct.
150
+ // By default, segments split in every 1000 chars.
151
+ // Thus, we need a string with length 2001 where the surrogate is
152
+ // at 2000 and 2001 positions.
153
+ int count = 1999 ;
154
+ char [] chars = new char [count ];
155
+ java .util .Arrays .fill (chars , 'x' );
156
+ String base = new String (chars );
157
+
158
+ final String VALUE = base + "\uD83E \uDEE1 " ;
159
+
160
+ ByteArrayOutputStream bb = new ByteArrayOutputStream ();
161
+ try (JsonGenerator g = SURROGATE_COMBINING_JSON_F .createGenerator (bb )) {
162
+ g .enable (JsonGenerator .Feature .COMBINE_UNICODE_SURROGATES_IN_UTF8 );
163
+
164
+ g .writeStartArray ();
165
+ g .writeString (VALUE );
166
+ g .writeEndArray ();
167
+ }
168
+
169
+ String result = new String (bb .toByteArray (), StandardCharsets .UTF_8 );
170
+
171
+ // +2 and -2 to remove array and quotes: result should contain ["xxxx....🫡"]
172
+ // "\uD83E\uDEE1" is the combined surrogate form of the emoji
173
+ assertEquals ("\uD83E \uDEE1 " , result .substring (count +2 , result .length ()-2 ));
174
+ }
143
175
}
0 commit comments