Skip to content

Commit

Permalink
Fix #91: handle split-buffary surrogate for comments as well
Browse files Browse the repository at this point in the history
  • Loading branch information
cowtowncoder committed Jun 7, 2024
1 parent 0f6af6d commit 46b7efc
Show file tree
Hide file tree
Showing 3 changed files with 40 additions and 12 deletions.
4 changes: 3 additions & 1 deletion release-notes/VERSION
Original file line number Diff line number Diff line change
Expand Up @@ -6,10 +6,12 @@ Project: aalto-xml

1.3.3 (not yet released)

#86: Multi-byte characters are split in writeCData() if first byte sits
#86: Multi-byte characters are split in `writeCData()` if first byte sits
right at the end of the buffer
(reported, fix contributed by @tatsel)
#90: Update stax2-api dep to 4.2.2 (from 4.2)
#91: Multi-byte characters are split in `writeComment()` if first byte sits
right at the end of the buffer

1.3.2 (25-Apr-2022)

Expand Down
19 changes: 13 additions & 6 deletions src/main/java/com/fasterxml/aalto/out/ByteXmlWriter.java
Original file line number Diff line number Diff line change
Expand Up @@ -799,6 +799,7 @@ protected int writeCDataContents(char[] cbuf, int offset, int len)
++offset;
--len;
}

// Unlike with writeCharacters() and fastWriteName(), let's not
// worry about split buffers here: this is unlikely to become
// performance bottleneck. This allows keeping it simple; and
Expand Down Expand Up @@ -1197,12 +1198,18 @@ public int writeComment(String data) throws IOException, XMLStreamException
protected int writeCommentContents(char[] cbuf, int offset, int len)
throws IOException, XMLStreamException
{
/* Unlike with writeCharacters() and fastWriteName(), let's not
* worry about split buffers here: this is unlikely to become
* performance bottleneck. This allows keeping it simple; and
* should it matter, we could start doing fast version here
* as well.
*/
if (_surrogate != 0) {
outputSurrogates(_surrogate, cbuf[offset]);
// reset the temporary surrogate storage
_surrogate = 0;
++offset;
--len;
}

// Unlike with writeCharacters() and fastWriteName(), let's not
// worry about split buffers here: this is unlikely to become
// performance bottleneck. This allows keeping it simple; and
// should it matter, we could start doing fast version here as well.
len += offset; // now marks the end

main_loop:
Expand Down
29 changes: 24 additions & 5 deletions src/test/java/com/fasterxml/aalto/sax/TestSaxWriter.java
Original file line number Diff line number Diff line change
Expand Up @@ -60,11 +60,7 @@ public void testSplitSurrogateWithAttributeValue2() throws Exception

public void testSplitSurrogateWithCData() throws Exception
{
// This test aims to produce the
// javax.xml.stream.XMLStreamException: Incomplete surrogate pair in content: first char 0xdfce, second 0x78
// error message. The issue was similar to the one described in testSurrogateMemory1(), except it happened in
// ByteXmlWriter#writeCDataContents(), where check for existing _surrogate was missing prior to the fix,
// as opposed to ByteXmlWriter#writeCharacters().
// Modification of "testSplitSurrogateWithAttributeValue()" but for CDATA
StringBuilder testText = new StringBuilder();
for (int i = 0; i < 511; i++) {
testText.append('x');
Expand All @@ -83,4 +79,27 @@ public void testSplitSurrogateWithCData() throws Exception
writer.writeEndTag(writer.constructName("testelement"));
writer.close(false);
}


public void testSplitSurrogateWithComment() throws Exception
{
// Modification of "testSplitSurrogateWithAttributeValue()" but for Comment
StringBuilder testText = new StringBuilder();
for (int i = 0; i < 511; i++) {
testText.append('x');
}
testText.append("\uD835\uDFCE");
for (int i = 0; i < 512; i++) {
testText.append('x');
}

WriterConfig writerConfig = new WriterConfig();
ByteArrayOutputStream byteArrayOutputStream = new ByteArrayOutputStream();
Utf8XmlWriter writer = new Utf8XmlWriter(writerConfig, byteArrayOutputStream);
writer.writeStartTagStart(writer.constructName("testelement"));
writer.writeComment(testText.toString());
writer.writeStartTagEnd();
writer.writeEndTag(writer.constructName("testelement"));
writer.close(false);
}
}

0 comments on commit 46b7efc

Please sign in to comment.