diff --git a/metafacture-biblio/src/main/java/org/metafacture/biblio/marc21/MarcXmlEncoder.java b/metafacture-biblio/src/main/java/org/metafacture/biblio/marc21/MarcXmlEncoder.java index 3113a3243..402ba1e15 100644 --- a/metafacture-biblio/src/main/java/org/metafacture/biblio/marc21/MarcXmlEncoder.java +++ b/metafacture-biblio/src/main/java/org/metafacture/biblio/marc21/MarcXmlEncoder.java @@ -185,6 +185,15 @@ public void setFormatted(final boolean formatted) { encoder.setFormatted(formatted); } + /** + * Flags whether to escape Unicode. + * + * @param escapeUnicode true if Unicode should be escaped + */ + public void setEscapeUnicode(final boolean escapeUnicode) { + encoder.setEscapeUnicode(escapeUnicode); + } + @Override public void startRecord(final String identifier) { pipe.startRecord(identifier); @@ -242,6 +251,7 @@ private static class Encoder extends DefaultStreamPipe> { private Object[] namespacePrefix = new Object[]{NAMESPACE_PREFIX}; private int indentationLevel; + private boolean escapeUnicode; private boolean formatted = PRETTY_PRINTED; private int recordAttributeOffset; private int recordLeaderOffset; @@ -270,6 +280,10 @@ public void setFormatted(final boolean formatted) { this.formatted = formatted; } + public void setEscapeUnicode(final boolean escapeUnicode) { + this.escapeUnicode = escapeUnicode; + } + @Override public void startRecord(final String identifier) { if (atStreamStart) { @@ -434,7 +448,7 @@ private boolean appendLeader(final String name, final String value) { * @param str the unescaped sequence to be written */ private void writeEscaped(final String str) { - builder.append(XmlUtil.escape(str, false)); + builder.append(XmlUtil.escape(str, escapeUnicode)); } private void writeLeader() { diff --git a/metafacture-biblio/src/test/java/org/metafacture/biblio/marc21/MarcXmlEncoderTest.java b/metafacture-biblio/src/test/java/org/metafacture/biblio/marc21/MarcXmlEncoderTest.java index a82acca03..baa64c020 100644 --- a/metafacture-biblio/src/test/java/org/metafacture/biblio/marc21/MarcXmlEncoderTest.java +++ b/metafacture-biblio/src/test/java/org/metafacture/biblio/marc21/MarcXmlEncoderTest.java @@ -46,6 +46,9 @@ public class MarcXmlEncoderTest { private static final String XML_MARC_COLLECTION_END_TAG = ""; private static final String RECORD_ID = "92005291"; + private static final int UNICODE_CODE_POINT = 1048576; + private static final String UNICODE_STRING = Character.toString(UNICODE_CODE_POINT); + private StringBuilder resultCollector; private int resultCollectorsResetStreamCount; private MarcXmlEncoder encoder; @@ -171,6 +174,33 @@ public void createARecordWithEscapedSequences() { Assert.assertEquals(expected, actual); } + @Test + public void createARecordWithoutEscapedUnicode() { + encoder.startRecord(RECORD_ID); + encoder.literal("001", UNICODE_STRING); + encoder.endRecord(); + encoder.onResetStream(); + final String expected = XML_DECLARATION + XML_ROOT_OPEN + "" + + "\000" + "" + + XML_MARC_COLLECTION_END_TAG; + final String actual = resultCollector.toString(); + Assert.assertEquals(expected, actual); + } + + @Test + public void createARecordWithEscapedUnicode() { + encoder.setEscapeUnicode(true); + encoder.startRecord(RECORD_ID); + encoder.literal("001", UNICODE_STRING); + encoder.endRecord(); + encoder.onResetStream(); + final String expected = XML_DECLARATION + XML_ROOT_OPEN + "" + + "&#" + UNICODE_CODE_POINT + ";" + "" + + XML_MARC_COLLECTION_END_TAG; + final String actual = resultCollector.toString(); + Assert.assertEquals(expected, actual); + } + @Test public void createTwoRecordsInOneCollection() { addOneRecord();