Skip to content

Commit 2e2ec82

Browse files
authored
Merge pull request #710 from metafacture/encodeMarcXmlEscapeUnicode
Optionally escape Unicode in MarcXmlEncoder.
2 parents bcef668 + 94dcbab commit 2e2ec82

File tree

2 files changed

+45
-1
lines changed

2 files changed

+45
-1
lines changed

metafacture-biblio/src/main/java/org/metafacture/biblio/marc21/MarcXmlEncoder.java

Lines changed: 15 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -185,6 +185,15 @@ public void setFormatted(final boolean formatted) {
185185
encoder.setFormatted(formatted);
186186
}
187187

188+
/**
189+
* Flags whether to escape Unicode.
190+
*
191+
* @param escapeUnicode true if Unicode should be escaped
192+
*/
193+
public void setEscapeUnicode(final boolean escapeUnicode) {
194+
encoder.setEscapeUnicode(escapeUnicode);
195+
}
196+
188197
@Override
189198
public void startRecord(final String identifier) {
190199
pipe.startRecord(identifier);
@@ -242,6 +251,7 @@ private static class Encoder extends DefaultStreamPipe<ObjectReceiver<String>> {
242251
private Object[] namespacePrefix = new Object[]{NAMESPACE_PREFIX};
243252

244253
private int indentationLevel;
254+
private boolean escapeUnicode;
245255
private boolean formatted = PRETTY_PRINTED;
246256
private int recordAttributeOffset;
247257
private int recordLeaderOffset;
@@ -270,6 +280,10 @@ public void setFormatted(final boolean formatted) {
270280
this.formatted = formatted;
271281
}
272282

283+
public void setEscapeUnicode(final boolean escapeUnicode) {
284+
this.escapeUnicode = escapeUnicode;
285+
}
286+
273287
@Override
274288
public void startRecord(final String identifier) {
275289
if (atStreamStart) {
@@ -434,7 +448,7 @@ private boolean appendLeader(final String name, final String value) {
434448
* @param str the unescaped sequence to be written
435449
*/
436450
private void writeEscaped(final String str) {
437-
builder.append(XmlUtil.escape(str, false));
451+
builder.append(XmlUtil.escape(str, escapeUnicode));
438452
}
439453

440454
private void writeLeader() {

metafacture-biblio/src/test/java/org/metafacture/biblio/marc21/MarcXmlEncoderTest.java

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -46,6 +46,9 @@ public class MarcXmlEncoderTest {
4646
private static final String XML_MARC_COLLECTION_END_TAG = "</marc:collection>";
4747
private static final String RECORD_ID = "92005291";
4848

49+
private static final int UNICODE_CODE_POINT = 1048576;
50+
private static final String UNICODE_STRING = Character.toString(UNICODE_CODE_POINT);
51+
4952
private StringBuilder resultCollector;
5053
private int resultCollectorsResetStreamCount;
5154
private MarcXmlEncoder encoder;
@@ -171,6 +174,33 @@ public void createARecordWithEscapedSequences() {
171174
Assert.assertEquals(expected, actual);
172175
}
173176

177+
@Test
178+
public void createARecordWithoutEscapedUnicode() {
179+
encoder.startRecord(RECORD_ID);
180+
encoder.literal("001", UNICODE_STRING);
181+
encoder.endRecord();
182+
encoder.onResetStream();
183+
final String expected = XML_DECLARATION + XML_ROOT_OPEN + "<marc:record>" +
184+
"<marc:controlfield tag=\"001\">\000</marc:controlfield>" + "</marc:record>" +
185+
XML_MARC_COLLECTION_END_TAG;
186+
final String actual = resultCollector.toString();
187+
Assert.assertEquals(expected, actual);
188+
}
189+
190+
@Test
191+
public void createARecordWithEscapedUnicode() {
192+
encoder.setEscapeUnicode(true);
193+
encoder.startRecord(RECORD_ID);
194+
encoder.literal("001", UNICODE_STRING);
195+
encoder.endRecord();
196+
encoder.onResetStream();
197+
final String expected = XML_DECLARATION + XML_ROOT_OPEN + "<marc:record>" +
198+
"<marc:controlfield tag=\"001\">&#" + UNICODE_CODE_POINT + ";</marc:controlfield>" + "</marc:record>" +
199+
XML_MARC_COLLECTION_END_TAG;
200+
final String actual = resultCollector.toString();
201+
Assert.assertEquals(expected, actual);
202+
}
203+
174204
@Test
175205
public void createTwoRecordsInOneCollection() {
176206
addOneRecord();

0 commit comments

Comments
 (0)