From 49dfd6a9c193d4d5d98921c961a6a3b0ca871bfa Mon Sep 17 00:00:00 2001 From: Claude Mamo <823038+claudemamo@users.noreply.github.com> Date: Mon, 26 Apr 2021 02:25:20 +0200 Subject: [PATCH] =?UTF-8?q?fix:=20retain=20attribute=20entity=20references?= =?UTF-8?q?=20instead=20of=20choking=20when=20XMLInpu=E2=80=A6=20(#74)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Implement #65: Add AaltoInputProperties.P_RETAIN_ATTRIBUTE_GENERAL_ENTITIES which allows skipping handling of general entity references in attributes. --- .../fasterxml/aalto/AaltoInputProperties.java | 16 ++++++++ .../com/fasterxml/aalto/in/ReaderConfig.java | 16 ++++++-- .../com/fasterxml/aalto/in/ReaderScanner.java | 28 +++++++------ .../aalto/sax/SAXParserFactoryImpl.java | 8 +++- .../fasterxml/aalto/sax/SAXParserImpl.java | 3 ++ .../java/com/fasterxml/aalto/sax/SAXUtil.java | 2 - .../aalto/sax/TestEntityResolver.java | 41 +++++++++++++++++++ .../aalto/sax/TestSAXParserFactoryImpl.java | 17 ++++++++ 8 files changed, 111 insertions(+), 20 deletions(-) create mode 100644 src/main/java/com/fasterxml/aalto/AaltoInputProperties.java create mode 100644 src/test/java/com/fasterxml/aalto/sax/TestSAXParserFactoryImpl.java diff --git a/src/main/java/com/fasterxml/aalto/AaltoInputProperties.java b/src/main/java/com/fasterxml/aalto/AaltoInputProperties.java new file mode 100644 index 0000000..cfa6a68 --- /dev/null +++ b/src/main/java/com/fasterxml/aalto/AaltoInputProperties.java @@ -0,0 +1,16 @@ +package com.fasterxml.aalto; + +/** + * Class that contains constant for property names used to configure + * cursor and event readers produced by Aalto implementation of + * {@link javax.xml.stream.XMLInputFactory}. + */ +public final class AaltoInputProperties { + + /** + * Feature controlling whether general entities in attributes are retained. + * + * @since 1.3 + */ + public final static String P_RETAIN_ATTRIBUTE_GENERAL_ENTITIES = "com.fasterxml.aalto.retainAttributeGeneralEntities"; +} diff --git a/src/main/java/com/fasterxml/aalto/in/ReaderConfig.java b/src/main/java/com/fasterxml/aalto/in/ReaderConfig.java index eddbc7f..538052c 100644 --- a/src/main/java/com/fasterxml/aalto/in/ReaderConfig.java +++ b/src/main/java/com/fasterxml/aalto/in/ReaderConfig.java @@ -5,6 +5,7 @@ import javax.xml.stream.*; +import com.fasterxml.aalto.AaltoInputProperties; import org.codehaus.stax2.XMLInputFactory2; import com.fasterxml.aalto.impl.CommonConfig; @@ -41,9 +42,10 @@ public final class ReaderConfig final static int F_AUTO_CLOSE_INPUT = 0x2000; // Custom flags: + final static int F_RETAIN_ATTRIBUTE_GENERAL_ENTITIES = 0x4000; /** - * These are the default settigs for XMLInputFactory. + * These are the default settings for XMLInputFactory. */ final static int DEFAULT_FLAGS = F_NS_AWARE @@ -55,7 +57,7 @@ public final class ReaderConfig | F_INTERN_NS_URIS // and will report CDATA as such (and not as CHARACTERS) | F_REPORT_CDATA - | F_PRESERVE_LOCATION + | F_PRESERVE_LOCATION ; private final static HashMap sProperties; @@ -98,7 +100,8 @@ public final class ReaderConfig // !!! Not really implemented, but let's recognize it sProperties.put(XMLInputFactory2.P_DTD_OVERRIDE, null); - // Custom ones? + // Custom ones + sProperties.put(AaltoInputProperties.P_RETAIN_ATTRIBUTE_GENERAL_ENTITIES, Integer.valueOf(F_RETAIN_ATTRIBUTE_GENERAL_ENTITIES)); } /** @@ -280,6 +283,10 @@ public void doReportCData(boolean state) { setFlag(F_REPORT_CDATA, state); } + public void doRetainAttributeGeneralEntities(boolean state) { + setFlag(F_RETAIN_ATTRIBUTE_GENERAL_ENTITIES, state); + } + /* /********************************************************************** /* Common accessors from CommonConfig @@ -412,6 +419,9 @@ public boolean willParseLazily() { public boolean hasInternNsURIsBeenEnabled() { return hasExplicitFlag(F_INTERN_NS_URIS); } + // // // Custom properties + + public boolean willRetainAttributeGeneralEntities() { return hasFlag(F_RETAIN_ATTRIBUTE_GENERAL_ENTITIES); } /* /********************************************************************** diff --git a/src/main/java/com/fasterxml/aalto/in/ReaderScanner.java b/src/main/java/com/fasterxml/aalto/in/ReaderScanner.java index e19184e..074d9a9 100644 --- a/src/main/java/com/fasterxml/aalto/in/ReaderScanner.java +++ b/src/main/java/com/fasterxml/aalto/in/ReaderScanner.java @@ -830,7 +830,7 @@ protected final int handleStartElement(char c) * simplify main method, which makes code more maintainable * and possibly easier for JIT/HotSpot to optimize. */ - private final int collectValue(int attrPtr, char quoteChar, PName attrName) + private int collectValue(int attrPtr, char quoteChar, PName attrName) throws XMLStreamException { char[] attrBuffer = _attrCollector.startNewValue(attrName, attrPtr); @@ -896,20 +896,22 @@ private final int collectValue(int attrPtr, char quoteChar, PName attrName) throwUnexpectedChar(c, "'<' not allowed in attribute value"); case XmlCharTypes.CT_AMP: { - int d = handleEntityInText(false); - if (d == 0) { // unexpanded general entity... not good - reportUnexpandedEntityInAttr(attrName, false); - } - // Ok; does it need a surrogate though? (over 16 bits) - if ((d >> 16) != 0) { - d -= 0x10000; - attrBuffer[attrPtr++] = (char) (0xD800 | (d >> 10)); - d = 0xDC00 | (d & 0x3FF); - if (attrPtr >= attrBuffer.length) { - attrBuffer = _attrCollector.valueBufferFull(); + if (!_config.willRetainAttributeGeneralEntities()) { + int d = handleEntityInText(false); + if (d == 0) { // unexpanded general entity... not good + reportUnexpandedEntityInAttr(attrName, false); + } + // Ok; does it need a surrogate though? (over 16 bits) + if ((d >> 16) != 0) { + d -= 0x10000; + attrBuffer[attrPtr++] = (char) (0xD800 | (d >> 10)); + d = 0xDC00 | (d & 0x3FF); + if (attrPtr >= attrBuffer.length) { + attrBuffer = _attrCollector.valueBufferFull(); + } } + c = (char) d; } - c = (char) d; } break; case XmlCharTypes.CT_ATTR_QUOTE: diff --git a/src/main/java/com/fasterxml/aalto/sax/SAXParserFactoryImpl.java b/src/main/java/com/fasterxml/aalto/sax/SAXParserFactoryImpl.java index 3776d5b..2494b1f 100644 --- a/src/main/java/com/fasterxml/aalto/sax/SAXParserFactoryImpl.java +++ b/src/main/java/com/fasterxml/aalto/sax/SAXParserFactoryImpl.java @@ -18,6 +18,7 @@ import javax.xml.parsers.SAXParser; import javax.xml.parsers.SAXParserFactory; +import com.fasterxml.aalto.AaltoInputProperties; import org.xml.sax.SAXNotRecognizedException; import org.xml.sax.SAXNotSupportedException; @@ -36,7 +37,7 @@ public class SAXParserFactoryImpl extends SAXParserFactory { final InputFactoryImpl mStaxFactory; - + public SAXParserFactoryImpl() { // defaults should be fine... @@ -68,6 +69,8 @@ public boolean getFeature(String name) switch (stdFeat) { case IS_STANDALONE: // read-only, but only during parsing return true; + case EXTERNAL_GENERAL_ENTITIES: + return Boolean.FALSE.equals(mStaxFactory.getProperty(AaltoInputProperties.P_RETAIN_ATTRIBUTE_GENERAL_ENTITIES)); default: } } else { @@ -95,7 +98,8 @@ public void setFeature(String name, boolean enabled) switch (stdFeat) { case EXTERNAL_GENERAL_ENTITIES: - ok = !enabled; + mStaxFactory.setProperty(AaltoInputProperties.P_RETAIN_ATTRIBUTE_GENERAL_ENTITIES, !enabled); + ok = true; break; case EXTERNAL_PARAMETER_ENTITIES: ok = !enabled; diff --git a/src/main/java/com/fasterxml/aalto/sax/SAXParserImpl.java b/src/main/java/com/fasterxml/aalto/sax/SAXParserImpl.java index cfbed04..9382a27 100644 --- a/src/main/java/com/fasterxml/aalto/sax/SAXParserImpl.java +++ b/src/main/java/com/fasterxml/aalto/sax/SAXParserImpl.java @@ -22,6 +22,7 @@ import javax.xml.stream.XMLStreamConstants; import javax.xml.stream.XMLStreamException; +import com.fasterxml.aalto.AaltoInputProperties; import org.xml.sax.*; import org.xml.sax.ext.Attributes2; import org.xml.sax.ext.DeclHandler; @@ -262,6 +263,8 @@ public boolean getFeature(String name) case IS_STANDALONE: // read-only, but only during parsing // !!! TBI return true; + case EXTERNAL_GENERAL_ENTITIES: + return Boolean.FALSE.equals(_staxFactory.getProperty(AaltoInputProperties.P_RETAIN_ATTRIBUTE_GENERAL_ENTITIES)); default: } } else { diff --git a/src/main/java/com/fasterxml/aalto/sax/SAXUtil.java b/src/main/java/com/fasterxml/aalto/sax/SAXUtil.java index 57a77aa..be9db8f 100644 --- a/src/main/java/com/fasterxml/aalto/sax/SAXUtil.java +++ b/src/main/java/com/fasterxml/aalto/sax/SAXUtil.java @@ -71,8 +71,6 @@ public static SAXProperty findStdProperty(String featURI) public static Boolean getFixedStdFeatureValue(SAXFeature stdFeat) { switch (stdFeat) { - case EXTERNAL_GENERAL_ENTITIES: // not yet implemented - return Boolean.FALSE; case EXTERNAL_PARAMETER_ENTITIES: // not yet implemented return Boolean.FALSE; case IS_STANDALONE: // read-only, but only during parsing diff --git a/src/test/java/com/fasterxml/aalto/sax/TestEntityResolver.java b/src/test/java/com/fasterxml/aalto/sax/TestEntityResolver.java index fa2f118..7666a20 100644 --- a/src/test/java/com/fasterxml/aalto/sax/TestEntityResolver.java +++ b/src/test/java/com/fasterxml/aalto/sax/TestEntityResolver.java @@ -1,9 +1,12 @@ package com.fasterxml.aalto.sax; import java.io.*; +import java.util.concurrent.CountDownLatch; import javax.xml.parsers.SAXParser; +import javax.xml.stream.XMLInputFactory; +import com.fasterxml.aalto.stax.InputFactoryImpl; import org.xml.sax.*; import org.xml.sax.helpers.DefaultHandler; @@ -47,6 +50,44 @@ public void testWithDummyExtSubset() } } + public void testRetainAttributeEntityReference() + throws Exception + { + final String XML = + "\n" + +""; + + SAXParserFactoryImpl spf = new SAXParserFactoryImpl(); + SAXParser sp = spf.newSAXParser(); + DefaultHandler h = new DefaultHandler(); + + try { + sp.parse(new InputSource(new StringReader(XML)), h); + fail(); + } catch (SAXException e) { + verifyException(e, "General entity reference (&replace-me;) encountered in entity expanding mode: operation not (yet) implemented\n at [row,col {unknown-source}]: [2,22]"); + } + + SAXParserFactoryImpl spfKeepEntityReferences = new SAXParserFactoryImpl(); + spfKeepEntityReferences.setFeature("http://xml.org/sax/features/external-general-entities", false); + SAXParser spKeepEntityReferences = spfKeepEntityReferences.newSAXParser(); + + final CountDownLatch countDownLatch = new CountDownLatch(1); + spKeepEntityReferences.parse(new InputSource(new StringReader(XML)), new DefaultHandler() { + @Override + public void startElement(String uri, String localName, String qName, Attributes attributes) { + assertEquals("root", localName); + assertEquals("root", qName); + assertEquals(1, attributes.getLength()); + assertEquals("&replace-me;", attributes.getValue(0)); + + countDownLatch.countDown(); + } + }); + + assertEquals(0, countDownLatch.getCount()); + } + static class MyResolver implements EntityResolver { diff --git a/src/test/java/com/fasterxml/aalto/sax/TestSAXParserFactoryImpl.java b/src/test/java/com/fasterxml/aalto/sax/TestSAXParserFactoryImpl.java new file mode 100644 index 0000000..2c3c438 --- /dev/null +++ b/src/test/java/com/fasterxml/aalto/sax/TestSAXParserFactoryImpl.java @@ -0,0 +1,17 @@ +package com.fasterxml.aalto.sax; + +import org.xml.sax.SAXNotRecognizedException; +import org.xml.sax.SAXNotSupportedException; + +public class TestSAXParserFactoryImpl extends base.BaseTestCase { + + public void testSetGetFeatureExternalGeneralEntities() throws SAXNotRecognizedException, SAXNotSupportedException { + SAXParserFactoryImpl saxParserFactory = new SAXParserFactoryImpl(); + saxParserFactory.setFeature("http://xml.org/sax/features/external-general-entities", false); + assertFalse(saxParserFactory.getFeature("http://xml.org/sax/features/external-general-entities")); + + saxParserFactory.setFeature("http://xml.org/sax/features/external-general-entities", true); + assertTrue(saxParserFactory.getFeature("http://xml.org/sax/features/external-general-entities")); + } + +}