diff --git a/release-notes/CREDITS b/release-notes/CREDITS index 1ecffc2..7704bfb 100644 --- a/release-notes/CREDITS +++ b/release-notes/CREDITS @@ -25,3 +25,7 @@ Mike Conley (conleym@github) Michiel Borkent (borkdude@github) * Reported #47: NPE when trying to parse document with DTD declaration (1.2.1) + +Claude Mamo (otcdlink-simpleuser@github) + * Contributed #65: Support disabling General Entity Reference replacement in attribute values + (1.3.0) diff --git a/release-notes/VERSION b/release-notes/VERSION index 1a7c8f5..56bd173 100644 --- a/release-notes/VERSION +++ b/release-notes/VERSION @@ -6,6 +6,8 @@ Project: aalto-xml 1.3.0 (not yet released) +#65: Support disabling General Entity Reference replacement in attribute values + (contributed by Claude M) * Fix minor offset check bug in `AsyncByteArrayScanner`/`AsyncByteBufferScanner` * Various minor fixes based on lgtm.com suggestions diff --git a/src/main/java/com/fasterxml/aalto/AaltoInputProperties.java b/src/main/java/com/fasterxml/aalto/AaltoInputProperties.java index cfa6a68..f60a42d 100644 --- a/src/main/java/com/fasterxml/aalto/AaltoInputProperties.java +++ b/src/main/java/com/fasterxml/aalto/AaltoInputProperties.java @@ -4,13 +4,27 @@ * Class that contains constant for property names used to configure * cursor and event readers produced by Aalto implementation of * {@link javax.xml.stream.XMLInputFactory}. + * + * @since 1.3 */ -public final class AaltoInputProperties { - +public final class AaltoInputProperties +{ /** - * Feature controlling whether general entities in attributes are retained. - * - * @since 1.3 + * Feature controlling whether general entities in attributes are retained + * as-is without processing ({@code true}) or replaced as per standard + * XML processing rules ({@code false}). + * If enabled, instead of regular General Entity expansion, possible general + * entities in Attribute values will be left exactly as-is, with no processing; + * as such they cannot be distinguished from regular textual content. + *

+ * The main reason for enabling this non-standard property is to avoid errors + * in cases where content contains general entity references in attribute values, + * but no processing is allowed (for example, for security reasons). + *

+ * Property defaults to {@code false} for XML standard compliancy but may + * be enabled to avoid processing errors (but note that caller will necessarily + * lose information as unexpanded entity cannot be distinguished from regular + * attribute textual content). */ public final static String P_RETAIN_ATTRIBUTE_GENERAL_ENTITIES = "com.fasterxml.aalto.retainAttributeGeneralEntities"; } diff --git a/src/main/java/com/fasterxml/aalto/in/ReaderConfig.java b/src/main/java/com/fasterxml/aalto/in/ReaderConfig.java index 538052c..8467024 100644 --- a/src/main/java/com/fasterxml/aalto/in/ReaderConfig.java +++ b/src/main/java/com/fasterxml/aalto/in/ReaderConfig.java @@ -27,27 +27,27 @@ public final class ReaderConfig public final static int STANDALONE_NO = 2; // Standard Stax flags: - final static int F_NS_AWARE = 0x0001; - final static int F_COALESCING = 0x0002; - final static int F_DTD_AWARE = 0x0004; - final static int F_DTD_VALIDATING = 0x0008; - final static int F_EXPAND_ENTITIES = 0x0010; + protected final static int F_NS_AWARE = 0x0001; + protected final static int F_COALESCING = 0x0002; + protected final static int F_DTD_AWARE = 0x0004; + protected final static int F_DTD_VALIDATING = 0x0008; + protected final static int F_EXPAND_ENTITIES = 0x0010; // Standard Stax2 flags: - final static int F_LAZY_PARSING = 0x0100; - final static int F_INTERN_NAMES = 0x0200; - final static int F_INTERN_NS_URIS = 0x0400; - final static int F_REPORT_CDATA = 0x0800; - final static int F_PRESERVE_LOCATION = 0x1000; - final static int F_AUTO_CLOSE_INPUT = 0x2000; + protected final static int F_LAZY_PARSING = 0x0100; + protected final static int F_INTERN_NAMES = 0x0200; + protected final static int F_INTERN_NS_URIS = 0x0400; + protected final static int F_REPORT_CDATA = 0x0800; + protected final static int F_PRESERVE_LOCATION = 0x1000; + protected final static int F_AUTO_CLOSE_INPUT = 0x2000; // Custom flags: - final static int F_RETAIN_ATTRIBUTE_GENERAL_ENTITIES = 0x4000; + protected final static int F_RETAIN_ATTRIBUTE_GENERAL_ENTITIES = 0x4000; /** * These are the default settings for XMLInputFactory. */ - final static int DEFAULT_FLAGS = + protected final static int DEFAULT_FLAGS = F_NS_AWARE | F_DTD_AWARE | F_EXPAND_ENTITIES @@ -57,8 +57,8 @@ public final class ReaderConfig | F_INTERN_NS_URIS // and will report CDATA as such (and not as CHARACTERS) | F_REPORT_CDATA - | F_PRESERVE_LOCATION - ; + | F_PRESERVE_LOCATION + ; private final static HashMap sProperties; static { @@ -100,8 +100,11 @@ public final class ReaderConfig // !!! Not really implemented, but let's recognize it sProperties.put(XMLInputFactory2.P_DTD_OVERRIDE, null); - // Custom ones - sProperties.put(AaltoInputProperties.P_RETAIN_ATTRIBUTE_GENERAL_ENTITIES, Integer.valueOf(F_RETAIN_ATTRIBUTE_GENERAL_ENTITIES)); + // Custom ones: + + // [aalto-xml#65]: Allow disabling processing of GEs in attribute values: + sProperties.put(AaltoInputProperties.P_RETAIN_ATTRIBUTE_GENERAL_ENTITIES, + Integer.valueOf(F_RETAIN_ATTRIBUTE_GENERAL_ENTITIES)); } /** @@ -141,6 +144,7 @@ public final class ReaderConfig private XMLResolver mResolver; private IllegalCharHandler illegalCharHandler; + /* /********************************************************************** /* Buffer recycling: @@ -283,6 +287,14 @@ public void doReportCData(boolean state) { setFlag(F_REPORT_CDATA, state); } + /** + * Method for enabling or disabling + * {@link AaltoInputProperties#P_RETAIN_ATTRIBUTE_GENERAL_ENTITIES}. + * + * @param state Whether to enable or disable property + * + * @since 1.3 + */ public void doRetainAttributeGeneralEntities(boolean state) { setFlag(F_RETAIN_ATTRIBUTE_GENERAL_ENTITIES, state); } @@ -421,6 +433,14 @@ public boolean willParseLazily() { // // // Custom properties + /** + * Accessor for checking configured state of + * {@link AaltoInputProperties#P_RETAIN_ATTRIBUTE_GENERAL_ENTITIES}. + * + * @return Whether the property is enabled or disabled + * + * @since 1.3 + */ public boolean willRetainAttributeGeneralEntities() { return hasFlag(F_RETAIN_ATTRIBUTE_GENERAL_ENTITIES); } /* diff --git a/src/test/java/stream/TestGeneralEntityHandling.java b/src/test/java/stream/TestGeneralEntityHandling.java new file mode 100644 index 0000000..0cb0c1b --- /dev/null +++ b/src/test/java/stream/TestGeneralEntityHandling.java @@ -0,0 +1,54 @@ +package stream; + +import java.io.StringReader; + +import javax.xml.stream.XMLStreamException; +import javax.xml.stream.XMLStreamReader; + +import org.codehaus.stax2.XMLInputFactory2; + +import com.fasterxml.aalto.AaltoInputProperties; + +// Mostly for [aalto-xml#65] +public class TestGeneralEntityHandling extends base.BaseTestCase +{ + private final XMLInputFactory2 VANILLA_F = newInputFactory(); + + private final XMLInputFactory2 RETAIN_ATTR_GE_F = newInputFactory(); + { + RETAIN_ATTR_GE_F.setProperty(AaltoInputProperties.P_RETAIN_ATTRIBUTE_GENERAL_ENTITIES, + true); + } + + public void testAttributeGEHandling() throws Exception + { + final String DOC = "Text"; + + // First: with Vanilla, should just fail + XMLStreamReader sr = VANILLA_F.createXMLStreamReader( + new StringReader(DOC)); + try { + sr.next(); + fail("Should not pass"); + } catch (XMLStreamException e) { + verifyException(e, "General entity reference (&ent;) encountered"); + } + sr.close(); + + // But with new (1.3) setting, can be tolerated + sr = RETAIN_ATTR_GE_F.createXMLStreamReader( + new StringReader(DOC)); + assertTokenType(START_ELEMENT, sr.next()); + assertEquals("root", sr.getLocalName()); + assertEquals(1, sr.getAttributeCount()); + assertEquals("Entity: &ent;", sr.getAttributeValue(0)); + + assertTokenType(CHARACTERS, sr.next()); + // Assume that value is not split (as per impl) + assertEquals("Text", sr.getText()); + + assertTokenType(END_ELEMENT, sr.next()); + assertTokenType(END_DOCUMENT, sr.next()); + sr.close(); + } +}