Skip to content

Commit

Permalink
Add testingPareUnpareAPIMode Tunable
Browse files Browse the repository at this point in the history
- currently during testing we parse/unparse using both the sax and non-sax API, which leads to issues like trace running outputting twice for the same test which is confusing. We also run the parse for all out infoset outputters. With this tunable, we default to the more efficient single infoset outputter (scalaxml) and single API (non-sax) parse/unparse.
- we convert TDMLInfosetOutputter to a trait so the Full and Limited subclasses can extend it as well as TeeInfosetOutputter
- the tunable has 2 options: limited and full. with limited being the default and full being our current 2 API, all infoset outputters mode.
- add test showing use of full mode

DAFFODIL-2904
  • Loading branch information
olabusayoT committed Nov 6, 2024
1 parent 1467784 commit 0ee6b9a
Show file tree
Hide file tree
Showing 4 changed files with 144 additions and 19 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -579,6 +579,15 @@
</xs:documentation>
</xs:annotation>
</xs:element>
<xs:element name="testingParseUnparseAPIMode" type="daf:TunableTestingParseUnparseAPIMode" default="limited" minOccurs="0">
<xs:annotation>
<xs:documentation>
When running parse/unparse on tests, this tunable controls whether we use the non-sax API with a single
infoset inputter/outputter (i.e 'limited' mode) or if we use both the sax and non-sax API and
check that every infoset inputter/outputter gives the same results (i.e 'full' mode)
</xs:documentation>
</xs:annotation>
</xs:element>
<xs:element name="unqualifiedPathStepPolicy" type="daf:TunableUnqualifiedPathStepPolicy" default="noNamespace" minOccurs="0">
<xs:annotation>
<xs:documentation>
Expand Down Expand Up @@ -749,6 +758,13 @@
</xs:list>
</xs:simpleType>

<xs:simpleType name="TunableTestingParseUnparseAPIMode">
<xs:restriction base="xs:token">
<xs:enumeration value="limited" />
<xs:enumeration value="full" />
</xs:restriction>
</xs:simpleType>

<xs:element name="dfdlConfig">
<xs:complexType>
<xs:sequence>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@ import org.apache.daffodil.core.compiler.Compiler
import org.apache.daffodil.core.dsom.ExpressionCompilers
import org.apache.daffodil.io.InputSourceDataInputStream
import org.apache.daffodil.lib.Implicits.using
import org.apache.daffodil.lib.api.TestingParseUnparseAPIMode.Full
import org.apache.daffodil.lib.api._
import org.apache.daffodil.lib.exceptions.Assert
import org.apache.daffodil.lib.externalvars.Binding
Expand Down Expand Up @@ -209,14 +210,22 @@ class DaffodilTDMLDFDLProcessor private (private var dp: DataProcessor)
def parse(uri: java.net.URI, lengthLimitInBits: Long): TDMLParseResult = {
val url = uri.toURL
val dpInputStream = url.openStream()
val saxInputStream = url.openStream()
doParseWithBothApis(dpInputStream, saxInputStream, lengthLimitInBits)
if (dp.tunables.testingParseUnparseAPIMode == Full) {
val saxInputStream = url.openStream()
doParseWithBothApis(dpInputStream, saxInputStream, lengthLimitInBits)

Check warning on line 215 in daffodil-tdml-processor/src/main/scala/org/apache/daffodil/processor/tdml/DaffodilTDMLDFDLProcessor.scala

View check run for this annotation

Codecov / codecov/patch

daffodil-tdml-processor/src/main/scala/org/apache/daffodil/processor/tdml/DaffodilTDMLDFDLProcessor.scala#L213-L215

Added lines #L213 - L215 were not covered by tests
} else {
doParseWithNonSaxAPI(dpInputStream, lengthLimitInBits)

Check warning on line 217 in daffodil-tdml-processor/src/main/scala/org/apache/daffodil/processor/tdml/DaffodilTDMLDFDLProcessor.scala

View check run for this annotation

Codecov / codecov/patch

daffodil-tdml-processor/src/main/scala/org/apache/daffodil/processor/tdml/DaffodilTDMLDFDLProcessor.scala#L217

Added line #L217 was not covered by tests
}
}

def parse(arr: Array[Byte], lengthLimitInBits: Long): TDMLParseResult = {
val dpInputStream = new ByteArrayInputStream(arr)
val saxInputStream = new ByteArrayInputStream(arr)
doParseWithBothApis(dpInputStream, saxInputStream, lengthLimitInBits)
if (dp.tunables.testingParseUnparseAPIMode == Full) {
val saxInputStream = new ByteArrayInputStream(arr)
doParseWithBothApis(dpInputStream, saxInputStream, lengthLimitInBits)
} else {
doParseWithNonSaxAPI(dpInputStream, lengthLimitInBits)
}
}

override def parse(is: java.io.InputStream, lengthLimitInBits: Long): TDMLParseResult = {
Expand Down Expand Up @@ -252,21 +261,47 @@ class DaffodilTDMLDFDLProcessor private (private var dp: DataProcessor)
infosetXML: scala.xml.Node,
outStream: java.io.OutputStream
): TDMLUnparseResult = {
val bos = new ByteArrayOutputStream()
val osw = new OutputStreamWriter(bos, StandardCharsets.UTF_8)
scala.xml.XML.write(osw, infosetXML, "UTF-8", xmlDecl = true, null)
osw.flush()
osw.close()
val saxInstream = new ByteArrayInputStream(bos.toByteArray)
doUnparseWithBothApis(inputter, saxInstream, outStream)
if (dp.tunables.testingParseUnparseAPIMode == Full) {
val bos = new ByteArrayOutputStream()
val osw = new OutputStreamWriter(bos, StandardCharsets.UTF_8)
scala.xml.XML.write(osw, infosetXML, "UTF-8", xmlDecl = true, null)
osw.flush()
osw.close()
val saxInstream = new ByteArrayInputStream(bos.toByteArray)
doUnparseWithBothApis(inputter, saxInstream, outStream)
} else {
doUnparseWithNonSaxApi(inputter, outStream)
}
}

def doParseWithNonSaxAPI(
dpInputStream: java.io.InputStream,
lengthLimitInBits: Long
): TDMLParseResult = {
val outputter = new TDMLInfosetOutputterLimited()
outputter.setBlobAttributes(blobDir, blobPrefix, blobSuffix)

using(InputSourceDataInputStream(dpInputStream)) { dis =>
// The length limit here should be the length of the document
// under test. Only set a limit when the end of the document
// do not match a byte boundary.
if (lengthLimitInBits % 8 != 0) {
Assert.usage(lengthLimitInBits >= 0)
dis.setBitLimit0b(MaybeULong(lengthLimitInBits))
}

val actual = dp.parse(dis, outputter)

new DaffodilTDMLParseResult(actual, outputter)
}
}

def doParseWithBothApis(
dpInputStream: java.io.InputStream,
saxInputStream: java.io.InputStream,
lengthLimitInBits: Long
): TDMLParseResult = {
val outputter = new TDMLInfosetOutputter()
val outputter = new TDMLInfosetOutputterFull()
outputter.setBlobAttributes(blobDir, blobPrefix, blobSuffix)

val xri = dp.newXMLReaderInstance
Expand Down Expand Up @@ -306,6 +341,19 @@ class DaffodilTDMLDFDLProcessor private (private var dp: DataProcessor)
}
}

def doUnparseWithNonSaxApi(
dpInputter: TDMLInfosetInputter,
dpOutputStream: java.io.OutputStream
): DaffodilTDMLUnparseResult = {

val dpOutputChannel = java.nio.channels.Channels.newChannel(dpOutputStream)

val actualDP = dp.unparse(dpInputter, dpOutputChannel).asInstanceOf[UnparseResult]
dpOutputChannel.close()

new DaffodilTDMLUnparseResult(actualDP, dpOutputStream)
}

def doUnparseWithBothApis(
dpInputter: TDMLInfosetInputter,
saxInputStream: java.io.InputStream,
Expand Down Expand Up @@ -408,11 +456,11 @@ class DaffodilTDMLDFDLProcessor private (private var dp: DataProcessor)
final class DaffodilTDMLParseResult(actual: DFDL.ParseResult, outputter: TDMLInfosetOutputter)
extends TDMLParseResult {

override def getResult: Node = outputter.getResult()
override def getResult: Node = outputter.getResult

override def getBlobPaths: Seq[Path] = outputter.getBlobPaths()

def inputter = outputter.toInfosetInputter()
def inputter = outputter.toInfosetInputter

override def isProcessingError: Boolean = actual.isProcessingError

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,9 @@ package org.apache.daffodil.processor.tdml

import java.io.ByteArrayInputStream
import java.io.ByteArrayOutputStream
import scala.xml.Node

import org.apache.daffodil.runtime1.infoset.InfosetOutputter
import org.apache.daffodil.runtime1.infoset.JDOMInfosetInputter
import org.apache.daffodil.runtime1.infoset.JDOMInfosetOutputter
import org.apache.daffodil.runtime1.infoset.JsonInfosetInputter
Expand All @@ -33,7 +35,23 @@ import org.apache.daffodil.runtime1.infoset.W3CDOMInfosetOutputter
import org.apache.daffodil.runtime1.infoset.XMLTextInfosetInputter
import org.apache.daffodil.runtime1.infoset.XMLTextInfosetOutputter

class TDMLInfosetOutputter
class TDMLInfosetOutputterLimited
extends {
private val scalaOut = new ScalaXMLInfosetOutputter()
private val outputters: Seq[InfosetOutputter] = Seq(scalaOut)
}
with TeeInfosetOutputter(outputters: _*)
with TDMLInfosetOutputter {

override def getResult: Node = scalaOut.getResult

override def toInfosetInputter: TDMLInfosetInputter = {
val scalaIn = new ScalaXMLInfosetInputter(scalaOut.getResult)
new TDMLInfosetInputter(scalaIn, Seq())
}
}

class TDMLInfosetOutputterFull
extends {
private val jsonStream = new ByteArrayOutputStream()
val xmlStream = new ByteArrayOutputStream()
Expand All @@ -44,13 +62,15 @@ class TDMLInfosetOutputter
private val jsonOut = new JsonInfosetOutputter(jsonStream, false)
private val xmlOut = new XMLTextInfosetOutputter(xmlStream, false)

private val outputters = Seq(xmlOut, scalaOut, jdomOut, w3cdomOut, jsonOut)
private val outputters: Seq[InfosetOutputter] =
Seq(xmlOut, scalaOut, jdomOut, w3cdomOut, jsonOut)
}
with TeeInfosetOutputter(outputters: _*) {
with TeeInfosetOutputter(outputters: _*)
with TDMLInfosetOutputter {

def getResult() = scalaOut.getResult
override def getResult: Node = scalaOut.getResult

def toInfosetInputter() = {
override def toInfosetInputter: TDMLInfosetInputter = {
val scalaIn = new ScalaXMLInfosetInputter(scalaOut.getResult)
val jdomIn = new JDOMInfosetInputter(jdomOut.getResult)
val w3cdomIn = new W3CDOMInfosetInputter(w3cdomOut.getResult)
Expand All @@ -63,3 +83,10 @@ class TDMLInfosetOutputter
new TDMLInfosetInputter(scalaIn, Seq(jdomIn, w3cdomIn, jsonIn, xmlIn, nullIn))
}
}

trait TDMLInfosetOutputter extends InfosetOutputter {

def getResult: Node

def toInfosetInputter: TDMLInfosetInputter
}
Original file line number Diff line number Diff line change
Expand Up @@ -237,4 +237,38 @@ class TestTDMLRunnerConfig {
assertTrue(msg.contains("ambiguous"))
assertTrue(msg.contains("testConfigFile.xml"))
}

@Test def testGoodConfigTestingParseUnparseAPIMode() = {
val testSuite =
<tdml:testSuite suiteName="theSuiteName" xmlns:daf={daf} xmlns:ex={example}
xmlns:tdml={tdml} xmlns:dfdl={dfdl} xmlns:xsd={xsd} xmlns:xs={
xsd
} xmlns:xsi={xsi}>
<tdml:defineSchema name="mySchema">
<xs:include schemaLocation="/org/apache/daffodil/xsd/DFDLGeneralFormat.dfdl.xsd"/>
<dfdl:format ref="GeneralFormat"/>
<xs:element name="e1" type="xs:int" dfdl:lengthKind="explicit" dfdl:length="1"/>
</tdml:defineSchema>
<tdml:defineConfig name="myConfig">
<daf:tunables xmlns="http://www.w3.org/2001/XMLSchema" xmlns:xs="http://www.w3.org/2001/XMLSchema">
<daf:testingParseUnparseAPIMode>full</daf:testingParseUnparseAPIMode>
</daf:tunables>
</tdml:defineConfig>
<tdml:parserTestCase xmlns={
tdml
} name="test1" root="e1" model="mySchema" config="myConfig" roundTrip="onePass">
<tdml:document>2</tdml:document>
<tdml:infoset>
<tdml:dfdlInfoset>
<ex:e1>2</ex:e1>
</tdml:dfdlInfoset>
</tdml:infoset>
</tdml:parserTestCase>
</tdml:testSuite>

val runner = new Runner(testSuite)
runner.runOneTest("test1")
runner.reset
}

}

0 comments on commit 0ee6b9a

Please sign in to comment.