Skip to content

Commit

Permalink
De-personalize Diagnostic File URLs in Compiled Schemas
Browse files Browse the repository at this point in the history
- pass on java.io.File to URISchemaSource as well as URI; update usages
- update diagnostics to remove systemid but keep cause, line number and column number
- remove maxParentDirectoriesForDiagnostics tunable and diagnosticsStripLocationInfo tdml attribute as it's no longer used and has been replaced with diagnosticFile
- return URISchemaSource from resolveSchemaLocation and fileResourceURIConverter
- update URISchemaSource constructor to accept 2 arguments instead of tuple
- add XercesSchemaFileLocation class which uses xercesError and schemaFileLocation to create more complete schemaFileLocation
- if we're using a fakeSchemaDocXML, overwrite the diagnosticFile returned from resolveSchemaLocation to use the xmlSchemaDocument.diagnosticFile
- use Paths.resolveSibling for diagnosticFilepath resolution against context diagnosticFile
- set diagnosticFile to empty string for DaffodilXMLLoader with comment explaining why
- create uriToDiagnosticFile function with heuristic for jar/file/null scheme with separate handling
- pass in fake context in cwd to resolveSchemaLocation call from main
- clarification comments
- update tests to remove paths with unix path separators
- update/add tests

DAFFODIL-2195
  • Loading branch information
olabusayoT committed Mar 20, 2024
1 parent 9b12912 commit d024f17
Show file tree
Hide file tree
Showing 37 changed files with 386 additions and 290 deletions.
68 changes: 41 additions & 27 deletions daffodil-cli/src/main/scala/org/apache/daffodil/cli/Main.scala
Original file line number Diff line number Diff line change
Expand Up @@ -227,7 +227,7 @@ class CLIConf(arguments: Array[String], stdout: PrintStream, stderr: PrintStream

implicit def rootNSConverter = org.rogach.scallop.singleArgConverter[RefQName](qnameConvert _)

implicit def fileResourceURIConverter = singleArgConverter[URI]((s: String) => {
implicit def fileResourceURIConverter = singleArgConverter[URISchemaSource]((s: String) => {
val optResolved =
try {
val uri =
Expand Down Expand Up @@ -256,19 +256,22 @@ class CLIConf(arguments: Array[String], stdout: PrintStream, stderr: PrintStream
}
}
// At this point we have a valid URI, which could be absolute or relative, with relative
// URIs resolved from the current working directory. We can convert this to a string and
// pass it to resolveSchemaLocation to find the actual file or resource
val cwd = Paths.get("").toUri
XMLUtils.resolveSchemaLocation(uri.toString, Some(cwd))
// URIs resolved from the current working directory. We create a fake contextPath that represents
// a fake file in the current working directory and pass that as the contextSource to the
// resolveSchemaLocation function to find the actual file or resource. This is necessary because
// resolveSchemaLocation expects a context that is a file for diagnostic purposes.
val contextPath = Paths.get("fakeContext.dfdl.xsd")
val contextSource = URISchemaSource(contextPath.toFile, contextPath.toUri)
XMLUtils.resolveSchemaLocation(uri.toString, Some(contextSource))
} catch {
case _: Exception => throw new Exception(s"Could not find file or resource $s")
}
optResolved match {
case Some((uri, relToAbs)) => {
case Some((uriSchemaSource, relToAbs)) => {
if (relToAbs) {
Logger.log.warn(s"Found relative path on classpath absolutely, did you mean /$s")
}
uri
uriSchemaSource
}
case None => {
throw new Exception(s"Could not find file or resource $s")
Expand Down Expand Up @@ -377,7 +380,11 @@ class CLIConf(arguments: Array[String], stdout: PrintStream, stderr: PrintStream
"Root element to use. Can be prefixed with {namespace}. Must be a top-level element. Defaults to first top-level element of DFDL schema.",
)
val schema =
opt[URI]("schema", argName = "file", descr = "DFDL schema to use to create parser")(
opt[URISchemaSource](
"schema",
argName = "file",
descr = "DFDL schema to use to create parser",
)(
fileResourceURIConverter,
)
val stream = toggle(
Expand Down Expand Up @@ -488,7 +495,11 @@ class CLIConf(arguments: Array[String], stdout: PrintStream, stderr: PrintStream
"Root element to use. Can be prefixed with {namespace}. Must be a top-level element. Defaults to first top-level element of DFDL schema.",
)
val schema =
opt[URI]("schema", argName = "file", descr = "DFDL schema to use to create parser")(
opt[URISchemaSource](
"schema",
argName = "file",
descr = "DFDL schema to use to create parser",
)(
fileResourceURIConverter,
)
val stream = toggle(
Expand Down Expand Up @@ -576,7 +587,7 @@ class CLIConf(arguments: Array[String], stdout: PrintStream, stderr: PrintStream
descr =
"Root element to use. Can be prefixed with {namespace}. Must be a top-level element. Defaults to first top-level element of DFDL schema.",
)
val schema = opt[URI](
val schema = opt[URISchemaSource](
"schema",
required = true,
argName = "file",
Expand Down Expand Up @@ -685,7 +696,11 @@ class CLIConf(arguments: Array[String], stdout: PrintStream, stderr: PrintStream
"Root element to use. Can be prefixed with {namespace}. Must be a top-level element. Defaults to first top-level element of DFDL schema.",
)
val schema =
opt[URI]("schema", argName = "file", descr = "DFDL schema to use to create parser")(
opt[URISchemaSource](
"schema",
argName = "file",
descr = "DFDL schema to use to create parser",
)(
fileResourceURIConverter,
)
val threads = opt[Int](
Expand Down Expand Up @@ -774,7 +789,7 @@ class CLIConf(arguments: Array[String], stdout: PrintStream, stderr: PrintStream
descr =
"Root element to use. Can be prefixed with {namespace}. Must be a top-level element. Defaults to first top-level element of DFDL schema.",
)
val schema = opt[URI](
val schema = opt[URISchemaSource](
"schema",
required = true,
argName = "file",
Expand Down Expand Up @@ -820,7 +835,7 @@ class CLIConf(arguments: Array[String], stdout: PrintStream, stderr: PrintStream
descr =
"Output file to write the encoded/decoded file to. If not given or is -, data is written to stdout.",
)
val schema = opt[URI](
val schema = opt[URISchemaSource](
"schema",
argName = "file",
descr = "DFDL schema to use for schema aware encoding/decoding.",
Expand Down Expand Up @@ -1018,7 +1033,7 @@ class Main(
}

def createProcessorFromSchema(
schema: URI,
schemaSource: URISchemaSource,
rootNS: Option[RefQName],
path: Option[String],
tunablesMap: Map[String, String],
Expand All @@ -1039,7 +1054,6 @@ class Main(
// to also include the call to pf.onPath. (which is the last phase
// of compilation, where it asks for the parser)
//
val schemaSource = URISchemaSource(schema)
val res = Timer.getResult(
"compiling", {
val processorFactory = compiler.compileSource(schemaSource)
Expand Down Expand Up @@ -1067,7 +1081,7 @@ class Main(
}

def createGeneratorFromSchema(
schema: URI,
schemaSource: URISchemaSource,
rootNS: Option[RefQName],
tunables: Map[String, String],
language: String,
Expand All @@ -1081,7 +1095,6 @@ class Main(
}
}

val schemaSource = URISchemaSource(schema)
val cg = Timer.getResult(
"compiling", {
val processorFactory = compiler.compileSource(schemaSource)
Expand Down Expand Up @@ -1164,11 +1177,10 @@ class Main(
case Some(file) => new FileOutputStream(file)
}

val infosetType = parseOpts.infosetType.toOption.get
val infosetHandler = InfosetType.getInfosetHandler(
parseOpts.infosetType.toOption.get,
parseOpts.infosetType(),
processor,
parseOpts.schema.toOption,
parseOpts.schema.map(_.uri).toOption,
forPerformance = false,
)

Expand Down Expand Up @@ -1340,11 +1352,10 @@ class Main(
}
}

val infosetType = performanceOpts.infosetType.toOption.get
val infosetHandler = InfosetType.getInfosetHandler(
infosetType,
performanceOpts.infosetType(),
processor,
performanceOpts.schema.toOption,
performanceOpts.schema.map(_.uri).toOption,
forPerformance = true,
)

Expand Down Expand Up @@ -1505,11 +1516,10 @@ class Main(
var keepUnparsing = maybeScanner.isEmpty || maybeScanner.get.hasNext
var exitCode = ExitCode.Success

val infosetType = unparseOpts.infosetType.toOption.get
val infosetHandler = InfosetType.getInfosetHandler(
unparseOpts.infosetType.toOption.get,
unparseOpts.infosetType(),
processor,
unparseOpts.schema.toOption,
unparseOpts.schema.map(_.uri).toOption,
forPerformance = false,
)

Expand Down Expand Up @@ -1806,7 +1816,11 @@ class Main(

val exiFactory: Option[EXIFactory] =
try {
Some(EXIInfosetHandler.createEXIFactory(exiOpts.schema.toOption))
Some(
EXIInfosetHandler.createEXIFactory(
exiOpts.schema.map(_.uri).toOption,
),
)
} catch {
case e: EXIException => {
Logger.log.error(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -318,4 +318,20 @@ class TestCLISaveParser {
}
}

/**
* Attempted to save-parser an invalid schema so we can check the diagnostic error for leaked information
*/
@Test def test_CLI_Saving_SaveParser_error(): Unit = {
val schema = path(
"daffodil-sapi/src/test/resources/test/sapi/mySchema6.dfdl.xsd",
)

withTempFile { parser =>
runCLI(args"save-parser -s $schema $parser") { cli =>
cli.expectErr("[error]")
cli.expectErr(s"Schema context: Location line 32 column 74 in ${schema.normalize()}")
}(ExitCode.UnableToCreateProcessor)
}
}

}
Original file line number Diff line number Diff line change
Expand Up @@ -325,7 +325,7 @@ class Compiler private (
optRootName: Option[String] = None,
optRootNamespace: Option[String] = None,
): ProcessorFactory = {
val source = URISchemaSource(file.toURI)
val source = URISchemaSource(file, file.toURI)
compileSource(source, optRootName, optRootNamespace)
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,11 +17,14 @@

package org.apache.daffodil.core.dsom

import java.io.File

import org.apache.daffodil.core.dsom.IIUtils._
import org.apache.daffodil.lib.api.Diagnostic
import org.apache.daffodil.lib.api._
import org.apache.daffodil.lib.exceptions.Assert
import org.apache.daffodil.lib.exceptions.SchemaFileLocation
import org.apache.daffodil.lib.exceptions.XercesSchemaFileLocation
import org.apache.daffodil.lib.util.Logger
import org.apache.daffodil.lib.util.Misc
import org.apache.daffodil.lib.xml.DaffodilXMLLoader
Expand All @@ -45,16 +48,30 @@ class DFDLSchemaFileLoadErrorHandler(schemaFileLocation: SchemaFileLocation)
private def loaderErrors = loaderErrors_
private def loaderWarnings = loaderWarnings_

private def loaderSDEs: Seq[Diagnostic] = loaderErrors.map {
new SchemaDefinitionError(schemaFileLocation, "Error loading schema due to %s", _)
private def loaderSDEs: Seq[Diagnostic] = loaderErrors.map { err =>
val errMessage = err.getMessage
// we create a new SchemaFileLocation (xsfl) because the Xerces error has line, column and file info that
// the original schemaFileLocation that's passed in doesn't contain, so we can pass in this more
// complete SchemaFileLocation
val xsfl = new XercesSchemaFileLocation(err, schemaFileLocation)
new SchemaDefinitionError(
xsfl,
"Error loading schema due to %s",
errMessage,
)
}

private def loaderSDWs: Seq[Diagnostic] = loaderWarnings.map {
private def loaderSDWs: Seq[Diagnostic] = loaderWarnings.map { w =>
val warnMessage = w.getMessage
// we create a new SchemaFileLocation (xsfl) because the Xerces error has line and column info that
// the original schemaFileLocation that's passed in doesn't contain, so we can pass in this more
// complete SchemaFileLocation
val xsfl = new XercesSchemaFileLocation(w, schemaFileLocation)
new SchemaDefinitionWarning(
WarnID.XmlParser,
schemaFileLocation,
xsfl,
"Warning loading schema due to %s",
_,
warnMessage,
)
}

Expand Down Expand Up @@ -139,7 +156,9 @@ final class DFDLSchemaFile(

override lazy val uriString = schemaSource.uriForLoading.toString

override protected lazy val diagnosticDebugNameImpl = schemaSource.uriForLoading.toString
override lazy val diagnosticFile: File = schemaSource.diagnosticFile

override protected lazy val diagnosticDebugNameImpl = diagnosticFile.getPath

lazy val diagnosticChildren =
Nil // no recursive descent. We just want the loader's validation errors.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@ import org.apache.daffodil.core.dsom.IIUtils._
import org.apache.daffodil.lib.api.DaffodilSchemaSource
import org.apache.daffodil.lib.api.URISchemaSource
import org.apache.daffodil.lib.api.WarnID
import org.apache.daffodil.lib.exceptions.Assert
import org.apache.daffodil.lib.util.Delay
import org.apache.daffodil.lib.util.Misc
import org.apache.daffodil.lib.xml.NS
Expand Down Expand Up @@ -195,15 +196,28 @@ abstract class IIBase(
protected final lazy val resolvedSchemaLocation: Option[DaffodilSchemaSource] =
LV('resolvedSchemaLocation) {
val res = schemaLocationProperty.flatMap { slText =>
val enclosingSchemaURI = schemaFile.map { _.schemaSource.uriForLoading }
val optURI = XMLUtils.resolveSchemaLocation(slText, enclosingSchemaURI)
val optSource = optURI.map { case (uri, relToAbs) =>
val enclosingSchemaSource = schemaFile.map { sf =>
sf.schemaSource
}
val optURISchemaSource =
XMLUtils.resolveSchemaLocation(slText, enclosingSchemaSource)
val optSource = optURISchemaSource.map { case (uriSchemaSource, relToAbs) =>
schemaDefinitionWarningWhen(
WarnID.DeprecatedRelativeSchemaLocation,
relToAbs,
s"Resolving relative schemaLocations absolutely is deprecated. Did you mean /$slText",
)
URISchemaSource(uri)
// if isBootStrapSD is true, we assume we are using the fakeXMLSchemaDocument, which means
// we will be passing in and receiving back an absolute diagnosticFilepath from resolveSchemaLocation.
// In just this case, we want to ignore that absolute filepath and use the diagnosticFilepath
// from main, which is the XMLSchemaDocument diagnosticFilepath
val finalUriSchemaSource = if (xmlSchemaDocument.isBootStrapSD) {
Assert.invariant(enclosingSchemaSource.isEmpty)
URISchemaSource(xmlSchemaDocument.diagnosticFile, uriSchemaSource.uri)
} else {
uriSchemaSource
}
finalUriSchemaSource
}
optSource
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@ import org.apache.daffodil.lib.api.DaffodilSchemaSource
import org.apache.daffodil.lib.api.URISchemaSource
import org.apache.daffodil.lib.exceptions.Assert
import org.apache.daffodil.lib.util.Logger
import org.apache.daffodil.lib.util.Misc
import org.apache.daffodil.lib.xml._

/**
Expand Down Expand Up @@ -98,10 +99,13 @@ final class Import(importNode: Node, xsd: XMLSchemaDocument, seenArg: IIMap)
None
}
case Some(ns) => {
val uri = resolver.resolveURI(ns.toString)
if (uri == null) None
val uriString = resolver.resolveURI(ns.toString)
if (uriString == null) None
else {
val res = URISchemaSource(URI.create(uri))
val uri = URI.create(uriString)
val dfp = Misc.uriToDiagnosticFile(uri)
val res =
URISchemaSource(dfp, uri)
Some(res)
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,10 +17,10 @@

package org.apache.daffodil.core.dsom

import java.io.File
import scala.xml.NamespaceBinding

import org.apache.daffodil.core.dsom.walker.CommonContextView
import org.apache.daffodil.lib.api.DaffodilTunables
import org.apache.daffodil.lib.exceptions.SchemaFileLocatable
import org.apache.daffodil.lib.xml.NS
import org.apache.daffodil.lib.xml.XMLUtils
Expand All @@ -31,8 +31,6 @@ trait SchemaFileLocatableImpl extends SchemaFileLocatable { self: SchemaComponen
def schemaFile: Option[DFDLSchemaFile]
def optLexicalParent: Option[SchemaComponent]

def tunables: DaffodilTunables = self.tunable

/**
* Annotations can contain expressions, so we need to be able to compile them.
*
Expand Down Expand Up @@ -76,6 +74,7 @@ trait CommonContextMixin extends NestingLexicalMixin with CommonContextView {
}
final def xmlSchemaDocument: XMLSchemaDocument = optXMLSchemaDocument.get
def uriString: String = optLexicalParent.get.uriString
def diagnosticFile: File = optLexicalParent.get.diagnosticFile

def xml: scala.xml.Node

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,8 @@

package org.apache.daffodil.core.dsom

import java.io.File

/**
* Mixin for all SchemaComponents
*/
Expand All @@ -35,4 +37,6 @@ trait SchemaComponentIncludesAndImportsMixin extends CommonContextMixin {
xmlSchemaDocument.uriString
}.toOption.getOrElse(orElseURL)

override def diagnosticFile: File = xmlSchemaDocument.diagnosticFile

}
Loading

0 comments on commit d024f17

Please sign in to comment.