From cdc9b819caffbc6b502d45f8933f5fb19e5b9962 Mon Sep 17 00:00:00 2001 From: olabusayoT <50379531+olabusayoT@users.noreply.github.com> Date: Wed, 9 Oct 2024 22:15:09 -0400 Subject: [PATCH] Fix Resolve Schema Location for xsi:SchemaLocation in Config files - add comment about specific validation error from xerces that requires workaround - fix resolveSchemaLocation so it looks up non-jar uris with a scheme - add schema location to config to prove it doesn't cause failure - use new File() instead of Paths.get to avoid issue with drive names in Windows DAFFODIL-2339 --- .../daffodil/lib/xml/DaffodilXMLLoader.scala | 3 +- .../apache/daffodil/lib/xml/XMLUtils.scala | 31 +++++++------------ .../daffodil_config_cli_test.xml | 24 +++----------- 3 files changed, 18 insertions(+), 40 deletions(-) diff --git a/daffodil-lib/src/main/scala/org/apache/daffodil/lib/xml/DaffodilXMLLoader.scala b/daffodil-lib/src/main/scala/org/apache/daffodil/lib/xml/DaffodilXMLLoader.scala index 68f7645f83..ded5498447 100644 --- a/daffodil-lib/src/main/scala/org/apache/daffodil/lib/xml/DaffodilXMLLoader.scala +++ b/daffodil-lib/src/main/scala/org/apache/daffodil/lib/xml/DaffodilXMLLoader.scala @@ -698,7 +698,8 @@ class DaffodilXMLLoader(val errorHandler: org.xml.sax.ErrorHandler) // We must use XMLReader setProperty() function to set the entity resolver--calling // setEntityResolver with the Xerces XML reader causes validation to fail for some - // reason. We call the right function below, but unfortunately, scala-xml calls + // reason (we get a "cvc-elt.1.a: Cannot find the declaration of element 'schema'" error). + // We call the right function below, but unfortunately, scala-xml calls // setEntityResolver in loadDocument(), which cannot be disabled and scala-xml does not // want to change. To avoid this, we wrap the Xerces XMLReader in an XMLFilterImpl and // override setEntityResolver to a no-op. However, XMLFilterImpl parse() calls diff --git a/daffodil-lib/src/main/scala/org/apache/daffodil/lib/xml/XMLUtils.scala b/daffodil-lib/src/main/scala/org/apache/daffodil/lib/xml/XMLUtils.scala index dc41c6504f..83aa1e61f3 100644 --- a/daffodil-lib/src/main/scala/org/apache/daffodil/lib/xml/XMLUtils.scala +++ b/daffodil-lib/src/main/scala/org/apache/daffodil/lib/xml/XMLUtils.scala @@ -1459,34 +1459,27 @@ Differences were (path, expected, actual): ) } - val uriIsJustPathComponent = - uri.getScheme == null && - uri.getAuthority == null && - uri.getQuery == null && - uri.getFragment == null && - uri.getPath != null - val optResolved: Option[(URISchemaSource, Boolean)] = - if (uri.isAbsolute) { - // an absolute URI is one with a scheme. In this case, we expect to be able to resolve - // the URI and do not try anything else (e.g. filesystem, classpath). Since this function + if (uri.isAbsolute && uri.getScheme.contains("jar")) { + // an absolute URI is one with a scheme. In the case that it is a jar uri + // we expect to be able to resolve the URI and do not try anything else + // (e.g. filesystem, classpath). Since this function // is for schemaLocation attributes, we may eventually want to disallow this, and only // allow relative URIs (i.e. URIs without a scheme). We do have some places that use // absolute URIs in includes/imports and cannot remove this yet. try { - uri.toURL.openStream.close + uri.toURL.openStream.close() val uss = URISchemaSource(Misc.uriToDiagnosticFile(uri), uri) Some(uss, false) } catch { case e: IOException => None } - } else if (!uriIsJustPathComponent) { - // this is not an absolute URI so we don't have a scheme. This should just be a path, so - // throw an IllegalArgumentException if that's not the case - val msg = - s"Non-absolute schemaLocation URI can only contain a path component: $schemaLocation" - throw new IllegalArgumentException(msg) - } else if (uri.getPath.startsWith("/")) { + } + // we want to attempt to resolve the URI whether the non-jar uri has a scheme or not, + // this is relevant for when we are validating with Xerces, and it calls resolvesEntity + // we get URIs that look like file:/path/to/not/absolute/path ex: file:/org/apache/daffodil/xsd/dafext.xsd + // that fail to be found in the above case, so we have to look them up + else if (uri.getPath.startsWith("/")) { // The None.orElse{ ... }.orElse { ... } pattern below is useful to evaluate each // alternative way to resolve a schema location, stopping only when a Some is returned. // This makes for easily adding/removing/reordering resolution approaches by changing @@ -1506,7 +1499,7 @@ Differences were (path, expected, actual): .orElse { // Search for the schemaLocation path on the file system. This path is absolute so it // must exist. If it does not exist, this orElse block results in a None - val file = Paths.get(uri.getPath).toFile + val file = new File(uri.getPath) if (file.exists) { val uss = URISchemaSource(file, file.toURI) Some((uss, false)) diff --git a/daffodil-test/src/test/resources/org/apache/daffodil/section07/external_variables/daffodil_config_cli_test.xml b/daffodil-test/src/test/resources/org/apache/daffodil/section07/external_variables/daffodil_config_cli_test.xml index f714d0e653..262111c287 100644 --- a/daffodil-test/src/test/resources/org/apache/daffodil/section07/external_variables/daffodil_config_cli_test.xml +++ b/daffodil-test/src/test/resources/org/apache/daffodil/section07/external_variables/daffodil_config_cli_test.xml @@ -15,26 +15,10 @@ See the License for the specific language governing permissions and limitations under the License. --> - - - + -9