diff --git a/daffodil-core/src/main/resources/org/apache/daffodil/xsd/tdml.xsd b/daffodil-core/src/main/resources/org/apache/daffodil/xsd/tdml.xsd index 8a8441a1e2..0b6397666f 100644 --- a/daffodil-core/src/main/resources/org/apache/daffodil/xsd/tdml.xsd +++ b/daffodil-core/src/main/resources/org/apache/daffodil/xsd/tdml.xsd @@ -224,11 +224,21 @@ - - - - - + + + + + + + + + + + + + + + diff --git a/daffodil-core/src/main/scala/org/apache/daffodil/lib/xml/DaffodilConstructingLoader.scala b/daffodil-core/src/main/scala/org/apache/daffodil/lib/xml/DaffodilConstructingLoader.scala index 8c1af3f61d..a8c97963cd 100644 --- a/daffodil-core/src/main/scala/org/apache/daffodil/lib/xml/DaffodilConstructingLoader.scala +++ b/daffodil-core/src/main/scala/org/apache/daffodil/lib/xml/DaffodilConstructingLoader.scala @@ -94,13 +94,16 @@ object Position { * behavior of normalizing CRLF to LF, and solitary CR to LF. * Defaults to true. Should only be changed in special circumstances * as not normalizing CRLFs is non-standard for XML. - * + * @param removeComments True to remove comments. This is used to keep the XML as close to the original as possible + * @param removeProcInstr True to remove processing instructions. This is used to keep the XML as close to the original as possible */ class DaffodilConstructingLoader private[xml] ( uri: URI, errorHandler: org.xml.sax.ErrorHandler, addPositionAttributes: Boolean, - normalizeCRLFtoLF: Boolean + normalizeCRLFtoLF: Boolean, + removeComments: Boolean, + removeProcInstr: Boolean ) extends ConstructingParser( { // Note: we must open the XML carefully since it might be in some non @@ -122,7 +125,14 @@ class DaffodilConstructingLoader private[xml] ( errorHandler: org.xml.sax.ErrorHandler, addPositionAttributes: Boolean = false ) = - this(uri, errorHandler, addPositionAttributes, normalizeCRLFtoLF = true) + this( + uri, + errorHandler, + addPositionAttributes, + normalizeCRLFtoLF = true, + removeComments = true, + removeProcInstr = true + ) /** * Ensures that DOCTYPES aka DTDs, if encountered, are rejected. @@ -316,19 +326,30 @@ class DaffodilConstructingLoader private[xml] ( } /** - * Drops comments + * Drops comments if removeComments is true + * + * This is optional controlled by a constructor parameter. */ override def comment(pos: Int, s: String): Comment = { - // returning null drops comments - null + if (removeComments) { + // returning null drops comments + null + } else { + super.comment(pos, s) + } } /** - * Drops processing instructions + * Drops processing instructions if removeProcInstr is false + * + * This is optional controlled by a constructor parameter. */ override def procInstr(pos: Int, target: String, txt: String) = { - // returning null drops processing instructions - null + if (removeProcInstr) { // returning null drops processing instructions + null + } else { + super.procInstr(pos, target, txt) + } } private def parseXMLPrologAttributes( diff --git a/daffodil-core/src/main/scala/org/apache/daffodil/lib/xml/DaffodilXMLLoader.scala b/daffodil-core/src/main/scala/org/apache/daffodil/lib/xml/DaffodilXMLLoader.scala index 0b32d1accc..c250dfcd03 100644 --- a/daffodil-core/src/main/scala/org/apache/daffodil/lib/xml/DaffodilXMLLoader.scala +++ b/daffodil-core/src/main/scala/org/apache/daffodil/lib/xml/DaffodilXMLLoader.scala @@ -702,31 +702,20 @@ class DaffodilXMLLoader(val errorHandler: org.xml.sax.ErrorHandler) * @param optSchemaURI Optional URI for XML schema for the XML source document. * @param addPositionAttributes True to add dafint:file dafint:line attributes to all elements. * Defaults to false. - * @return an scala.xml.Node (Element actually) which is the document element of the source. - */ - def load( - source: DaffodilSchemaSource, - optSchemaURI: Option[URI], - addPositionAttributes: Boolean = false - ): scala.xml.Node = - load(source, optSchemaURI, addPositionAttributes, normalizeCRLFtoLF = true) - - /** - * package private constructor gives access to normalizeCRLFtoLF feature. - * - * @param source The URI for the XML document which may be a XML or DFDL schema, or just XML data. - * @param optSchemaURI Optional URI for XML schema for the XML source document. - * @param addPositionAttributes True to add dafint:file dafint:line attributes to all elements. - * Defaults to false. * @param normalizeCRLFtoLF True to normalize CRLF and isolated CR to LF. This should usually be true, * but some special case situations may require preservation of CRLF/CR. + * @param removeComments True to remove comments. This is used to keep the XML as close to the original as possible + * @param removeProcInstr True to remove processing instructions. This is used to keep the XML as close to the original as possible + * * @return an scala.xml.Node (Element actually) which is the document element of the source. */ - private[xml] def load( + def load( source: DaffodilSchemaSource, optSchemaURI: Option[URI], - addPositionAttributes: Boolean, - normalizeCRLFtoLF: Boolean + addPositionAttributes: Boolean = false, + normalizeCRLFtoLF: Boolean = true, + removeComments: Boolean = true, + removeProcInstr: Boolean = true ): scala.xml.Node = { // // First we invoke the validator to explicitly validate the XML against @@ -819,7 +808,9 @@ class DaffodilXMLLoader(val errorHandler: org.xml.sax.ErrorHandler) source.uriForLoading, errorHandler, addPositionAttributes, - normalizeCRLFtoLF + normalizeCRLFtoLF, + removeComments, + removeProcInstr ) val res = try { diff --git a/daffodil-core/src/main/scala/org/apache/daffodil/lib/xml/XMLUtils.scala b/daffodil-core/src/main/scala/org/apache/daffodil/lib/xml/XMLUtils.scala index da67ac01bb..b08a594010 100644 --- a/daffodil-core/src/main/scala/org/apache/daffodil/lib/xml/XMLUtils.scala +++ b/daffodil-core/src/main/scala/org/apache/daffodil/lib/xml/XMLUtils.scala @@ -42,6 +42,7 @@ import org.apache.daffodil.lib.iapi.URISchemaSource import org.apache.daffodil.lib.schema.annotation.props.LookupLocation import org.apache.daffodil.lib.util.Maybe import org.apache.daffodil.lib.util.Misc +import org.apache.daffodil.runtime1.infoset.XMLTextInfoset import org.apache.commons.io.IOUtils import org.xml.sax.XMLReader @@ -599,6 +600,14 @@ object XMLUtils { def removeComments(e: Node): Node = { e match { + case x @ Elem( + null, + XMLTextInfoset.stringAsXml, + Null, + NamespaceBinding(null, null | "", _), + _* + ) => + x case Elem(prefix, label, attribs, scope, child*) => { val newChildren = child.filterNot { _.isInstanceOf[Comment] }.map { removeComments(_) } Elem(prefix, label, attribs, scope, true, newChildren*) @@ -644,34 +653,66 @@ object XMLUtils { private def removeMixedWhitespace(ns: Node): Node = { if (!ns.isInstanceOf[Elem]) return ns - val e = ns.asInstanceOf[Elem] - val children = e.child - val noMixedChildren = - if (children.exists(_.isInstanceOf[Elem])) { - children - .filter { - case Text(data) if data.matches("""\s*""") => false - case Text(data) => - throw new Exception("Element %s contains mixed data: %s".format(e.label, data)) - case _ => true - } - .map(removeMixedWhitespace) - } else { - children.filter { - // - // So this is a bit strange, but we're dropping nodes that are Empty String. - // - // In XML we cannot tell where there is a Text("") child, from with Nil children - // - case Text("") => false // drop empty strings - case _ => true + + ns match { + // NOTE: this is specifically for the stringAsXml feature as we avoid + // making changes to any of its children except removing any surrounding + // whitespace, requiring that stringAsXml in the infoset match results exactly. + case e @ Elem( + null, + XMLTextInfoset.stringAsXml, + Null, + NamespaceBinding(null, null | "", _), + _* + ) => { + val (elemChildren, nonElemChildren) = e.child.partition { + _.isInstanceOf[Elem] + } + if (elemChildren.length != 1) + throw new Exception("stringAsXml must contain a single child element.") + nonElemChildren.foreach { + case Text(data) if data.matches("""\s*""") => // no-op, empty text siblings are fine + case x => + throw new Exception( + "%s is some kind of mixed content not allowed as a stringAsXml child".format(x) + ) } + e.asInstanceOf[Elem].copy(child = elemChildren) } + case _ => { + val e = ns.asInstanceOf[Elem] + val children = e.child + val noMixedChildren = + if (children.exists(_.isInstanceOf[Elem])) { + children + .filter { + case Text(data) if data.matches("""\s*""") => false + case Text(data) => + throw new Exception( + "Element %s contains mixed data: %s".format(e.label, data) + ) + case _ => true + } + .map(removeMixedWhitespace) + } else { + children.filter { + // + // So this is a bit strange, but we're dropping nodes that are Empty String. + // + // In XML we cannot tell where there is a Text("") child, from with Nil children + // + case Text("") => false // drop empty strings + case _ => true + } + } + + val res = + if (noMixedChildren eq children) e + else e.copy(child = noMixedChildren) + res + } + } - val res = - if (noMixedChildren eq children) e - else e.copy(child = noMixedChildren) - res } /** @@ -700,6 +741,15 @@ object XMLUtils { ): NodeSeq = { val res = n match { + case e @ Elem( + null, + XMLTextInfoset.stringAsXml, + Null, + NamespaceBinding(null, null | "", _), + _* + ) => + e + case e @ Elem(prefix, label, attributes, scope, children*) => { val filteredScope = if (ns.length > 0) filterScope(scope, ns) else xml.TopScope @@ -973,6 +1023,15 @@ Differences were (path, expected, actual): } else if (checkPrefixes && prefixA != prefixB) { // different prefix List((zPath + "/" + labelA + "@prefix", prefixA, prefixB)) + } else if (checkPrefixes && a.scope.getURI(prefixA) != b.scope.getURI(prefixB)) { + // prefixes doesn't resolve to same namespace + List( + ( + zPath + "/" + labelA + "@prefix-namespace", + a.scope.getURI(prefixA), + b.scope.getURI(prefixB) + ) + ) } else if (checkNamespaces && mappingsA != mappingsB) { // different namespace bindings List((zPath + "/" + labelA + "@xmlns", mappingsA, mappingsB)) @@ -1055,6 +1114,28 @@ Differences were (path, expected, actual): computeTextDiff(zPath, tA, tB, maybeType, maybeFloatEpsilon, maybeDoubleEpsilon) thisDiff } + case (cA: Comment, cB: Comment) => { + val thisDiff = computeTextDiff( + zPath, + cA.toString, + cB.toString, + maybeType, + maybeFloatEpsilon, + maybeDoubleEpsilon + ) + thisDiff + } + case (pcA: PCData, pcB: PCData) => { + val thisDiff = computeTextDiff( + zPath, + pcA.toString, + pcB.toString, + maybeType, + maybeFloatEpsilon, + maybeDoubleEpsilon + ) + thisDiff + } case (pA: ProcInstr, pB: ProcInstr) => { val ProcInstr(tA1label, tA1content) = pA val ProcInstr(tB1label, tB1content) = pB diff --git a/daffodil-core/src/main/scala/org/apache/daffodil/runtime1/infoset/ScalaXMLInfosetOutputter.scala b/daffodil-core/src/main/scala/org/apache/daffodil/runtime1/infoset/ScalaXMLInfosetOutputter.scala index 4abe32f378..e30534fcc7 100644 --- a/daffodil-core/src/main/scala/org/apache/daffodil/runtime1/infoset/ScalaXMLInfosetOutputter.scala +++ b/daffodil-core/src/main/scala/org/apache/daffodil/runtime1/infoset/ScalaXMLInfosetOutputter.scala @@ -19,9 +19,7 @@ package org.apache.daffodil.runtime1.infoset import scala.collection.mutable.ListBuffer import scala.xml.MetaData -import scala.xml.NamespaceBinding import scala.xml.Null -import scala.xml.PrefixedAttribute import scala.xml.UnprefixedAttribute import org.apache.daffodil.api.DFDLPrimType @@ -56,16 +54,6 @@ class ScalaXMLInfosetOutputter(showFreedInfo: Boolean = false) resultNode = Maybe(root(0)) } - private def getScope(diElem: DIElement): NamespaceBinding = { - val minScope = diElem.metadata.minimizedScope - // if including xsi:type is enabled, ensure the xsi namespace is defined on the root element - if (getIncludeDataType() && stack.length == 1 && minScope.getURI("xsi") == null) { - NamespaceBinding("xsi", XMLUtils.XSI_NAMESPACE, minScope) - } else { - minScope - } - } - private def getAttributes(diElem: DIElement): MetaData = { val nilAttr = if (diElem.isNilled) XMLUtils.xmlNilAttribute else Null val freedAttr = @@ -92,14 +80,7 @@ class ScalaXMLInfosetOutputter(showFreedInfo: Boolean = false) } else { nilAttr } - val typedAttr = - if (getIncludeDataType() && diElem.isSimple) { - val primName = diElem.erd.optPrimType.get.name - new PrefixedAttribute("xsi", "type", "xs:" + primName, freedAttr) - } else { - freedAttr - } - typedAttr + freedAttr } override def startSimple(se: InfosetSimpleElement): Unit = { @@ -124,7 +105,7 @@ class ScalaXMLInfosetOutputter(showFreedInfo: Boolean = false) diSimple.metadata.prefix, diSimple.metadata.name, attributes, - getScope(diSimple), + diSimple.metadata.minimizedScope, minimizeEmpty = true, children* ) @@ -149,7 +130,7 @@ class ScalaXMLInfosetOutputter(showFreedInfo: Boolean = false) diComplex.metadata.prefix, diComplex.metadata.name, attributes, - getScope(diComplex), + diComplex.metadata.minimizedScope, minimizeEmpty = true, children* ) diff --git a/daffodil-core/src/test/scala/org/apache/daffodil/lib/xml/test/unit/TestXMLLoader.scala b/daffodil-core/src/test/scala/org/apache/daffodil/lib/xml/test/unit/TestXMLLoader.scala index 3cb5caeb3e..1471da3a3d 100644 --- a/daffodil-core/src/test/scala/org/apache/daffodil/lib/xml/test/unit/TestXMLLoader.scala +++ b/daffodil-core/src/test/scala/org/apache/daffodil/lib/xml/test/unit/TestXMLLoader.scala @@ -172,9 +172,19 @@ class TestXMLLoader { // and toString will print them out into the text with the preserved. // val xmlFromDafLoaderNonNormalized = - loader.load(ss, None, addPositionAttributes = false, normalizeCRLFtoLF = false) + loader.load( + ss, + None, + addPositionAttributes = false, + normalizeCRLFtoLF = false + ) val xmlFromDafLoaderNormalized = - loader.load(ss, None, addPositionAttributes = false, normalizeCRLFtoLF = true) + loader.load( + ss, + None, + addPositionAttributes = false, + normalizeCRLFtoLF = true + ) { // compare to the regular scala XML loader diff --git a/daffodil-core/src/test/scala/org/apache/daffodil/lib/xml/test/unit/TestXMLUtils.scala b/daffodil-core/src/test/scala/org/apache/daffodil/lib/xml/test/unit/TestXMLUtils.scala index 3e70d7129d..2c9690f4b1 100644 --- a/daffodil-core/src/test/scala/org/apache/daffodil/lib/xml/test/unit/TestXMLUtils.scala +++ b/daffodil-core/src/test/scala/org/apache/daffodil/lib/xml/test/unit/TestXMLUtils.scala @@ -98,6 +98,17 @@ class TestXMLUtils { assertEquals("ns2", b) } + @Test def testPrefixNSDiff(): Unit = { + // different prefix should error, even though the namespace is the same + val d1 = a + val d2 = a + val diffs = XMLUtils.computeDiff(d1, d2, checkPrefixes = true) + val Seq((path, a, b)) = diffs + assertEquals("/a@prefix-namespace", path) + assertEquals("someprefix", a) + assertEquals("someotherprefix", b) + } + @Test def testNamespaceDiff(): Unit = { // different namespace mappings should error val d1 = a diff --git a/daffodil-tdml-lib/src/main/scala/org/apache/daffodil/tdml/TDMLRunner.scala b/daffodil-tdml-lib/src/main/scala/org/apache/daffodil/tdml/TDMLRunner.scala index 0549c61ac1..f36a2a4909 100644 --- a/daffodil-tdml-lib/src/main/scala/org/apache/daffodil/tdml/TDMLRunner.scala +++ b/daffodil-tdml-lib/src/main/scala/org/apache/daffodil/tdml/TDMLRunner.scala @@ -2800,7 +2800,13 @@ case class DFDLInfoset(di: Node, parent: Infoset) { val testSuite = testCase.parent val before = testSuite.loadingExceptions.clone() - val elem = loader.load(infosetSrc, None) // no schema + val elem = loader.load( + infosetSrc, + None, + normalizeCRLFtoLF = false, + removeComments = false, + removeProcInstr = false + ) // no schema // // TODO: DAFFODIL-288 validate the infoset also // You can pass the optDataSchema, which appears to be the correct thing diff --git a/daffodil-tdml-processor/src/main/scala/org/apache/daffodil/processor/tdml/DaffodilTDMLDFDLProcessor.scala b/daffodil-tdml-processor/src/main/scala/org/apache/daffodil/processor/tdml/DaffodilTDMLDFDLProcessor.scala index 696a1ab8e8..da0d134a67 100644 --- a/daffodil-tdml-processor/src/main/scala/org/apache/daffodil/processor/tdml/DaffodilTDMLDFDLProcessor.scala +++ b/daffodil-tdml-processor/src/main/scala/org/apache/daffodil/processor/tdml/DaffodilTDMLDFDLProcessor.scala @@ -41,6 +41,7 @@ import org.apache.daffodil.lib.util.MaybeULong import org.apache.daffodil.lib.xml.DaffodilSAXParserFactory import org.apache.daffodil.lib.xml.XMLUtils import org.apache.daffodil.lib.xml.XMLUtils.XMLDifferenceException +import org.apache.daffodil.processor.tdml import org.apache.daffodil.runtime1.iapi.* import org.apache.daffodil.runtime1.iapi.DFDL.DaffodilUnhandledSAXException import org.apache.daffodil.runtime1.iapi.DFDL.DaffodilUnparseContentHandler @@ -173,7 +174,7 @@ class DaffodilTDMLDFDLProcessor private[tdml] ( private def blobPrefix = "" private def blobSuffix = ".bin" - private lazy val tdmlApiInfosetsEnv = sys.env.getOrElse("DAFFODIL_TDML_API_INFOSETS", "scala") + private lazy val tdmlApiInfosetsEnv = sys.env.getOrElse("DAFFODIL_TDML_API_INFOSETS", "xml") override def withTracing(bool: Boolean): DaffodilTDMLDFDLProcessor = { copy(dp = newTracing(bool)) @@ -269,7 +270,7 @@ class DaffodilTDMLDFDLProcessor private[tdml] ( val outputter = if (tdmlApiInfosetsEnv == "all") { TDMLInfosetOutputterAll() } else { - TDMLInfosetOutputterScala() + TDMLInfosetOutputterXML() } outputter.setBlobAttributes(blobDir, blobPrefix, blobSuffix) @@ -308,7 +309,17 @@ class DaffodilTDMLDFDLProcessor private[tdml] ( xri.parse(sis) if (!actual.isError && !errorHandler.isError) { - verifySameParseOutput(outputter.xmlStream, saxOutputStream) + // we use the scala result because both the ScalaInfosetOutputter and + // the SAXInfosetOutputter do not implement stringAsXml, + // which helps to avoid any differences cause by the stringAsXml conversions. + val actualOutputArray = outputter + .asInstanceOf[tdml.TDMLInfosetOutputterAll] + .getScalaResult + .toString + .getBytes("UTF-8") + val baos = new ByteArrayOutputStream(actualOutputArray.length) + baos.write(actualOutputArray) + verifySameParseOutput(baos, saxOutputStream) } val dpParseDiag = actual.getDiagnostics.asScala.map(_.toString()).toSeq val saxParseDiag = errorHandler.getDiagnostics.asScala.map(_.toString()).toSeq @@ -392,7 +403,12 @@ class DaffodilTDMLDFDLProcessor private[tdml] ( XMLUtils.compareAndReport( dpParseXMLNodeOutput, saxParseXMLNodeOutput, - checkNamespaces = true, + // we no longer checkNamespaces because SAX outputs the same namespaces as + // the XMLTextInfosetOutputter but not the scalaXMLInfosetOutputter, so checking + // namespaces fails in the DAFFODIL_TDML_API_INFOSETS='all' case due to differences + // in the scalaXMLInfosetOutputter namespaces, probably having to do with + // minimizeScope issues + // checkNamespaces = true, checkPrefixes = true ) } catch { diff --git a/daffodil-tdml-processor/src/main/scala/org/apache/daffodil/processor/tdml/TDMLInfosetInputter.scala b/daffodil-tdml-processor/src/main/scala/org/apache/daffodil/processor/tdml/TDMLInfosetInputter.scala index 518f38961b..e9097b4413 100644 --- a/daffodil-tdml-processor/src/main/scala/org/apache/daffodil/processor/tdml/TDMLInfosetInputter.scala +++ b/daffodil-tdml-processor/src/main/scala/org/apache/daffodil/processor/tdml/TDMLInfosetInputter.scala @@ -27,32 +27,37 @@ import org.apache.daffodil.lib.util.Misc import org.apache.daffodil.lib.xml.XMLUtils import org.apache.daffodil.runtime1.dpath.NodeInfo import org.apache.daffodil.runtime1.infoset.JsonInfosetInputter -import org.apache.daffodil.runtime1.infoset.ScalaXMLInfosetInputter import org.apache.daffodil.tdml.TDMLException class TDMLInfosetInputter( - val scalaInputter: ScalaXMLInfosetInputter, + val inputter: api.infoset.InfosetInputter, others: Seq[api.infoset.InfosetInputter] ) extends api.infoset.InfosetInputter { private def implString: String = "daffodil" override def getEventType(): InfosetInputterEventType = { - val res = scalaInputter.getEventType() + val res = inputter.getEventType() if (!others.forall(_.getEventType() == res)) - throw TDMLException("getEventType does not match", Some(implString)) + throw TDMLException( + s"getEventType does not match\n${others.zip(others.map(_.getEventType)).mkString("\n")}", + Some(implString) + ) res } override def getLocalName(): String = { - val res = scalaInputter.getLocalName() + val res = inputter.getLocalName() if (!others.forall(_.getLocalName() == res)) - throw TDMLException("getLocalName does not match", Some(implString)) + throw TDMLException( + s"getLocalName does not match\n${others.zip(others.map(_.getLocalName)).mkString("\n")}", + Some(implString) + ) res } override def getNamespaceURI(): String = { - val res = scalaInputter.getNamespaceURI() + val res = inputter.getNamespaceURI() val resIsEmpty = res == null || res == "" val othersMatch = others.forall { i => if (!i.getSupportsNamespaces) { @@ -66,7 +71,10 @@ class TDMLInfosetInputter( } } if (!othersMatch) - throw TDMLException("getNamespaceURI does not match", Some(implString)) + throw TDMLException( + s"getNamespaceURI does not match\n${others.filter(_.getSupportsNamespaces).map(o => (o, o.getNamespaceURI)).mkString("\n")}", + Some(implString) + ) res } @@ -74,7 +82,7 @@ class TDMLInfosetInputter( primType: NodeInfo.Kind, runtimeProperties: java.util.Map[String, String] ): String = { - val res = scalaInputter.getSimpleText(primType, runtimeProperties) + val res = inputter.getSimpleText(primType, runtimeProperties) val resIsEmpty = res == null || res == "" val otherStrings = others.map { i => // Note in an unparserTestCase, there are no others (infoset inputters), because the input infoset is @@ -100,7 +108,10 @@ class TDMLInfosetInputter( } if (!othersmatch) - throw TDMLException("getSimpleText does not match", Some(implString)) + throw TDMLException( + s"getSimpleText does not match for $res\n${others.zip(otherStrings).mkString("\n")}", + Some(implString) + ) if (primType.isInstanceOf[NodeInfo.AnyURI.Kind]) { try { @@ -126,26 +137,32 @@ class TDMLInfosetInputter( } override def isNilled(): JBoolean = { - val res = scalaInputter.isNilled() + val res = inputter.isNilled() if (!others.forall(_.isNilled() == res)) - throw TDMLException("isNilled does not match", Some(implString)) + throw TDMLException( + s"isNilled does not match\n${others.zip(others.map(_.isNilled)).mkString("\n")}", + Some(implString) + ) res } override def hasNext(): Boolean = { - val res = scalaInputter.hasNext() + val res = inputter.hasNext() if (!others.forall(_.hasNext() == res)) - throw TDMLException("hasNext does not match", Some(implString)) + throw TDMLException( + s"hasNext does not match\n${others.zip(others.map(_.hasNext)).mkString("\n")}", + Some(implString) + ) res } override def next(): Unit = { - scalaInputter.next() + inputter.next() others.foreach(_.next()) } override def fini(): Unit = { - scalaInputter.fini() + inputter.fini() others.foreach(_.fini()) } diff --git a/daffodil-tdml-processor/src/main/scala/org/apache/daffodil/processor/tdml/TDMLInfosetOutputter.scala b/daffodil-tdml-processor/src/main/scala/org/apache/daffodil/processor/tdml/TDMLInfosetOutputter.scala index cf913d6877..a04262955f 100644 --- a/daffodil-tdml-processor/src/main/scala/org/apache/daffodil/processor/tdml/TDMLInfosetOutputter.scala +++ b/daffodil-tdml-processor/src/main/scala/org/apache/daffodil/processor/tdml/TDMLInfosetOutputter.scala @@ -19,7 +19,6 @@ package org.apache.daffodil.processor.tdml import java.io.ByteArrayInputStream import java.io.ByteArrayOutputStream -import java.nio.charset.Charset import scala.xml.Node import org.apache.daffodil.api @@ -36,29 +35,27 @@ import org.apache.daffodil.runtime1.infoset.W3CDOMInfosetOutputter import org.apache.daffodil.runtime1.infoset.XMLTextInfosetInputter import org.apache.daffodil.runtime1.infoset.XMLTextInfosetOutputter -class TDMLInfosetOutputterScala(scalaOut: ScalaXMLInfosetOutputter) - extends TeeInfosetOutputter(Seq(scalaOut)*) - with TDMLInfosetOutputter { +object TDMLInfosetOutputterXML { + def apply(): TDMLInfosetOutputterXML = { + val baos = new ByteArrayOutputStream() + val xmlOut = new XMLTextInfosetOutputter(baos, false) + xmlOut.setIncludeDataType(true) + new TDMLInfosetOutputterXML(baos, xmlOut) + } +} - override def getResult: Node = scalaOut.getResult() +class TDMLInfosetOutputterXML( + override val xmlStream: ByteArrayOutputStream, + xmlOut: XMLTextInfosetOutputter +) extends TeeInfosetOutputter(Seq(xmlOut)*) + with TDMLInfosetOutputter { - override lazy val xmlStream: ByteArrayOutputStream = { - val bos = new ByteArrayOutputStream() - bos.write(getResult.toString().getBytes(Charset.defaultCharset())) - bos - } + override def getResult: Node = + scala.xml.XML.load(new ByteArrayInputStream(xmlStream.toByteArray)) override def toInfosetInputter: TDMLInfosetInputter = { - val scalaIn = new ScalaXMLInfosetInputter(scalaOut.getResult()) - new TDMLInfosetInputter(scalaIn, Seq()) - } -} - -object TDMLInfosetOutputterScala { - def apply(): TDMLInfosetOutputterScala = { - val scalaOut = new ScalaXMLInfosetOutputter() - scalaOut.setIncludeDataType(true) - new TDMLInfosetOutputterScala(scalaOut) + val xmlIn = new XMLTextInfosetInputter(new ByteArrayInputStream(xmlStream.toByteArray)) + new TDMLInfosetInputter(xmlIn, Seq()) } } @@ -73,7 +70,9 @@ class TDMLInfosetOutputterAll( ) extends TeeInfosetOutputter(Seq(xmlOut, scalaOut, jdomOut, w3cdomOut, jsonOut)*) with TDMLInfosetOutputter { - override def getResult: Node = scalaOut.getResult() + def getScalaResult: Node = scalaOut.getResult() + override def getResult: Node = + scala.xml.XML.load(new ByteArrayInputStream(xmlStream.toByteArray)) override def toInfosetInputter: TDMLInfosetInputter = { val scalaIn = new ScalaXMLInfosetInputter(scalaOut.getResult()) @@ -82,10 +81,14 @@ class TDMLInfosetOutputterAll( val jsonIn = new JsonInfosetInputter(new ByteArrayInputStream(jsonStream.toByteArray)) val xmlIn = new XMLTextInfosetInputter(new ByteArrayInputStream(xmlStream.toByteArray)) val nullIn = { - val events = NullInfosetInputter.toEvents(new ByteArrayInputStream(xmlStream.toByteArray)) + val events = NullInfosetInputter.toEvents( + new ByteArrayInputStream( + scalaOut.getResult().toString().getBytes("UTF-8") + ) + ) new NullInfosetInputter(events) } - new TDMLInfosetInputter(scalaIn, Seq(jdomIn, w3cdomIn, jsonIn, xmlIn, nullIn)) + new TDMLInfosetInputter(xmlIn, Seq(jdomIn, w3cdomIn, jsonIn, scalaIn, nullIn)) } } @@ -100,7 +103,7 @@ object TDMLInfosetOutputterAll { val jsonOut = new JsonInfosetOutputter(jsonStream, false) val xmlOut = new XMLTextInfosetOutputter(xmlStream, false) - Seq(scalaOut, jdomOut, w3cdomOut, jsonOut, xmlOut).foreach { out => + Seq(jdomOut, w3cdomOut, jsonOut, xmlOut).foreach { out => out.setIncludeDataType(true) } diff --git a/daffodil-test-integration/src/test/scala/org/apache/daffodil/cliTest/TestCLITdml.scala b/daffodil-test-integration/src/test/scala/org/apache/daffodil/cliTest/TestCLITdml.scala index 1f0e062f32..046fbe6fb2 100644 --- a/daffodil-test-integration/src/test/scala/org/apache/daffodil/cliTest/TestCLITdml.scala +++ b/daffodil-test-integration/src/test/scala/org/apache/daffodil/cliTest/TestCLITdml.scala @@ -45,7 +45,7 @@ class TestCLITdml { "daffodil-test/src/test/resources/org/apache/daffodil/section06/entities/Entities.tdml" ) - val envs = Map("DAFFODIL_TDML_API_INFOSETS" -> "scala") + val envs = Map("DAFFODIL_TDML_API_INFOSETS" -> "xml") runCLI(args"test -i -t $tdml byte_entities_6_08", envs = envs) { cli => // parse diff --git a/daffodil-test/src/test/resources/META-INF/services/org.apache.daffodil.api.validation.ValidatorFactory b/daffodil-test/src/test/resources/META-INF/services/org.apache.daffodil.api.validation.ValidatorFactory new file mode 100644 index 0000000000..a12f49a78a --- /dev/null +++ b/daffodil-test/src/test/resources/META-INF/services/org.apache.daffodil.api.validation.ValidatorFactory @@ -0,0 +1,16 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +org.apache.daffodil.infoset.TestStringAsXmlValidatorFactory diff --git a/daffodil-test/src/test/resources/org/apache/daffodil/infoset/stringAsXML.tdml b/daffodil-test/src/test/resources/org/apache/daffodil/infoset/stringAsXML.tdml new file mode 100644 index 0000000000..7d71f77ef9 --- /dev/null +++ b/daffodil-test/src/test/resources/org/apache/daffodil/infoset/stringAsXML.tdml @@ -0,0 +1,86 @@ + + + + + + + + stringAsXml/namespaced/binMessage_01.dat + + + stringAsXml/namespaced/binMessage_01.dat.xml + + + + + + stringAsXml/namespaced/binMessage_01.dat + + + stringAsXml/namespaced/binMessage_01.dat.xml + + + Element 'xmlStr' is a simple type + + + + + + stringAsXml/namespaced/binMessage_03.dat + + + Unexpected character + + + + + + stringAsXml/namespaced/binMessage_08.dat + + + Undeclared general entity "name" + + + + + + + stringAsXml/nonamespace/binMessage_01.dat + + + stringAsXml/nonamespace/binMessage_01.dat.xml + + + Value '=invalid field' is not facet-valid + + + diff --git a/daffodil-test/src/test/resources/org/apache/daffodil/section07/variables/variables_01.tdml b/daffodil-test/src/test/resources/org/apache/daffodil/section07/variables/variables_01.tdml index 6d636c6af1..545f47029f 100644 --- a/daffodil-test/src/test/resources/org/apache/daffodil/section07/variables/variables_01.tdml +++ b/daffodil-test/src/test/resources/org/apache/daffodil/section07/variables/variables_01.tdml @@ -48,7 +48,7 @@ - 42 + 42 diff --git a/daffodil-test/src/test/scala/org/apache/daffodil/infoset/TestStringAsXmlTDML.scala b/daffodil-test/src/test/scala/org/apache/daffodil/infoset/TestStringAsXmlTDML.scala new file mode 100644 index 0000000000..62f9392338 --- /dev/null +++ b/daffodil-test/src/test/scala/org/apache/daffodil/infoset/TestStringAsXmlTDML.scala @@ -0,0 +1,37 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.daffodil.infoset + +import org.apache.daffodil.junit.tdml.TdmlSuite +import org.apache.daffodil.junit.tdml.TdmlTests + +import org.junit.Test + +object TestStringAsXmlTDML extends TdmlSuite { + val tdmlResource = "/org/apache/daffodil/infoset/stringAsXML.tdml" +} + +class TestStringAsXmlTDML extends TdmlTests { + val tdmlSuite = TestStringAsXmlTDML + + @Test def stringAsXml_01_a = test + @Test def stringAsXml_01_b = test + @Test def stringAsXml_04 = test + @Test def stringAsXml_09 = test + @Test def stringAsXml_10 = test +} diff --git a/daffodil-test/src/test/scala/org/apache/daffodil/infoset/TestStringAsXmlValidator.scala b/daffodil-test/src/test/scala/org/apache/daffodil/infoset/TestStringAsXmlValidator.scala new file mode 100644 index 0000000000..560329ad87 --- /dev/null +++ b/daffodil-test/src/test/scala/org/apache/daffodil/infoset/TestStringAsXmlValidator.scala @@ -0,0 +1,50 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.daffodil.infoset + +import java.io.InputStream +import java.net.URL +import java.util.Properties + +import org.apache.daffodil.api.validation.ValidationHandler +import org.apache.daffodil.api.validation.Validator +import org.apache.daffodil.api.validation.ValidatorFactory +import org.apache.daffodil.validation.XercesValidator + +object TestStringAsXmlValidator { + val name = "TestStringAsXmlValidator" +} + +class TestStringAsXmlValidator(schemaURL: String) extends Validator { + private lazy val xercesValidator = XercesValidator.fromURL(new URL(schemaURL)) + + override def validateXML(document: InputStream, vh: ValidationHandler): Unit = { + xercesValidator.validateXML(document, vh) + } +} + +class TestStringAsXmlValidatorFactory extends ValidatorFactory { + + override def name: String = TestStringAsXmlValidator.name + + override def make(config: Properties) = { + val dfdlSchema = config.getProperty(name) + // assumes the validation XSD path is in the same as the DFDL schema but with a different suffix + val xsdSchema = dfdlSchema.replace(".dfdl.xsd", "WithXmlPayload.xsd") + new TestStringAsXmlValidator(xsdSchema) + } +}