diff --git a/exist-core/pom.xml b/exist-core/pom.xml index 991c80178de..7c739d20914 100644 --- a/exist-core/pom.xml +++ b/exist-core/pom.xml @@ -1200,6 +1200,7 @@ The BaseX Team. The original license statement is also included below.]]>${project.build.testOutputDirectory}/log4j2.xml + 180 + + + org.exist.storage.lock.DeadlockIT + org.exist.xmldb.RemoveCollectionIT + @{jacocoArgLine} --add-modules jdk.incubator.vector --enable-native-access=ALL-UNNAMED -Dfile.encoding=${project.build.sourceEncoding} -Dexist.recovery.progressbar.hide=true ${project.basedir}/../exist-jetty-config/target/classes/org/exist/jetty diff --git a/exist-core/src/main/java/org/exist/storage/serializers/EXistOutputKeys.java b/exist-core/src/main/java/org/exist/storage/serializers/EXistOutputKeys.java index ca85a06f5fe..7c727e6ab16 100644 --- a/exist-core/src/main/java/org/exist/storage/serializers/EXistOutputKeys.java +++ b/exist-core/src/main/java/org/exist/storage/serializers/EXistOutputKeys.java @@ -28,6 +28,11 @@ public class EXistOutputKeys { */ public static final String ITEM_SEPARATOR = "item-separator"; + // --- QT4 Serialization 4.0 parameters --- + public static final String CANONICAL = "canonical"; + public static final String ESCAPE_SOLIDUS = "escape-solidus"; + public static final String JSON_LINES = "json-lines"; + public static final String OMIT_ORIGINAL_XML_DECLARATION = "omit-original-xml-declaration"; public static final String OUTPUT_DOCTYPE = "output-doctype"; diff --git a/exist-core/src/main/java/org/exist/util/serializer/AbstractSerializer.java b/exist-core/src/main/java/org/exist/util/serializer/AbstractSerializer.java index 758ccee130a..a1b7c9890b3 100644 --- a/exist-core/src/main/java/org/exist/util/serializer/AbstractSerializer.java +++ b/exist-core/src/main/java/org/exist/util/serializer/AbstractSerializer.java @@ -81,13 +81,27 @@ protected SerializerWriter getDefaultWriter() { public void setOutput(Writer writer, Properties properties) { outputProperties = Objects.requireNonNullElseGet(properties, () -> new Properties(defaultProperties)); final String method = outputProperties.getProperty(OutputKeys.METHOD, "xml"); - final String htmlVersionProp = outputProperties.getProperty(EXistOutputKeys.HTML_VERSION, "1.0"); - + // For html/xhtml methods, determine HTML version: + // 1. Use html-version if explicitly set + // 2. Otherwise use version (W3C spec: version controls HTML version for html method) + // 3. Default to 5.0 double htmlVersion; - try { - htmlVersion = Double.parseDouble(htmlVersionProp); - } catch (NumberFormatException e) { - htmlVersion = 1.0; + final String explicitHtmlVersion = outputProperties.getProperty(EXistOutputKeys.HTML_VERSION); + if (explicitHtmlVersion != null) { + try { + htmlVersion = Double.parseDouble(explicitHtmlVersion); + } catch (NumberFormatException e) { + htmlVersion = 5.0; + } + } else if (("html".equalsIgnoreCase(method) || "xhtml".equalsIgnoreCase(method)) + && outputProperties.getProperty(OutputKeys.VERSION) != null) { + try { + htmlVersion = Double.parseDouble(outputProperties.getProperty(OutputKeys.VERSION)); + } catch (NumberFormatException e) { + htmlVersion = 5.0; + } + } else { + htmlVersion = 5.0; } final SerializerWriter baseSerializerWriter = getBaseSerializerWriter(method, htmlVersion); diff --git a/exist-core/src/main/java/org/exist/util/serializer/AdaptiveWriter.java b/exist-core/src/main/java/org/exist/util/serializer/AdaptiveWriter.java index 22ab6dfca23..717ec83ab07 100644 --- a/exist-core/src/main/java/org/exist/util/serializer/AdaptiveWriter.java +++ b/exist-core/src/main/java/org/exist/util/serializer/AdaptiveWriter.java @@ -190,10 +190,15 @@ private void writeAtomic(AtomicValue value) throws IOException, SAXException, XP } private void writeDouble(final DoubleValue item) throws SAXException { - final DecimalFormatSymbols symbols = DecimalFormatSymbols.getInstance(Locale.US); - symbols.setExponentSeparator("e"); - final DecimalFormat df = new DecimalFormat("0.0##########################E0", symbols); - writeText(df.format(item.getDouble())); + final double d = item.getDouble(); + if (Double.isInfinite(d) || Double.isNaN(d)) { + writeText(item.getStringValue()); + } else { + final DecimalFormatSymbols symbols = DecimalFormatSymbols.getInstance(Locale.US); + symbols.setExponentSeparator("e"); + final DecimalFormat df = new DecimalFormat("0.0##########################E0", symbols); + writeText(df.format(d)); + } } private void writeArray(final ArrayType array) throws XPathException, SAXException, TransformerException { @@ -215,9 +220,7 @@ private void writeArray(final ArrayType array) throws XPathException, SAXExcepti private void writeMap(final AbstractMapType map) throws SAXException, XPathException, TransformerException { try { - writer.write("map"); - addSpaceIfIndent(); - writer.write('{'); + writer.write("map{"); addIndent(); indent(); for (final Iterator> i = map.iterator(); i.hasNext(); ) { diff --git a/exist-core/src/main/java/org/exist/util/serializer/HTML5Writer.java b/exist-core/src/main/java/org/exist/util/serializer/HTML5Writer.java index 1dffc3029b7..bc69c4304c6 100644 --- a/exist-core/src/main/java/org/exist/util/serializer/HTML5Writer.java +++ b/exist-core/src/main/java/org/exist/util/serializer/HTML5Writer.java @@ -246,6 +246,23 @@ protected void closeStartTag(boolean isEmpty) throws TransformerException { } } + @Override + public void processingInstruction(String target, String data) throws TransformerException { + try { + closeStartTag(false); + final Writer writer = getWriter(); + writer.write("'); + } catch (IOException e) { + throw new TransformerException(e.getMessage(), e); + } + } + @Override protected boolean needsEscape(char ch) { if (RAW_TEXT_ELEMENTS.contains(currentTag)) { @@ -253,4 +270,20 @@ protected boolean needsEscape(char ch) { } return super.needsEscape(ch); } + + @Override + protected boolean needsEscape(final char ch, final boolean inAttribute) { + // In raw text elements (script, style), suppress escaping for TEXT content only. + // Attribute values must always be escaped, even on raw text elements. + if (!inAttribute && RAW_TEXT_ELEMENTS.contains(currentTag)) { + return false; + } + // For attributes, always return true (bypass the 1-arg override + // which returns false for all script/style content) + if (inAttribute) { + return true; + } + return super.needsEscape(ch, inAttribute); + } + } diff --git a/exist-core/src/main/java/org/exist/util/serializer/IndentingXMLWriter.java b/exist-core/src/main/java/org/exist/util/serializer/IndentingXMLWriter.java index c336d8b2943..99df54c3e19 100644 --- a/exist-core/src/main/java/org/exist/util/serializer/IndentingXMLWriter.java +++ b/exist-core/src/main/java/org/exist/util/serializer/IndentingXMLWriter.java @@ -25,7 +25,9 @@ import java.io.Writer; import java.util.ArrayDeque; import java.util.Deque; +import java.util.HashSet; import java.util.Properties; +import java.util.Set; import javax.xml.transform.OutputKeys; import javax.xml.transform.TransformerException; @@ -48,6 +50,8 @@ public class IndentingXMLWriter extends XMLWriter { private boolean sameline = false; private boolean whitespacePreserve = false; private final Deque whitespacePreserveStack = new ArrayDeque<>(); + private Set suppressIndentation = null; + private int suppressIndentDepth = 0; public IndentingXMLWriter() { super(); @@ -75,6 +79,9 @@ public void startElement(final String namespaceURI, final String localName, fina indent(); } super.startElement(namespaceURI, localName, qname); + if (isSuppressIndentation(localName)) { + suppressIndentDepth++; + } addIndent(); afterTag = true; sameline = true; @@ -86,6 +93,9 @@ public void startElement(final QName qname) throws TransformerException { indent(); } super.startElement(qname); + if (isSuppressIndentation(qname.getLocalPart())) { + suppressIndentDepth++; + } addIndent(); afterTag = true; sameline = true; @@ -95,6 +105,9 @@ public void startElement(final QName qname) throws TransformerException { public void endElement(final String namespaceURI, final String localName, final String qname) throws TransformerException { endIndent(namespaceURI, localName); super.endElement(namespaceURI, localName, qname); + if (isSuppressIndentation(localName) && suppressIndentDepth > 0) { + suppressIndentDepth--; + } popWhitespacePreserve(); // apply ancestor's xml:space value _after_ end element sameline = isInlineTag(namespaceURI, localName); afterTag = true; @@ -104,6 +117,9 @@ public void endElement(final String namespaceURI, final String localName, final public void endElement(final QName qname) throws TransformerException { endIndent(qname.getNamespaceURI(), qname.getLocalPart()); super.endElement(qname); + if (isSuppressIndentation(qname.getLocalPart()) && suppressIndentDepth > 0) { + suppressIndentDepth--; + } popWhitespacePreserve(); // apply ancestor's xml:space value _after_ end element sameline = isInlineTag(qname.getNamespaceURI(), qname.getLocalPart()); afterTag = true; @@ -164,7 +180,29 @@ public void setOutputProperties(final Properties properties) { } catch (final NumberFormatException e) { LOG.warn("Invalid indentation value: '{}'", option); } - indent = "yes".equals(outputProperties.getProperty(OutputKeys.INDENT, "no")); + final String indentValue = outputProperties.getProperty(OutputKeys.INDENT, "no").trim(); + indent = "yes".equals(indentValue) || "true".equals(indentValue) || "1".equals(indentValue); + final String suppressProp = outputProperties.getProperty("suppress-indentation"); + if (suppressProp != null && !suppressProp.isEmpty()) { + suppressIndentation = new HashSet<>(); + for (final String name : suppressProp.split("\\s+")) { + if (!name.isEmpty()) { + // Handle URI-qualified names: Q{ns}local or {ns}local → extract local part + if (name.startsWith("Q{") || name.startsWith("{")) { + final int closeBrace = name.indexOf('}'); + if (closeBrace > 0 && closeBrace < name.length() - 1) { + suppressIndentation.add(name.substring(closeBrace + 1)); + } else { + suppressIndentation.add(name); + } + } else { + suppressIndentation.add(name); + } + } + } + } else { + suppressIndentation = null; + } } @Override @@ -220,8 +258,12 @@ protected void addSpaceIfIndent() throws IOException { writer.write(' '); } + private boolean isSuppressIndentation(final String localName) { + return suppressIndentation != null && suppressIndentation.contains(localName); + } + protected void indent() throws TransformerException { - if (!indent || whitespacePreserve) { + if (!indent || whitespacePreserve || suppressIndentDepth > 0) { return; } final int spaces = indentAmount * level; diff --git a/exist-core/src/main/java/org/exist/util/serializer/XHTML5Writer.java b/exist-core/src/main/java/org/exist/util/serializer/XHTML5Writer.java index e89e7119d19..4894c0162af 100644 --- a/exist-core/src/main/java/org/exist/util/serializer/XHTML5Writer.java +++ b/exist-core/src/main/java/org/exist/util/serializer/XHTML5Writer.java @@ -24,6 +24,7 @@ import java.io.Writer; import javax.xml.transform.TransformerException; +import org.exist.storage.serializers.EXistOutputKeys; import it.unimi.dsi.fastutil.objects.ObjectOpenHashSet; import it.unimi.dsi.fastutil.objects.ObjectSet; @@ -128,7 +129,45 @@ protected void writeDoctype(String rootElement) throws TransformerException { return; } - documentType("html", null, null); + // Canonical serialization: never output DOCTYPE + final String canonicalProp = outputProperties != null + ? outputProperties.getProperty(EXistOutputKeys.CANONICAL) : null; + if ("yes".equals(canonicalProp) || "true".equals(canonicalProp) || "1".equals(canonicalProp)) { + doctypeWritten = true; + return; + } + + // Only output DOCTYPE when the root element is (case-insensitive) + // Per W3C Serialization: DOCTYPE is for the html element only, not fragments + final String localName = rootElement.contains(":") ? rootElement.substring(rootElement.indexOf(':') + 1) : rootElement; + if (!"html".equalsIgnoreCase(localName)) { + doctypeWritten = true; // suppress future attempts + return; + } + + final String publicId = outputProperties != null + ? outputProperties.getProperty(javax.xml.transform.OutputKeys.DOCTYPE_PUBLIC) : null; + final String systemId = outputProperties != null + ? outputProperties.getProperty(javax.xml.transform.OutputKeys.DOCTYPE_SYSTEM) : null; + final String method = outputProperties != null + ? outputProperties.getProperty(javax.xml.transform.OutputKeys.METHOD, "xhtml") : "xhtml"; + + if ("xhtml".equalsIgnoreCase(method)) { + // XHTML: per W3C spec section 5.2, only output doctype-public when + // doctype-system is also present + if (systemId != null) { + documentType("html", publicId, systemId); + } else if (publicId == null) { + // Neither set — simple DOCTYPE + documentType("html", null, null); + } else { + // doctype-public without doctype-system — suppress DOCTYPE for XHTML + doctypeWritten = true; + } + } else { + // HTML method: pass through doctype-public and doctype-system as set + documentType("html", publicId, systemId); + } doctypeWritten = true; } } diff --git a/exist-core/src/main/java/org/exist/util/serializer/XHTMLWriter.java b/exist-core/src/main/java/org/exist/util/serializer/XHTMLWriter.java index b0006f7f51c..9238cd1e848 100644 --- a/exist-core/src/main/java/org/exist/util/serializer/XHTMLWriter.java +++ b/exist-core/src/main/java/org/exist/util/serializer/XHTMLWriter.java @@ -23,6 +23,7 @@ import java.io.IOException; import java.io.Writer; +import javax.xml.transform.OutputKeys; import javax.xml.transform.TransformerException; import it.unimi.dsi.fastutil.objects.ObjectOpenHashSet; @@ -36,12 +37,35 @@ */ public class XHTMLWriter extends IndentingXMLWriter { + /** + * HTML boolean attributes per HTML 4.01 and HTML5 spec. + * When method="html" and the attribute value equals the attribute name + * (case-insensitive), the attribute is minimized to just the name. + */ + protected static final ObjectSet BOOLEAN_ATTRIBUTES = new ObjectOpenHashSet<>(31); + static { + BOOLEAN_ATTRIBUTES.add("checked"); + BOOLEAN_ATTRIBUTES.add("compact"); + BOOLEAN_ATTRIBUTES.add("declare"); + BOOLEAN_ATTRIBUTES.add("defer"); + BOOLEAN_ATTRIBUTES.add("disabled"); + BOOLEAN_ATTRIBUTES.add("ismap"); + BOOLEAN_ATTRIBUTES.add("multiple"); + BOOLEAN_ATTRIBUTES.add("nohref"); + BOOLEAN_ATTRIBUTES.add("noresize"); + BOOLEAN_ATTRIBUTES.add("noshade"); + BOOLEAN_ATTRIBUTES.add("nowrap"); + BOOLEAN_ATTRIBUTES.add("readonly"); + BOOLEAN_ATTRIBUTES.add("selected"); + } + protected static final ObjectSet EMPTY_TAGS = new ObjectOpenHashSet<>(31); static { EMPTY_TAGS.add("area"); EMPTY_TAGS.add("base"); EMPTY_TAGS.add("br"); EMPTY_TAGS.add("col"); + EMPTY_TAGS.add("embed"); EMPTY_TAGS.add("hr"); EMPTY_TAGS.add("img"); EMPTY_TAGS.add("input"); @@ -88,6 +112,8 @@ public class XHTMLWriter extends IndentingXMLWriter { } protected String currentTag; + protected boolean inHead = false; + protected boolean contentTypeMetaWritten = false; protected final ObjectSet emptyTags; protected final ObjectSet inlineTags; @@ -120,78 +146,121 @@ public XHTMLWriter(final Writer writer, ObjectSet emptyTags, ObjectSet 0 && namespaceURI != null && namespaceURI.equals(Namespaces.XHTML_NS)) { - haveCollapsedXhtmlPrefix = true; - return qname.substring(pos+1); - + if (pos > 0 && namespaceURI != null) { + if (namespaceURI.equals(Namespaces.XHTML_NS)) { + haveCollapsedXhtmlPrefix = true; + return qname.substring(pos + 1); + } + // XHTML5: normalize SVG and MathML prefixes + if (isHtml5Version() && (namespaceURI.equals(SVG_NS) || namespaceURI.equals(MATHML_NS))) { + collapsedForeignNs = namespaceURI; + return qname.substring(pos + 1); + } } - return qname; } @Override public void namespace(final String prefix, final String nsURI) throws TransformerException { - if(haveCollapsedXhtmlPrefix && prefix != null && !prefix.isEmpty() && nsURI.equals(Namespaces.XHTML_NS)) { - return; //dont output the xmlns:prefix for the collapsed nodes prefix + if (haveCollapsedXhtmlPrefix && prefix != null && !prefix.isEmpty() && nsURI.equals(Namespaces.XHTML_NS)) { + return; // don't output the xmlns:prefix for the collapsed node's prefix + } + // When a foreign namespace prefix was collapsed, replace the prefixed + // declaration with a default namespace declaration + if (collapsedForeignNs != null && prefix != null && !prefix.isEmpty() + && nsURI.equals(collapsedForeignNs)) { + super.namespace("", nsURI); // emit xmlns="..." instead of xmlns:prefix="..." + return; } - super.namespace(prefix, nsURI); } @@ -200,9 +269,25 @@ public void namespace(final String prefix, final String nsURI) throws Transforme protected void closeStartTag(final boolean isEmpty) throws TransformerException { try { if (tagIsOpen) { + // Flush canonical buffers (sorted namespaces + attributes) if active + if (isCanonical()) { + flushCanonicalBuffersXhtml(); + } if (isEmpty) { - if (isEmptyTag(currentTag)) { - getWriter().write(" />"); + if (isCanonical()) { + // Canonical: always expand empty elements + getWriter().write('>'); + getWriter().write("'); + } else if (isEmptyTag(currentTag)) { + // For method="html", use HTML-style void tags (
) + // For method="xhtml", use XHTML-style (
) + if (isHtmlMethod()) { + getWriter().write(">"); + } else { + getWriter().write(" />"); + } } else { getWriter().write('>'); getWriter().write(") while XHTML uses self-closing (
). + */ + private boolean isHtmlMethod() { + if (outputProperties != null) { + final String method = outputProperties.getProperty(javax.xml.transform.OutputKeys.METHOD); + return "html".equalsIgnoreCase(method); + } + return false; + } + + /** + * Returns true if the HTML version is 5.0 or higher. + */ + private boolean isHtml5Version() { + if (outputProperties == null) { + return true; // default to HTML5 + } + final String version = outputProperties.getProperty(OutputKeys.VERSION); + if (version != null) { + try { + return Double.parseDouble(version) >= 5.0; + } catch (final NumberFormatException e) { + // ignore + } + } + return true; // default to HTML5 + } + @Override + public void attribute(final QName qname, final CharSequence value) throws TransformerException { + // For method="html", minimize boolean attributes when value matches name + if (isHtmlMethod() && isBooleanAttribute(qname.getLocalPart(), value)) { + try { + if (!tagIsOpen) { + characters(value); + return; + } + final Writer w = getWriter(); + w.write(' '); + w.write(qname.getLocalPart()); + // Don't write ="value" — minimized form + } catch (final IOException ioe) { + throw new TransformerException(ioe.getMessage(), ioe); + } + return; + } + super.attribute(qname, value); + } + + @Override + public void attribute(final String qname, final CharSequence value) throws TransformerException { + if (isHtmlMethod() && isBooleanAttribute(qname, value)) { + try { + if (!tagIsOpen) { + characters(value); + return; + } + final Writer w = getWriter(); + w.write(' '); + w.write(qname); + } catch (final IOException ioe) { + throw new TransformerException(ioe.getMessage(), ioe); + } + return; + } + super.attribute(qname, value); + } + + private boolean isBooleanAttribute(final String attrName, final CharSequence value) { + return BOOLEAN_ATTRIBUTES.contains(attrName.toLowerCase(java.util.Locale.ROOT)) + && attrName.equalsIgnoreCase(value.toString()); + } + + private static final ObjectSet RAW_TEXT_ELEMENTS_HTML = new ObjectOpenHashSet<>(4); + static { + RAW_TEXT_ELEMENTS_HTML.add("script"); + RAW_TEXT_ELEMENTS_HTML.add("style"); + } + + @Override + protected boolean needsEscape(final char ch, final boolean inAttribute) { + // For HTML method, script and style content should not be escaped + if (!inAttribute && isHtmlMethod() + && currentTag != null && RAW_TEXT_ELEMENTS_HTML.contains(currentTag.toLowerCase(java.util.Locale.ROOT))) { + return false; + } + return super.needsEscape(ch, inAttribute); + } + + /** + * For HTML serialization, cdata-section-elements is ignored per the + * W3C serialization spec — CDATA sections are not valid in HTML. + */ + @Override + protected boolean shouldUseCdataSections() { + if (isHtmlMethod()) { + return false; + } + return super.shouldUseCdataSections(); + } + + @Override + protected boolean escapeAmpersandBeforeBrace() { + // HTML spec: & before { in attribute values should not be escaped + return false; + } + @Override protected boolean isInlineTag(final String namespaceURI, final String localName) { return (namespaceURI == null || namespaceURI.isEmpty() || Namespaces.XHTML_NS.equals(namespaceURI)) && inlineTags.contains(localName); } + + /** + * Write a meta content-type tag as the first child of head when + * include-content-type is enabled (the default per W3C Serialization 3.1). + */ + protected void writeContentTypeMeta() throws TransformerException { + if (contentTypeMetaWritten || outputProperties == null) { + return; + } + final String includeContentType = outputProperties.getProperty("include-content-type", "yes"); + if (!"yes".equals(includeContentType)) { + return; + } + contentTypeMetaWritten = true; + try { + final String encoding = outputProperties.getProperty(OutputKeys.ENCODING, "UTF-8"); + closeStartTag(false); + final Writer writer = getWriter(); + + // HTML5 method uses + // XHTML and HTML4 use + // XHTML mode requires self-closing tags (/>) for valid XML output — + // the URL rewrite pipeline re-parses this as XML in the view step. + final boolean selfClose = !isHtmlMethod(); + if (isHtmlMethod() && isHtml5Version()) { + writer.write("" : "\">"); + } else { + final String mediaType = outputProperties.getProperty(OutputKeys.MEDIA_TYPE, "text/html"); + writer.write("" : "\">"); + } + } catch (IOException e) { + throw new TransformerException(e.getMessage(), e); + } + } } diff --git a/exist-core/src/main/java/org/exist/util/serializer/XMLWriter.java b/exist-core/src/main/java/org/exist/util/serializer/XMLWriter.java index 763aaf52ef6..48887f88e13 100644 --- a/exist-core/src/main/java/org/exist/util/serializer/XMLWriter.java +++ b/exist-core/src/main/java/org/exist/util/serializer/XMLWriter.java @@ -78,6 +78,11 @@ public class XMLWriter implements SerializerWriter { private String defaultNamespace = ""; + // Namespace stack (BaseX-style): flat list of (prefix, uri) pairs for all in-scope bindings. + // nstack records the list size at each startElement so endElement can roll back declarations. + private final List nspaces = new ArrayList<>(); + private final Deque nstack = new ArrayDeque<>(); + /** * When serializing an XDM this should be true, * otherwise false. @@ -86,8 +91,33 @@ public class XMLWriter implements SerializerWriter { * compared to retrieving resources from the database. */ private boolean xdmSerialization = false; + private boolean xml11 = false; + private boolean canonical = false; + @Nullable private java.text.Normalizer.Form normalizationForm = null; + + // Canonical XML: buffer namespaces and attributes for sorting + private final List canonicalNamespaces = new ArrayList<>(); // [prefix, uri] + private final List canonicalAttributes = new ArrayList<>(); // [nsUri, localName, qname, value] private final Deque elementName = new ArrayDeque<>(); + + /** + * Returns true if cdata-section-elements should be applied. + * Subclasses (e.g., XHTMLWriter for HTML method) can override + * to suppress CDATA sections. + */ + protected boolean shouldUseCdataSections() { + return xdmSerialization; + } + + /** + * Returns the namespace URI of the current (innermost) element, + * or null if no element is on the stack. + */ + protected String currentElementNamespaceURI() { + final QName top = elementName.peek(); + return top != null ? top.getNamespaceURI() : null; + } private LazyVal> cdataSectionElements = new LazyVal<>(this::parseCdataSectionElementNames); private boolean cdataSetionElement = false; @@ -96,8 +126,9 @@ public class XMLWriter implements SerializerWriter { Arrays.fill(textSpecialChars, false); textSpecialChars['<'] = true; textSpecialChars['>'] = true; - // textSpecialChars['\r'] = true; + textSpecialChars['\r'] = true; textSpecialChars['&'] = true; + textSpecialChars[0x7F] = true; // DEL must be escaped as  attrSpecialChars = new boolean[128]; Arrays.fill(attrSpecialChars, false); @@ -108,6 +139,7 @@ public class XMLWriter implements SerializerWriter { attrSpecialChars['\t'] = true; attrSpecialChars['&'] = true; attrSpecialChars['"'] = true; + attrSpecialChars[0x7F] = true; // DEL must be escaped as  } @Nullable private XMLDeclaration originalXmlDecl; @@ -139,6 +171,10 @@ public void setOutputProperties(final Properties properties) { } this.xdmSerialization = "yes".equals(outputProperties.getProperty(EXistOutputKeys.XDM_SERIALIZATION, "no")); + this.xml11 = "1.1".equals(outputProperties.getProperty(OutputKeys.VERSION)); + this.normalizationForm = parseNormalizationForm(outputProperties.getProperty("normalization-form", "none")); + final String canonicalProp = outputProperties.getProperty(EXistOutputKeys.CANONICAL); + this.canonical = "yes".equals(canonicalProp) || "true".equals(canonicalProp) || "1".equals(canonicalProp); } private Set parseCdataSectionElementNames() { @@ -166,6 +202,8 @@ protected void resetObjectState() { originalXmlDecl = null; doctypeWritten = false; defaultNamespace = ""; + nspaces.clear(); + nstack.clear(); cdataSectionElements = new LazyVal<>(this::parseCdataSectionElementNames); } @@ -184,12 +222,35 @@ public Writer getWriter() { } public String getDefaultNamespace() { - return defaultNamespace.isEmpty() ? null : defaultNamespace; + final String fromStack = nsLookup(""); + return (fromStack == null || fromStack.isEmpty()) ? null : fromStack; } public void setDefaultNamespace(final String namespace) { + // Keep the baseline field in sync; nsLookup() falls back to it when the + // namespace stack has no in-scope binding for the default prefix. defaultNamespace = namespace == null ? "" : namespace; } + + /** + * Looks up the currently in-scope URI for {@code prefix} by scanning the flat + * namespace list from innermost to outermost scope. + * For the default-namespace prefix ({@code ""}), falls back to the + * {@link #defaultNamespace} baseline field when the stack has no binding. + * + * @return the in-scope URI, or {@code null} if {@code prefix} is unbound + */ + private String nsLookup(final String prefix) { + for (int i = nspaces.size() - 2; i >= 0; i -= 2) { + if (nspaces.get(i).equals(prefix)) { + return nspaces.get(i + 1); + } + } + if (prefix.isEmpty()) { + return defaultNamespace.isEmpty() ? null : defaultNamespace; + } + return null; + } public void startDocument() throws TransformerException { resetObjectState(); @@ -207,15 +268,16 @@ public void startElement(final String namespaceUri, final String localName, fina if(!declarationWritten) { writeDeclaration(); } - + if(!doctypeWritten) { writeDoctype(qname); } - + try { if(tagIsOpen) { closeStartTag(false); } + nstack.push(nspaces.size()); writer.write('<'); writer.write(qname); tagIsOpen = true; @@ -233,21 +295,22 @@ public void startElement(final QName qname) throws TransformerException { if(!declarationWritten) { writeDeclaration(); } - + if(!doctypeWritten) { writeDoctype(qname.getStringValue()); } - + try { if(tagIsOpen) { closeStartTag(false); } + nstack.push(nspaces.size()); writer.write('<'); if(qname.getPrefix() != null && !qname.getPrefix().isEmpty()) { writer.write(qname.getPrefix()); writer.write(':'); } - + writer.write(qname.getLocalPart()); tagIsOpen = true; elementName.push(qname); @@ -266,6 +329,9 @@ public void endElement(final String namespaceURI, final String localName, final writer.write('>'); } elementName.pop(); + if (!nstack.isEmpty()) { + nspaces.subList(nstack.pop(), nspaces.size()).clear(); + } } catch(final IOException ioe) { throw new TransformerException(ioe.getMessage(), ioe); } @@ -285,40 +351,74 @@ public void endElement(final QName qname) throws TransformerException { writer.write('>'); } elementName.pop(); + if (!nstack.isEmpty()) { + nspaces.subList(nstack.pop(), nspaces.size()).clear(); + } } catch(final IOException ioe) { throw new TransformerException(ioe.getMessage(), ioe); } } public void namespace(final String prefix, final String nsURI) throws TransformerException { - if((nsURI == null) && (prefix == null || prefix.isEmpty())) { + final String normPrefix = prefix != null ? prefix : ""; + final String normUri = nsURI != null ? nsURI : ""; + + // The xml namespace is implicitly declared and never needs explicit serialization + if ("xml".equals(normPrefix)) { return; } - try { - if(!tagIsOpen) { + try { + if (!tagIsOpen) { + // An xmlns="" outside a start tag is harmless — just skip it + if (normUri.isEmpty() && normPrefix.isEmpty()) { + return; + } throw new TransformerException("Found a namespace declaration outside an element"); } - if(prefix != null && !prefix.isEmpty()) { - writer.write(' '); - writer.write("xmlns"); - writer.write(':'); - writer.write(prefix); - writer.write("=\""); - writeChars(nsURI, true); - writer.write('"'); - } else { - if(defaultNamespace.equals(nsURI)) { - return; + if (canonical) { + // Buffer for sorting — emitted in closeStartTag + // Validate: reject relative namespace URIs (SERE0024) + if (!normUri.isEmpty() && isRelativeUri(normUri)) { + throw new TransformerException("err:SERE0024 Canonical serialization does not allow relative namespace URIs: " + normUri); } - writer.write(' '); - writer.write("xmlns"); + if (normPrefix.isEmpty() && normUri.isEmpty()) { + return; // Skip xmlns="" in canonical (not meaningful for no-namespace elements) + } + // Deduplicate: replace existing binding for same prefix + canonicalNamespaces.removeIf(ns -> ns[0].equals(normPrefix)); + canonicalNamespaces.add(new String[]{normPrefix, normUri}); + // Track in namespace stack so getDefaultNamespace() stays accurate + nspaces.add(normPrefix); + nspaces.add(normUri); + return; + } + + // Look up what is currently in scope for this prefix. + // nsLookup scans nspaces from innermost to outermost and falls back to the + // defaultNamespace baseline field for the default-namespace prefix. + final String inScope = nsLookup(normPrefix); + final String effective = inScope != null ? inScope : ""; + if (normUri.equals(effective)) { + return; // Binding unchanged — no declaration needed + } + + // Record the new binding so descendants can see it via nsLookup + nspaces.add(normPrefix); + nspaces.add(normUri); + + // Write the namespace declaration + writer.write(' '); + if (normPrefix.isEmpty()) { + writer.write("xmlns=\""); + } else { + writer.write("xmlns:"); + writer.write(normPrefix); writer.write("=\""); - writeChars(nsURI, true); - writer.write('"'); - defaultNamespace= nsURI; } + writeChars(normUri, true); + writer.write('"'); } catch(final IOException ioe) { throw new TransformerException(ioe.getMessage(), ioe); } @@ -329,8 +429,13 @@ public void attribute(String qname, CharSequence value) throws TransformerExcept if(!tagIsOpen) { characters(value); return; - // throw new TransformerException("Found an attribute outside an - // element"); + } + if (canonical) { + // Buffer for sorting — extract namespace URI from qname if prefixed + final int colon = qname.indexOf(':'); + final String nsUri = colon > 0 ? "" : ""; // string qname doesn't carry namespace + canonicalAttributes.add(new String[]{nsUri, colon > 0 ? qname.substring(colon + 1) : qname, qname, value.toString()}); + return; } writer.write(' '); writer.write(qname); @@ -347,8 +452,18 @@ public void attribute(final QName qname, final CharSequence value) throws Transf if(!tagIsOpen) { characters(value); return; - // throw new TransformerException("Found an attribute outside an - // element"); + } + if (canonical) { + final String nsUri = qname.getNamespaceURI() != null ? qname.getNamespaceURI() : ""; + final String localName = qname.getLocalPart(); + final String fullName; + if (qname.getPrefix() != null && !qname.getPrefix().isEmpty()) { + fullName = qname.getPrefix() + ":" + localName; + } else { + fullName = localName; + } + canonicalAttributes.add(new String[]{nsUri, localName, fullName, value.toString()}); + return; } writer.write(' '); if(qname.getPrefix() != null && !qname.getPrefix().isEmpty()) { @@ -373,12 +488,68 @@ public void characters(final CharSequence chars) throws TransformerException { if(tagIsOpen) { closeStartTag(false); } - writeChars(chars, false); + // When xdmSerialization is active and current element is in cdata-section-elements, + // wrap text content in CDATA instead of escaping it (per W3C Serialization 3.1) + if (shouldUseCdataSections() && !elementName.isEmpty() + && cdataSectionElements.get().contains(elementName.peek())) { + writeCdataContent(chars); + } else { + writeChars(chars, false); + } } catch(final IOException ioe) { throw new TransformerException(ioe.getMessage(), ioe); } } + private void writeCdataContent(final CharSequence chars) throws IOException { + // CDATA sections must be split when: + // 1. The content contains "]]>" (which would end the CDATA prematurely) + // 2. A character cannot be represented in the output encoding (must be escaped as &#xNN;) + final String s = normalize(chars).toString(); + boolean inCdata = false; + for (int i = 0; i < s.length(); ) { + final int cp = s.codePointAt(i); + final int cpLen = Character.charCount(cp); + + // Check for "]]>" sequence + if (cp == ']' && i + 2 < s.length() && s.charAt(i + 1) == ']' && s.charAt(i + 2) == '>') { + if (!inCdata) { + writer.write(""); + inCdata = false; + i += 2; // skip "]]", the ">" will be picked up next + continue; + } + + // Check if character is encodable in the output charset + if (!charSet.inCharacterSet((char) cp)) { + // Close any open CDATA section + if (inCdata) { + writer.write("]]>"); + inCdata = false; + } + // Write as character reference + writer.write("&#x"); + writer.write(Integer.toHexString(cp)); + writer.write(';'); + } else { + // Encodable character — write inside CDATA + if (!inCdata) { + writer.write(""); + } + } + public void characters(final char[] ch, final int start, final int len) throws TransformerException { if(!declarationWritten) { writeDeclaration(); @@ -510,8 +681,23 @@ public void documentType(final String name, final String publicId, final String protected void closeStartTag(final boolean isEmpty) throws TransformerException { try { if(tagIsOpen) { - if(isEmpty) { + if (canonical) { + flushCanonicalBuffers(); + } + if(isEmpty && !canonical) { + // Canonical XML: empty elements expanded to writer.write("/>"); + } else if (isEmpty) { + // Canonical: write > for empty elements + writer.write('>'); + final QName currentElem = elementName.peek(); + writer.write("'); } else { writer.write('>'); } @@ -522,6 +708,52 @@ protected void closeStartTag(final boolean isEmpty) throws TransformerException } } + protected boolean isCanonical() { + return canonical; + } + + protected void flushCanonicalBuffersXhtml() throws TransformerException { + try { + flushCanonicalBuffers(); + } catch (final IOException ioe) { + throw new TransformerException(ioe.getMessage(), ioe); + } + } + + private void flushCanonicalBuffers() throws IOException { + // Sort namespaces by prefix (default namespace first, then alphabetical) + canonicalNamespaces.sort((a, b) -> a[0].compareTo(b[0])); + // Write sorted namespaces + for (final String[] ns : canonicalNamespaces) { + writer.write(' '); + if (ns[0].isEmpty()) { + writer.write("xmlns=\""); + } else { + writer.write("xmlns:"); + writer.write(ns[0]); + writer.write("=\""); + } + writeChars(ns[1], true); + writer.write('"'); + } + canonicalNamespaces.clear(); + + // Sort attributes by namespace URI (primary), then local name (secondary) + canonicalAttributes.sort((a, b) -> { + final int cmp = a[0].compareTo(b[0]); + return cmp != 0 ? cmp : a[1].compareTo(b[1]); + }); + // Write sorted attributes + for (final String[] attr : canonicalAttributes) { + writer.write(' '); + writer.write(attr[2]); // qualified name + writer.write("=\""); + writeChars(attr[3], true); + writer.write('"'); + } + canonicalAttributes.clear(); + } + protected void writeDeclaration() throws TransformerException { if(declarationWritten) { return; @@ -537,7 +769,9 @@ protected void writeDeclaration() throws TransformerException { // get the fields of the persisted xml declaration, but overridden with any properties from the serialization properties final String version = outputProperties.getProperty(OutputKeys.VERSION, (originalXmlDecl.version != null ? originalXmlDecl.version : DEFAULT_XML_VERSION)); final String encoding = outputProperties.getProperty(OutputKeys.ENCODING, (originalXmlDecl.encoding != null ? originalXmlDecl.encoding : DEFAULT_XML_ENCODING)); - @Nullable final String standalone = outputProperties.getProperty(OutputKeys.STANDALONE, originalXmlDecl.standalone); + @Nullable final String standaloneOrig = outputProperties.getProperty(OutputKeys.STANDALONE, originalXmlDecl.standalone); + // "omit" means standalone should be absent from the declaration + @Nullable final String standalone = (standaloneOrig != null && "omit".equalsIgnoreCase(standaloneOrig.trim())) ? null : standaloneOrig; writeDeclaration(version, encoding, standalone); @@ -545,11 +779,15 @@ protected void writeDeclaration() throws TransformerException { } final String omitXmlDecl = outputProperties.getProperty(OutputKeys.OMIT_XML_DECLARATION, "yes"); - if ("no".equals(omitXmlDecl)) { + @Nullable final String standaloneRaw = outputProperties.getProperty(OutputKeys.STANDALONE); + // "omit" means standalone should be absent from the declaration + @Nullable final String standalone = (standaloneRaw != null && "omit".equalsIgnoreCase(standaloneRaw.trim())) ? null : standaloneRaw; + // Per W3C Serialization 3.1: output declaration if omit-xml-declaration is false/no/0, + // or if standalone is explicitly set (the declaration is required to carry standalone) + if (isBooleanFalse(omitXmlDecl) || standalone != null) { // get the fields of the declaration from the serialization properties final String version = outputProperties.getProperty(OutputKeys.VERSION, DEFAULT_XML_VERSION); final String encoding = outputProperties.getProperty(OutputKeys.ENCODING, DEFAULT_XML_ENCODING); - @Nullable final String standalone = outputProperties.getProperty(OutputKeys.STANDALONE); writeDeclaration(version, encoding, standalone); } @@ -564,7 +802,15 @@ private void writeDeclaration(final String version, final String encoding, @Null writer.write('"'); if(standalone != null) { writer.write(" standalone=\""); - writer.write(standalone); + // Normalize boolean values to yes/no for XML declaration + final String standaloneVal = standalone.trim(); + if ("true".equals(standaloneVal) || "1".equals(standaloneVal)) { + writer.write("yes"); + } else if ("false".equals(standaloneVal) || "0".equals(standaloneVal)) { + writer.write("no"); + } else { + writer.write(standaloneVal); + } writer.write('"'); } writer.write("?>\n"); @@ -589,36 +835,79 @@ protected void writeDoctype(final String rootElement) throws TransformerExceptio protected boolean needsEscape(final char ch) { return true; } + + /** + * Whether & before { should be escaped. HTML output returns false + * per W3C HTML serialization spec. XML output returns true (always escape &). + */ + protected boolean escapeAmpersandBeforeBrace() { + return true; + } + + /** + * Check if a serialization boolean parameter value is false. + * W3C Serialization 3.1 accepts "no", "false", "0" (with optional whitespace) as false. + */ + protected static boolean isBooleanFalse(final String value) { + if (value == null) { + return false; + } + final String trimmed = value.trim(); + return "no".equals(trimmed) || "false".equals(trimmed) || "0".equals(trimmed); + } + + /** + * Whether the given character needs escaping. Subclasses can override + * to suppress escaping for specific contexts (e.g., HTML raw text elements). + * + * @param ch the character to check + * @param inAttribute true if we're writing an attribute value + */ + protected boolean needsEscape(final char ch, final boolean inAttribute) { + return needsEscape(ch); + } protected void writeChars(final CharSequence s, final boolean inAttribute) throws IOException { + // Apply Unicode normalization if configured + final CharSequence text = normalize(s); final boolean[] specialChars = inAttribute ? attrSpecialChars : textSpecialChars; char ch = 0; - final int len = s.length(); + final int len = text.length(); int pos = 0, i; while(pos < len) { i = pos; while(i < len) { - ch = s.charAt(i); + ch = text.charAt(i); if(ch < 128) { if(specialChars[ch]) { break; + } else if(xml11 && ch >= 0x01 && ch <= 0x1F + && ch != 0x09 && ch != 0x0A && ch != 0x0D) { + // XML 1.1: C0 control chars (except TAB, LF, CR) must be escaped + break; } else { i++; } } else if(!charSet.inCharacterSet(ch)) { break; + } else if(ch >= 0x7F && ch <= 0x9F) { + // Control chars 0x7F-0x9F must be serialized as character references + break; + } else if(ch == 0x2028) { + // LINE SEPARATOR must be serialized as character reference + break; } else { i++; } } - writeCharSeq(s, pos, i); + writeCharSeq(text, pos, i); // writer.write(s.subSequence(pos, i).toString()); if (i >= len) { return; } - if(needsEscape(ch)) { + if(needsEscape(ch, inAttribute)) { switch(ch) { case '<': writer.write("<"); @@ -627,7 +916,12 @@ protected void writeChars(final CharSequence s, final boolean inAttribute) throw writer.write(">"); break; case '&': - writer.write("&"); + // HTML spec: & before { in attribute values should not be escaped + if (inAttribute && i + 1 < len && text.charAt(i + 1) == '{' && !escapeAmpersandBeforeBrace()) { + writer.write('&'); + } else { + writer.write("&"); + } break; case '\r': writer.write(" "); @@ -672,6 +966,38 @@ protected void writeCharacterReference(final char charval) throws IOException { writer.write(charref, 0, o); } + @Nullable + private static java.text.Normalizer.Form parseNormalizationForm(final String value) { + if (value == null) return null; + return switch (value.trim().toUpperCase(java.util.Locale.ROOT)) { + case "NFC" -> java.text.Normalizer.Form.NFC; + case "NFD" -> java.text.Normalizer.Form.NFD; + case "NFKC" -> java.text.Normalizer.Form.NFKC; + case "NFKD" -> java.text.Normalizer.Form.NFKD; + case "NONE", "" -> null; + default -> null; // "fully-normalized" or unknown — treated as none + }; + } + + /** + * Apply Unicode normalization if a normalization-form is set. + */ + protected CharSequence normalize(final CharSequence text) { + if (normalizationForm == null) return text; + final String s = text.toString(); + if (java.text.Normalizer.isNormalized(s, normalizationForm)) return text; + return java.text.Normalizer.normalize(s, normalizationForm); + } + + private static boolean isRelativeUri(final String uri) { + for (int i = 0; i < uri.length(); i++) { + final char c = uri.charAt(i); + if (c == ':') return false; + if (c == '/' || c == '?' || c == '#') return true; + } + return true; + } + private static class XMLDeclaration { @Nullable final String version; @Nullable final String encoding; diff --git a/exist-core/src/main/java/org/exist/util/serializer/XQuerySerializer.java b/exist-core/src/main/java/org/exist/util/serializer/XQuerySerializer.java index 366e3866cbc..44266ea5869 100644 --- a/exist-core/src/main/java/org/exist/util/serializer/XQuerySerializer.java +++ b/exist-core/src/main/java/org/exist/util/serializer/XQuerySerializer.java @@ -32,6 +32,7 @@ import org.xml.sax.SAXNotSupportedException; import javax.xml.transform.OutputKeys; +import java.io.IOException; import java.io.Writer; import java.util.Properties; @@ -72,17 +73,167 @@ public void serialize(final Sequence sequence, final int start, final int howman break; case "xml": default: - serializeXML(sequence, start, howmany, wrap, typed, compilationTime, executionTime); + // For XML/text methods, flatten any arrays in the sequence before serialization + // (arrays can't be serialized as SAX events directly) + // Maps and function items cannot be serialized with XML/text methods (SENR0001) + validateXmlSerializable(sequence); + if (isCanonical()) { + validateCanonical(sequence); + } + final Sequence flattened = flattenArrays(sequence); + if (flattened != sequence) { + // Flattening changed the sequence — reset start/howmany to cover all items. + // For text method, default item-separator is space if not explicitly set. + if ("text".equals(method) && outputProperties.getProperty(EXistOutputKeys.ITEM_SEPARATOR) == null) { + outputProperties.setProperty(EXistOutputKeys.ITEM_SEPARATOR, " "); + } + serializeXML(flattened, 1, flattened.getItemCount(), wrap, typed, compilationTime, executionTime); + } else { + serializeXML(flattened, start, howmany, wrap, typed, compilationTime, executionTime); + } break; } } + /** + * Validate that a sequence can be serialized with the XML/text method. + * Maps and function items are not serializable as XML (SENR0001). + */ + private static void validateXmlSerializable(final Sequence sequence) throws SAXException, XPathException { + for (final SequenceIterator i = sequence.iterate(); i.hasNext(); ) { + final Item item = i.nextItem(); + final int type = item.getType(); + if (type == Type.MAP_ITEM || type == Type.FUNCTION) { + throw new SAXException("err:SENR0001 Cannot serialize a " + + Type.getTypeName(type) + " with the XML or text output method"); + } + } + } + + private boolean isCanonical() { + final String v = outputProperties.getProperty(EXistOutputKeys.CANONICAL); + return "yes".equals(v) || "true".equals(v) || "1".equals(v); + } + + /** + * Validate canonical XML constraints (SERE0024). + * Checks for relative namespace URIs and multi-root documents. + */ + private void validateCanonical(final Sequence sequence) throws SAXException, XPathException { + for (final SequenceIterator i = sequence.iterate(); i.hasNext(); ) { + final Item item = i.nextItem(); + if (Type.subTypeOf(item.getType(), Type.NODE)) { + validateCanonicalNode((NodeValue) item); + } + } + } + + private void validateCanonicalNode(final NodeValue node) throws SAXException, XPathException { + if (node.getType() == Type.DOCUMENT) { + // Check for multi-root: document must have exactly one element child + int elementCount = 0; + final org.w3c.dom.Node domNode = node.getNode(); + for (org.w3c.dom.Node child = domNode.getFirstChild(); child != null; child = child.getNextSibling()) { + if (child.getNodeType() == org.w3c.dom.Node.ELEMENT_NODE) { + elementCount++; + } + } + if (elementCount != 1) { + throw new SAXException("err:SERE0024 Canonical serialization requires a well-formed document with exactly one root element, found " + elementCount); + } + // Check namespace URIs on the document's elements + validateCanonicalNamespaces(domNode); + } else if (node.getType() == Type.ELEMENT) { + validateCanonicalNamespaces(node.getNode()); + } + } + + private void validateCanonicalNamespaces(final org.w3c.dom.Node node) throws SAXException { + if (node.getNodeType() == org.w3c.dom.Node.ELEMENT_NODE) { + final String nsUri = node.getNamespaceURI(); + if (nsUri != null && !nsUri.isEmpty() && isRelativeUri(nsUri)) { + throw new SAXException("err:SERE0024 Canonical serialization does not allow relative namespace URIs: " + nsUri); + } + // Also check namespace URIs in attributes (including xmlns declarations) + final org.w3c.dom.NamedNodeMap attrs = node.getAttributes(); + if (attrs != null) { + for (int i = 0; i < attrs.getLength(); i++) { + final org.w3c.dom.Attr attr = (org.w3c.dom.Attr) attrs.item(i); + final String attrName = attr.getName(); + // Check xmlns and xmlns:prefix declarations + if ("xmlns".equals(attrName) || attrName.startsWith("xmlns:")) { + final String declUri = attr.getValue(); + if (declUri != null && !declUri.isEmpty() && isRelativeUri(declUri)) { + throw new SAXException("err:SERE0024 Canonical serialization does not allow relative namespace URIs: " + declUri); + } + } + } + } + // Check child elements recursively + for (org.w3c.dom.Node child = node.getFirstChild(); child != null; child = child.getNextSibling()) { + validateCanonicalNamespaces(child); + } + } + } + + private static boolean isRelativeUri(final String uri) { + // Absolute URIs contain a scheme (e.g., "http://", "urn:", "file:") + // A URI without ":" before the first "/" or "?" is relative + for (int i = 0; i < uri.length(); i++) { + final char c = uri.charAt(i); + if (c == ':') return false; // Found scheme separator — absolute + if (c == '/' || c == '?' || c == '#') return true; // Path/query before scheme — relative + } + return true; // No scheme found — relative (e.g., "local.ns") + } + + /** + * Flatten arrays in a sequence — each array member becomes a top-level item. + * This is needed because the SAX-based XML/text serializer can't handle ArrayType items. + */ + private static Sequence flattenArrays(final Sequence sequence) throws XPathException { + boolean hasArrays = false; + for (final SequenceIterator i = sequence.iterate(); i.hasNext(); ) { + if (i.nextItem().getType() == Type.ARRAY_ITEM) { + hasArrays = true; + break; + } + } + if (!hasArrays) { + return sequence; + } + final ValueSequence result = new ValueSequence(); + for (final SequenceIterator i = sequence.iterate(); i.hasNext(); ) { + final Item item = i.nextItem(); + if (item.getType() == Type.ARRAY_ITEM) { + final Sequence flat = org.exist.xquery.functions.array.ArrayType.flatten(item); + for (final SequenceIterator fi = flat.iterate(); fi.hasNext(); ) { + result.add(fi.nextItem()); + } + } else { + result.add(item); + } + } + return result; + } + public boolean normalize() { final String method = outputProperties.getProperty(OutputKeys.METHOD, "xml"); return !("json".equals(method) || "adaptive".equals(method)); } private void serializeXML(final Sequence sequence, final int start, final int howmany, final boolean wrap, final boolean typed, final long compilationTime, final long executionTime) throws SAXException, XPathException { + final String itemSeparator = outputProperties.getProperty(EXistOutputKeys.ITEM_SEPARATOR); + // If item-separator is set and sequence has multiple items, serialize items individually + // with separator between them (the internal Serializer doesn't handle item-separator) + if (itemSeparator != null && sequence.getItemCount() > 1 && !wrap) { + serializeXMLWithItemSeparator(sequence, start, howmany, typed, itemSeparator); + } else { + serializeXMLDirect(sequence, start, howmany, wrap, typed, compilationTime, executionTime); + } + } + + private void serializeXMLDirect(final Sequence sequence, final int start, final int howmany, final boolean wrap, final boolean typed, final long compilationTime, final long executionTime) throws SAXException, XPathException { final Serializer serializer = broker.borrowSerializer(); SAXSerializer sax = null; try { @@ -102,11 +253,78 @@ private void serializeXML(final Sequence sequence, final int start, final int ho } } + private void serializeXMLWithItemSeparator(final Sequence sequence, final int start, final int howmany, final boolean typed, final String itemSeparator) throws SAXException, XPathException { + // Write XML declaration if not omitted (per W3C Serialization 3.1) + if (!isBooleanTrue(outputProperties.getProperty(OutputKeys.OMIT_XML_DECLARATION, "no"))) { + try { + final String version = outputProperties.getProperty(OutputKeys.VERSION, "1.0"); + final String encoding = outputProperties.getProperty(OutputKeys.ENCODING, "UTF-8"); + writer.write(""); + } catch (IOException e) { + throw new SAXException(e.getMessage(), e); + } + } + + final int actualStart = start - 1; // convert 1-based to 0-based + final int end = Math.min(actualStart + howmany, sequence.getItemCount()); + for (int i = actualStart; i < end; i++) { + if (i > actualStart) { + try { + writer.write(itemSeparator); + } catch (IOException e) { + throw new SAXException(e.getMessage(), e); + } + } + final Item item = sequence.itemAt(i); + if (item == null) { + continue; + } + if (Type.subTypeOf(item.getType(), Type.NODE)) { + // For nodes serialized with item-separator, omit the XML declaration + // on each individual node (only one declaration for the whole output) + final Properties nodeProps = new Properties(outputProperties); + nodeProps.setProperty(OutputKeys.OMIT_XML_DECLARATION, "yes"); + final Serializer serializer = broker.borrowSerializer(); + SAXSerializer sax = null; + try { + sax = (SAXSerializer) SerializerPool.getInstance().borrowObject(SAXSerializer.class); + sax.setOutput(writer, nodeProps); + serializer.setProperties(nodeProps); + serializer.setSAXHandlers(sax, sax); + final ValueSequence singleItem = new ValueSequence(1); + singleItem.add(item); + serializer.toSAX(singleItem, 1, 1, false, typed, 0, 0); + } catch (SAXNotSupportedException | SAXNotRecognizedException e) { + throw new SAXException(e.getMessage(), e); + } finally { + if (sax != null) { + SerializerPool.getInstance().returnObject(sax); + } + broker.returnSerializer(serializer); + } + } else { + try { + writer.write(item.getStringValue()); + } catch (IOException e) { + throw new SAXException(e.getMessage(), e); + } + } + } + } + + private static boolean isBooleanTrue(final String value) { + if (value == null) return false; + final String v = value.trim(); + return "yes".equals(v) || "true".equals(v) || "1".equals(v); + } + private void serializeJSON(final Sequence sequence, final long compilationTime, final long executionTime) throws SAXException, XPathException { - // backwards compatibility: if the sequence contains a single element, we assume - // it should be transformed to JSON following the rules of the old JSON writer + // Backwards compatibility: if the sequence contains a single element or document, + // use the legacy XML-to-JSON writer (which converts XML structure to JSON properties). + // This is needed for RESTXQ and REST API which return XML documents with method=json. + // Maps, arrays, atomics, and multi-item sequences go through the W3C-compliant JSONSerializer. if (sequence.hasOne() && (Type.subTypeOf(sequence.getItemType(), Type.DOCUMENT) || Type.subTypeOf(sequence.getItemType(), Type.ELEMENT))) { - serializeXML(sequence, 1, 1, false, false, compilationTime, executionTime); + serializeXMLDirect(sequence, 1, 1, false, false, compilationTime, executionTime); } else { JSONSerializer serializer = new JSONSerializer(broker, outputProperties); serializer.serialize(sequence, writer); diff --git a/exist-core/src/main/java/org/exist/util/serializer/json/JSONSerializer.java b/exist-core/src/main/java/org/exist/util/serializer/json/JSONSerializer.java index bd1f01a9454..9c533df3c44 100644 --- a/exist-core/src/main/java/org/exist/util/serializer/json/JSONSerializer.java +++ b/exist-core/src/main/java/org/exist/util/serializer/json/JSONSerializer.java @@ -23,53 +23,93 @@ import com.fasterxml.jackson.core.JsonFactory; import com.fasterxml.jackson.core.JsonGenerator; +import com.fasterxml.jackson.core.json.JsonWriteFeature; import io.lacuna.bifurcan.IEntry; +import it.unimi.dsi.fastutil.ints.Int2ObjectMap; import org.exist.storage.DBBroker; import org.exist.storage.serializers.EXistOutputKeys; import org.exist.storage.serializers.Serializer; +import org.exist.xquery.ErrorCodes; import org.exist.xquery.XPathException; import org.exist.xquery.functions.array.ArrayType; import org.exist.xquery.functions.map.MapType; +import org.exist.xquery.util.SerializerUtils; import org.exist.xquery.value.*; import org.xml.sax.SAXException; +import javax.annotation.Nullable; import javax.xml.transform.OutputKeys; import java.io.IOException; import java.io.Writer; +import java.util.ArrayList; +import java.util.HashSet; +import java.util.List; import java.util.Properties; +import java.util.Set; /** * Called by {@link org.exist.util.serializer.XQuerySerializer} to serialize an XQuery sequence * to JSON. The JSON serializer differs from other serialization methods because it maps XQuery * data items to JSON. * + * Per W3C XSLT and XQuery Serialization 3.1 Section 10 (JSON Output Method). + * * @author Wolf */ public class JSONSerializer { private final DBBroker broker; private final Properties outputProperties; + private final boolean allowDuplicateNames; + private final boolean canonical; + @Nullable private final Int2ObjectMap characterMap; public JSONSerializer(DBBroker broker, Properties outputProperties) { super(); this.broker = broker; this.outputProperties = outputProperties; + final String canonicalProp = outputProperties.getProperty(EXistOutputKeys.CANONICAL); + this.canonical = isBooleanTrue(canonicalProp); + // Canonical mode: always reject duplicate keys + this.allowDuplicateNames = !canonical && "yes".equals( + outputProperties.getProperty(EXistOutputKeys.ALLOW_DUPLICATE_NAMES, "yes")); + this.characterMap = SerializerUtils.getCharacterMap(outputProperties); } public void serialize(Sequence sequence, Writer writer) throws SAXException { - JsonFactory factory = new JsonFactory(); + // QT4: escape-solidus controls whether / is escaped as \/ + // Default is "no" for XQ 3.1 compatibility (parameter doesn't exist in 3.1 spec) + // Canonical JSON (RFC 8785): solidus is NOT escaped + final boolean escapeSolidus = !canonical && isBooleanTrue( + outputProperties.getProperty(EXistOutputKeys.ESCAPE_SOLIDUS, "no")); + final JsonFactory factory = JsonFactory.builder() + .configure(JsonWriteFeature.ESCAPE_FORWARD_SLASHES, escapeSolidus) + .build(); try { JsonGenerator generator = factory.createGenerator(writer); generator.disable(JsonGenerator.Feature.AUTO_CLOSE_TARGET); - if ("yes".equals(outputProperties.getProperty(OutputKeys.INDENT, "no"))) { - generator.useDefaultPrettyPrinter(); + if (isBooleanTrue(outputProperties.getProperty(OutputKeys.INDENT, "no"))) { + final int indentSpaces = Integer.parseInt( + outputProperties.getProperty(EXistOutputKeys.INDENT_SPACES, "4")); + final com.fasterxml.jackson.core.util.DefaultPrettyPrinter pp = + new com.fasterxml.jackson.core.util.DefaultPrettyPrinter(); + pp.indentArraysWith( + com.fasterxml.jackson.core.util.DefaultIndenter.SYSTEM_LINEFEED_INSTANCE.withIndent( + " ".repeat(indentSpaces))); + pp.indentObjectsWith( + com.fasterxml.jackson.core.util.DefaultIndenter.SYSTEM_LINEFEED_INSTANCE.withIndent( + " ".repeat(indentSpaces))); + generator.setPrettyPrinter(pp); } - if ("yes".equals(outputProperties.getProperty(EXistOutputKeys.ALLOW_DUPLICATE_NAMES, "yes"))) { - generator.enable(JsonGenerator.Feature.STRICT_DUPLICATE_DETECTION); + // Duplicate detection is handled manually in serializeMap for proper SERE0022 errors + generator.disable(JsonGenerator.Feature.STRICT_DUPLICATE_DETECTION); + final boolean jsonLines = isBooleanTrue( + outputProperties.getProperty(EXistOutputKeys.JSON_LINES, "no")); + if (jsonLines) { + serializeJsonLines(sequence, generator); } else { - generator.disable(JsonGenerator.Feature.STRICT_DUPLICATE_DETECTION); + serializeSequence(sequence, generator); } - serializeSequence(sequence, generator); if ("yes".equals(outputProperties.getProperty(EXistOutputKeys.INSERT_FINAL_NEWLINE, "no"))) { generator.writeRaw('\n'); } @@ -79,12 +119,55 @@ public void serialize(Sequence sequence, Writer writer) throws SAXException { } } + /** + * JSON Lines format (NDJSON): one JSON value per line, no array wrapper. + * Per QT4 Serialization 4.0, when json-lines=true. + */ + private void serializeJsonLines(Sequence sequence, JsonGenerator generator) throws IOException, XPathException, SAXException { + if (sequence.isEmpty()) { + return; + } + // Each line must be a separate root-level value. Jackson adds separator + // whitespace between root values, so we serialize each item to a string + // and concatenate with newlines. + final boolean escapeSolidus = !isBooleanFalse( + outputProperties.getProperty(EXistOutputKeys.ESCAPE_SOLIDUS, "yes")); + boolean first = true; + for (SequenceIterator i = sequence.iterate(); i.hasNext(); ) { + if (!first) { + generator.writeRaw('\n'); + } + // Serialize this item to a standalone string + final java.io.StringWriter lineWriter = new java.io.StringWriter(); + final JsonFactory lineFactory = JsonFactory.builder() + .configure(JsonWriteFeature.ESCAPE_FORWARD_SLASHES, escapeSolidus) + .build(); + final JsonGenerator lineGen = lineFactory.createGenerator(lineWriter); + lineGen.disable(JsonGenerator.Feature.AUTO_CLOSE_TARGET); + serializeItem(i.nextItem(), lineGen); + lineGen.close(); + // Write the line's JSON as raw content to avoid Jackson's root separator + generator.writeRaw(lineWriter.toString()); + first = false; + } + } + private void serializeSequence(Sequence sequence, JsonGenerator generator) throws IOException, XPathException, SAXException { + serializeSequence(sequence, generator, false); + } + + private void serializeSequence(Sequence sequence, JsonGenerator generator, boolean allowMultiItem) throws IOException, XPathException, SAXException { if (sequence.isEmpty()) { generator.writeNull(); } else if (sequence.hasOne() && "no".equals(outputProperties.getProperty(EXistOutputKeys.JSON_ARRAY_OUTPUT, "no"))) { serializeItem(sequence.itemAt(0), generator); + } else if (!allowMultiItem) { + // SERE0023: JSON output method cannot serialize a sequence of more than one item + // at the top level or as a map entry value + throw new SAXException("err:SERE0023 Sequence of " + sequence.getItemCount() + + " items cannot be serialized using the JSON output method"); } else { + // Inside arrays, multi-item sequences become JSON arrays generator.writeStartArray(); for (SequenceIterator i = sequence.iterate(); i.hasNext(); ) { serializeItem(i.nextItem(), generator); @@ -99,23 +182,111 @@ private void serializeItem(Item item, JsonGenerator generator) throws IOExceptio } else if (item.getType() == Type.MAP_ITEM) { serializeMap((MapType) item, generator); } else if (Type.subTypeOf(item.getType(), Type.ANY_ATOMIC_TYPE)) { - if (Type.subTypeOfUnion(item.getType(), Type.NUMERIC)) { - generator.writeNumber(item.getStringValue()); - } else { - switch (item.getType()) { - case Type.BOOLEAN: - generator.writeBoolean(((AtomicValue)item).effectiveBooleanValue()); - break; - default: - generator.writeString(item.getStringValue()); - break; - } - } + serializeAtomicValue(item, generator); } else if (Type.subTypeOf(item.getType(), Type.NODE)) { serializeNode(item, generator); + } else if (Type.subTypeOf(item.getType(), Type.FUNCTION)) { + throw new SAXException("err:SERE0021 Sequence contains a function item, which cannot be serialized as JSON"); } } + private void serializeAtomicValue(Item item, JsonGenerator generator) throws IOException, XPathException, SAXException { + if (Type.subTypeOfUnion(item.getType(), Type.NUMERIC)) { + if (canonical) { + // RFC 8785: cast to double, use shortest representation + final double d = ((NumericValue) item).getDouble(); + if (!Double.isFinite(d)) { + throw new SAXException("err:SERE0020 Numeric value " + item.getStringValue() + + " cannot be serialized in canonical JSON"); + } + generator.writeRawValue(canonicalDoubleString(d)); + return; + } + final String stringValue = item.getStringValue(); + // W3C Serialization 3.1: INF, -INF, and NaN MUST raise SERE0020 + if ("NaN".equals(stringValue) || "INF".equals(stringValue) || "-INF".equals(stringValue)) { + throw new SAXException("err:SERE0020 Numeric value " + stringValue + + " cannot be serialized as JSON"); + } else if ("-0".equals(stringValue)) { + // Negative zero: write as 0 (QT4 allows either 0 or -0) + generator.writeNumber(stringValue); + } else { + generator.writeNumber(stringValue); + } + } else if (item.getType() == Type.BOOLEAN) { + generator.writeBoolean(((AtomicValue) item).effectiveBooleanValue()); + } else { + writeStringWithCharMap(generator, item.getStringValue()); + } + } + + /** + * RFC 8785 canonical double formatting. + * Uses ECMAScript shortest representation: minimum digits to uniquely + * identify the double value. Plain notation for [1e-6, 1e21), exponential + * notation otherwise with lowercase 'e'. + */ + private static String canonicalDoubleString(final double value) { + if (value == 0) return "0"; + if (value == Double.MIN_VALUE) return "5e-324"; + if (value == -Double.MIN_VALUE) return "-5e-324"; + + final java.math.BigDecimal bd = java.math.BigDecimal.valueOf(value).stripTrailingZeros(); + final double abs = Math.abs(value); + if (abs >= 1e-6 && abs < 1e21) { + return bd.toPlainString(); + } else { + return bd.toString().replace('E', 'e'); + } + } + + /** + * Apply use-character-maps substitutions to a string value. + * Character map replacements are written raw (not escaped by JSON). + */ + private String applyCharacterMap(final String value) { + if (characterMap == null || characterMap.isEmpty()) { + return value; + } + final StringBuilder sb = new StringBuilder(value.length()); + for (int i = 0; i < value.length(); ) { + final int cp = value.codePointAt(i); + i += Character.charCount(cp); + final String replacement = characterMap.get(cp); + if (replacement != null) { + sb.append(replacement); + } else { + sb.appendCodePoint(cp); + } + } + return sb.toString(); + } + + /** + * Write a string value to the JSON generator, applying character map + * substitutions. The mapped string is passed through writeString so + * Jackson handles JSON structural separators and escaping correctly. + */ + private void writeStringWithCharMap(final JsonGenerator generator, final String value) throws IOException { + if (characterMap == null || characterMap.isEmpty()) { + generator.writeString(value); + } else { + generator.writeString(applyCharacterMap(value)); + } + } + + private static boolean isBooleanTrue(final String value) { + if (value == null) return false; + final String v = value.trim(); + return "yes".equals(v) || "true".equals(v) || "1".equals(v); + } + + private static boolean isBooleanFalse(final String value) { + if (value == null) return false; + final String v = value.trim(); + return "no".equals(v) || "false".equals(v) || "0".equals(v); + } + private void serializeNode(Item item, JsonGenerator generator) throws SAXException { final Serializer serializer = broker.borrowSerializer(); final Properties xmlOutput = new Properties(); @@ -124,7 +295,7 @@ private void serializeNode(Item item, JsonGenerator generator) throws SAXExcepti xmlOutput.setProperty(OutputKeys.INDENT, outputProperties.getProperty(OutputKeys.INDENT, "no")); try { serializer.setProperties(xmlOutput); - generator.writeString(serializer.serialize((NodeValue)item)); + writeStringWithCharMap(generator, serializer.serialize((NodeValue)item)); } catch (IOException e) { throw new SAXException(e.getMessage(), e); } finally { @@ -136,16 +307,50 @@ private void serializeArray(ArrayType array, JsonGenerator generator) throws IOE generator.writeStartArray(); for (int i = 0; i < array.getSize(); i++) { final Sequence member = array.get(i); - serializeSequence(member, generator); + // W3C Serialization 3.1: multi-item sequences within arrays raise SERE0023 + if (member.getItemCount() > 1) { + throw new SAXException("err:SERE0023 Array member at position " + (i + 1) + + " is a sequence of " + member.getItemCount() + " items"); + } + serializeSequence(member, generator, false); } generator.writeEndArray(); } private void serializeMap(MapType map, JsonGenerator generator) throws IOException, XPathException, SAXException { generator.writeStartObject(); - for (final IEntry entry: map) { - generator.writeFieldName(entry.key().getStringValue()); - serializeSequence(entry.value(), generator); + final Set seenKeys = allowDuplicateNames ? null : new HashSet<>(); + + // Canonical JSON (RFC 8785): sort keys by UTF-16 code unit order + final Iterable> entries; + if (canonical) { + final List> sorted = new ArrayList<>(); + for (final IEntry entry : map) { + sorted.add(entry); + } + sorted.sort((a, b) -> { + try { + return a.key().getStringValue().compareTo(b.key().getStringValue()); + } catch (XPathException e) { + return 0; + } + }); + entries = sorted; + } else { + final List> list = new ArrayList<>(); + for (final IEntry entry : map) { + list.add(entry); + } + entries = list; + } + + for (final IEntry entry : entries) { + final String key = entry.key().getStringValue(); + if (seenKeys != null && !seenKeys.add(key)) { + throw new SAXException("err:SERE0022 Duplicate key '" + key + "' in map and allow-duplicate-names is 'no'"); + } + generator.writeFieldName(key); + serializeSequence(entry.value(), generator, false); } generator.writeEndObject(); } diff --git a/exist-core/src/main/java/org/exist/xquery/ElementConstructor.java b/exist-core/src/main/java/org/exist/xquery/ElementConstructor.java index 20b94537797..82dc28ac3a3 100644 --- a/exist-core/src/main/java/org/exist/xquery/ElementConstructor.java +++ b/exist-core/src/main/java/org/exist/xquery/ElementConstructor.java @@ -124,9 +124,9 @@ public void addNamespaceDecl(final String name, final String uri) throws XPathEx throw new XPathException(this, ErrorCodes.XQST0070, "'" + Namespaces.XMLNS_NS + "' can bind only to '" + XMLConstants.XMLNS_ATTRIBUTE + "' prefix"); } - if (name != null && (!name.isEmpty()) && uri.trim().isEmpty()) { - throw new XPathException(this, ErrorCodes.XQST0085, "cannot undeclare a prefix " + name + "."); - } + // XQST0085: namespace undeclaration (xmlns:prefix="") is allowed when the + // implementation supports XML Names 1.1. Since eXist supports XML 1.1 + // serialization (version="1.1"), this is no longer an error. addNamespaceDecl(qn); } diff --git a/exist-core/src/main/java/org/exist/xquery/Option.java b/exist-core/src/main/java/org/exist/xquery/Option.java index 27f8615dfdb..32c38e67dd7 100644 --- a/exist-core/src/main/java/org/exist/xquery/Option.java +++ b/exist-core/src/main/java/org/exist/xquery/Option.java @@ -60,7 +60,9 @@ public Option(QName qname, String contents) throws XPathException { } public Option(final Expression expression, QName qname, String contents) throws XPathException { - if (qname.getPrefix() == null || qname.getPrefix().isEmpty()) + // Options must be in a namespace: either via prefix or via URIQualifiedName Q{uri}local + if ((qname.getPrefix() == null || qname.getPrefix().isEmpty()) + && (qname.getNamespaceURI() == null || qname.getNamespaceURI().isEmpty())) {throw new XPathException(expression, "XPST0081: options must have a prefix");} this.qname = qname; this.contents = contents; diff --git a/exist-core/src/main/java/org/exist/xquery/XQueryContext.java b/exist-core/src/main/java/org/exist/xquery/XQueryContext.java index 6e8105ec786..13b9a8281c0 100644 --- a/exist-core/src/main/java/org/exist/xquery/XQueryContext.java +++ b/exist-core/src/main/java/org/exist/xquery/XQueryContext.java @@ -3276,9 +3276,16 @@ protected void clearUpdateListeners() { @Override public void checkOptions(final Properties properties) throws XPathException { checkLegacyOptions(properties); + + // Phase 1: Process parameter-document first (provides base settings) + processParameterDocument(dynamicOptions, properties); + processParameterDocument(staticOptions, properties); + + // Phase 2: Process inline options (override parameter-document settings) if (dynamicOptions != null) { for (final Option option : dynamicOptions) { - if (Namespaces.XSLT_XQUERY_SERIALIZATION_NS.equals(option.getQName().getNamespaceURI())) { + if (Namespaces.XSLT_XQUERY_SERIALIZATION_NS.equals(option.getQName().getNamespaceURI()) + && !"parameter-document".equals(option.getQName().getLocalPart())) { SerializerUtils.setProperty(option.getQName().getLocalPart(), option.getContents(), properties, inScopeNamespaces::get); } @@ -3288,6 +3295,7 @@ public void checkOptions(final Properties properties) throws XPathException { if (staticOptions != null) { for (final Option option : staticOptions) { if (Namespaces.XSLT_XQUERY_SERIALIZATION_NS.equals(option.getQName().getNamespaceURI()) + && !"parameter-document".equals(option.getQName().getLocalPart()) && !properties.containsKey(option.getQName().getLocalPart())) { SerializerUtils.setProperty(option.getQName().getLocalPart(), option.getContents(), properties, inScopeNamespaces::get); @@ -3296,6 +3304,55 @@ public void checkOptions(final Properties properties) throws XPathException { } } + /** + * Process the parameter-document serialization option if present. + * Loads the referenced XML file and extracts serialization parameters. + */ + private void processParameterDocument(final java.util.List