diff --git a/exist-core/pom.xml b/exist-core/pom.xml
index 991c80178de..7c739d20914 100644
--- a/exist-core/pom.xml
+++ b/exist-core/pom.xml
@@ -1200,6 +1200,7 @@ The BaseX Team. The original license statement is also included below.]]>${project.build.testOutputDirectory}/log4j2.xml
+ 180
+
+
+ org.exist.storage.lock.DeadlockIT
+ org.exist.xmldb.RemoveCollectionIT
+ @{jacocoArgLine} --add-modules jdk.incubator.vector --enable-native-access=ALL-UNNAMED -Dfile.encoding=${project.build.sourceEncoding} -Dexist.recovery.progressbar.hide=true${project.basedir}/../exist-jetty-config/target/classes/org/exist/jetty
diff --git a/exist-core/src/main/java/org/exist/storage/serializers/EXistOutputKeys.java b/exist-core/src/main/java/org/exist/storage/serializers/EXistOutputKeys.java
index ca85a06f5fe..7c727e6ab16 100644
--- a/exist-core/src/main/java/org/exist/storage/serializers/EXistOutputKeys.java
+++ b/exist-core/src/main/java/org/exist/storage/serializers/EXistOutputKeys.java
@@ -28,6 +28,11 @@ public class EXistOutputKeys {
*/
public static final String ITEM_SEPARATOR = "item-separator";
+ // --- QT4 Serialization 4.0 parameters ---
+ public static final String CANONICAL = "canonical";
+ public static final String ESCAPE_SOLIDUS = "escape-solidus";
+ public static final String JSON_LINES = "json-lines";
+
public static final String OMIT_ORIGINAL_XML_DECLARATION = "omit-original-xml-declaration";
public static final String OUTPUT_DOCTYPE = "output-doctype";
diff --git a/exist-core/src/main/java/org/exist/util/serializer/AbstractSerializer.java b/exist-core/src/main/java/org/exist/util/serializer/AbstractSerializer.java
index 758ccee130a..a1b7c9890b3 100644
--- a/exist-core/src/main/java/org/exist/util/serializer/AbstractSerializer.java
+++ b/exist-core/src/main/java/org/exist/util/serializer/AbstractSerializer.java
@@ -81,13 +81,27 @@ protected SerializerWriter getDefaultWriter() {
public void setOutput(Writer writer, Properties properties) {
outputProperties = Objects.requireNonNullElseGet(properties, () -> new Properties(defaultProperties));
final String method = outputProperties.getProperty(OutputKeys.METHOD, "xml");
- final String htmlVersionProp = outputProperties.getProperty(EXistOutputKeys.HTML_VERSION, "1.0");
-
+ // For html/xhtml methods, determine HTML version:
+ // 1. Use html-version if explicitly set
+ // 2. Otherwise use version (W3C spec: version controls HTML version for html method)
+ // 3. Default to 5.0
double htmlVersion;
- try {
- htmlVersion = Double.parseDouble(htmlVersionProp);
- } catch (NumberFormatException e) {
- htmlVersion = 1.0;
+ final String explicitHtmlVersion = outputProperties.getProperty(EXistOutputKeys.HTML_VERSION);
+ if (explicitHtmlVersion != null) {
+ try {
+ htmlVersion = Double.parseDouble(explicitHtmlVersion);
+ } catch (NumberFormatException e) {
+ htmlVersion = 5.0;
+ }
+ } else if (("html".equalsIgnoreCase(method) || "xhtml".equalsIgnoreCase(method))
+ && outputProperties.getProperty(OutputKeys.VERSION) != null) {
+ try {
+ htmlVersion = Double.parseDouble(outputProperties.getProperty(OutputKeys.VERSION));
+ } catch (NumberFormatException e) {
+ htmlVersion = 5.0;
+ }
+ } else {
+ htmlVersion = 5.0;
}
final SerializerWriter baseSerializerWriter = getBaseSerializerWriter(method, htmlVersion);
diff --git a/exist-core/src/main/java/org/exist/util/serializer/AdaptiveWriter.java b/exist-core/src/main/java/org/exist/util/serializer/AdaptiveWriter.java
index 22ab6dfca23..717ec83ab07 100644
--- a/exist-core/src/main/java/org/exist/util/serializer/AdaptiveWriter.java
+++ b/exist-core/src/main/java/org/exist/util/serializer/AdaptiveWriter.java
@@ -190,10 +190,15 @@ private void writeAtomic(AtomicValue value) throws IOException, SAXException, XP
}
private void writeDouble(final DoubleValue item) throws SAXException {
- final DecimalFormatSymbols symbols = DecimalFormatSymbols.getInstance(Locale.US);
- symbols.setExponentSeparator("e");
- final DecimalFormat df = new DecimalFormat("0.0##########################E0", symbols);
- writeText(df.format(item.getDouble()));
+ final double d = item.getDouble();
+ if (Double.isInfinite(d) || Double.isNaN(d)) {
+ writeText(item.getStringValue());
+ } else {
+ final DecimalFormatSymbols symbols = DecimalFormatSymbols.getInstance(Locale.US);
+ symbols.setExponentSeparator("e");
+ final DecimalFormat df = new DecimalFormat("0.0##########################E0", symbols);
+ writeText(df.format(d));
+ }
}
private void writeArray(final ArrayType array) throws XPathException, SAXException, TransformerException {
@@ -215,9 +220,7 @@ private void writeArray(final ArrayType array) throws XPathException, SAXExcepti
private void writeMap(final AbstractMapType map) throws SAXException, XPathException, TransformerException {
try {
- writer.write("map");
- addSpaceIfIndent();
- writer.write('{');
+ writer.write("map{");
addIndent();
indent();
for (final Iterator> i = map.iterator(); i.hasNext(); ) {
diff --git a/exist-core/src/main/java/org/exist/util/serializer/HTML5Writer.java b/exist-core/src/main/java/org/exist/util/serializer/HTML5Writer.java
index 1dffc3029b7..bc69c4304c6 100644
--- a/exist-core/src/main/java/org/exist/util/serializer/HTML5Writer.java
+++ b/exist-core/src/main/java/org/exist/util/serializer/HTML5Writer.java
@@ -246,6 +246,23 @@ protected void closeStartTag(boolean isEmpty) throws TransformerException {
}
}
+ @Override
+ public void processingInstruction(String target, String data) throws TransformerException {
+ try {
+ closeStartTag(false);
+ final Writer writer = getWriter();
+ writer.write("");
+ writer.write(target);
+ if (data != null && !data.isEmpty()) {
+ writer.write(' ');
+ writer.write(data);
+ }
+ writer.write('>');
+ } catch (IOException e) {
+ throw new TransformerException(e.getMessage(), e);
+ }
+ }
+
@Override
protected boolean needsEscape(char ch) {
if (RAW_TEXT_ELEMENTS.contains(currentTag)) {
@@ -253,4 +270,20 @@ protected boolean needsEscape(char ch) {
}
return super.needsEscape(ch);
}
+
+ @Override
+ protected boolean needsEscape(final char ch, final boolean inAttribute) {
+ // In raw text elements (script, style), suppress escaping for TEXT content only.
+ // Attribute values must always be escaped, even on raw text elements.
+ if (!inAttribute && RAW_TEXT_ELEMENTS.contains(currentTag)) {
+ return false;
+ }
+ // For attributes, always return true (bypass the 1-arg override
+ // which returns false for all script/style content)
+ if (inAttribute) {
+ return true;
+ }
+ return super.needsEscape(ch, inAttribute);
+ }
+
}
diff --git a/exist-core/src/main/java/org/exist/util/serializer/IndentingXMLWriter.java b/exist-core/src/main/java/org/exist/util/serializer/IndentingXMLWriter.java
index c336d8b2943..99df54c3e19 100644
--- a/exist-core/src/main/java/org/exist/util/serializer/IndentingXMLWriter.java
+++ b/exist-core/src/main/java/org/exist/util/serializer/IndentingXMLWriter.java
@@ -25,7 +25,9 @@
import java.io.Writer;
import java.util.ArrayDeque;
import java.util.Deque;
+import java.util.HashSet;
import java.util.Properties;
+import java.util.Set;
import javax.xml.transform.OutputKeys;
import javax.xml.transform.TransformerException;
@@ -48,6 +50,8 @@ public class IndentingXMLWriter extends XMLWriter {
private boolean sameline = false;
private boolean whitespacePreserve = false;
private final Deque whitespacePreserveStack = new ArrayDeque<>();
+ private Set suppressIndentation = null;
+ private int suppressIndentDepth = 0;
public IndentingXMLWriter() {
super();
@@ -75,6 +79,9 @@ public void startElement(final String namespaceURI, final String localName, fina
indent();
}
super.startElement(namespaceURI, localName, qname);
+ if (isSuppressIndentation(localName)) {
+ suppressIndentDepth++;
+ }
addIndent();
afterTag = true;
sameline = true;
@@ -86,6 +93,9 @@ public void startElement(final QName qname) throws TransformerException {
indent();
}
super.startElement(qname);
+ if (isSuppressIndentation(qname.getLocalPart())) {
+ suppressIndentDepth++;
+ }
addIndent();
afterTag = true;
sameline = true;
@@ -95,6 +105,9 @@ public void startElement(final QName qname) throws TransformerException {
public void endElement(final String namespaceURI, final String localName, final String qname) throws TransformerException {
endIndent(namespaceURI, localName);
super.endElement(namespaceURI, localName, qname);
+ if (isSuppressIndentation(localName) && suppressIndentDepth > 0) {
+ suppressIndentDepth--;
+ }
popWhitespacePreserve(); // apply ancestor's xml:space value _after_ end element
sameline = isInlineTag(namespaceURI, localName);
afterTag = true;
@@ -104,6 +117,9 @@ public void endElement(final String namespaceURI, final String localName, final
public void endElement(final QName qname) throws TransformerException {
endIndent(qname.getNamespaceURI(), qname.getLocalPart());
super.endElement(qname);
+ if (isSuppressIndentation(qname.getLocalPart()) && suppressIndentDepth > 0) {
+ suppressIndentDepth--;
+ }
popWhitespacePreserve(); // apply ancestor's xml:space value _after_ end element
sameline = isInlineTag(qname.getNamespaceURI(), qname.getLocalPart());
afterTag = true;
@@ -164,7 +180,29 @@ public void setOutputProperties(final Properties properties) {
} catch (final NumberFormatException e) {
LOG.warn("Invalid indentation value: '{}'", option);
}
- indent = "yes".equals(outputProperties.getProperty(OutputKeys.INDENT, "no"));
+ final String indentValue = outputProperties.getProperty(OutputKeys.INDENT, "no").trim();
+ indent = "yes".equals(indentValue) || "true".equals(indentValue) || "1".equals(indentValue);
+ final String suppressProp = outputProperties.getProperty("suppress-indentation");
+ if (suppressProp != null && !suppressProp.isEmpty()) {
+ suppressIndentation = new HashSet<>();
+ for (final String name : suppressProp.split("\\s+")) {
+ if (!name.isEmpty()) {
+ // Handle URI-qualified names: Q{ns}local or {ns}local → extract local part
+ if (name.startsWith("Q{") || name.startsWith("{")) {
+ final int closeBrace = name.indexOf('}');
+ if (closeBrace > 0 && closeBrace < name.length() - 1) {
+ suppressIndentation.add(name.substring(closeBrace + 1));
+ } else {
+ suppressIndentation.add(name);
+ }
+ } else {
+ suppressIndentation.add(name);
+ }
+ }
+ }
+ } else {
+ suppressIndentation = null;
+ }
}
@Override
@@ -220,8 +258,12 @@ protected void addSpaceIfIndent() throws IOException {
writer.write(' ');
}
+ private boolean isSuppressIndentation(final String localName) {
+ return suppressIndentation != null && suppressIndentation.contains(localName);
+ }
+
protected void indent() throws TransformerException {
- if (!indent || whitespacePreserve) {
+ if (!indent || whitespacePreserve || suppressIndentDepth > 0) {
return;
}
final int spaces = indentAmount * level;
diff --git a/exist-core/src/main/java/org/exist/util/serializer/XHTML5Writer.java b/exist-core/src/main/java/org/exist/util/serializer/XHTML5Writer.java
index e89e7119d19..4894c0162af 100644
--- a/exist-core/src/main/java/org/exist/util/serializer/XHTML5Writer.java
+++ b/exist-core/src/main/java/org/exist/util/serializer/XHTML5Writer.java
@@ -24,6 +24,7 @@
import java.io.Writer;
import javax.xml.transform.TransformerException;
+import org.exist.storage.serializers.EXistOutputKeys;
import it.unimi.dsi.fastutil.objects.ObjectOpenHashSet;
import it.unimi.dsi.fastutil.objects.ObjectSet;
@@ -128,7 +129,45 @@ protected void writeDoctype(String rootElement) throws TransformerException {
return;
}
- documentType("html", null, null);
+ // Canonical serialization: never output DOCTYPE
+ final String canonicalProp = outputProperties != null
+ ? outputProperties.getProperty(EXistOutputKeys.CANONICAL) : null;
+ if ("yes".equals(canonicalProp) || "true".equals(canonicalProp) || "1".equals(canonicalProp)) {
+ doctypeWritten = true;
+ return;
+ }
+
+ // Only output DOCTYPE when the root element is (case-insensitive)
+ // Per W3C Serialization: DOCTYPE is for the html element only, not fragments
+ final String localName = rootElement.contains(":") ? rootElement.substring(rootElement.indexOf(':') + 1) : rootElement;
+ if (!"html".equalsIgnoreCase(localName)) {
+ doctypeWritten = true; // suppress future attempts
+ return;
+ }
+
+ final String publicId = outputProperties != null
+ ? outputProperties.getProperty(javax.xml.transform.OutputKeys.DOCTYPE_PUBLIC) : null;
+ final String systemId = outputProperties != null
+ ? outputProperties.getProperty(javax.xml.transform.OutputKeys.DOCTYPE_SYSTEM) : null;
+ final String method = outputProperties != null
+ ? outputProperties.getProperty(javax.xml.transform.OutputKeys.METHOD, "xhtml") : "xhtml";
+
+ if ("xhtml".equalsIgnoreCase(method)) {
+ // XHTML: per W3C spec section 5.2, only output doctype-public when
+ // doctype-system is also present
+ if (systemId != null) {
+ documentType("html", publicId, systemId);
+ } else if (publicId == null) {
+ // Neither set — simple DOCTYPE
+ documentType("html", null, null);
+ } else {
+ // doctype-public without doctype-system — suppress DOCTYPE for XHTML
+ doctypeWritten = true;
+ }
+ } else {
+ // HTML method: pass through doctype-public and doctype-system as set
+ documentType("html", publicId, systemId);
+ }
doctypeWritten = true;
}
}
diff --git a/exist-core/src/main/java/org/exist/util/serializer/XHTMLWriter.java b/exist-core/src/main/java/org/exist/util/serializer/XHTMLWriter.java
index b0006f7f51c..9238cd1e848 100644
--- a/exist-core/src/main/java/org/exist/util/serializer/XHTMLWriter.java
+++ b/exist-core/src/main/java/org/exist/util/serializer/XHTMLWriter.java
@@ -23,6 +23,7 @@
import java.io.IOException;
import java.io.Writer;
+import javax.xml.transform.OutputKeys;
import javax.xml.transform.TransformerException;
import it.unimi.dsi.fastutil.objects.ObjectOpenHashSet;
@@ -36,12 +37,35 @@
*/
public class XHTMLWriter extends IndentingXMLWriter {
+ /**
+ * HTML boolean attributes per HTML 4.01 and HTML5 spec.
+ * When method="html" and the attribute value equals the attribute name
+ * (case-insensitive), the attribute is minimized to just the name.
+ */
+ protected static final ObjectSet BOOLEAN_ATTRIBUTES = new ObjectOpenHashSet<>(31);
+ static {
+ BOOLEAN_ATTRIBUTES.add("checked");
+ BOOLEAN_ATTRIBUTES.add("compact");
+ BOOLEAN_ATTRIBUTES.add("declare");
+ BOOLEAN_ATTRIBUTES.add("defer");
+ BOOLEAN_ATTRIBUTES.add("disabled");
+ BOOLEAN_ATTRIBUTES.add("ismap");
+ BOOLEAN_ATTRIBUTES.add("multiple");
+ BOOLEAN_ATTRIBUTES.add("nohref");
+ BOOLEAN_ATTRIBUTES.add("noresize");
+ BOOLEAN_ATTRIBUTES.add("noshade");
+ BOOLEAN_ATTRIBUTES.add("nowrap");
+ BOOLEAN_ATTRIBUTES.add("readonly");
+ BOOLEAN_ATTRIBUTES.add("selected");
+ }
+
protected static final ObjectSet EMPTY_TAGS = new ObjectOpenHashSet<>(31);
static {
EMPTY_TAGS.add("area");
EMPTY_TAGS.add("base");
EMPTY_TAGS.add("br");
EMPTY_TAGS.add("col");
+ EMPTY_TAGS.add("embed");
EMPTY_TAGS.add("hr");
EMPTY_TAGS.add("img");
EMPTY_TAGS.add("input");
@@ -88,6 +112,8 @@ public class XHTMLWriter extends IndentingXMLWriter {
}
protected String currentTag;
+ protected boolean inHead = false;
+ protected boolean contentTypeMetaWritten = false;
protected final ObjectSet emptyTags;
protected final ObjectSet inlineTags;
@@ -120,78 +146,121 @@ public XHTMLWriter(final Writer writer, ObjectSet emptyTags, ObjectSet 0 && namespaceURI != null && namespaceURI.equals(Namespaces.XHTML_NS)) {
- haveCollapsedXhtmlPrefix = true;
- return qname.substring(pos+1);
-
+ if (pos > 0 && namespaceURI != null) {
+ if (namespaceURI.equals(Namespaces.XHTML_NS)) {
+ haveCollapsedXhtmlPrefix = true;
+ return qname.substring(pos + 1);
+ }
+ // XHTML5: normalize SVG and MathML prefixes
+ if (isHtml5Version() && (namespaceURI.equals(SVG_NS) || namespaceURI.equals(MATHML_NS))) {
+ collapsedForeignNs = namespaceURI;
+ return qname.substring(pos + 1);
+ }
}
-
return qname;
}
@Override
public void namespace(final String prefix, final String nsURI) throws TransformerException {
- if(haveCollapsedXhtmlPrefix && prefix != null && !prefix.isEmpty() && nsURI.equals(Namespaces.XHTML_NS)) {
- return; //dont output the xmlns:prefix for the collapsed nodes prefix
+ if (haveCollapsedXhtmlPrefix && prefix != null && !prefix.isEmpty() && nsURI.equals(Namespaces.XHTML_NS)) {
+ return; // don't output the xmlns:prefix for the collapsed node's prefix
+ }
+ // When a foreign namespace prefix was collapsed, replace the prefixed
+ // declaration with a default namespace declaration
+ if (collapsedForeignNs != null && prefix != null && !prefix.isEmpty()
+ && nsURI.equals(collapsedForeignNs)) {
+ super.namespace("", nsURI); // emit xmlns="..." instead of xmlns:prefix="..."
+ return;
}
-
super.namespace(prefix, nsURI);
}
@@ -200,9 +269,25 @@ public void namespace(final String prefix, final String nsURI) throws Transforme
protected void closeStartTag(final boolean isEmpty) throws TransformerException {
try {
if (tagIsOpen) {
+ // Flush canonical buffers (sorted namespaces + attributes) if active
+ if (isCanonical()) {
+ flushCanonicalBuffersXhtml();
+ }
if (isEmpty) {
- if (isEmptyTag(currentTag)) {
- getWriter().write(" />");
+ if (isCanonical()) {
+ // Canonical: always expand empty elements
+ getWriter().write('>');
+ getWriter().write("");
+ getWriter().write(currentTag);
+ getWriter().write('>');
+ } else if (isEmptyTag(currentTag)) {
+ // For method="html", use HTML-style void tags ( )
+ // For method="xhtml", use XHTML-style ( )
+ if (isHtmlMethod()) {
+ getWriter().write(">");
+ } else {
+ getWriter().write(" />");
+ }
} else {
getWriter().write('>');
getWriter().write("");
@@ -218,10 +303,159 @@ protected void closeStartTag(final boolean isEmpty) throws TransformerException
throw new TransformerException(ioe.getMessage(), ioe);
}
}
+
+ /**
+ * Returns true if the output method is "html" (not "xhtml").
+ * HTML uses void element syntax ( ) while XHTML uses self-closing ( ).
+ */
+ private boolean isHtmlMethod() {
+ if (outputProperties != null) {
+ final String method = outputProperties.getProperty(javax.xml.transform.OutputKeys.METHOD);
+ return "html".equalsIgnoreCase(method);
+ }
+ return false;
+ }
+
+ /**
+ * Returns true if the HTML version is 5.0 or higher.
+ */
+ private boolean isHtml5Version() {
+ if (outputProperties == null) {
+ return true; // default to HTML5
+ }
+ final String version = outputProperties.getProperty(OutputKeys.VERSION);
+ if (version != null) {
+ try {
+ return Double.parseDouble(version) >= 5.0;
+ } catch (final NumberFormatException e) {
+ // ignore
+ }
+ }
+ return true; // default to HTML5
+ }
+ @Override
+ public void attribute(final QName qname, final CharSequence value) throws TransformerException {
+ // For method="html", minimize boolean attributes when value matches name
+ if (isHtmlMethod() && isBooleanAttribute(qname.getLocalPart(), value)) {
+ try {
+ if (!tagIsOpen) {
+ characters(value);
+ return;
+ }
+ final Writer w = getWriter();
+ w.write(' ');
+ w.write(qname.getLocalPart());
+ // Don't write ="value" — minimized form
+ } catch (final IOException ioe) {
+ throw new TransformerException(ioe.getMessage(), ioe);
+ }
+ return;
+ }
+ super.attribute(qname, value);
+ }
+
+ @Override
+ public void attribute(final String qname, final CharSequence value) throws TransformerException {
+ if (isHtmlMethod() && isBooleanAttribute(qname, value)) {
+ try {
+ if (!tagIsOpen) {
+ characters(value);
+ return;
+ }
+ final Writer w = getWriter();
+ w.write(' ');
+ w.write(qname);
+ } catch (final IOException ioe) {
+ throw new TransformerException(ioe.getMessage(), ioe);
+ }
+ return;
+ }
+ super.attribute(qname, value);
+ }
+
+ private boolean isBooleanAttribute(final String attrName, final CharSequence value) {
+ return BOOLEAN_ATTRIBUTES.contains(attrName.toLowerCase(java.util.Locale.ROOT))
+ && attrName.equalsIgnoreCase(value.toString());
+ }
+
+ private static final ObjectSet RAW_TEXT_ELEMENTS_HTML = new ObjectOpenHashSet<>(4);
+ static {
+ RAW_TEXT_ELEMENTS_HTML.add("script");
+ RAW_TEXT_ELEMENTS_HTML.add("style");
+ }
+
+ @Override
+ protected boolean needsEscape(final char ch, final boolean inAttribute) {
+ // For HTML method, script and style content should not be escaped
+ if (!inAttribute && isHtmlMethod()
+ && currentTag != null && RAW_TEXT_ELEMENTS_HTML.contains(currentTag.toLowerCase(java.util.Locale.ROOT))) {
+ return false;
+ }
+ return super.needsEscape(ch, inAttribute);
+ }
+
+ /**
+ * For HTML serialization, cdata-section-elements is ignored per the
+ * W3C serialization spec — CDATA sections are not valid in HTML.
+ */
+ @Override
+ protected boolean shouldUseCdataSections() {
+ if (isHtmlMethod()) {
+ return false;
+ }
+ return super.shouldUseCdataSections();
+ }
+
+ @Override
+ protected boolean escapeAmpersandBeforeBrace() {
+ // HTML spec: & before { in attribute values should not be escaped
+ return false;
+ }
+
@Override
protected boolean isInlineTag(final String namespaceURI, final String localName) {
return (namespaceURI == null || namespaceURI.isEmpty() || Namespaces.XHTML_NS.equals(namespaceURI))
&& inlineTags.contains(localName);
}
+
+ /**
+ * Write a meta content-type tag as the first child of head when
+ * include-content-type is enabled (the default per W3C Serialization 3.1).
+ */
+ protected void writeContentTypeMeta() throws TransformerException {
+ if (contentTypeMetaWritten || outputProperties == null) {
+ return;
+ }
+ final String includeContentType = outputProperties.getProperty("include-content-type", "yes");
+ if (!"yes".equals(includeContentType)) {
+ return;
+ }
+ contentTypeMetaWritten = true;
+ try {
+ final String encoding = outputProperties.getProperty(OutputKeys.ENCODING, "UTF-8");
+ closeStartTag(false);
+ final Writer writer = getWriter();
+
+ // HTML5 method uses
+ // XHTML and HTML4 use
+ // XHTML mode requires self-closing tags (/>) for valid XML output —
+ // the URL rewrite pipeline re-parses this as XML in the view step.
+ final boolean selfClose = !isHtmlMethod();
+ if (isHtmlMethod() && isHtml5Version()) {
+ writer.write("" : "\">");
+ } else {
+ final String mediaType = outputProperties.getProperty(OutputKeys.MEDIA_TYPE, "text/html");
+ writer.write("" : "\">");
+ }
+ } catch (IOException e) {
+ throw new TransformerException(e.getMessage(), e);
+ }
+ }
}
diff --git a/exist-core/src/main/java/org/exist/util/serializer/XMLWriter.java b/exist-core/src/main/java/org/exist/util/serializer/XMLWriter.java
index 763aaf52ef6..48887f88e13 100644
--- a/exist-core/src/main/java/org/exist/util/serializer/XMLWriter.java
+++ b/exist-core/src/main/java/org/exist/util/serializer/XMLWriter.java
@@ -78,6 +78,11 @@ public class XMLWriter implements SerializerWriter {
private String defaultNamespace = "";
+ // Namespace stack (BaseX-style): flat list of (prefix, uri) pairs for all in-scope bindings.
+ // nstack records the list size at each startElement so endElement can roll back declarations.
+ private final List nspaces = new ArrayList<>();
+ private final Deque nstack = new ArrayDeque<>();
+
/**
* When serializing an XDM this should be true,
* otherwise false.
@@ -86,8 +91,33 @@ public class XMLWriter implements SerializerWriter {
* compared to retrieving resources from the database.
*/
private boolean xdmSerialization = false;
+ private boolean xml11 = false;
+ private boolean canonical = false;
+ @Nullable private java.text.Normalizer.Form normalizationForm = null;
+
+ // Canonical XML: buffer namespaces and attributes for sorting
+ private final List canonicalNamespaces = new ArrayList<>(); // [prefix, uri]
+ private final List canonicalAttributes = new ArrayList<>(); // [nsUri, localName, qname, value]
private final Deque elementName = new ArrayDeque<>();
+
+ /**
+ * Returns true if cdata-section-elements should be applied.
+ * Subclasses (e.g., XHTMLWriter for HTML method) can override
+ * to suppress CDATA sections.
+ */
+ protected boolean shouldUseCdataSections() {
+ return xdmSerialization;
+ }
+
+ /**
+ * Returns the namespace URI of the current (innermost) element,
+ * or null if no element is on the stack.
+ */
+ protected String currentElementNamespaceURI() {
+ final QName top = elementName.peek();
+ return top != null ? top.getNamespaceURI() : null;
+ }
private LazyVal> cdataSectionElements = new LazyVal<>(this::parseCdataSectionElementNames);
private boolean cdataSetionElement = false;
@@ -96,8 +126,9 @@ public class XMLWriter implements SerializerWriter {
Arrays.fill(textSpecialChars, false);
textSpecialChars['<'] = true;
textSpecialChars['>'] = true;
- // textSpecialChars['\r'] = true;
+ textSpecialChars['\r'] = true;
textSpecialChars['&'] = true;
+ textSpecialChars[0x7F] = true; // DEL must be escaped as
attrSpecialChars = new boolean[128];
Arrays.fill(attrSpecialChars, false);
@@ -108,6 +139,7 @@ public class XMLWriter implements SerializerWriter {
attrSpecialChars['\t'] = true;
attrSpecialChars['&'] = true;
attrSpecialChars['"'] = true;
+ attrSpecialChars[0x7F] = true; // DEL must be escaped as
}
@Nullable private XMLDeclaration originalXmlDecl;
@@ -139,6 +171,10 @@ public void setOutputProperties(final Properties properties) {
}
this.xdmSerialization = "yes".equals(outputProperties.getProperty(EXistOutputKeys.XDM_SERIALIZATION, "no"));
+ this.xml11 = "1.1".equals(outputProperties.getProperty(OutputKeys.VERSION));
+ this.normalizationForm = parseNormalizationForm(outputProperties.getProperty("normalization-form", "none"));
+ final String canonicalProp = outputProperties.getProperty(EXistOutputKeys.CANONICAL);
+ this.canonical = "yes".equals(canonicalProp) || "true".equals(canonicalProp) || "1".equals(canonicalProp);
}
private Set parseCdataSectionElementNames() {
@@ -166,6 +202,8 @@ protected void resetObjectState() {
originalXmlDecl = null;
doctypeWritten = false;
defaultNamespace = "";
+ nspaces.clear();
+ nstack.clear();
cdataSectionElements = new LazyVal<>(this::parseCdataSectionElementNames);
}
@@ -184,12 +222,35 @@ public Writer getWriter() {
}
public String getDefaultNamespace() {
- return defaultNamespace.isEmpty() ? null : defaultNamespace;
+ final String fromStack = nsLookup("");
+ return (fromStack == null || fromStack.isEmpty()) ? null : fromStack;
}
public void setDefaultNamespace(final String namespace) {
+ // Keep the baseline field in sync; nsLookup() falls back to it when the
+ // namespace stack has no in-scope binding for the default prefix.
defaultNamespace = namespace == null ? "" : namespace;
}
+
+ /**
+ * Looks up the currently in-scope URI for {@code prefix} by scanning the flat
+ * namespace list from innermost to outermost scope.
+ * For the default-namespace prefix ({@code ""}), falls back to the
+ * {@link #defaultNamespace} baseline field when the stack has no binding.
+ *
+ * @return the in-scope URI, or {@code null} if {@code prefix} is unbound
+ */
+ private String nsLookup(final String prefix) {
+ for (int i = nspaces.size() - 2; i >= 0; i -= 2) {
+ if (nspaces.get(i).equals(prefix)) {
+ return nspaces.get(i + 1);
+ }
+ }
+ if (prefix.isEmpty()) {
+ return defaultNamespace.isEmpty() ? null : defaultNamespace;
+ }
+ return null;
+ }
public void startDocument() throws TransformerException {
resetObjectState();
@@ -207,15 +268,16 @@ public void startElement(final String namespaceUri, final String localName, fina
if(!declarationWritten) {
writeDeclaration();
}
-
+
if(!doctypeWritten) {
writeDoctype(qname);
}
-
+
try {
if(tagIsOpen) {
closeStartTag(false);
}
+ nstack.push(nspaces.size());
writer.write('<');
writer.write(qname);
tagIsOpen = true;
@@ -233,21 +295,22 @@ public void startElement(final QName qname) throws TransformerException {
if(!declarationWritten) {
writeDeclaration();
}
-
+
if(!doctypeWritten) {
writeDoctype(qname.getStringValue());
}
-
+
try {
if(tagIsOpen) {
closeStartTag(false);
}
+ nstack.push(nspaces.size());
writer.write('<');
if(qname.getPrefix() != null && !qname.getPrefix().isEmpty()) {
writer.write(qname.getPrefix());
writer.write(':');
}
-
+
writer.write(qname.getLocalPart());
tagIsOpen = true;
elementName.push(qname);
@@ -266,6 +329,9 @@ public void endElement(final String namespaceURI, final String localName, final
writer.write('>');
}
elementName.pop();
+ if (!nstack.isEmpty()) {
+ nspaces.subList(nstack.pop(), nspaces.size()).clear();
+ }
} catch(final IOException ioe) {
throw new TransformerException(ioe.getMessage(), ioe);
}
@@ -285,40 +351,74 @@ public void endElement(final QName qname) throws TransformerException {
writer.write('>');
}
elementName.pop();
+ if (!nstack.isEmpty()) {
+ nspaces.subList(nstack.pop(), nspaces.size()).clear();
+ }
} catch(final IOException ioe) {
throw new TransformerException(ioe.getMessage(), ioe);
}
}
public void namespace(final String prefix, final String nsURI) throws TransformerException {
- if((nsURI == null) && (prefix == null || prefix.isEmpty())) {
+ final String normPrefix = prefix != null ? prefix : "";
+ final String normUri = nsURI != null ? nsURI : "";
+
+ // The xml namespace is implicitly declared and never needs explicit serialization
+ if ("xml".equals(normPrefix)) {
return;
}
- try {
- if(!tagIsOpen) {
+ try {
+ if (!tagIsOpen) {
+ // An xmlns="" outside a start tag is harmless — just skip it
+ if (normUri.isEmpty() && normPrefix.isEmpty()) {
+ return;
+ }
throw new TransformerException("Found a namespace declaration outside an element");
}
- if(prefix != null && !prefix.isEmpty()) {
- writer.write(' ');
- writer.write("xmlns");
- writer.write(':');
- writer.write(prefix);
- writer.write("=\"");
- writeChars(nsURI, true);
- writer.write('"');
- } else {
- if(defaultNamespace.equals(nsURI)) {
- return;
+ if (canonical) {
+ // Buffer for sorting — emitted in closeStartTag
+ // Validate: reject relative namespace URIs (SERE0024)
+ if (!normUri.isEmpty() && isRelativeUri(normUri)) {
+ throw new TransformerException("err:SERE0024 Canonical serialization does not allow relative namespace URIs: " + normUri);
}
- writer.write(' ');
- writer.write("xmlns");
+ if (normPrefix.isEmpty() && normUri.isEmpty()) {
+ return; // Skip xmlns="" in canonical (not meaningful for no-namespace elements)
+ }
+ // Deduplicate: replace existing binding for same prefix
+ canonicalNamespaces.removeIf(ns -> ns[0].equals(normPrefix));
+ canonicalNamespaces.add(new String[]{normPrefix, normUri});
+ // Track in namespace stack so getDefaultNamespace() stays accurate
+ nspaces.add(normPrefix);
+ nspaces.add(normUri);
+ return;
+ }
+
+ // Look up what is currently in scope for this prefix.
+ // nsLookup scans nspaces from innermost to outermost and falls back to the
+ // defaultNamespace baseline field for the default-namespace prefix.
+ final String inScope = nsLookup(normPrefix);
+ final String effective = inScope != null ? inScope : "";
+ if (normUri.equals(effective)) {
+ return; // Binding unchanged — no declaration needed
+ }
+
+ // Record the new binding so descendants can see it via nsLookup
+ nspaces.add(normPrefix);
+ nspaces.add(normUri);
+
+ // Write the namespace declaration
+ writer.write(' ');
+ if (normPrefix.isEmpty()) {
+ writer.write("xmlns=\"");
+ } else {
+ writer.write("xmlns:");
+ writer.write(normPrefix);
writer.write("=\"");
- writeChars(nsURI, true);
- writer.write('"');
- defaultNamespace= nsURI;
}
+ writeChars(normUri, true);
+ writer.write('"');
} catch(final IOException ioe) {
throw new TransformerException(ioe.getMessage(), ioe);
}
@@ -329,8 +429,13 @@ public void attribute(String qname, CharSequence value) throws TransformerExcept
if(!tagIsOpen) {
characters(value);
return;
- // throw new TransformerException("Found an attribute outside an
- // element");
+ }
+ if (canonical) {
+ // Buffer for sorting — extract namespace URI from qname if prefixed
+ final int colon = qname.indexOf(':');
+ final String nsUri = colon > 0 ? "" : ""; // string qname doesn't carry namespace
+ canonicalAttributes.add(new String[]{nsUri, colon > 0 ? qname.substring(colon + 1) : qname, qname, value.toString()});
+ return;
}
writer.write(' ');
writer.write(qname);
@@ -347,8 +452,18 @@ public void attribute(final QName qname, final CharSequence value) throws Transf
if(!tagIsOpen) {
characters(value);
return;
- // throw new TransformerException("Found an attribute outside an
- // element");
+ }
+ if (canonical) {
+ final String nsUri = qname.getNamespaceURI() != null ? qname.getNamespaceURI() : "";
+ final String localName = qname.getLocalPart();
+ final String fullName;
+ if (qname.getPrefix() != null && !qname.getPrefix().isEmpty()) {
+ fullName = qname.getPrefix() + ":" + localName;
+ } else {
+ fullName = localName;
+ }
+ canonicalAttributes.add(new String[]{nsUri, localName, fullName, value.toString()});
+ return;
}
writer.write(' ');
if(qname.getPrefix() != null && !qname.getPrefix().isEmpty()) {
@@ -373,12 +488,68 @@ public void characters(final CharSequence chars) throws TransformerException {
if(tagIsOpen) {
closeStartTag(false);
}
- writeChars(chars, false);
+ // When xdmSerialization is active and current element is in cdata-section-elements,
+ // wrap text content in CDATA instead of escaping it (per W3C Serialization 3.1)
+ if (shouldUseCdataSections() && !elementName.isEmpty()
+ && cdataSectionElements.get().contains(elementName.peek())) {
+ writeCdataContent(chars);
+ } else {
+ writeChars(chars, false);
+ }
} catch(final IOException ioe) {
throw new TransformerException(ioe.getMessage(), ioe);
}
}
+ private void writeCdataContent(final CharSequence chars) throws IOException {
+ // CDATA sections must be split when:
+ // 1. The content contains "]]>" (which would end the CDATA prematurely)
+ // 2. A character cannot be represented in the output encoding (must be escaped as NN;)
+ final String s = normalize(chars).toString();
+ boolean inCdata = false;
+ for (int i = 0; i < s.length(); ) {
+ final int cp = s.codePointAt(i);
+ final int cpLen = Character.charCount(cp);
+
+ // Check for "]]>" sequence
+ if (cp == ']' && i + 2 < s.length() && s.charAt(i + 1) == ']' && s.charAt(i + 2) == '>') {
+ if (!inCdata) {
+ writer.write("");
+ inCdata = false;
+ i += 2; // skip "]]", the ">" will be picked up next
+ continue;
+ }
+
+ // Check if character is encodable in the output charset
+ if (!charSet.inCharacterSet((char) cp)) {
+ // Close any open CDATA section
+ if (inCdata) {
+ writer.write("]]>");
+ inCdata = false;
+ }
+ // Write as character reference
+ writer.write("");
+ writer.write(Integer.toHexString(cp));
+ writer.write(';');
+ } else {
+ // Encodable character — write inside CDATA
+ if (!inCdata) {
+ writer.write("");
+ }
+ }
+
public void characters(final char[] ch, final int start, final int len) throws TransformerException {
if(!declarationWritten) {
writeDeclaration();
@@ -510,8 +681,23 @@ public void documentType(final String name, final String publicId, final String
protected void closeStartTag(final boolean isEmpty) throws TransformerException {
try {
if(tagIsOpen) {
- if(isEmpty) {
+ if (canonical) {
+ flushCanonicalBuffers();
+ }
+ if(isEmpty && !canonical) {
+ // Canonical XML: empty elements expanded to
writer.write("/>");
+ } else if (isEmpty) {
+ // Canonical: write > for empty elements
+ writer.write('>');
+ final QName currentElem = elementName.peek();
+ writer.write("");
+ if (currentElem.getPrefix() != null && !currentElem.getPrefix().isEmpty()) {
+ writer.write(currentElem.getPrefix());
+ writer.write(':');
+ }
+ writer.write(currentElem.getLocalPart());
+ writer.write('>');
} else {
writer.write('>');
}
@@ -522,6 +708,52 @@ protected void closeStartTag(final boolean isEmpty) throws TransformerException
}
}
+ protected boolean isCanonical() {
+ return canonical;
+ }
+
+ protected void flushCanonicalBuffersXhtml() throws TransformerException {
+ try {
+ flushCanonicalBuffers();
+ } catch (final IOException ioe) {
+ throw new TransformerException(ioe.getMessage(), ioe);
+ }
+ }
+
+ private void flushCanonicalBuffers() throws IOException {
+ // Sort namespaces by prefix (default namespace first, then alphabetical)
+ canonicalNamespaces.sort((a, b) -> a[0].compareTo(b[0]));
+ // Write sorted namespaces
+ for (final String[] ns : canonicalNamespaces) {
+ writer.write(' ');
+ if (ns[0].isEmpty()) {
+ writer.write("xmlns=\"");
+ } else {
+ writer.write("xmlns:");
+ writer.write(ns[0]);
+ writer.write("=\"");
+ }
+ writeChars(ns[1], true);
+ writer.write('"');
+ }
+ canonicalNamespaces.clear();
+
+ // Sort attributes by namespace URI (primary), then local name (secondary)
+ canonicalAttributes.sort((a, b) -> {
+ final int cmp = a[0].compareTo(b[0]);
+ return cmp != 0 ? cmp : a[1].compareTo(b[1]);
+ });
+ // Write sorted attributes
+ for (final String[] attr : canonicalAttributes) {
+ writer.write(' ');
+ writer.write(attr[2]); // qualified name
+ writer.write("=\"");
+ writeChars(attr[3], true);
+ writer.write('"');
+ }
+ canonicalAttributes.clear();
+ }
+
protected void writeDeclaration() throws TransformerException {
if(declarationWritten) {
return;
@@ -537,7 +769,9 @@ protected void writeDeclaration() throws TransformerException {
// get the fields of the persisted xml declaration, but overridden with any properties from the serialization properties
final String version = outputProperties.getProperty(OutputKeys.VERSION, (originalXmlDecl.version != null ? originalXmlDecl.version : DEFAULT_XML_VERSION));
final String encoding = outputProperties.getProperty(OutputKeys.ENCODING, (originalXmlDecl.encoding != null ? originalXmlDecl.encoding : DEFAULT_XML_ENCODING));
- @Nullable final String standalone = outputProperties.getProperty(OutputKeys.STANDALONE, originalXmlDecl.standalone);
+ @Nullable final String standaloneOrig = outputProperties.getProperty(OutputKeys.STANDALONE, originalXmlDecl.standalone);
+ // "omit" means standalone should be absent from the declaration
+ @Nullable final String standalone = (standaloneOrig != null && "omit".equalsIgnoreCase(standaloneOrig.trim())) ? null : standaloneOrig;
writeDeclaration(version, encoding, standalone);
@@ -545,11 +779,15 @@ protected void writeDeclaration() throws TransformerException {
}
final String omitXmlDecl = outputProperties.getProperty(OutputKeys.OMIT_XML_DECLARATION, "yes");
- if ("no".equals(omitXmlDecl)) {
+ @Nullable final String standaloneRaw = outputProperties.getProperty(OutputKeys.STANDALONE);
+ // "omit" means standalone should be absent from the declaration
+ @Nullable final String standalone = (standaloneRaw != null && "omit".equalsIgnoreCase(standaloneRaw.trim())) ? null : standaloneRaw;
+ // Per W3C Serialization 3.1: output declaration if omit-xml-declaration is false/no/0,
+ // or if standalone is explicitly set (the declaration is required to carry standalone)
+ if (isBooleanFalse(omitXmlDecl) || standalone != null) {
// get the fields of the declaration from the serialization properties
final String version = outputProperties.getProperty(OutputKeys.VERSION, DEFAULT_XML_VERSION);
final String encoding = outputProperties.getProperty(OutputKeys.ENCODING, DEFAULT_XML_ENCODING);
- @Nullable final String standalone = outputProperties.getProperty(OutputKeys.STANDALONE);
writeDeclaration(version, encoding, standalone);
}
@@ -564,7 +802,15 @@ private void writeDeclaration(final String version, final String encoding, @Null
writer.write('"');
if(standalone != null) {
writer.write(" standalone=\"");
- writer.write(standalone);
+ // Normalize boolean values to yes/no for XML declaration
+ final String standaloneVal = standalone.trim();
+ if ("true".equals(standaloneVal) || "1".equals(standaloneVal)) {
+ writer.write("yes");
+ } else if ("false".equals(standaloneVal) || "0".equals(standaloneVal)) {
+ writer.write("no");
+ } else {
+ writer.write(standaloneVal);
+ }
writer.write('"');
}
writer.write("?>\n");
@@ -589,36 +835,79 @@ protected void writeDoctype(final String rootElement) throws TransformerExceptio
protected boolean needsEscape(final char ch) {
return true;
}
+
+ /**
+ * Whether & before { should be escaped. HTML output returns false
+ * per W3C HTML serialization spec. XML output returns true (always escape &).
+ */
+ protected boolean escapeAmpersandBeforeBrace() {
+ return true;
+ }
+
+ /**
+ * Check if a serialization boolean parameter value is false.
+ * W3C Serialization 3.1 accepts "no", "false", "0" (with optional whitespace) as false.
+ */
+ protected static boolean isBooleanFalse(final String value) {
+ if (value == null) {
+ return false;
+ }
+ final String trimmed = value.trim();
+ return "no".equals(trimmed) || "false".equals(trimmed) || "0".equals(trimmed);
+ }
+
+ /**
+ * Whether the given character needs escaping. Subclasses can override
+ * to suppress escaping for specific contexts (e.g., HTML raw text elements).
+ *
+ * @param ch the character to check
+ * @param inAttribute true if we're writing an attribute value
+ */
+ protected boolean needsEscape(final char ch, final boolean inAttribute) {
+ return needsEscape(ch);
+ }
protected void writeChars(final CharSequence s, final boolean inAttribute) throws IOException {
+ // Apply Unicode normalization if configured
+ final CharSequence text = normalize(s);
final boolean[] specialChars = inAttribute ? attrSpecialChars : textSpecialChars;
char ch = 0;
- final int len = s.length();
+ final int len = text.length();
int pos = 0, i;
while(pos < len) {
i = pos;
while(i < len) {
- ch = s.charAt(i);
+ ch = text.charAt(i);
if(ch < 128) {
if(specialChars[ch]) {
break;
+ } else if(xml11 && ch >= 0x01 && ch <= 0x1F
+ && ch != 0x09 && ch != 0x0A && ch != 0x0D) {
+ // XML 1.1: C0 control chars (except TAB, LF, CR) must be escaped
+ break;
} else {
i++;
}
} else if(!charSet.inCharacterSet(ch)) {
break;
+ } else if(ch >= 0x7F && ch <= 0x9F) {
+ // Control chars 0x7F-0x9F must be serialized as character references
+ break;
+ } else if(ch == 0x2028) {
+ // LINE SEPARATOR must be serialized as character reference
+ break;
} else {
i++;
}
}
- writeCharSeq(s, pos, i);
+ writeCharSeq(text, pos, i);
// writer.write(s.subSequence(pos, i).toString());
if (i >= len) {
return;
}
- if(needsEscape(ch)) {
+ if(needsEscape(ch, inAttribute)) {
switch(ch) {
case '<':
writer.write("<");
@@ -627,7 +916,12 @@ protected void writeChars(final CharSequence s, final boolean inAttribute) throw
writer.write(">");
break;
case '&':
- writer.write("&");
+ // HTML spec: & before { in attribute values should not be escaped
+ if (inAttribute && i + 1 < len && text.charAt(i + 1) == '{' && !escapeAmpersandBeforeBrace()) {
+ writer.write('&');
+ } else {
+ writer.write("&");
+ }
break;
case '\r':
writer.write("
");
@@ -672,6 +966,38 @@ protected void writeCharacterReference(final char charval) throws IOException {
writer.write(charref, 0, o);
}
+ @Nullable
+ private static java.text.Normalizer.Form parseNormalizationForm(final String value) {
+ if (value == null) return null;
+ return switch (value.trim().toUpperCase(java.util.Locale.ROOT)) {
+ case "NFC" -> java.text.Normalizer.Form.NFC;
+ case "NFD" -> java.text.Normalizer.Form.NFD;
+ case "NFKC" -> java.text.Normalizer.Form.NFKC;
+ case "NFKD" -> java.text.Normalizer.Form.NFKD;
+ case "NONE", "" -> null;
+ default -> null; // "fully-normalized" or unknown — treated as none
+ };
+ }
+
+ /**
+ * Apply Unicode normalization if a normalization-form is set.
+ */
+ protected CharSequence normalize(final CharSequence text) {
+ if (normalizationForm == null) return text;
+ final String s = text.toString();
+ if (java.text.Normalizer.isNormalized(s, normalizationForm)) return text;
+ return java.text.Normalizer.normalize(s, normalizationForm);
+ }
+
+ private static boolean isRelativeUri(final String uri) {
+ for (int i = 0; i < uri.length(); i++) {
+ final char c = uri.charAt(i);
+ if (c == ':') return false;
+ if (c == '/' || c == '?' || c == '#') return true;
+ }
+ return true;
+ }
+
private static class XMLDeclaration {
@Nullable final String version;
@Nullable final String encoding;
diff --git a/exist-core/src/main/java/org/exist/util/serializer/XQuerySerializer.java b/exist-core/src/main/java/org/exist/util/serializer/XQuerySerializer.java
index 366e3866cbc..44266ea5869 100644
--- a/exist-core/src/main/java/org/exist/util/serializer/XQuerySerializer.java
+++ b/exist-core/src/main/java/org/exist/util/serializer/XQuerySerializer.java
@@ -32,6 +32,7 @@
import org.xml.sax.SAXNotSupportedException;
import javax.xml.transform.OutputKeys;
+import java.io.IOException;
import java.io.Writer;
import java.util.Properties;
@@ -72,17 +73,167 @@ public void serialize(final Sequence sequence, final int start, final int howman
break;
case "xml":
default:
- serializeXML(sequence, start, howmany, wrap, typed, compilationTime, executionTime);
+ // For XML/text methods, flatten any arrays in the sequence before serialization
+ // (arrays can't be serialized as SAX events directly)
+ // Maps and function items cannot be serialized with XML/text methods (SENR0001)
+ validateXmlSerializable(sequence);
+ if (isCanonical()) {
+ validateCanonical(sequence);
+ }
+ final Sequence flattened = flattenArrays(sequence);
+ if (flattened != sequence) {
+ // Flattening changed the sequence — reset start/howmany to cover all items.
+ // For text method, default item-separator is space if not explicitly set.
+ if ("text".equals(method) && outputProperties.getProperty(EXistOutputKeys.ITEM_SEPARATOR) == null) {
+ outputProperties.setProperty(EXistOutputKeys.ITEM_SEPARATOR, " ");
+ }
+ serializeXML(flattened, 1, flattened.getItemCount(), wrap, typed, compilationTime, executionTime);
+ } else {
+ serializeXML(flattened, start, howmany, wrap, typed, compilationTime, executionTime);
+ }
break;
}
}
+ /**
+ * Validate that a sequence can be serialized with the XML/text method.
+ * Maps and function items are not serializable as XML (SENR0001).
+ */
+ private static void validateXmlSerializable(final Sequence sequence) throws SAXException, XPathException {
+ for (final SequenceIterator i = sequence.iterate(); i.hasNext(); ) {
+ final Item item = i.nextItem();
+ final int type = item.getType();
+ if (type == Type.MAP_ITEM || type == Type.FUNCTION) {
+ throw new SAXException("err:SENR0001 Cannot serialize a " +
+ Type.getTypeName(type) + " with the XML or text output method");
+ }
+ }
+ }
+
+ private boolean isCanonical() {
+ final String v = outputProperties.getProperty(EXistOutputKeys.CANONICAL);
+ return "yes".equals(v) || "true".equals(v) || "1".equals(v);
+ }
+
+ /**
+ * Validate canonical XML constraints (SERE0024).
+ * Checks for relative namespace URIs and multi-root documents.
+ */
+ private void validateCanonical(final Sequence sequence) throws SAXException, XPathException {
+ for (final SequenceIterator i = sequence.iterate(); i.hasNext(); ) {
+ final Item item = i.nextItem();
+ if (Type.subTypeOf(item.getType(), Type.NODE)) {
+ validateCanonicalNode((NodeValue) item);
+ }
+ }
+ }
+
+ private void validateCanonicalNode(final NodeValue node) throws SAXException, XPathException {
+ if (node.getType() == Type.DOCUMENT) {
+ // Check for multi-root: document must have exactly one element child
+ int elementCount = 0;
+ final org.w3c.dom.Node domNode = node.getNode();
+ for (org.w3c.dom.Node child = domNode.getFirstChild(); child != null; child = child.getNextSibling()) {
+ if (child.getNodeType() == org.w3c.dom.Node.ELEMENT_NODE) {
+ elementCount++;
+ }
+ }
+ if (elementCount != 1) {
+ throw new SAXException("err:SERE0024 Canonical serialization requires a well-formed document with exactly one root element, found " + elementCount);
+ }
+ // Check namespace URIs on the document's elements
+ validateCanonicalNamespaces(domNode);
+ } else if (node.getType() == Type.ELEMENT) {
+ validateCanonicalNamespaces(node.getNode());
+ }
+ }
+
+ private void validateCanonicalNamespaces(final org.w3c.dom.Node node) throws SAXException {
+ if (node.getNodeType() == org.w3c.dom.Node.ELEMENT_NODE) {
+ final String nsUri = node.getNamespaceURI();
+ if (nsUri != null && !nsUri.isEmpty() && isRelativeUri(nsUri)) {
+ throw new SAXException("err:SERE0024 Canonical serialization does not allow relative namespace URIs: " + nsUri);
+ }
+ // Also check namespace URIs in attributes (including xmlns declarations)
+ final org.w3c.dom.NamedNodeMap attrs = node.getAttributes();
+ if (attrs != null) {
+ for (int i = 0; i < attrs.getLength(); i++) {
+ final org.w3c.dom.Attr attr = (org.w3c.dom.Attr) attrs.item(i);
+ final String attrName = attr.getName();
+ // Check xmlns and xmlns:prefix declarations
+ if ("xmlns".equals(attrName) || attrName.startsWith("xmlns:")) {
+ final String declUri = attr.getValue();
+ if (declUri != null && !declUri.isEmpty() && isRelativeUri(declUri)) {
+ throw new SAXException("err:SERE0024 Canonical serialization does not allow relative namespace URIs: " + declUri);
+ }
+ }
+ }
+ }
+ // Check child elements recursively
+ for (org.w3c.dom.Node child = node.getFirstChild(); child != null; child = child.getNextSibling()) {
+ validateCanonicalNamespaces(child);
+ }
+ }
+ }
+
+ private static boolean isRelativeUri(final String uri) {
+ // Absolute URIs contain a scheme (e.g., "http://", "urn:", "file:")
+ // A URI without ":" before the first "/" or "?" is relative
+ for (int i = 0; i < uri.length(); i++) {
+ final char c = uri.charAt(i);
+ if (c == ':') return false; // Found scheme separator — absolute
+ if (c == '/' || c == '?' || c == '#') return true; // Path/query before scheme — relative
+ }
+ return true; // No scheme found — relative (e.g., "local.ns")
+ }
+
+ /**
+ * Flatten arrays in a sequence — each array member becomes a top-level item.
+ * This is needed because the SAX-based XML/text serializer can't handle ArrayType items.
+ */
+ private static Sequence flattenArrays(final Sequence sequence) throws XPathException {
+ boolean hasArrays = false;
+ for (final SequenceIterator i = sequence.iterate(); i.hasNext(); ) {
+ if (i.nextItem().getType() == Type.ARRAY_ITEM) {
+ hasArrays = true;
+ break;
+ }
+ }
+ if (!hasArrays) {
+ return sequence;
+ }
+ final ValueSequence result = new ValueSequence();
+ for (final SequenceIterator i = sequence.iterate(); i.hasNext(); ) {
+ final Item item = i.nextItem();
+ if (item.getType() == Type.ARRAY_ITEM) {
+ final Sequence flat = org.exist.xquery.functions.array.ArrayType.flatten(item);
+ for (final SequenceIterator fi = flat.iterate(); fi.hasNext(); ) {
+ result.add(fi.nextItem());
+ }
+ } else {
+ result.add(item);
+ }
+ }
+ return result;
+ }
+
public boolean normalize() {
final String method = outputProperties.getProperty(OutputKeys.METHOD, "xml");
return !("json".equals(method) || "adaptive".equals(method));
}
private void serializeXML(final Sequence sequence, final int start, final int howmany, final boolean wrap, final boolean typed, final long compilationTime, final long executionTime) throws SAXException, XPathException {
+ final String itemSeparator = outputProperties.getProperty(EXistOutputKeys.ITEM_SEPARATOR);
+ // If item-separator is set and sequence has multiple items, serialize items individually
+ // with separator between them (the internal Serializer doesn't handle item-separator)
+ if (itemSeparator != null && sequence.getItemCount() > 1 && !wrap) {
+ serializeXMLWithItemSeparator(sequence, start, howmany, typed, itemSeparator);
+ } else {
+ serializeXMLDirect(sequence, start, howmany, wrap, typed, compilationTime, executionTime);
+ }
+ }
+
+ private void serializeXMLDirect(final Sequence sequence, final int start, final int howmany, final boolean wrap, final boolean typed, final long compilationTime, final long executionTime) throws SAXException, XPathException {
final Serializer serializer = broker.borrowSerializer();
SAXSerializer sax = null;
try {
@@ -102,11 +253,78 @@ private void serializeXML(final Sequence sequence, final int start, final int ho
}
}
+ private void serializeXMLWithItemSeparator(final Sequence sequence, final int start, final int howmany, final boolean typed, final String itemSeparator) throws SAXException, XPathException {
+ // Write XML declaration if not omitted (per W3C Serialization 3.1)
+ if (!isBooleanTrue(outputProperties.getProperty(OutputKeys.OMIT_XML_DECLARATION, "no"))) {
+ try {
+ final String version = outputProperties.getProperty(OutputKeys.VERSION, "1.0");
+ final String encoding = outputProperties.getProperty(OutputKeys.ENCODING, "UTF-8");
+ writer.write("");
+ } catch (IOException e) {
+ throw new SAXException(e.getMessage(), e);
+ }
+ }
+
+ final int actualStart = start - 1; // convert 1-based to 0-based
+ final int end = Math.min(actualStart + howmany, sequence.getItemCount());
+ for (int i = actualStart; i < end; i++) {
+ if (i > actualStart) {
+ try {
+ writer.write(itemSeparator);
+ } catch (IOException e) {
+ throw new SAXException(e.getMessage(), e);
+ }
+ }
+ final Item item = sequence.itemAt(i);
+ if (item == null) {
+ continue;
+ }
+ if (Type.subTypeOf(item.getType(), Type.NODE)) {
+ // For nodes serialized with item-separator, omit the XML declaration
+ // on each individual node (only one declaration for the whole output)
+ final Properties nodeProps = new Properties(outputProperties);
+ nodeProps.setProperty(OutputKeys.OMIT_XML_DECLARATION, "yes");
+ final Serializer serializer = broker.borrowSerializer();
+ SAXSerializer sax = null;
+ try {
+ sax = (SAXSerializer) SerializerPool.getInstance().borrowObject(SAXSerializer.class);
+ sax.setOutput(writer, nodeProps);
+ serializer.setProperties(nodeProps);
+ serializer.setSAXHandlers(sax, sax);
+ final ValueSequence singleItem = new ValueSequence(1);
+ singleItem.add(item);
+ serializer.toSAX(singleItem, 1, 1, false, typed, 0, 0);
+ } catch (SAXNotSupportedException | SAXNotRecognizedException e) {
+ throw new SAXException(e.getMessage(), e);
+ } finally {
+ if (sax != null) {
+ SerializerPool.getInstance().returnObject(sax);
+ }
+ broker.returnSerializer(serializer);
+ }
+ } else {
+ try {
+ writer.write(item.getStringValue());
+ } catch (IOException e) {
+ throw new SAXException(e.getMessage(), e);
+ }
+ }
+ }
+ }
+
+ private static boolean isBooleanTrue(final String value) {
+ if (value == null) return false;
+ final String v = value.trim();
+ return "yes".equals(v) || "true".equals(v) || "1".equals(v);
+ }
+
private void serializeJSON(final Sequence sequence, final long compilationTime, final long executionTime) throws SAXException, XPathException {
- // backwards compatibility: if the sequence contains a single element, we assume
- // it should be transformed to JSON following the rules of the old JSON writer
+ // Backwards compatibility: if the sequence contains a single element or document,
+ // use the legacy XML-to-JSON writer (which converts XML structure to JSON properties).
+ // This is needed for RESTXQ and REST API which return XML documents with method=json.
+ // Maps, arrays, atomics, and multi-item sequences go through the W3C-compliant JSONSerializer.
if (sequence.hasOne() && (Type.subTypeOf(sequence.getItemType(), Type.DOCUMENT) || Type.subTypeOf(sequence.getItemType(), Type.ELEMENT))) {
- serializeXML(sequence, 1, 1, false, false, compilationTime, executionTime);
+ serializeXMLDirect(sequence, 1, 1, false, false, compilationTime, executionTime);
} else {
JSONSerializer serializer = new JSONSerializer(broker, outputProperties);
serializer.serialize(sequence, writer);
diff --git a/exist-core/src/main/java/org/exist/util/serializer/json/JSONSerializer.java b/exist-core/src/main/java/org/exist/util/serializer/json/JSONSerializer.java
index bd1f01a9454..9c533df3c44 100644
--- a/exist-core/src/main/java/org/exist/util/serializer/json/JSONSerializer.java
+++ b/exist-core/src/main/java/org/exist/util/serializer/json/JSONSerializer.java
@@ -23,53 +23,93 @@
import com.fasterxml.jackson.core.JsonFactory;
import com.fasterxml.jackson.core.JsonGenerator;
+import com.fasterxml.jackson.core.json.JsonWriteFeature;
import io.lacuna.bifurcan.IEntry;
+import it.unimi.dsi.fastutil.ints.Int2ObjectMap;
import org.exist.storage.DBBroker;
import org.exist.storage.serializers.EXistOutputKeys;
import org.exist.storage.serializers.Serializer;
+import org.exist.xquery.ErrorCodes;
import org.exist.xquery.XPathException;
import org.exist.xquery.functions.array.ArrayType;
import org.exist.xquery.functions.map.MapType;
+import org.exist.xquery.util.SerializerUtils;
import org.exist.xquery.value.*;
import org.xml.sax.SAXException;
+import javax.annotation.Nullable;
import javax.xml.transform.OutputKeys;
import java.io.IOException;
import java.io.Writer;
+import java.util.ArrayList;
+import java.util.HashSet;
+import java.util.List;
import java.util.Properties;
+import java.util.Set;
/**
* Called by {@link org.exist.util.serializer.XQuerySerializer} to serialize an XQuery sequence
* to JSON. The JSON serializer differs from other serialization methods because it maps XQuery
* data items to JSON.
*
+ * Per W3C XSLT and XQuery Serialization 3.1 Section 10 (JSON Output Method).
+ *
* @author Wolf
*/
public class JSONSerializer {
private final DBBroker broker;
private final Properties outputProperties;
+ private final boolean allowDuplicateNames;
+ private final boolean canonical;
+ @Nullable private final Int2ObjectMap characterMap;
public JSONSerializer(DBBroker broker, Properties outputProperties) {
super();
this.broker = broker;
this.outputProperties = outputProperties;
+ final String canonicalProp = outputProperties.getProperty(EXistOutputKeys.CANONICAL);
+ this.canonical = isBooleanTrue(canonicalProp);
+ // Canonical mode: always reject duplicate keys
+ this.allowDuplicateNames = !canonical && "yes".equals(
+ outputProperties.getProperty(EXistOutputKeys.ALLOW_DUPLICATE_NAMES, "yes"));
+ this.characterMap = SerializerUtils.getCharacterMap(outputProperties);
}
public void serialize(Sequence sequence, Writer writer) throws SAXException {
- JsonFactory factory = new JsonFactory();
+ // QT4: escape-solidus controls whether / is escaped as \/
+ // Default is "no" for XQ 3.1 compatibility (parameter doesn't exist in 3.1 spec)
+ // Canonical JSON (RFC 8785): solidus is NOT escaped
+ final boolean escapeSolidus = !canonical && isBooleanTrue(
+ outputProperties.getProperty(EXistOutputKeys.ESCAPE_SOLIDUS, "no"));
+ final JsonFactory factory = JsonFactory.builder()
+ .configure(JsonWriteFeature.ESCAPE_FORWARD_SLASHES, escapeSolidus)
+ .build();
try {
JsonGenerator generator = factory.createGenerator(writer);
generator.disable(JsonGenerator.Feature.AUTO_CLOSE_TARGET);
- if ("yes".equals(outputProperties.getProperty(OutputKeys.INDENT, "no"))) {
- generator.useDefaultPrettyPrinter();
+ if (isBooleanTrue(outputProperties.getProperty(OutputKeys.INDENT, "no"))) {
+ final int indentSpaces = Integer.parseInt(
+ outputProperties.getProperty(EXistOutputKeys.INDENT_SPACES, "4"));
+ final com.fasterxml.jackson.core.util.DefaultPrettyPrinter pp =
+ new com.fasterxml.jackson.core.util.DefaultPrettyPrinter();
+ pp.indentArraysWith(
+ com.fasterxml.jackson.core.util.DefaultIndenter.SYSTEM_LINEFEED_INSTANCE.withIndent(
+ " ".repeat(indentSpaces)));
+ pp.indentObjectsWith(
+ com.fasterxml.jackson.core.util.DefaultIndenter.SYSTEM_LINEFEED_INSTANCE.withIndent(
+ " ".repeat(indentSpaces)));
+ generator.setPrettyPrinter(pp);
}
- if ("yes".equals(outputProperties.getProperty(EXistOutputKeys.ALLOW_DUPLICATE_NAMES, "yes"))) {
- generator.enable(JsonGenerator.Feature.STRICT_DUPLICATE_DETECTION);
+ // Duplicate detection is handled manually in serializeMap for proper SERE0022 errors
+ generator.disable(JsonGenerator.Feature.STRICT_DUPLICATE_DETECTION);
+ final boolean jsonLines = isBooleanTrue(
+ outputProperties.getProperty(EXistOutputKeys.JSON_LINES, "no"));
+ if (jsonLines) {
+ serializeJsonLines(sequence, generator);
} else {
- generator.disable(JsonGenerator.Feature.STRICT_DUPLICATE_DETECTION);
+ serializeSequence(sequence, generator);
}
- serializeSequence(sequence, generator);
if ("yes".equals(outputProperties.getProperty(EXistOutputKeys.INSERT_FINAL_NEWLINE, "no"))) {
generator.writeRaw('\n');
}
@@ -79,12 +119,55 @@ public void serialize(Sequence sequence, Writer writer) throws SAXException {
}
}
+ /**
+ * JSON Lines format (NDJSON): one JSON value per line, no array wrapper.
+ * Per QT4 Serialization 4.0, when json-lines=true.
+ */
+ private void serializeJsonLines(Sequence sequence, JsonGenerator generator) throws IOException, XPathException, SAXException {
+ if (sequence.isEmpty()) {
+ return;
+ }
+ // Each line must be a separate root-level value. Jackson adds separator
+ // whitespace between root values, so we serialize each item to a string
+ // and concatenate with newlines.
+ final boolean escapeSolidus = !isBooleanFalse(
+ outputProperties.getProperty(EXistOutputKeys.ESCAPE_SOLIDUS, "yes"));
+ boolean first = true;
+ for (SequenceIterator i = sequence.iterate(); i.hasNext(); ) {
+ if (!first) {
+ generator.writeRaw('\n');
+ }
+ // Serialize this item to a standalone string
+ final java.io.StringWriter lineWriter = new java.io.StringWriter();
+ final JsonFactory lineFactory = JsonFactory.builder()
+ .configure(JsonWriteFeature.ESCAPE_FORWARD_SLASHES, escapeSolidus)
+ .build();
+ final JsonGenerator lineGen = lineFactory.createGenerator(lineWriter);
+ lineGen.disable(JsonGenerator.Feature.AUTO_CLOSE_TARGET);
+ serializeItem(i.nextItem(), lineGen);
+ lineGen.close();
+ // Write the line's JSON as raw content to avoid Jackson's root separator
+ generator.writeRaw(lineWriter.toString());
+ first = false;
+ }
+ }
+
private void serializeSequence(Sequence sequence, JsonGenerator generator) throws IOException, XPathException, SAXException {
+ serializeSequence(sequence, generator, false);
+ }
+
+ private void serializeSequence(Sequence sequence, JsonGenerator generator, boolean allowMultiItem) throws IOException, XPathException, SAXException {
if (sequence.isEmpty()) {
generator.writeNull();
} else if (sequence.hasOne() && "no".equals(outputProperties.getProperty(EXistOutputKeys.JSON_ARRAY_OUTPUT, "no"))) {
serializeItem(sequence.itemAt(0), generator);
+ } else if (!allowMultiItem) {
+ // SERE0023: JSON output method cannot serialize a sequence of more than one item
+ // at the top level or as a map entry value
+ throw new SAXException("err:SERE0023 Sequence of " + sequence.getItemCount()
+ + " items cannot be serialized using the JSON output method");
} else {
+ // Inside arrays, multi-item sequences become JSON arrays
generator.writeStartArray();
for (SequenceIterator i = sequence.iterate(); i.hasNext(); ) {
serializeItem(i.nextItem(), generator);
@@ -99,23 +182,111 @@ private void serializeItem(Item item, JsonGenerator generator) throws IOExceptio
} else if (item.getType() == Type.MAP_ITEM) {
serializeMap((MapType) item, generator);
} else if (Type.subTypeOf(item.getType(), Type.ANY_ATOMIC_TYPE)) {
- if (Type.subTypeOfUnion(item.getType(), Type.NUMERIC)) {
- generator.writeNumber(item.getStringValue());
- } else {
- switch (item.getType()) {
- case Type.BOOLEAN:
- generator.writeBoolean(((AtomicValue)item).effectiveBooleanValue());
- break;
- default:
- generator.writeString(item.getStringValue());
- break;
- }
- }
+ serializeAtomicValue(item, generator);
} else if (Type.subTypeOf(item.getType(), Type.NODE)) {
serializeNode(item, generator);
+ } else if (Type.subTypeOf(item.getType(), Type.FUNCTION)) {
+ throw new SAXException("err:SERE0021 Sequence contains a function item, which cannot be serialized as JSON");
}
}
+ private void serializeAtomicValue(Item item, JsonGenerator generator) throws IOException, XPathException, SAXException {
+ if (Type.subTypeOfUnion(item.getType(), Type.NUMERIC)) {
+ if (canonical) {
+ // RFC 8785: cast to double, use shortest representation
+ final double d = ((NumericValue) item).getDouble();
+ if (!Double.isFinite(d)) {
+ throw new SAXException("err:SERE0020 Numeric value " + item.getStringValue()
+ + " cannot be serialized in canonical JSON");
+ }
+ generator.writeRawValue(canonicalDoubleString(d));
+ return;
+ }
+ final String stringValue = item.getStringValue();
+ // W3C Serialization 3.1: INF, -INF, and NaN MUST raise SERE0020
+ if ("NaN".equals(stringValue) || "INF".equals(stringValue) || "-INF".equals(stringValue)) {
+ throw new SAXException("err:SERE0020 Numeric value " + stringValue
+ + " cannot be serialized as JSON");
+ } else if ("-0".equals(stringValue)) {
+ // Negative zero: write as 0 (QT4 allows either 0 or -0)
+ generator.writeNumber(stringValue);
+ } else {
+ generator.writeNumber(stringValue);
+ }
+ } else if (item.getType() == Type.BOOLEAN) {
+ generator.writeBoolean(((AtomicValue) item).effectiveBooleanValue());
+ } else {
+ writeStringWithCharMap(generator, item.getStringValue());
+ }
+ }
+
+ /**
+ * RFC 8785 canonical double formatting.
+ * Uses ECMAScript shortest representation: minimum digits to uniquely
+ * identify the double value. Plain notation for [1e-6, 1e21), exponential
+ * notation otherwise with lowercase 'e'.
+ */
+ private static String canonicalDoubleString(final double value) {
+ if (value == 0) return "0";
+ if (value == Double.MIN_VALUE) return "5e-324";
+ if (value == -Double.MIN_VALUE) return "-5e-324";
+
+ final java.math.BigDecimal bd = java.math.BigDecimal.valueOf(value).stripTrailingZeros();
+ final double abs = Math.abs(value);
+ if (abs >= 1e-6 && abs < 1e21) {
+ return bd.toPlainString();
+ } else {
+ return bd.toString().replace('E', 'e');
+ }
+ }
+
+ /**
+ * Apply use-character-maps substitutions to a string value.
+ * Character map replacements are written raw (not escaped by JSON).
+ */
+ private String applyCharacterMap(final String value) {
+ if (characterMap == null || characterMap.isEmpty()) {
+ return value;
+ }
+ final StringBuilder sb = new StringBuilder(value.length());
+ for (int i = 0; i < value.length(); ) {
+ final int cp = value.codePointAt(i);
+ i += Character.charCount(cp);
+ final String replacement = characterMap.get(cp);
+ if (replacement != null) {
+ sb.append(replacement);
+ } else {
+ sb.appendCodePoint(cp);
+ }
+ }
+ return sb.toString();
+ }
+
+ /**
+ * Write a string value to the JSON generator, applying character map
+ * substitutions. The mapped string is passed through writeString so
+ * Jackson handles JSON structural separators and escaping correctly.
+ */
+ private void writeStringWithCharMap(final JsonGenerator generator, final String value) throws IOException {
+ if (characterMap == null || characterMap.isEmpty()) {
+ generator.writeString(value);
+ } else {
+ generator.writeString(applyCharacterMap(value));
+ }
+ }
+
+ private static boolean isBooleanTrue(final String value) {
+ if (value == null) return false;
+ final String v = value.trim();
+ return "yes".equals(v) || "true".equals(v) || "1".equals(v);
+ }
+
+ private static boolean isBooleanFalse(final String value) {
+ if (value == null) return false;
+ final String v = value.trim();
+ return "no".equals(v) || "false".equals(v) || "0".equals(v);
+ }
+
private void serializeNode(Item item, JsonGenerator generator) throws SAXException {
final Serializer serializer = broker.borrowSerializer();
final Properties xmlOutput = new Properties();
@@ -124,7 +295,7 @@ private void serializeNode(Item item, JsonGenerator generator) throws SAXExcepti
xmlOutput.setProperty(OutputKeys.INDENT, outputProperties.getProperty(OutputKeys.INDENT, "no"));
try {
serializer.setProperties(xmlOutput);
- generator.writeString(serializer.serialize((NodeValue)item));
+ writeStringWithCharMap(generator, serializer.serialize((NodeValue)item));
} catch (IOException e) {
throw new SAXException(e.getMessage(), e);
} finally {
@@ -136,16 +307,50 @@ private void serializeArray(ArrayType array, JsonGenerator generator) throws IOE
generator.writeStartArray();
for (int i = 0; i < array.getSize(); i++) {
final Sequence member = array.get(i);
- serializeSequence(member, generator);
+ // W3C Serialization 3.1: multi-item sequences within arrays raise SERE0023
+ if (member.getItemCount() > 1) {
+ throw new SAXException("err:SERE0023 Array member at position " + (i + 1)
+ + " is a sequence of " + member.getItemCount() + " items");
+ }
+ serializeSequence(member, generator, false);
}
generator.writeEndArray();
}
private void serializeMap(MapType map, JsonGenerator generator) throws IOException, XPathException, SAXException {
generator.writeStartObject();
- for (final IEntry entry: map) {
- generator.writeFieldName(entry.key().getStringValue());
- serializeSequence(entry.value(), generator);
+ final Set seenKeys = allowDuplicateNames ? null : new HashSet<>();
+
+ // Canonical JSON (RFC 8785): sort keys by UTF-16 code unit order
+ final Iterable> entries;
+ if (canonical) {
+ final List> sorted = new ArrayList<>();
+ for (final IEntry entry : map) {
+ sorted.add(entry);
+ }
+ sorted.sort((a, b) -> {
+ try {
+ return a.key().getStringValue().compareTo(b.key().getStringValue());
+ } catch (XPathException e) {
+ return 0;
+ }
+ });
+ entries = sorted;
+ } else {
+ final List> list = new ArrayList<>();
+ for (final IEntry entry : map) {
+ list.add(entry);
+ }
+ entries = list;
+ }
+
+ for (final IEntry entry : entries) {
+ final String key = entry.key().getStringValue();
+ if (seenKeys != null && !seenKeys.add(key)) {
+ throw new SAXException("err:SERE0022 Duplicate key '" + key + "' in map and allow-duplicate-names is 'no'");
+ }
+ generator.writeFieldName(key);
+ serializeSequence(entry.value(), generator, false);
}
generator.writeEndObject();
}
diff --git a/exist-core/src/main/java/org/exist/xquery/ElementConstructor.java b/exist-core/src/main/java/org/exist/xquery/ElementConstructor.java
index 20b94537797..82dc28ac3a3 100644
--- a/exist-core/src/main/java/org/exist/xquery/ElementConstructor.java
+++ b/exist-core/src/main/java/org/exist/xquery/ElementConstructor.java
@@ -124,9 +124,9 @@ public void addNamespaceDecl(final String name, final String uri) throws XPathEx
throw new XPathException(this, ErrorCodes.XQST0070, "'" + Namespaces.XMLNS_NS + "' can bind only to '" + XMLConstants.XMLNS_ATTRIBUTE + "' prefix");
}
- if (name != null && (!name.isEmpty()) && uri.trim().isEmpty()) {
- throw new XPathException(this, ErrorCodes.XQST0085, "cannot undeclare a prefix " + name + ".");
- }
+ // XQST0085: namespace undeclaration (xmlns:prefix="") is allowed when the
+ // implementation supports XML Names 1.1. Since eXist supports XML 1.1
+ // serialization (version="1.1"), this is no longer an error.
addNamespaceDecl(qn);
}
diff --git a/exist-core/src/main/java/org/exist/xquery/Option.java b/exist-core/src/main/java/org/exist/xquery/Option.java
index 27f8615dfdb..32c38e67dd7 100644
--- a/exist-core/src/main/java/org/exist/xquery/Option.java
+++ b/exist-core/src/main/java/org/exist/xquery/Option.java
@@ -60,7 +60,9 @@ public Option(QName qname, String contents) throws XPathException {
}
public Option(final Expression expression, QName qname, String contents) throws XPathException {
- if (qname.getPrefix() == null || qname.getPrefix().isEmpty())
+ // Options must be in a namespace: either via prefix or via URIQualifiedName Q{uri}local
+ if ((qname.getPrefix() == null || qname.getPrefix().isEmpty())
+ && (qname.getNamespaceURI() == null || qname.getNamespaceURI().isEmpty()))
{throw new XPathException(expression, "XPST0081: options must have a prefix");}
this.qname = qname;
this.contents = contents;
diff --git a/exist-core/src/main/java/org/exist/xquery/XQueryContext.java b/exist-core/src/main/java/org/exist/xquery/XQueryContext.java
index 6e8105ec786..13b9a8281c0 100644
--- a/exist-core/src/main/java/org/exist/xquery/XQueryContext.java
+++ b/exist-core/src/main/java/org/exist/xquery/XQueryContext.java
@@ -3276,9 +3276,16 @@ protected void clearUpdateListeners() {
@Override
public void checkOptions(final Properties properties) throws XPathException {
checkLegacyOptions(properties);
+
+ // Phase 1: Process parameter-document first (provides base settings)
+ processParameterDocument(dynamicOptions, properties);
+ processParameterDocument(staticOptions, properties);
+
+ // Phase 2: Process inline options (override parameter-document settings)
if (dynamicOptions != null) {
for (final Option option : dynamicOptions) {
- if (Namespaces.XSLT_XQUERY_SERIALIZATION_NS.equals(option.getQName().getNamespaceURI())) {
+ if (Namespaces.XSLT_XQUERY_SERIALIZATION_NS.equals(option.getQName().getNamespaceURI())
+ && !"parameter-document".equals(option.getQName().getLocalPart())) {
SerializerUtils.setProperty(option.getQName().getLocalPart(), option.getContents(), properties,
inScopeNamespaces::get);
}
@@ -3288,6 +3295,7 @@ public void checkOptions(final Properties properties) throws XPathException {
if (staticOptions != null) {
for (final Option option : staticOptions) {
if (Namespaces.XSLT_XQUERY_SERIALIZATION_NS.equals(option.getQName().getNamespaceURI())
+ && !"parameter-document".equals(option.getQName().getLocalPart())
&& !properties.containsKey(option.getQName().getLocalPart())) {
SerializerUtils.setProperty(option.getQName().getLocalPart(), option.getContents(), properties,
inScopeNamespaces::get);
@@ -3296,6 +3304,55 @@ public void checkOptions(final Properties properties) throws XPathException {
}
}
+ /**
+ * Process the parameter-document serialization option if present.
+ * Loads the referenced XML file and extracts serialization parameters.
+ */
+ private void processParameterDocument(final java.util.List
"));
+ }
+
+ @Test
+ public void htmlFragmentDivNoDoctype() throws Exception {
+ final String result = serialize("
text
", "html", "5.0");
+ assertFalse("HTML div fragment should NOT have DOCTYPE: " + result,
+ result.contains("item", "html", "5.0");
+ assertFalse("HTML li fragment should NOT have DOCTYPE: " + result,
+ result.contains("
hello
",
+ "xhtml", "5.0");
+ assertTrue("XHTML document should have DOCTYPE: " + result,
+ result.contains(""));
+ }
+
+ @Test
+ public void xhtmlFragmentNoDoctype() throws Exception {
+ final String result = serialize(
+ "
hello
",
+ "xhtml", "5.0");
+ assertFalse("XHTML fragment should NOT have DOCTYPE: " + result,
+ result.contains("