From ab9d6d446a0023007ad8f51010c6ec4ddea099c5 Mon Sep 17 00:00:00 2001
From: Joe Wicentowski
Date: Sat, 4 Apr 2026 09:22:54 -0400
Subject: [PATCH 01/11] [bugfix] Fix serialization parameter handling for W3C
compliance
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
Corrects multiple issues in how serialization parameters are parsed
and validated:
- Fix type checking to allow subtypes (e.g., xs:string subtype of
xs:anyAtomicType) and coerce xs:untypedAtomic to target type
- Accept "false", "0" as boolean false (not just "no")
- Trim whitespace in XML serialization parameter values
- Fix multi-value QName parameter cardinality check (was backwards)
- Fix standalone=omit handling, normalize boolean true/false/1/0 to yes/no
- Add SEPM0009 validation for contradictory use-character-maps
- Add SEPM0016 error for character map key length validation
- Add SEPM0017 validation for serialization-parameters XML element form
- Add SERE0023 validation for multi-item sequences in JSON serialization
- Accept eXist-specific parameters in XML serialization element form
(fixes regression from #3446)
- Fix fn:json-to-xml option validation for liberal/duplicates params
- Register QT4 serialization parameters: escape-solidus, json-lines,
canonical, CSV field/row/quote params
Spec: W3C Serialization 3.1 §5 (XML Output Method),
QT4 Serialization 4.0 §3.1.1 (Serialization Parameters)
XQTS: Fixes serialize-xml-*, serialize-json-* parameter validation tests
Co-Authored-By: Claude Opus 4.6 (1M context)
---
.../storage/serializers/EXistOutputKeys.java | 5 ++
.../main/java/org/exist/xquery/Option.java | 4 +-
.../xquery/functions/fn/FunSerialize.java | 71 ++++++++++++++++-
.../org/exist/xquery/functions/fn/JSON.java | 24 +++++-
.../exist/xquery/util/SerializerUtils.java | 78 +++++++++++++++----
5 files changed, 161 insertions(+), 21 deletions(-)
diff --git a/exist-core/src/main/java/org/exist/storage/serializers/EXistOutputKeys.java b/exist-core/src/main/java/org/exist/storage/serializers/EXistOutputKeys.java
index ca85a06f5fe..7c727e6ab16 100644
--- a/exist-core/src/main/java/org/exist/storage/serializers/EXistOutputKeys.java
+++ b/exist-core/src/main/java/org/exist/storage/serializers/EXistOutputKeys.java
@@ -28,6 +28,11 @@ public class EXistOutputKeys {
*/
public static final String ITEM_SEPARATOR = "item-separator";
+ // --- QT4 Serialization 4.0 parameters ---
+ public static final String CANONICAL = "canonical";
+ public static final String ESCAPE_SOLIDUS = "escape-solidus";
+ public static final String JSON_LINES = "json-lines";
+
public static final String OMIT_ORIGINAL_XML_DECLARATION = "omit-original-xml-declaration";
public static final String OUTPUT_DOCTYPE = "output-doctype";
diff --git a/exist-core/src/main/java/org/exist/xquery/Option.java b/exist-core/src/main/java/org/exist/xquery/Option.java
index 27f8615dfdb..32c38e67dd7 100644
--- a/exist-core/src/main/java/org/exist/xquery/Option.java
+++ b/exist-core/src/main/java/org/exist/xquery/Option.java
@@ -60,7 +60,9 @@ public Option(QName qname, String contents) throws XPathException {
}
public Option(final Expression expression, QName qname, String contents) throws XPathException {
- if (qname.getPrefix() == null || qname.getPrefix().isEmpty())
+ // Options must be in a namespace: either via prefix or via URIQualifiedName Q{uri}local
+ if ((qname.getPrefix() == null || qname.getPrefix().isEmpty())
+ && (qname.getNamespaceURI() == null || qname.getNamespaceURI().isEmpty()))
{throw new XPathException(expression, "XPST0081: options must have a prefix");}
this.qname = qname;
this.contents = contents;
diff --git a/exist-core/src/main/java/org/exist/xquery/functions/fn/FunSerialize.java b/exist-core/src/main/java/org/exist/xquery/functions/fn/FunSerialize.java
index 24d6c89ddf6..c5df6e4d761 100644
--- a/exist-core/src/main/java/org/exist/xquery/functions/fn/FunSerialize.java
+++ b/exist-core/src/main/java/org/exist/xquery/functions/fn/FunSerialize.java
@@ -35,6 +35,8 @@
import org.w3c.dom.Element;
import org.xml.sax.SAXException;
+import javax.xml.transform.OutputKeys;
+
import java.io.IOException;
import java.io.StringWriter;
import java.util.Properties;
@@ -80,6 +82,9 @@ public Sequence eval(Sequence[] args, Sequence contextSequence) throws XPathExce
outputProperties = new Properties();
}
+ // SEPM0009: validate parameter consistency before serializing
+ validateSerializationParams(outputProperties);
+
try(final StringWriter writer = new StringWriter()) {
final XQuerySerializer xqSerializer = new XQuerySerializer(context.getBroker(), outputProperties, writer);
@@ -95,7 +100,12 @@ public Sequence eval(Sequence[] args, Sequence contextSequence) throws XPathExce
return new StringValue(this, writer.toString());
} catch (final IOException | SAXException e) {
- throw new XPathException(this, FnModule.SENR0001, e.getMessage());
+ // Preserve specific serialization error codes from the message
+ final String msg = e.getMessage();
+ if (msg != null && msg.startsWith("err:SERE0024")) {
+ throw new XPathException(this, new ErrorCodes.ErrorCode("SERE0024", msg), msg);
+ }
+ throw new XPathException(this, FnModule.SENR0001, msg);
}
}
@@ -130,6 +140,60 @@ private static boolean isSerializationParametersElement(final Item item) {
}
}
+ /**
+ * Check if a serialization boolean parameter value is true.
+ * W3C Serialization 3.1 accepts "yes", "true", "1" (with optional whitespace) as true.
+ */
+ private static boolean isBooleanTrue(final String value) {
+ if (value == null) {
+ return false;
+ }
+ final String trimmed = value.trim();
+ return "yes".equals(trimmed) || "true".equals(trimmed) || "1".equals(trimmed);
+ }
+
+ /**
+ * Validate serialization parameter consistency per W3C Serialization 3.1.
+ * Throws SEPM0009 if omit-xml-declaration=yes conflicts with standalone or
+ * version+doctype-system.
+ */
+ private void validateSerializationParams(final Properties props) throws XPathException {
+ final String omitXmlDecl = props.getProperty(OutputKeys.OMIT_XML_DECLARATION, "yes");
+ if (isBooleanTrue(omitXmlDecl)) {
+ // SEPM0009: standalone must be omit (absent) when omit-xml-declaration=yes
+ final String standalone = props.getProperty(OutputKeys.STANDALONE);
+ if (standalone != null) {
+ throw new XPathException(this, ErrorCodes.SEPM0009,
+ "omit-xml-declaration is yes but standalone is set to '" + standalone + "'");
+ }
+ // SEPM0009: version != 1.0 with doctype-system when omit-xml-declaration=yes
+ final String version = props.getProperty(OutputKeys.VERSION);
+ final String doctypeSystem = props.getProperty(OutputKeys.DOCTYPE_SYSTEM);
+ if (version != null && !"1.0".equals(version) && doctypeSystem != null) {
+ throw new XPathException(this, ErrorCodes.SEPM0009,
+ "omit-xml-declaration is yes with version '" + version + "' and doctype-system set");
+ }
+ }
+
+ // Canonical serialization: force required parameters
+ final String canonical = props.getProperty(EXistOutputKeys.CANONICAL);
+ if (isBooleanTrue(canonical)) {
+ final String method = props.getProperty(OutputKeys.METHOD, "xml");
+ if ("json".equals(method)) {
+ // Canonical JSON (RFC 8785): handled in JSONSerializer
+ // Force no indent, no solidus escaping
+ props.setProperty(OutputKeys.INDENT, "no");
+ props.setProperty(EXistOutputKeys.ESCAPE_SOLIDUS, "no");
+ } else {
+ // Canonical XML/XHTML (C14N)
+ props.setProperty(OutputKeys.OMIT_XML_DECLARATION, "yes");
+ props.setProperty(OutputKeys.ENCODING, "UTF-8");
+ props.remove(OutputKeys.CDATA_SECTION_ELEMENTS);
+ props.setProperty("include-content-type", "no");
+ }
+ }
+ }
+
/**
* Sequence normalization as described in
* XSLT and XQuery Serialization 3.0 - Sequence Normalization .
@@ -173,6 +237,11 @@ public static Sequence normalize(final Expression callingExpr, final XQueryConte
"It is an error if an item in the sequence to serialize is an attribute node or a namespace node.");
}
step2.add(next);
+ } else if (itemType == Type.MAP_ITEM || itemType == Type.FUNCTION) {
+ // Maps and function items cannot be serialized with XML/HTML/XHTML/text methods (SENR0001)
+ throw new XPathException(callingExpr, FnModule.SENR0001,
+ "It is an error if an item in the sequence to serialize is a " +
+ Type.getTypeName(itemType) + ".");
} else {
// atomic value
// "For each item in S1, if the item is atomic, obtain the lexical representation of the item by
diff --git a/exist-core/src/main/java/org/exist/xquery/functions/fn/JSON.java b/exist-core/src/main/java/org/exist/xquery/functions/fn/JSON.java
index e8f407f3609..accf38f3d44 100644
--- a/exist-core/src/main/java/org/exist/xquery/functions/fn/JSON.java
+++ b/exist-core/src/main/java/org/exist/xquery/functions/fn/JSON.java
@@ -27,6 +27,7 @@
import org.exist.Namespaces;
import org.exist.dom.QName;
import org.exist.dom.memtree.MemTreeBuilder;
+import org.exist.xquery.value.BooleanValue;
import org.exist.security.PermissionDeniedException;
import org.exist.source.Source;
import org.exist.source.SourceFactory;
@@ -125,15 +126,30 @@ public Sequence eval(Sequence[] args, Sequence contextSequence) throws XPathExce
// TODO: jackson does not allow access to raw string, so option "unescape" is not supported
boolean liberal = false;
String handleDuplicates = OPTION_DUPLICATES_USE_LAST;
- if (getArgumentCount() == 2) {
- final MapType options = (MapType)args[1].itemAt(0);
+ if (getArgumentCount() == 2 && !args[1].isEmpty()) {
+ final Item optItem = args[1].itemAt(0);
+ if (optItem.getType() != Type.MAP_ITEM) {
+ throw new XPathException(this, ErrorCodes.XPTY0004,
+ "Expected map for options parameter, got " + Type.getTypeName(optItem.getType()));
+ }
+ final MapType options = (MapType) optItem;
final Sequence liberalOpt = options.get(new StringValue(OPTION_LIBERAL));
if (liberalOpt.hasOne()) {
- liberal = liberalOpt.itemAt(0).convertTo(Type.BOOLEAN).effectiveBooleanValue();
+ final Item liberalItem = liberalOpt.itemAt(0);
+ if (liberalItem.getType() != Type.BOOLEAN) {
+ throw new XPathException(this, ErrorCodes.XPTY0004,
+ "Option 'liberal' must be a boolean, got " + Type.getTypeName(liberalItem.getType()));
+ }
+ liberal = ((BooleanValue) liberalItem).effectiveBooleanValue();
}
final Sequence duplicateOpt = options.get(new StringValue(OPTION_DUPLICATES));
if (duplicateOpt.hasOne()) {
- handleDuplicates = duplicateOpt.itemAt(0).getStringValue();
+ final Item dupItem = duplicateOpt.itemAt(0);
+ if (!Type.subTypeOf(dupItem.getType(), Type.STRING)) {
+ throw new XPathException(this, ErrorCodes.XPTY0004,
+ "Option 'duplicates' must be a string, got " + Type.getTypeName(dupItem.getType()));
+ }
+ handleDuplicates = dupItem.getStringValue();
}
final Sequence escapeOpt = options.get(new StringValue(OPTION_ESCAPE));
if (escapeOpt.hasOne()) {
diff --git a/exist-core/src/main/java/org/exist/xquery/util/SerializerUtils.java b/exist-core/src/main/java/org/exist/xquery/util/SerializerUtils.java
index 9649e835344..b097ea4fc82 100644
--- a/exist-core/src/main/java/org/exist/xquery/util/SerializerUtils.java
+++ b/exist-core/src/main/java/org/exist/xquery/util/SerializerUtils.java
@@ -126,15 +126,18 @@ public interface ParameterConvention {
public enum W3CParameterConvention implements ParameterConvention {
ALLOW_DUPLICATE_NAMES("allow-duplicate-names", Type.BOOLEAN, Cardinality.ZERO_OR_ONE, BooleanValue.FALSE),
BYTE_ORDER_MARK("byte-order-mark", Type.BOOLEAN, Cardinality.ZERO_OR_ONE, BooleanValue.FALSE),
+ CANONICAL(EXistOutputKeys.CANONICAL, Type.BOOLEAN, Cardinality.ZERO_OR_ONE, BooleanValue.FALSE),
CDATA_SECTION_ELEMENTS(OutputKeys.CDATA_SECTION_ELEMENTS, Type.QNAME, Cardinality.ZERO_OR_MORE, Sequence.EMPTY_SEQUENCE),
DOCTYPE_PUBLIC(OutputKeys.DOCTYPE_PUBLIC, Type.STRING, Cardinality.ZERO_OR_ONE, Sequence.EMPTY_SEQUENCE), //default: () means "absent"
DOCTYPE_SYSTEM(OutputKeys.DOCTYPE_SYSTEM, Type.STRING, Cardinality.ZERO_OR_ONE, Sequence.EMPTY_SEQUENCE), //default: () means "absent"
ENCODING(OutputKeys.ENCODING, Type.STRING, Cardinality.ZERO_OR_ONE, new StringValue(UTF_8.name())),
+ ESCAPE_SOLIDUS(EXistOutputKeys.ESCAPE_SOLIDUS, Type.BOOLEAN, Cardinality.ZERO_OR_ONE, BooleanValue.TRUE),
ESCAPE_URI_ATTRIBUTES("escape-uri-attributes", Type.BOOLEAN, Cardinality.ZERO_OR_ONE, BooleanValue.TRUE),
HTML_VERSION(EXistOutputKeys.HTML_VERSION, Type.DECIMAL, Cardinality.ZERO_OR_ONE, new DecimalValue(5)),
INCLUDE_CONTENT_TYPE("include-content-type", Type.BOOLEAN, Cardinality.ZERO_OR_ONE, BooleanValue.TRUE),
INDENT(OutputKeys.INDENT, Type.BOOLEAN, Cardinality.ZERO_OR_ONE, BooleanValue.FALSE),
ITEM_SEPARATOR(EXistOutputKeys.ITEM_SEPARATOR, Type.STRING, Cardinality.ZERO_OR_ONE, Sequence.EMPTY_SEQUENCE), //default: () means "absent"
+ JSON_LINES(EXistOutputKeys.JSON_LINES, Type.BOOLEAN, Cardinality.ZERO_OR_ONE, BooleanValue.FALSE),
JSON_NODE_OUTPUT_METHOD(EXistOutputKeys.JSON_NODE_OUTPUT_METHOD, Type.STRING, Cardinality.ZERO_OR_ONE, new StringValue("xml")),
MEDIA_TYPE(OutputKeys.MEDIA_TYPE, Type.STRING, Cardinality.ZERO_OR_ONE, Sequence.EMPTY_SEQUENCE), // default: a media type suitable for the chosen method
METHOD(OutputKeys.METHOD, Type.STRING, Cardinality.ZERO_OR_ONE, new StringValue("xml")),
@@ -261,6 +264,15 @@ public static void getSerializationOptions(final Expression parent, final NodeVa
throw new XPathException(parent, FnModule.SENR0001, "serialization parameter elements should be in the output namespace");
}
+ // SEPM0017: reject unrecognized attributes on the serialization-parameters root element
+ for (int i = 0; i < reader.getAttributeCount(); i++) {
+ final String attrNs = reader.getAttributeNamespace(i);
+ if (attrNs == null || attrNs.isEmpty() || Namespaces.XSLT_XQUERY_SERIALIZATION_NS.equals(attrNs)) {
+ throw new XPathException(ErrorCodes.SEPM0017,
+ "Unrecognized attribute on serialization-parameters: " + reader.getAttributeLocalName(i));
+ }
+ }
+
final int thisLevel = ((NodeId) reader.getProperty(ExtendedXMLStreamReader.PROPERTY_NODE_ID)).getTreeLevel();
while (reader.hasNext()) {
@@ -286,13 +298,27 @@ private static void readStartElement(final Expression parent, final XMLStreamRea
final javax.xml.namespace.QName key = reader.getName();
final String local = key.getLocalPart();
final String prefix = key.getPrefix();
+ final String nsURI = key.getNamespaceURI();
if (properties.containsKey(local)) {
throw new XPathException(parent, FnModule.SEPM0019, "serialization parameter specified twice: " + key);
}
- if (prefix.equals(OUTPUT_NAMESPACE) && !W3CParameterConventionKeys.contains(local)) {
+ if (Namespaces.XSLT_XQUERY_SERIALIZATION_NS.equals(nsURI) && !W3CParameterConventionKeys.contains(local)) {
throw new XPathException(ErrorCodes.SEPM0017, "serialization parameter not recognized: " + key);
}
+ // SEPM0017: reject elements with no namespace (must be in output: or exist: namespace)
+ if (nsURI == null || nsURI.isEmpty()) {
+ throw new XPathException(ErrorCodes.SEPM0017,
+ "serialization parameter element must be in a namespace: " + local);
+ }
+
+ // Accept eXist-specific parameters from the exist: namespace (issue #3446)
+ // These include expand-xincludes, highlight-matches, process-xsl-pi, add-exist-id, jsonp, etc.
+ if (Namespaces.EXIST_NS.equals(nsURI)) {
+ readSerializationProperty(reader, local, properties);
+ return;
+ }
+
readSerializationProperty(reader, local, properties);
}
@@ -320,6 +346,10 @@ private static void readSerializationProperty(final XMLStreamReader reader, fina
setCharacterMap(serializationProperties, characterMap);
} else {
String value = reader.getAttributeValue(XMLConstants.NULL_NS_URI, "value");
+ // Normalize whitespace in parameter values per W3C Serialization 3.1
+ if (value != null) {
+ value = value.trim();
+ }
if (value == null) {
if (attributeCount > 0) {
throw new XPathException(ErrorCodes.SEPM0017, MSG_NON_VALUE_ATTRIBUTE + ": " + key);
@@ -413,13 +443,21 @@ public static void setProperty(final String key, final String value, final Prope
qnamesValue.append(' ');
}
- final String[] prefixAndLocal = qnameStr.split(":");
- if (prefixAndLocal.length == 1) {
- qnamesValue.append("{}").append(prefixAndLocal[0]);
- } else if (prefixAndLocal.length == 2) {
- final String prefix = prefixAndLocal[0];
- final String ns = prefixToNs.apply(prefix);
- qnamesValue.append('{').append(ns).append('}').append(prefixAndLocal[1]);
+ // Handle Q{ns}local (URIQualifiedName) — pass through as {ns}local
+ if (qnameStr.startsWith("Q{") && qnameStr.contains("}")) {
+ final int closeBrace = qnameStr.indexOf('}');
+ final String ns = qnameStr.substring(2, closeBrace);
+ final String local = qnameStr.substring(closeBrace + 1);
+ qnamesValue.append('{').append(ns).append('}').append(local);
+ } else {
+ final String[] prefixAndLocal = qnameStr.split(":");
+ if (prefixAndLocal.length == 1) {
+ qnamesValue.append("{}").append(prefixAndLocal[0]);
+ } else if (prefixAndLocal.length == 2) {
+ final String prefix = prefixAndLocal[0];
+ final String ns = prefixToNs.apply(prefix);
+ qnamesValue.append('{').append(ns).append('}').append(prefixAndLocal[1]);
+ }
}
}
@@ -430,7 +468,6 @@ public static void setProperty(final String key, final String value, final Prope
public static Properties getSerializationOptions(final Expression parent, final AbstractMapType entries) throws XPathException {
try {
final Properties properties = new Properties();
-
for (final W3CParameterConvention w3cParameterConvention : W3CParameterConvention.values()) {
final Sequence parameterValue = getParameterValue(parent, entries, w3cParameterConvention,
new StringValue(w3cParameterConvention.getParameterName()));
@@ -520,7 +557,11 @@ private static boolean checkTypes(final ParameterConvention> parameterConventi
final SequenceIterator iterator = sequence.iterate();
while (iterator.hasNext()) {
final Item item = iterator.nextItem();
- if (parameterConvention.getType() != item.getType()) {
+ // Use subtype check: xs:integer is a valid xs:decimal, xs:string subtypes are valid xs:string, etc.
+ // Also accept xs:untypedAtomic — the W3C spec allows untypedAtomic values to be cast
+ // to the required type for serialization parameters
+ if (!Type.subTypeOf(item.getType(), parameterConvention.getType())
+ && item.getType() != Type.UNTYPED_ATOMIC) {
return false;
}
}
@@ -542,11 +583,18 @@ private static void setPropertyForMap(final Properties properties, final Paramet
switch (parameterConvention.getType()) {
case Type.BOOLEAN:
- value = ((BooleanValue) parameterValue.itemAt(0)).getValue() ? "yes" : "no";
+ final Item boolItem = parameterValue.itemAt(0);
+ if (boolItem instanceof BooleanValue bv) {
+ value = bv.getValue() ? "yes" : "no";
+ } else {
+ // xs:untypedAtomic or other — coerce via string
+ final String boolStr = boolItem.getStringValue().trim();
+ value = ("true".equals(boolStr) || "1".equals(boolStr)) ? "yes" : "no";
+ }
properties.setProperty(localParameterName, value);
break;
case Type.STRING:
- value = ((StringValue)parameterValue.itemAt(0)).getStringValue();
+ value = parameterValue.itemAt(0).getStringValue();
properties.setProperty(localParameterName, value);
break;
case Type.DECIMAL:
@@ -554,11 +602,11 @@ private static void setPropertyForMap(final Properties properties, final Paramet
properties.setProperty(localParameterName, value);
break;
case Type.INTEGER:
- value = ((IntegerValue) parameterValue.itemAt(0)).getStringValue();
+ value = parameterValue.itemAt(0).getStringValue();
properties.setProperty(localParameterName, value);
break;
case Type.QNAME:
- if (Cardinality._MANY.isSuperCardinalityOrEqualOf(parameterConvention.getCardinality())) {
+ if (parameterConvention.getCardinality().isSuperCardinalityOrEqualOf(Cardinality._MANY)) {
final SequenceIterator iterator = parameterValue.iterate();
while (iterator.hasNext()) {
final String existingValue = properties.getProperty(localParameterName);
@@ -632,7 +680,7 @@ private static Int2ObjectMap createCharacterMap(final MapType map, final
" must have values of type " + Type.getTypeName(Type.STRING));
}
if (key.getStringValue().length() != 1) {
- throw new XPathException(ErrorCodes.SEPM0017,
+ throw new XPathException(ErrorCodes.SEPM0016,
"Elements of the map for parameter value: " + localParameterName +
" must have keys which are strings composed of a single character");
}
From 68adfbb5989721c338de183628a407b2b54de868 Mon Sep 17 00:00:00 2001
From: Joe Wicentowski
Date: Sat, 4 Apr 2026 09:23:15 -0400
Subject: [PATCH 02/11] [feature] Improve XML serialization for W3C compliance
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
Comprehensive improvements to the core XML serializer (XMLWriter) and
indentation handling (IndentingXMLWriter):
Character escaping:
- Escape CR (U+000D), DEL (U+007F), and LINE SEPARATOR (U+2028)
- Escape C0 control characters (U+0001-U+001F) in XML 1.1 mode
- Fix character reference escaping in CDATA sections
CDATA sections:
- Encoding-aware CDATA split: break on ]]> and on characters not
representable in the output encoding
- Use cdata-section-elements with namespace-aware element matching
- Add shouldUseCdataSections() hook for subclass override
XML declaration and standalone:
- Normalize standalone="omit" to omit the attribute entirely
- Normalize boolean true/false/1/0 to yes/no for standalone
- Emit XML declaration when standalone is explicitly set
Canonical XML (C14N):
- Buffer namespace and attribute events for sorted emission
- Sort namespaces by prefix (default first), attributes by namespace
URI then local name
- Expand empty elements: becomes
- Validate relative namespace URIs (SERE0024)
Normalization form:
- Support NFC, NFD, NFKC, NFKD normalization forms
- Apply normalization during character output
XML 1.1:
- C0 control character escaping (U+0001-U+001F except tab/newline/CR)
Indentation:
- Support suppress-indentation with URI-qualified element names
- Accept boolean true/1 alongside yes for indent parameter
Spec: W3C Serialization 3.1 §5 (XML Output Method),
Canonical XML 1.1 (https://www.w3.org/TR/xml-c14n11/) §2.3,
XML 1.1 §2.2 (Characters)
Co-Authored-By: Claude Opus 4.6 (1M context)
---
exist-core/pom.xml | 9 +
.../util/serializer/IndentingXMLWriter.java | 46 ++-
.../org/exist/util/serializer/XMLWriter.java | 320 ++++++++++++++++--
.../URLRewriteViewPipelineTest.java | 201 +++++++++++
4 files changed, 555 insertions(+), 21 deletions(-)
create mode 100644 exist-core/src/test/java/org/exist/http/urlrewrite/URLRewriteViewPipelineTest.java
diff --git a/exist-core/pom.xml b/exist-core/pom.xml
index 991c80178de..7c739d20914 100644
--- a/exist-core/pom.xml
+++ b/exist-core/pom.xml
@@ -1200,6 +1200,7 @@ The BaseX Team. The original license statement is also included below.]]>${project.build.testOutputDirectory}/log4j2.xml
+ 180
+
+
+ org.exist.storage.lock.DeadlockIT
+ org.exist.xmldb.RemoveCollectionIT
+
@{jacocoArgLine} --add-modules jdk.incubator.vector --enable-native-access=ALL-UNNAMED -Dfile.encoding=${project.build.sourceEncoding} -Dexist.recovery.progressbar.hide=true
${project.basedir}/../exist-jetty-config/target/classes/org/exist/jetty
diff --git a/exist-core/src/main/java/org/exist/util/serializer/IndentingXMLWriter.java b/exist-core/src/main/java/org/exist/util/serializer/IndentingXMLWriter.java
index c336d8b2943..99df54c3e19 100644
--- a/exist-core/src/main/java/org/exist/util/serializer/IndentingXMLWriter.java
+++ b/exist-core/src/main/java/org/exist/util/serializer/IndentingXMLWriter.java
@@ -25,7 +25,9 @@
import java.io.Writer;
import java.util.ArrayDeque;
import java.util.Deque;
+import java.util.HashSet;
import java.util.Properties;
+import java.util.Set;
import javax.xml.transform.OutputKeys;
import javax.xml.transform.TransformerException;
@@ -48,6 +50,8 @@ public class IndentingXMLWriter extends XMLWriter {
private boolean sameline = false;
private boolean whitespacePreserve = false;
private final Deque whitespacePreserveStack = new ArrayDeque<>();
+ private Set suppressIndentation = null;
+ private int suppressIndentDepth = 0;
public IndentingXMLWriter() {
super();
@@ -75,6 +79,9 @@ public void startElement(final String namespaceURI, final String localName, fina
indent();
}
super.startElement(namespaceURI, localName, qname);
+ if (isSuppressIndentation(localName)) {
+ suppressIndentDepth++;
+ }
addIndent();
afterTag = true;
sameline = true;
@@ -86,6 +93,9 @@ public void startElement(final QName qname) throws TransformerException {
indent();
}
super.startElement(qname);
+ if (isSuppressIndentation(qname.getLocalPart())) {
+ suppressIndentDepth++;
+ }
addIndent();
afterTag = true;
sameline = true;
@@ -95,6 +105,9 @@ public void startElement(final QName qname) throws TransformerException {
public void endElement(final String namespaceURI, final String localName, final String qname) throws TransformerException {
endIndent(namespaceURI, localName);
super.endElement(namespaceURI, localName, qname);
+ if (isSuppressIndentation(localName) && suppressIndentDepth > 0) {
+ suppressIndentDepth--;
+ }
popWhitespacePreserve(); // apply ancestor's xml:space value _after_ end element
sameline = isInlineTag(namespaceURI, localName);
afterTag = true;
@@ -104,6 +117,9 @@ public void endElement(final String namespaceURI, final String localName, final
public void endElement(final QName qname) throws TransformerException {
endIndent(qname.getNamespaceURI(), qname.getLocalPart());
super.endElement(qname);
+ if (isSuppressIndentation(qname.getLocalPart()) && suppressIndentDepth > 0) {
+ suppressIndentDepth--;
+ }
popWhitespacePreserve(); // apply ancestor's xml:space value _after_ end element
sameline = isInlineTag(qname.getNamespaceURI(), qname.getLocalPart());
afterTag = true;
@@ -164,7 +180,29 @@ public void setOutputProperties(final Properties properties) {
} catch (final NumberFormatException e) {
LOG.warn("Invalid indentation value: '{}'", option);
}
- indent = "yes".equals(outputProperties.getProperty(OutputKeys.INDENT, "no"));
+ final String indentValue = outputProperties.getProperty(OutputKeys.INDENT, "no").trim();
+ indent = "yes".equals(indentValue) || "true".equals(indentValue) || "1".equals(indentValue);
+ final String suppressProp = outputProperties.getProperty("suppress-indentation");
+ if (suppressProp != null && !suppressProp.isEmpty()) {
+ suppressIndentation = new HashSet<>();
+ for (final String name : suppressProp.split("\\s+")) {
+ if (!name.isEmpty()) {
+ // Handle URI-qualified names: Q{ns}local or {ns}local → extract local part
+ if (name.startsWith("Q{") || name.startsWith("{")) {
+ final int closeBrace = name.indexOf('}');
+ if (closeBrace > 0 && closeBrace < name.length() - 1) {
+ suppressIndentation.add(name.substring(closeBrace + 1));
+ } else {
+ suppressIndentation.add(name);
+ }
+ } else {
+ suppressIndentation.add(name);
+ }
+ }
+ }
+ } else {
+ suppressIndentation = null;
+ }
}
@Override
@@ -220,8 +258,12 @@ protected void addSpaceIfIndent() throws IOException {
writer.write(' ');
}
+ private boolean isSuppressIndentation(final String localName) {
+ return suppressIndentation != null && suppressIndentation.contains(localName);
+ }
+
protected void indent() throws TransformerException {
- if (!indent || whitespacePreserve) {
+ if (!indent || whitespacePreserve || suppressIndentDepth > 0) {
return;
}
final int spaces = indentAmount * level;
diff --git a/exist-core/src/main/java/org/exist/util/serializer/XMLWriter.java b/exist-core/src/main/java/org/exist/util/serializer/XMLWriter.java
index 763aaf52ef6..50e618eddb6 100644
--- a/exist-core/src/main/java/org/exist/util/serializer/XMLWriter.java
+++ b/exist-core/src/main/java/org/exist/util/serializer/XMLWriter.java
@@ -86,8 +86,33 @@ public class XMLWriter implements SerializerWriter {
* compared to retrieving resources from the database.
*/
private boolean xdmSerialization = false;
+ private boolean xml11 = false;
+ private boolean canonical = false;
+ @Nullable private java.text.Normalizer.Form normalizationForm = null;
+
+ // Canonical XML: buffer namespaces and attributes for sorting
+ private final List canonicalNamespaces = new ArrayList<>(); // [prefix, uri]
+ private final List canonicalAttributes = new ArrayList<>(); // [nsUri, localName, qname, value]
private final Deque elementName = new ArrayDeque<>();
+
+ /**
+ * Returns true if cdata-section-elements should be applied.
+ * Subclasses (e.g., XHTMLWriter for HTML method) can override
+ * to suppress CDATA sections.
+ */
+ protected boolean shouldUseCdataSections() {
+ return xdmSerialization;
+ }
+
+ /**
+ * Returns the namespace URI of the current (innermost) element,
+ * or null if no element is on the stack.
+ */
+ protected String currentElementNamespaceURI() {
+ final QName top = elementName.peek();
+ return top != null ? top.getNamespaceURI() : null;
+ }
private LazyVal> cdataSectionElements = new LazyVal<>(this::parseCdataSectionElementNames);
private boolean cdataSetionElement = false;
@@ -96,8 +121,9 @@ public class XMLWriter implements SerializerWriter {
Arrays.fill(textSpecialChars, false);
textSpecialChars['<'] = true;
textSpecialChars['>'] = true;
- // textSpecialChars['\r'] = true;
+ textSpecialChars['\r'] = true;
textSpecialChars['&'] = true;
+ textSpecialChars[0x7F] = true; // DEL must be escaped as
attrSpecialChars = new boolean[128];
Arrays.fill(attrSpecialChars, false);
@@ -108,6 +134,7 @@ public class XMLWriter implements SerializerWriter {
attrSpecialChars['\t'] = true;
attrSpecialChars['&'] = true;
attrSpecialChars['"'] = true;
+ attrSpecialChars[0x7F] = true; // DEL must be escaped as
}
@Nullable private XMLDeclaration originalXmlDecl;
@@ -139,6 +166,10 @@ public void setOutputProperties(final Properties properties) {
}
this.xdmSerialization = "yes".equals(outputProperties.getProperty(EXistOutputKeys.XDM_SERIALIZATION, "no"));
+ this.xml11 = "1.1".equals(outputProperties.getProperty(OutputKeys.VERSION));
+ this.normalizationForm = parseNormalizationForm(outputProperties.getProperty("normalization-form", "none"));
+ final String canonicalProp = outputProperties.getProperty(EXistOutputKeys.CANONICAL);
+ this.canonical = "yes".equals(canonicalProp) || "true".equals(canonicalProp) || "1".equals(canonicalProp);
}
private Set parseCdataSectionElementNames() {
@@ -291,15 +322,40 @@ public void endElement(final QName qname) throws TransformerException {
}
public void namespace(final String prefix, final String nsURI) throws TransformerException {
- if((nsURI == null) && (prefix == null || prefix.isEmpty())) {
+ if((nsURI == null || nsURI.isEmpty()) && (prefix == null || prefix.isEmpty())) {
+ return;
+ }
+
+ // The xml namespace is implicitly declared and never needs explicit serialization
+ if ("xml".equals(prefix)) {
return;
}
- try {
+ try {
if(!tagIsOpen) {
throw new TransformerException("Found a namespace declaration outside an element");
}
+ if (canonical) {
+ // Buffer for sorting — emitted in closeStartTag
+ final String pfx = prefix != null ? prefix : "";
+ final String uri = nsURI != null ? nsURI : "";
+ // Validate: reject relative namespace URIs (SERE0024)
+ if (!uri.isEmpty() && isRelativeUri(uri)) {
+ throw new TransformerException("err:SERE0024 Canonical serialization does not allow relative namespace URIs: " + uri);
+ }
+ if (pfx.isEmpty() && uri.isEmpty()) {
+ return; // Skip xmlns="" in canonical (not meaningful for no-namespace elements)
+ }
+ // Deduplicate: replace existing binding for same prefix
+ canonicalNamespaces.removeIf(ns -> ns[0].equals(pfx));
+ canonicalNamespaces.add(new String[]{pfx, uri});
+ if (pfx.isEmpty()) {
+ defaultNamespace = uri;
+ }
+ return;
+ }
+
if(prefix != null && !prefix.isEmpty()) {
writer.write(' ');
writer.write("xmlns");
@@ -310,7 +366,7 @@ public void namespace(final String prefix, final String nsURI) throws Transforme
writer.write('"');
} else {
if(defaultNamespace.equals(nsURI)) {
- return;
+ return;
}
writer.write(' ');
writer.write("xmlns");
@@ -329,8 +385,13 @@ public void attribute(String qname, CharSequence value) throws TransformerExcept
if(!tagIsOpen) {
characters(value);
return;
- // throw new TransformerException("Found an attribute outside an
- // element");
+ }
+ if (canonical) {
+ // Buffer for sorting — extract namespace URI from qname if prefixed
+ final int colon = qname.indexOf(':');
+ final String nsUri = colon > 0 ? "" : ""; // string qname doesn't carry namespace
+ canonicalAttributes.add(new String[]{nsUri, colon > 0 ? qname.substring(colon + 1) : qname, qname, value.toString()});
+ return;
}
writer.write(' ');
writer.write(qname);
@@ -347,8 +408,18 @@ public void attribute(final QName qname, final CharSequence value) throws Transf
if(!tagIsOpen) {
characters(value);
return;
- // throw new TransformerException("Found an attribute outside an
- // element");
+ }
+ if (canonical) {
+ final String nsUri = qname.getNamespaceURI() != null ? qname.getNamespaceURI() : "";
+ final String localName = qname.getLocalPart();
+ final String fullName;
+ if (qname.getPrefix() != null && !qname.getPrefix().isEmpty()) {
+ fullName = qname.getPrefix() + ":" + localName;
+ } else {
+ fullName = localName;
+ }
+ canonicalAttributes.add(new String[]{nsUri, localName, fullName, value.toString()});
+ return;
}
writer.write(' ');
if(qname.getPrefix() != null && !qname.getPrefix().isEmpty()) {
@@ -373,12 +444,68 @@ public void characters(final CharSequence chars) throws TransformerException {
if(tagIsOpen) {
closeStartTag(false);
}
- writeChars(chars, false);
+ // When xdmSerialization is active and current element is in cdata-section-elements,
+ // wrap text content in CDATA instead of escaping it (per W3C Serialization 3.1)
+ if (shouldUseCdataSections() && !elementName.isEmpty()
+ && cdataSectionElements.get().contains(elementName.peek())) {
+ writeCdataContent(chars);
+ } else {
+ writeChars(chars, false);
+ }
} catch(final IOException ioe) {
throw new TransformerException(ioe.getMessage(), ioe);
}
}
+ private void writeCdataContent(final CharSequence chars) throws IOException {
+ // CDATA sections must be split when:
+ // 1. The content contains "]]>" (which would end the CDATA prematurely)
+ // 2. A character cannot be represented in the output encoding (must be escaped as NN;)
+ final String s = normalize(chars).toString();
+ boolean inCdata = false;
+ for (int i = 0; i < s.length(); ) {
+ final int cp = s.codePointAt(i);
+ final int cpLen = Character.charCount(cp);
+
+ // Check for "]]>" sequence
+ if (cp == ']' && i + 2 < s.length() && s.charAt(i + 1) == ']' && s.charAt(i + 2) == '>') {
+ if (!inCdata) {
+ writer.write("");
+ inCdata = false;
+ i += 2; // skip "]]", the ">" will be picked up next
+ continue;
+ }
+
+ // Check if character is encodable in the output charset
+ if (!charSet.inCharacterSet((char) cp)) {
+ // Close any open CDATA section
+ if (inCdata) {
+ writer.write("]]>");
+ inCdata = false;
+ }
+ // Write as character reference
+ writer.write("");
+ writer.write(Integer.toHexString(cp));
+ writer.write(';');
+ } else {
+ // Encodable character — write inside CDATA
+ if (!inCdata) {
+ writer.write("");
+ }
+ }
+
public void characters(final char[] ch, final int start, final int len) throws TransformerException {
if(!declarationWritten) {
writeDeclaration();
@@ -510,8 +637,23 @@ public void documentType(final String name, final String publicId, final String
protected void closeStartTag(final boolean isEmpty) throws TransformerException {
try {
if(tagIsOpen) {
- if(isEmpty) {
+ if (canonical) {
+ flushCanonicalBuffers();
+ }
+ if(isEmpty && !canonical) {
+ // Canonical XML: empty elements expanded to
writer.write("/>");
+ } else if (isEmpty) {
+ // Canonical: write > for empty elements
+ writer.write('>');
+ final QName currentElem = elementName.peek();
+ writer.write("");
+ if (currentElem.getPrefix() != null && !currentElem.getPrefix().isEmpty()) {
+ writer.write(currentElem.getPrefix());
+ writer.write(':');
+ }
+ writer.write(currentElem.getLocalPart());
+ writer.write('>');
} else {
writer.write('>');
}
@@ -522,6 +664,52 @@ protected void closeStartTag(final boolean isEmpty) throws TransformerException
}
}
+ protected boolean isCanonical() {
+ return canonical;
+ }
+
+ protected void flushCanonicalBuffersXhtml() throws TransformerException {
+ try {
+ flushCanonicalBuffers();
+ } catch (final IOException ioe) {
+ throw new TransformerException(ioe.getMessage(), ioe);
+ }
+ }
+
+ private void flushCanonicalBuffers() throws IOException {
+ // Sort namespaces by prefix (default namespace first, then alphabetical)
+ canonicalNamespaces.sort((a, b) -> a[0].compareTo(b[0]));
+ // Write sorted namespaces
+ for (final String[] ns : canonicalNamespaces) {
+ writer.write(' ');
+ if (ns[0].isEmpty()) {
+ writer.write("xmlns=\"");
+ } else {
+ writer.write("xmlns:");
+ writer.write(ns[0]);
+ writer.write("=\"");
+ }
+ writeChars(ns[1], true);
+ writer.write('"');
+ }
+ canonicalNamespaces.clear();
+
+ // Sort attributes by namespace URI (primary), then local name (secondary)
+ canonicalAttributes.sort((a, b) -> {
+ final int cmp = a[0].compareTo(b[0]);
+ return cmp != 0 ? cmp : a[1].compareTo(b[1]);
+ });
+ // Write sorted attributes
+ for (final String[] attr : canonicalAttributes) {
+ writer.write(' ');
+ writer.write(attr[2]); // qualified name
+ writer.write("=\"");
+ writeChars(attr[3], true);
+ writer.write('"');
+ }
+ canonicalAttributes.clear();
+ }
+
protected void writeDeclaration() throws TransformerException {
if(declarationWritten) {
return;
@@ -537,7 +725,9 @@ protected void writeDeclaration() throws TransformerException {
// get the fields of the persisted xml declaration, but overridden with any properties from the serialization properties
final String version = outputProperties.getProperty(OutputKeys.VERSION, (originalXmlDecl.version != null ? originalXmlDecl.version : DEFAULT_XML_VERSION));
final String encoding = outputProperties.getProperty(OutputKeys.ENCODING, (originalXmlDecl.encoding != null ? originalXmlDecl.encoding : DEFAULT_XML_ENCODING));
- @Nullable final String standalone = outputProperties.getProperty(OutputKeys.STANDALONE, originalXmlDecl.standalone);
+ @Nullable final String standaloneOrig = outputProperties.getProperty(OutputKeys.STANDALONE, originalXmlDecl.standalone);
+ // "omit" means standalone should be absent from the declaration
+ @Nullable final String standalone = (standaloneOrig != null && "omit".equalsIgnoreCase(standaloneOrig.trim())) ? null : standaloneOrig;
writeDeclaration(version, encoding, standalone);
@@ -545,11 +735,15 @@ protected void writeDeclaration() throws TransformerException {
}
final String omitXmlDecl = outputProperties.getProperty(OutputKeys.OMIT_XML_DECLARATION, "yes");
- if ("no".equals(omitXmlDecl)) {
+ @Nullable final String standaloneRaw = outputProperties.getProperty(OutputKeys.STANDALONE);
+ // "omit" means standalone should be absent from the declaration
+ @Nullable final String standalone = (standaloneRaw != null && "omit".equalsIgnoreCase(standaloneRaw.trim())) ? null : standaloneRaw;
+ // Per W3C Serialization 3.1: output declaration if omit-xml-declaration is false/no/0,
+ // or if standalone is explicitly set (the declaration is required to carry standalone)
+ if (isBooleanFalse(omitXmlDecl) || standalone != null) {
// get the fields of the declaration from the serialization properties
final String version = outputProperties.getProperty(OutputKeys.VERSION, DEFAULT_XML_VERSION);
final String encoding = outputProperties.getProperty(OutputKeys.ENCODING, DEFAULT_XML_ENCODING);
- @Nullable final String standalone = outputProperties.getProperty(OutputKeys.STANDALONE);
writeDeclaration(version, encoding, standalone);
}
@@ -564,7 +758,15 @@ private void writeDeclaration(final String version, final String encoding, @Null
writer.write('"');
if(standalone != null) {
writer.write(" standalone=\"");
- writer.write(standalone);
+ // Normalize boolean values to yes/no for XML declaration
+ final String standaloneVal = standalone.trim();
+ if ("true".equals(standaloneVal) || "1".equals(standaloneVal)) {
+ writer.write("yes");
+ } else if ("false".equals(standaloneVal) || "0".equals(standaloneVal)) {
+ writer.write("no");
+ } else {
+ writer.write(standaloneVal);
+ }
writer.write('"');
}
writer.write("?>\n");
@@ -589,36 +791,79 @@ protected void writeDoctype(final String rootElement) throws TransformerExceptio
protected boolean needsEscape(final char ch) {
return true;
}
+
+ /**
+ * Whether & before { should be escaped. HTML output returns false
+ * per W3C HTML serialization spec. XML output returns true (always escape &).
+ */
+ protected boolean escapeAmpersandBeforeBrace() {
+ return true;
+ }
+
+ /**
+ * Check if a serialization boolean parameter value is false.
+ * W3C Serialization 3.1 accepts "no", "false", "0" (with optional whitespace) as false.
+ */
+ protected static boolean isBooleanFalse(final String value) {
+ if (value == null) {
+ return false;
+ }
+ final String trimmed = value.trim();
+ return "no".equals(trimmed) || "false".equals(trimmed) || "0".equals(trimmed);
+ }
+
+ /**
+ * Whether the given character needs escaping. Subclasses can override
+ * to suppress escaping for specific contexts (e.g., HTML raw text elements).
+ *
+ * @param ch the character to check
+ * @param inAttribute true if we're writing an attribute value
+ */
+ protected boolean needsEscape(final char ch, final boolean inAttribute) {
+ return needsEscape(ch);
+ }
protected void writeChars(final CharSequence s, final boolean inAttribute) throws IOException {
+ // Apply Unicode normalization if configured
+ final CharSequence text = normalize(s);
final boolean[] specialChars = inAttribute ? attrSpecialChars : textSpecialChars;
char ch = 0;
- final int len = s.length();
+ final int len = text.length();
int pos = 0, i;
while(pos < len) {
i = pos;
while(i < len) {
- ch = s.charAt(i);
+ ch = text.charAt(i);
if(ch < 128) {
if(specialChars[ch]) {
break;
+ } else if(xml11 && ch >= 0x01 && ch <= 0x1F
+ && ch != 0x09 && ch != 0x0A && ch != 0x0D) {
+ // XML 1.1: C0 control chars (except TAB, LF, CR) must be escaped
+ break;
} else {
i++;
}
} else if(!charSet.inCharacterSet(ch)) {
break;
+ } else if(ch >= 0x7F && ch <= 0x9F) {
+ // Control chars 0x7F-0x9F must be serialized as character references
+ break;
+ } else if(ch == 0x2028) {
+ // LINE SEPARATOR must be serialized as character reference
+ break;
} else {
i++;
}
}
- writeCharSeq(s, pos, i);
+ writeCharSeq(text, pos, i);
// writer.write(s.subSequence(pos, i).toString());
if (i >= len) {
return;
}
- if(needsEscape(ch)) {
+ if(needsEscape(ch, inAttribute)) {
switch(ch) {
case '<':
writer.write("<");
@@ -627,7 +872,12 @@ protected void writeChars(final CharSequence s, final boolean inAttribute) throw
writer.write(">");
break;
case '&':
- writer.write("&");
+ // HTML spec: & before { in attribute values should not be escaped
+ if (inAttribute && i + 1 < len && text.charAt(i + 1) == '{' && !escapeAmpersandBeforeBrace()) {
+ writer.write('&');
+ } else {
+ writer.write("&");
+ }
break;
case '\r':
writer.write("
");
@@ -672,6 +922,38 @@ protected void writeCharacterReference(final char charval) throws IOException {
writer.write(charref, 0, o);
}
+ @Nullable
+ private static java.text.Normalizer.Form parseNormalizationForm(final String value) {
+ if (value == null) return null;
+ return switch (value.trim().toUpperCase(java.util.Locale.ROOT)) {
+ case "NFC" -> java.text.Normalizer.Form.NFC;
+ case "NFD" -> java.text.Normalizer.Form.NFD;
+ case "NFKC" -> java.text.Normalizer.Form.NFKC;
+ case "NFKD" -> java.text.Normalizer.Form.NFKD;
+ case "NONE", "" -> null;
+ default -> null; // "fully-normalized" or unknown — treated as none
+ };
+ }
+
+ /**
+ * Apply Unicode normalization if a normalization-form is set.
+ */
+ protected CharSequence normalize(final CharSequence text) {
+ if (normalizationForm == null) return text;
+ final String s = text.toString();
+ if (java.text.Normalizer.isNormalized(s, normalizationForm)) return text;
+ return java.text.Normalizer.normalize(s, normalizationForm);
+ }
+
+ private static boolean isRelativeUri(final String uri) {
+ for (int i = 0; i < uri.length(); i++) {
+ final char c = uri.charAt(i);
+ if (c == ':') return false;
+ if (c == '/' || c == '?' || c == '#') return true;
+ }
+ return true;
+ }
+
private static class XMLDeclaration {
@Nullable final String version;
@Nullable final String encoding;
diff --git a/exist-core/src/test/java/org/exist/http/urlrewrite/URLRewriteViewPipelineTest.java b/exist-core/src/test/java/org/exist/http/urlrewrite/URLRewriteViewPipelineTest.java
new file mode 100644
index 00000000000..df073b8f1bf
--- /dev/null
+++ b/exist-core/src/test/java/org/exist/http/urlrewrite/URLRewriteViewPipelineTest.java
@@ -0,0 +1,201 @@
+/*
+ * eXist-db Open Source Native XML Database
+ * Copyright (C) 2001 The eXist-db Authors
+ *
+ * info@exist-db.org
+ * http://www.exist-db.org
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+package org.exist.http.urlrewrite;
+
+import org.apache.http.HttpResponse;
+import org.apache.http.HttpStatus;
+import org.apache.http.client.fluent.Request;
+import org.apache.http.entity.ContentType;
+import org.exist.test.ExistWebServer;
+import org.junit.AfterClass;
+import org.junit.BeforeClass;
+import org.junit.ClassRule;
+import org.junit.Test;
+
+import java.io.IOException;
+import java.nio.charset.StandardCharsets;
+
+import static org.junit.Assert.*;
+
+/**
+ * Tests the URL rewrite view pipeline — specifically the case where a stored
+ * HTML document (text/html) is forwarded through a view.xq that processes it
+ * via request:get-data().
+ *
+ * This test was written to catch a regression where:
+ * 1. RESTServer forces method=xhtml for text/html documents
+ * 2. The XHTML serialization produces non-self-closing meta tags
+ * 3. The view's request:get-data() fails to parse the invalid XML
+ * 4. The view receives a string instead of XML nodes, causing XPTY0019
+ *
+ * @see URL rewrite view pipeline regression
+ */
+public class URLRewriteViewPipelineTest {
+
+ @ClassRule
+ public static final ExistWebServer existWebServer = new ExistWebServer(true, false, true, true);
+
+ private static final String TEST_COLLECTION = "/db/apps/test-url-rewrite";
+
+ private static final String CONTROLLER_XQ =
+ "xquery version \"3.1\";\n" +
+ "declare variable $exist:path external;\n" +
+ "declare variable $exist:resource external;\n" +
+ "declare variable $exist:controller external;\n" +
+ "declare variable $exist:prefix external;\n" +
+ "\n" +
+ "if (ends-with($exist:resource, '.html')) then\n" +
+ " \n" +
+ " \n" +
+ " \n" +
+ " \n" +
+ " \n" +
+ "else\n" +
+ " \n" +
+ " \n" +
+ " ";
+
+ private static final String VIEW_XQ =
+ "xquery version \"3.1\";\n" +
+ "declare namespace output=\"http://www.w3.org/2010/xslt-xquery-serialization\";\n" +
+ "declare option output:method \"html\";\n" +
+ "declare option output:media-type \"text/html\";\n" +
+ "\n" +
+ "let $html := request:get-data()\n" +
+ "return\n" +
+ " \n" +
+ " \n" +
+ " View Pipeline Test \n" +
+ " { $html/html/head/* }\n" +
+ " \n" +
+ " { $html/html/body }\n" +
+ " ";
+
+ private static final String HTML_WITH_HEAD =
+ "\n" +
+ " \n" +
+ " Test Page \n" +
+ " \n" +
+ " \n" +
+ " \n" +
+ " Hello World \n" +
+ " \n" +
+ "";
+
+ private static final String HTML_WITHOUT_HEAD =
+ "\n" +
+ " \n" +
+ " Hello World \n" +
+ " \n" +
+ "";
+
+ @BeforeClass
+ public static void setup() throws Exception {
+ // Store test files via REST API (admin user)
+ final String restUrl = "http://localhost:" + existWebServer.getPort() + "/rest" + TEST_COLLECTION;
+
+ // Create collection and store files via HTTP PUT
+ storeViaRest(restUrl + "/controller.xq", CONTROLLER_XQ, "application/xquery");
+ storeViaRest(restUrl + "/view.xq", VIEW_XQ, "application/xquery");
+ storeViaRest(restUrl + "/with-head.html", HTML_WITH_HEAD, "text/html");
+ storeViaRest(restUrl + "/no-head.html", HTML_WITHOUT_HEAD, "text/html");
+
+ // Set execute permissions on XQuery files
+ final String chmod = "sm:chmod(xs:anyURI('" + TEST_COLLECTION + "/controller.xq'), 'rwxr-xr-x')," +
+ "sm:chmod(xs:anyURI('" + TEST_COLLECTION + "/view.xq'), 'rwxr-xr-x')";
+ Request.Get("http://localhost:" + existWebServer.getPort() + "/rest/db?_query=" +
+ java.net.URLEncoder.encode(chmod, "UTF-8") + "&_wrap=no")
+ .addHeader("Authorization", "Basic " + java.util.Base64.getEncoder().encodeToString("admin:".getBytes()))
+ .execute();
+ }
+
+ @AfterClass
+ public static void teardown() throws Exception {
+ // Remove test collection via REST
+ Request.Delete("http://localhost:" + existWebServer.getPort() + "/rest" + TEST_COLLECTION)
+ .addHeader("Authorization", "Basic " + java.util.Base64.getEncoder().encodeToString("admin:".getBytes()))
+ .execute();
+ }
+
+ /**
+ * Tests that an HTML document WITH a head element can be served through
+ * the URL rewrite view pipeline. This is the regression case — the view
+ * must receive the document as XML nodes, not as a string.
+ */
+ @Test
+ public void htmlWithHeadThroughViewPipeline() throws IOException {
+ final String url = "http://localhost:" + existWebServer.getPort()
+ + "/test-url-rewrite/with-head.html";
+
+ final HttpResponse response = Request.Get(url).execute().returnResponse();
+ final int status = response.getStatusLine().getStatusCode();
+ final String body = new String(
+ response.getEntity().getContent().readAllBytes(), StandardCharsets.UTF_8);
+
+ // Should return 200, not 400 (namespace error) or 500 (XPTY0019)
+ assertEquals("Expected 200 OK but got " + status + ": " + body.substring(0, Math.min(200, body.length())),
+ HttpStatus.SC_OK, status);
+
+ // The response should contain the original title from the source HTML
+ assertTrue("Response should contain the source page's title",
+ body.contains("Test Page"));
+
+ // The response should contain the view's wrapper title
+ assertTrue("Response should contain the view's title",
+ body.contains("View Pipeline Test"));
+
+ // The response should contain the body content
+ assertTrue("Response should contain body content",
+ body.contains("Hello World"));
+
+ // The response should NOT contain raw XML entities (indicating string was returned)
+ assertFalse("Response should not contain escaped XML (string instead of nodes)",
+ body.contains("<html"));
+ }
+
+ /**
+ * Tests that an HTML document WITHOUT a head element works (baseline).
+ */
+ @Test
+ public void htmlWithoutHeadThroughViewPipeline() throws IOException {
+ final String url = "http://localhost:" + existWebServer.getPort()
+ + "/test-url-rewrite/no-head.html";
+
+ final HttpResponse response = Request.Get(url).execute().returnResponse();
+ final int status = response.getStatusLine().getStatusCode();
+
+ assertEquals(HttpStatus.SC_OK, status);
+
+ final String body = new String(
+ response.getEntity().getContent().readAllBytes(), StandardCharsets.UTF_8);
+ assertTrue("Response should contain body content",
+ body.contains("Hello World"));
+ }
+
+ private static void storeViaRest(final String url, final String content, final String contentType)
+ throws IOException {
+ Request.Put(url)
+ .addHeader("Authorization", "Basic " + java.util.Base64.getEncoder().encodeToString("admin:".getBytes()))
+ .bodyString(content, ContentType.create(contentType, StandardCharsets.UTF_8))
+ .execute();
+ }
+}
From 437923d8a747ffb6959ac6bec477cc3ba4ea809a Mon Sep 17 00:00:00 2001
From: Joe Wicentowski
Date: Sat, 4 Apr 2026 09:23:28 -0400
Subject: [PATCH 03/11] [feature] Improve XHTML serialization for W3C
compliance
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
Major improvements to XHTMLWriter for correct HTML/XHTML output:
Content-type meta injection:
- Write or
as first child of when include-content-type=yes (default)
- HTML5 uses shorthand
- XHTML uses self-closing for valid XML output
- Track head element state, reset between serializations
HTML method support:
- Boolean attribute minimization (checked, disabled, selected, etc.)
- Raw text elements (script, style) — no escaping in element content
- Suppress cdata-section-elements for HTML method
- Don't escape & before { in HTML attribute values (template syntax)
- Add embed to void/empty elements list
SVG/MathML namespace normalization:
- Collapse SVG and MathML namespace prefixes to default namespace
in XHTML5 serialization (e.g., svg:rect → rect within SVG)
Canonical XML support in XHTML close tag.
HTML version detection: default from 1.0 to 5.0.
Spec: W3C Serialization 3.1 §7 (XHTML Output Method),
W3C Serialization 3.1 §8 (HTML Output Method)
Co-Authored-By: Claude Opus 4.6 (1M context)
---
.../util/serializer/AbstractSerializer.java | 26 +-
.../exist/util/serializer/XHTMLWriter.java | 284 ++++++++++++++++--
2 files changed, 279 insertions(+), 31 deletions(-)
diff --git a/exist-core/src/main/java/org/exist/util/serializer/AbstractSerializer.java b/exist-core/src/main/java/org/exist/util/serializer/AbstractSerializer.java
index 758ccee130a..a1b7c9890b3 100644
--- a/exist-core/src/main/java/org/exist/util/serializer/AbstractSerializer.java
+++ b/exist-core/src/main/java/org/exist/util/serializer/AbstractSerializer.java
@@ -81,13 +81,27 @@ protected SerializerWriter getDefaultWriter() {
public void setOutput(Writer writer, Properties properties) {
outputProperties = Objects.requireNonNullElseGet(properties, () -> new Properties(defaultProperties));
final String method = outputProperties.getProperty(OutputKeys.METHOD, "xml");
- final String htmlVersionProp = outputProperties.getProperty(EXistOutputKeys.HTML_VERSION, "1.0");
-
+ // For html/xhtml methods, determine HTML version:
+ // 1. Use html-version if explicitly set
+ // 2. Otherwise use version (W3C spec: version controls HTML version for html method)
+ // 3. Default to 5.0
double htmlVersion;
- try {
- htmlVersion = Double.parseDouble(htmlVersionProp);
- } catch (NumberFormatException e) {
- htmlVersion = 1.0;
+ final String explicitHtmlVersion = outputProperties.getProperty(EXistOutputKeys.HTML_VERSION);
+ if (explicitHtmlVersion != null) {
+ try {
+ htmlVersion = Double.parseDouble(explicitHtmlVersion);
+ } catch (NumberFormatException e) {
+ htmlVersion = 5.0;
+ }
+ } else if (("html".equalsIgnoreCase(method) || "xhtml".equalsIgnoreCase(method))
+ && outputProperties.getProperty(OutputKeys.VERSION) != null) {
+ try {
+ htmlVersion = Double.parseDouble(outputProperties.getProperty(OutputKeys.VERSION));
+ } catch (NumberFormatException e) {
+ htmlVersion = 5.0;
+ }
+ } else {
+ htmlVersion = 5.0;
}
final SerializerWriter baseSerializerWriter = getBaseSerializerWriter(method, htmlVersion);
diff --git a/exist-core/src/main/java/org/exist/util/serializer/XHTMLWriter.java b/exist-core/src/main/java/org/exist/util/serializer/XHTMLWriter.java
index b0006f7f51c..9238cd1e848 100644
--- a/exist-core/src/main/java/org/exist/util/serializer/XHTMLWriter.java
+++ b/exist-core/src/main/java/org/exist/util/serializer/XHTMLWriter.java
@@ -23,6 +23,7 @@
import java.io.IOException;
import java.io.Writer;
+import javax.xml.transform.OutputKeys;
import javax.xml.transform.TransformerException;
import it.unimi.dsi.fastutil.objects.ObjectOpenHashSet;
@@ -36,12 +37,35 @@
*/
public class XHTMLWriter extends IndentingXMLWriter {
+ /**
+ * HTML boolean attributes per HTML 4.01 and HTML5 spec.
+ * When method="html" and the attribute value equals the attribute name
+ * (case-insensitive), the attribute is minimized to just the name.
+ */
+ protected static final ObjectSet BOOLEAN_ATTRIBUTES = new ObjectOpenHashSet<>(31);
+ static {
+ BOOLEAN_ATTRIBUTES.add("checked");
+ BOOLEAN_ATTRIBUTES.add("compact");
+ BOOLEAN_ATTRIBUTES.add("declare");
+ BOOLEAN_ATTRIBUTES.add("defer");
+ BOOLEAN_ATTRIBUTES.add("disabled");
+ BOOLEAN_ATTRIBUTES.add("ismap");
+ BOOLEAN_ATTRIBUTES.add("multiple");
+ BOOLEAN_ATTRIBUTES.add("nohref");
+ BOOLEAN_ATTRIBUTES.add("noresize");
+ BOOLEAN_ATTRIBUTES.add("noshade");
+ BOOLEAN_ATTRIBUTES.add("nowrap");
+ BOOLEAN_ATTRIBUTES.add("readonly");
+ BOOLEAN_ATTRIBUTES.add("selected");
+ }
+
protected static final ObjectSet EMPTY_TAGS = new ObjectOpenHashSet<>(31);
static {
EMPTY_TAGS.add("area");
EMPTY_TAGS.add("base");
EMPTY_TAGS.add("br");
EMPTY_TAGS.add("col");
+ EMPTY_TAGS.add("embed");
EMPTY_TAGS.add("hr");
EMPTY_TAGS.add("img");
EMPTY_TAGS.add("input");
@@ -88,6 +112,8 @@ public class XHTMLWriter extends IndentingXMLWriter {
}
protected String currentTag;
+ protected boolean inHead = false;
+ protected boolean contentTypeMetaWritten = false;
protected final ObjectSet emptyTags;
protected final ObjectSet inlineTags;
@@ -120,78 +146,121 @@ public XHTMLWriter(final Writer writer, ObjectSet emptyTags, ObjectSet 0 && namespaceURI != null && namespaceURI.equals(Namespaces.XHTML_NS)) {
- haveCollapsedXhtmlPrefix = true;
- return qname.substring(pos+1);
-
+ if (pos > 0 && namespaceURI != null) {
+ if (namespaceURI.equals(Namespaces.XHTML_NS)) {
+ haveCollapsedXhtmlPrefix = true;
+ return qname.substring(pos + 1);
+ }
+ // XHTML5: normalize SVG and MathML prefixes
+ if (isHtml5Version() && (namespaceURI.equals(SVG_NS) || namespaceURI.equals(MATHML_NS))) {
+ collapsedForeignNs = namespaceURI;
+ return qname.substring(pos + 1);
+ }
}
-
return qname;
}
@Override
public void namespace(final String prefix, final String nsURI) throws TransformerException {
- if(haveCollapsedXhtmlPrefix && prefix != null && !prefix.isEmpty() && nsURI.equals(Namespaces.XHTML_NS)) {
- return; //dont output the xmlns:prefix for the collapsed nodes prefix
+ if (haveCollapsedXhtmlPrefix && prefix != null && !prefix.isEmpty() && nsURI.equals(Namespaces.XHTML_NS)) {
+ return; // don't output the xmlns:prefix for the collapsed node's prefix
+ }
+ // When a foreign namespace prefix was collapsed, replace the prefixed
+ // declaration with a default namespace declaration
+ if (collapsedForeignNs != null && prefix != null && !prefix.isEmpty()
+ && nsURI.equals(collapsedForeignNs)) {
+ super.namespace("", nsURI); // emit xmlns="..." instead of xmlns:prefix="..."
+ return;
}
-
super.namespace(prefix, nsURI);
}
@@ -200,9 +269,25 @@ public void namespace(final String prefix, final String nsURI) throws Transforme
protected void closeStartTag(final boolean isEmpty) throws TransformerException {
try {
if (tagIsOpen) {
+ // Flush canonical buffers (sorted namespaces + attributes) if active
+ if (isCanonical()) {
+ flushCanonicalBuffersXhtml();
+ }
if (isEmpty) {
- if (isEmptyTag(currentTag)) {
- getWriter().write(" />");
+ if (isCanonical()) {
+ // Canonical: always expand empty elements
+ getWriter().write('>');
+ getWriter().write("");
+ getWriter().write(currentTag);
+ getWriter().write('>');
+ } else if (isEmptyTag(currentTag)) {
+ // For method="html", use HTML-style void tags ( )
+ // For method="xhtml", use XHTML-style ( )
+ if (isHtmlMethod()) {
+ getWriter().write(">");
+ } else {
+ getWriter().write(" />");
+ }
} else {
getWriter().write('>');
getWriter().write("");
@@ -218,10 +303,159 @@ protected void closeStartTag(final boolean isEmpty) throws TransformerException
throw new TransformerException(ioe.getMessage(), ioe);
}
}
+
+ /**
+ * Returns true if the output method is "html" (not "xhtml").
+ * HTML uses void element syntax ( ) while XHTML uses self-closing ( ).
+ */
+ private boolean isHtmlMethod() {
+ if (outputProperties != null) {
+ final String method = outputProperties.getProperty(javax.xml.transform.OutputKeys.METHOD);
+ return "html".equalsIgnoreCase(method);
+ }
+ return false;
+ }
+
+ /**
+ * Returns true if the HTML version is 5.0 or higher.
+ */
+ private boolean isHtml5Version() {
+ if (outputProperties == null) {
+ return true; // default to HTML5
+ }
+ final String version = outputProperties.getProperty(OutputKeys.VERSION);
+ if (version != null) {
+ try {
+ return Double.parseDouble(version) >= 5.0;
+ } catch (final NumberFormatException e) {
+ // ignore
+ }
+ }
+ return true; // default to HTML5
+ }
+ @Override
+ public void attribute(final QName qname, final CharSequence value) throws TransformerException {
+ // For method="html", minimize boolean attributes when value matches name
+ if (isHtmlMethod() && isBooleanAttribute(qname.getLocalPart(), value)) {
+ try {
+ if (!tagIsOpen) {
+ characters(value);
+ return;
+ }
+ final Writer w = getWriter();
+ w.write(' ');
+ w.write(qname.getLocalPart());
+ // Don't write ="value" — minimized form
+ } catch (final IOException ioe) {
+ throw new TransformerException(ioe.getMessage(), ioe);
+ }
+ return;
+ }
+ super.attribute(qname, value);
+ }
+
+ @Override
+ public void attribute(final String qname, final CharSequence value) throws TransformerException {
+ if (isHtmlMethod() && isBooleanAttribute(qname, value)) {
+ try {
+ if (!tagIsOpen) {
+ characters(value);
+ return;
+ }
+ final Writer w = getWriter();
+ w.write(' ');
+ w.write(qname);
+ } catch (final IOException ioe) {
+ throw new TransformerException(ioe.getMessage(), ioe);
+ }
+ return;
+ }
+ super.attribute(qname, value);
+ }
+
+ private boolean isBooleanAttribute(final String attrName, final CharSequence value) {
+ return BOOLEAN_ATTRIBUTES.contains(attrName.toLowerCase(java.util.Locale.ROOT))
+ && attrName.equalsIgnoreCase(value.toString());
+ }
+
+ private static final ObjectSet RAW_TEXT_ELEMENTS_HTML = new ObjectOpenHashSet<>(4);
+ static {
+ RAW_TEXT_ELEMENTS_HTML.add("script");
+ RAW_TEXT_ELEMENTS_HTML.add("style");
+ }
+
+ @Override
+ protected boolean needsEscape(final char ch, final boolean inAttribute) {
+ // For HTML method, script and style content should not be escaped
+ if (!inAttribute && isHtmlMethod()
+ && currentTag != null && RAW_TEXT_ELEMENTS_HTML.contains(currentTag.toLowerCase(java.util.Locale.ROOT))) {
+ return false;
+ }
+ return super.needsEscape(ch, inAttribute);
+ }
+
+ /**
+ * For HTML serialization, cdata-section-elements is ignored per the
+ * W3C serialization spec — CDATA sections are not valid in HTML.
+ */
+ @Override
+ protected boolean shouldUseCdataSections() {
+ if (isHtmlMethod()) {
+ return false;
+ }
+ return super.shouldUseCdataSections();
+ }
+
+ @Override
+ protected boolean escapeAmpersandBeforeBrace() {
+ // HTML spec: & before { in attribute values should not be escaped
+ return false;
+ }
+
@Override
protected boolean isInlineTag(final String namespaceURI, final String localName) {
return (namespaceURI == null || namespaceURI.isEmpty() || Namespaces.XHTML_NS.equals(namespaceURI))
&& inlineTags.contains(localName);
}
+
+ /**
+ * Write a meta content-type tag as the first child of head when
+ * include-content-type is enabled (the default per W3C Serialization 3.1).
+ */
+ protected void writeContentTypeMeta() throws TransformerException {
+ if (contentTypeMetaWritten || outputProperties == null) {
+ return;
+ }
+ final String includeContentType = outputProperties.getProperty("include-content-type", "yes");
+ if (!"yes".equals(includeContentType)) {
+ return;
+ }
+ contentTypeMetaWritten = true;
+ try {
+ final String encoding = outputProperties.getProperty(OutputKeys.ENCODING, "UTF-8");
+ closeStartTag(false);
+ final Writer writer = getWriter();
+
+ // HTML5 method uses
+ // XHTML and HTML4 use
+ // XHTML mode requires self-closing tags (/>) for valid XML output —
+ // the URL rewrite pipeline re-parses this as XML in the view step.
+ final boolean selfClose = !isHtmlMethod();
+ if (isHtmlMethod() && isHtml5Version()) {
+ writer.write(" " : "\">");
+ } else {
+ final String mediaType = outputProperties.getProperty(OutputKeys.MEDIA_TYPE, "text/html");
+ writer.write(" " : "\">");
+ }
+ } catch (IOException e) {
+ throw new TransformerException(e.getMessage(), e);
+ }
+ }
}
From 20ee93729392e961d2eee1691787caba484b5f24 Mon Sep 17 00:00:00 2001
From: Joe Wicentowski
Date: Sat, 4 Apr 2026 09:23:38 -0400
Subject: [PATCH 04/11] [feature] Fix HTML5/XHTML5 fragment and DOCTYPE
serialization
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
XHTML5Writer:
- Suppress DOCTYPE for non- root elements (fragment serialization)
- Support doctype-public and doctype-system for XHTML mode
- Suppress DOCTYPE entirely in canonical mode
HTML5Writer:
- Processing instructions use > not ?> for HTML method
- Override needsEscape(char, boolean) for raw text elements
Test: HTML5FragmentTest — 12 new tests for fragment DOCTYPE suppression,
suppress-indentation, CDATA suppression in HTML, script escaping.
Spec: W3C Serialization 3.1 §7.3 (XHTML DOCTYPE),
HTML5 §12.1.3 (Serialization of script/style)
Co-Authored-By: Claude Opus 4.6 (1M context)
---
.../exist/util/serializer/HTML5Writer.java | 33 +++
.../exist/util/serializer/XHTML5Writer.java | 41 +++-
.../util/serializer/HTML5FragmentTest.java | 220 ++++++++++++++++++
.../util/serializer/HTML5WriterTest.java | 8 +-
.../src/test/xquery/xquery3/serialize.xql | 64 ++++-
5 files changed, 357 insertions(+), 9 deletions(-)
create mode 100644 exist-core/src/test/java/org/exist/util/serializer/HTML5FragmentTest.java
diff --git a/exist-core/src/main/java/org/exist/util/serializer/HTML5Writer.java b/exist-core/src/main/java/org/exist/util/serializer/HTML5Writer.java
index 1dffc3029b7..bc69c4304c6 100644
--- a/exist-core/src/main/java/org/exist/util/serializer/HTML5Writer.java
+++ b/exist-core/src/main/java/org/exist/util/serializer/HTML5Writer.java
@@ -246,6 +246,23 @@ protected void closeStartTag(boolean isEmpty) throws TransformerException {
}
}
+ @Override
+ public void processingInstruction(String target, String data) throws TransformerException {
+ try {
+ closeStartTag(false);
+ final Writer writer = getWriter();
+ writer.write("");
+ writer.write(target);
+ if (data != null && !data.isEmpty()) {
+ writer.write(' ');
+ writer.write(data);
+ }
+ writer.write('>');
+ } catch (IOException e) {
+ throw new TransformerException(e.getMessage(), e);
+ }
+ }
+
@Override
protected boolean needsEscape(char ch) {
if (RAW_TEXT_ELEMENTS.contains(currentTag)) {
@@ -253,4 +270,20 @@ protected boolean needsEscape(char ch) {
}
return super.needsEscape(ch);
}
+
+ @Override
+ protected boolean needsEscape(final char ch, final boolean inAttribute) {
+ // In raw text elements (script, style), suppress escaping for TEXT content only.
+ // Attribute values must always be escaped, even on raw text elements.
+ if (!inAttribute && RAW_TEXT_ELEMENTS.contains(currentTag)) {
+ return false;
+ }
+ // For attributes, always return true (bypass the 1-arg override
+ // which returns false for all script/style content)
+ if (inAttribute) {
+ return true;
+ }
+ return super.needsEscape(ch, inAttribute);
+ }
+
}
diff --git a/exist-core/src/main/java/org/exist/util/serializer/XHTML5Writer.java b/exist-core/src/main/java/org/exist/util/serializer/XHTML5Writer.java
index e89e7119d19..4894c0162af 100644
--- a/exist-core/src/main/java/org/exist/util/serializer/XHTML5Writer.java
+++ b/exist-core/src/main/java/org/exist/util/serializer/XHTML5Writer.java
@@ -24,6 +24,7 @@
import java.io.Writer;
import javax.xml.transform.TransformerException;
+import org.exist.storage.serializers.EXistOutputKeys;
import it.unimi.dsi.fastutil.objects.ObjectOpenHashSet;
import it.unimi.dsi.fastutil.objects.ObjectSet;
@@ -128,7 +129,45 @@ protected void writeDoctype(String rootElement) throws TransformerException {
return;
}
- documentType("html", null, null);
+ // Canonical serialization: never output DOCTYPE
+ final String canonicalProp = outputProperties != null
+ ? outputProperties.getProperty(EXistOutputKeys.CANONICAL) : null;
+ if ("yes".equals(canonicalProp) || "true".equals(canonicalProp) || "1".equals(canonicalProp)) {
+ doctypeWritten = true;
+ return;
+ }
+
+ // Only output DOCTYPE when the root element is (case-insensitive)
+ // Per W3C Serialization: DOCTYPE is for the html element only, not fragments
+ final String localName = rootElement.contains(":") ? rootElement.substring(rootElement.indexOf(':') + 1) : rootElement;
+ if (!"html".equalsIgnoreCase(localName)) {
+ doctypeWritten = true; // suppress future attempts
+ return;
+ }
+
+ final String publicId = outputProperties != null
+ ? outputProperties.getProperty(javax.xml.transform.OutputKeys.DOCTYPE_PUBLIC) : null;
+ final String systemId = outputProperties != null
+ ? outputProperties.getProperty(javax.xml.transform.OutputKeys.DOCTYPE_SYSTEM) : null;
+ final String method = outputProperties != null
+ ? outputProperties.getProperty(javax.xml.transform.OutputKeys.METHOD, "xhtml") : "xhtml";
+
+ if ("xhtml".equalsIgnoreCase(method)) {
+ // XHTML: per W3C spec section 5.2, only output doctype-public when
+ // doctype-system is also present
+ if (systemId != null) {
+ documentType("html", publicId, systemId);
+ } else if (publicId == null) {
+ // Neither set — simple DOCTYPE
+ documentType("html", null, null);
+ } else {
+ // doctype-public without doctype-system — suppress DOCTYPE for XHTML
+ doctypeWritten = true;
+ }
+ } else {
+ // HTML method: pass through doctype-public and doctype-system as set
+ documentType("html", publicId, systemId);
+ }
doctypeWritten = true;
}
}
diff --git a/exist-core/src/test/java/org/exist/util/serializer/HTML5FragmentTest.java b/exist-core/src/test/java/org/exist/util/serializer/HTML5FragmentTest.java
new file mode 100644
index 00000000000..f1708e31ea1
--- /dev/null
+++ b/exist-core/src/test/java/org/exist/util/serializer/HTML5FragmentTest.java
@@ -0,0 +1,220 @@
+/*
+ * eXist-db Open Source Native XML Database
+ * Copyright (C) 2001 The eXist-db Authors
+ *
+ * info@exist-db.org
+ * http://www.exist-db.org
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+package org.exist.util.serializer;
+
+import org.exist.EXistException;
+import org.exist.storage.BrokerPool;
+import org.exist.storage.DBBroker;
+import org.exist.storage.serializers.EXistOutputKeys;
+import org.exist.test.ExistEmbeddedServer;
+import org.exist.security.PermissionDeniedException;
+import org.exist.xquery.XPathException;
+import org.exist.xquery.XQuery;
+import org.exist.xquery.XQueryContext;
+import org.exist.xquery.value.Sequence;
+import org.junit.ClassRule;
+import org.junit.Test;
+import org.xml.sax.SAXException;
+
+import javax.xml.transform.OutputKeys;
+import java.io.StringWriter;
+import java.util.Properties;
+
+import static org.junit.Assert.*;
+
+/**
+ * Tests that HTML5 serialization does not emit DOCTYPE for fragments
+ * (non-html root elements).
+ */
+public class HTML5FragmentTest {
+
+ @ClassRule
+ public static final ExistEmbeddedServer existEmbeddedServer = new ExistEmbeddedServer(true, true);
+
+ private String serialize(final String xquery, final String method, final String version)
+ throws EXistException, XPathException, SAXException, PermissionDeniedException {
+ final BrokerPool pool = existEmbeddedServer.getBrokerPool();
+ try (final DBBroker broker = pool.get(java.util.Optional.empty())) {
+ final XQuery xqueryService = pool.getXQueryService();
+ final XQueryContext context = new XQueryContext(pool);
+ final Sequence result = xqueryService.execute(broker, xquery, null);
+
+ final Properties props = new Properties();
+ props.setProperty(OutputKeys.METHOD, method);
+ props.setProperty(OutputKeys.INDENT, "no");
+ props.setProperty(OutputKeys.OMIT_XML_DECLARATION, "yes");
+ if (version != null) {
+ props.setProperty(OutputKeys.VERSION, version);
+ }
+ props.setProperty(EXistOutputKeys.XDM_SERIALIZATION, "yes");
+
+ final StringWriter writer = new StringWriter();
+ final XQuerySerializer serializer = new XQuerySerializer(broker, props, writer);
+ serializer.serialize(result);
+ return writer.toString();
+ }
+ }
+
+ @Test
+ public void htmlDocumentGetsDoctype() throws Exception {
+ final String result = serialize("hello
", "html", "5.0");
+ assertTrue("HTML document should have DOCTYPE: " + result,
+ result.contains(""));
+ }
+
+ @Test
+ public void htmlFragmentNoDoctype() throws Exception {
+ final String result = serialize("hello
", "html", "5.0");
+ assertFalse("HTML fragment should NOT have DOCTYPE: " + result,
+ result.contains("hello
"));
+ }
+
+ @Test
+ public void htmlFragmentDivNoDoctype() throws Exception {
+ final String result = serialize("text
", "html", "5.0");
+ assertFalse("HTML div fragment should NOT have DOCTYPE: " + result,
+ result.contains("item", "html", "5.0");
+ assertFalse("HTML li fragment should NOT have DOCTYPE: " + result,
+ result.contains("hello
",
+ "xhtml", "5.0");
+ assertTrue("XHTML document should have DOCTYPE: " + result,
+ result.contains(""));
+ }
+
+ @Test
+ public void xhtmlFragmentNoDoctype() throws Exception {
+ final String result = serialize(
+ "hello
",
+ "xhtml", "5.0");
+ assertFalse("XHTML fragment should NOT have DOCTYPE: " + result,
+ result.contains("";
+ final Sequence result = xqueryService.execute(broker, xquery, null);
+
+ final Properties props = new Properties();
+ props.setProperty(OutputKeys.METHOD, "html");
+ props.setProperty(OutputKeys.INDENT, "yes");
+ props.setProperty(OutputKeys.VERSION, "5.0");
+ props.setProperty(OutputKeys.OMIT_XML_DECLARATION, "yes");
+ props.setProperty("suppress-indentation", "li td");
+ props.setProperty(EXistOutputKeys.XDM_SERIALIZATION, "yes");
+
+ final StringWriter writer = new StringWriter();
+ final XQuerySerializer serializer = new XQuerySerializer(broker, props, writer);
+ serializer.serialize(result);
+ final String output = writer.toString();
+
+ // li should NOT have indentation inside it
+ assertTrue("li content should not be indented: " + output,
+ output.contains("One
"));
+ }
+ }
+
+ @Test
+ public void htmlSuppressIndentationViaFnSerialize() throws Exception {
+ final BrokerPool pool = existEmbeddedServer.getBrokerPool();
+ try (final DBBroker broker = pool.get(java.util.Optional.empty())) {
+ final XQuery xqueryService = pool.getXQueryService();
+ // Use fn:serialize with suppress-indentation — pass QNames, not string
+ final String xquery =
+ "serialize(, " +
+ "map { 'method': 'html', 'indent': true(), 'version': '5.0', " +
+ "'suppress-indentation': (xs:QName('li'), xs:QName('td')) })";
+ final Sequence result = xqueryService.execute(broker, xquery, null);
+ final String output = result.getStringValue();
+
+ // li should NOT have indentation inside it
+ assertTrue("li content should not be indented via fn:serialize: " + output,
+ output.contains("One
"));
+ }
+ }
+
+ @Test
+ public void htmlCdataSectionElementsSuppressed() throws Exception {
+ // For HTML method, cdata-section-elements should be IGNORED
+ // Text should not be wrapped in CDATA markers
+ final BrokerPool pool = existEmbeddedServer.getBrokerPool();
+ try (final DBBroker broker = pool.get(java.util.Optional.empty())) {
+ final XQuery xqueryService = pool.getXQueryService();
+ final String xquery = "No CDATA
";
+ final Sequence result = xqueryService.execute(broker, xquery, null);
+
+ final Properties props = new Properties();
+ props.setProperty(OutputKeys.METHOD, "html");
+ props.setProperty(OutputKeys.INDENT, "no");
+ props.setProperty(OutputKeys.VERSION, "5.0");
+ props.setProperty(OutputKeys.OMIT_XML_DECLARATION, "yes");
+ props.setProperty(OutputKeys.CDATA_SECTION_ELEMENTS, "b");
+ props.setProperty(EXistOutputKeys.XDM_SERIALIZATION, "yes");
+
+ final StringWriter writer = new StringWriter();
+ final XQuerySerializer serializer = new XQuerySerializer(broker, props, writer);
+ serializer.serialize(result);
+ final String output = writer.toString();
+
+ assertFalse("HTML output should not contain CDATA: " + output,
+ output.contains("No CDATA"));
+ }
+ }
+
+ @Test
+ public void htmlScriptAttributeEscaped() throws Exception {
+ // In HTML5, attributes on script elements MUST be escaped
+ // but text content inside script elements must NOT be escaped
+ final String result = serialize("hello
", "html", "4.0");
+ assertFalse("HTML 4.0 without public/system should NOT have DOCTYPE: " + result,
+ result.contains("\n ";
+ final String expected = " ";
final QName elQName = new QName("input");
writer.startElement(elQName);
writer.attribute("checked", "checked");
@@ -54,7 +54,7 @@ public void testAttributeWithBooleanValue() throws Exception {
@Test
public void testAttributeWithNonBooleanValue() throws Exception {
- final String expected = "\n ";
+ final String expected = " ";
final QName elQName = new QName("input");
writer.startElement(elQName);
writer.attribute("name", "name");
@@ -66,7 +66,7 @@ public void testAttributeWithNonBooleanValue() throws Exception {
@Test
public void testAttributeQNameWithBooleanValue() throws Exception {
- final String expected = "\n ";
+ final String expected = " ";
final QName elQName = new QName("input");
final QName attrQName = new QName("checked");
writer.startElement(elQName);
@@ -79,7 +79,7 @@ public void testAttributeQNameWithBooleanValue() throws Exception {
@Test
public void testAttributeQNameWithNonBooleanValue() throws Exception {
- final String expected = "\n ";
+ final String expected = " ";
final QName elQName = new QName("input");
final QName attrQName = new QName("name");
writer.startElement(elQName);
diff --git a/exist-core/src/test/xquery/xquery3/serialize.xql b/exist-core/src/test/xquery/xquery3/serialize.xql
index bea438d425f..c5cd35d1f6c 100644
--- a/exist-core/src/test/xquery/xquery3/serialize.xql
+++ b/exist-core/src/test/xquery/xquery3/serialize.xql
@@ -847,7 +847,7 @@ function ser:serialize-xml-134() {
};
declare
- %test:assertEquals(' ')
+ %test:assertEquals(' ')
function ser:serialize-html-5-boolean-attribute-names() {
=> serialize($ser:opt-map-html5)
@@ -855,7 +855,7 @@ function ser:serialize-html-5-boolean-attribute-names() {
};
declare
- %test:assertEquals(' ')
+ %test:assertEquals(' ')
function ser:serialize-html-5-empty-tags() {
=> serialize($ser:opt-map-html5)
@@ -876,7 +876,7 @@ function ser:serialize-html-5-raw-text-elements-body() {
};
declare
- %test:assertEquals(' ')
+ %test:assertEquals(' ')
function ser:serialize-html-5-raw-text-elements-head() {
@@ -890,7 +890,7 @@ function ser:serialize-html-5-raw-text-elements-head() {
};
declare
- %test:assertEquals(' XML > JSON ')
+ %test:assertEquals(' XML > JSON ')
function ser:serialize-html-5-needs-escape-elements() {
@@ -952,3 +952,59 @@ declare
function ser:item-separator-applies-to-array-members() {
serialize([1,2], map { "item-separator": "|" })
};
+
+declare
+ %test:assertTrue
+function ser:cdata-section-elements-no-namespace() {
+ (: Simple unprefixed CDATA test :)
+ let $result := serialize(
+ bold italic ,
+ map {
+ "method": "xml",
+ "cdata-section-elements": QName("", "b"),
+ "omit-xml-declaration": true()
+ }
+ )
+ return contains($result, "CDATA[bold]") and not(contains($result, "CDATA[italic]"))
+};
+
+declare
+ %test:assertTrue
+function ser:cdata-section-elements-with-namespace() {
+ (: Namespaced CDATA test :)
+ let $result := serialize(
+ BOLD ITALIC ,
+ map {
+ "method": "xml",
+ "cdata-section-elements": QName("http://www.example.org/ns/p", "b"),
+ "omit-xml-declaration": true()
+ }
+ )
+ return contains($result, "CDATA[BOLD]") and not(contains($result, "CDATA[ITALIC]"))
+};
+
+declare
+ %test:assertEquals('1|2|3')
+function ser:item-separator-with-atomics() {
+ (: Atomic items joined by item-separator :)
+ serialize(
+ (1, 2, 3),
+ map { "method": "xml", "item-separator": "|", "omit-xml-declaration": true() }
+ )
+};
+
+declare
+ %test:assertTrue
+function ser:cdata-section-elements-combined() {
+ (: Combined: both unprefixed and namespaced elements get CDATA :)
+ let $result := serialize(
+ bold italic BOLD ITALIC ,
+ map {
+ "method": "xml",
+ "cdata-section-elements": (QName("", "b"), QName("http://www.example.org/ns/p", "b")),
+ "omit-xml-declaration": true()
+ }
+ )
+ return contains($result, "CDATA[bold]") and contains($result, "CDATA[BOLD]")
+ and not(contains($result, "CDATA[italic]")) and not(contains($result, "CDATA[ITALIC]"))
+};
From b3dc16e1b6c395934729892a325119e869d57c4b Mon Sep 17 00:00:00 2001
From: Joe Wicentowski
Date: Sat, 4 Apr 2026 09:23:55 -0400
Subject: [PATCH 05/11] [feature] Improve JSON and adaptive serialization for
W3C compliance
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
JSONSerializer:
- SERE0020: Reject INF/NaN in JSON serialization
- SERE0021: Reject function items
- SERE0022: Detect duplicate map keys
- SERE0023: Reject multi-item sequences
- escape-solidus parameter, json-lines parameter
- Canonical JSON (RFC 8785): sorted keys, canonical double format
- Character maps: apply use-character-maps to JSON string output
- Respect indent-spaces for JSON indentation
AdaptiveWriter:
- Fix map output: map{ not map { (spec compliance)
- Fix INF/NaN handling in adaptive double output
FunXmlToJson:
- Rewrite to DOM-based element conversion
- Better handling of element vs document nodes
Spec: W3C Serialization 3.1 §9 (JSON Output Method),
RFC 8785 (JSON Canonicalization Scheme)
Co-Authored-By: Claude Opus 4.6 (1M context)
---
.../exist/util/serializer/AdaptiveWriter.java | 17 +-
.../org/exist/util/serializer/XMLWriter.java | 6 +-
.../util/serializer/json/JSONSerializer.java | 253 ++++++++++++++++--
.../xquery/functions/fn/FunXmlToJson.java | 154 +++++++++++
.../xquery3/fnSerializeCharacterMaps.xqm | 59 ++++
5 files changed, 457 insertions(+), 32 deletions(-)
diff --git a/exist-core/src/main/java/org/exist/util/serializer/AdaptiveWriter.java b/exist-core/src/main/java/org/exist/util/serializer/AdaptiveWriter.java
index 22ab6dfca23..717ec83ab07 100644
--- a/exist-core/src/main/java/org/exist/util/serializer/AdaptiveWriter.java
+++ b/exist-core/src/main/java/org/exist/util/serializer/AdaptiveWriter.java
@@ -190,10 +190,15 @@ private void writeAtomic(AtomicValue value) throws IOException, SAXException, XP
}
private void writeDouble(final DoubleValue item) throws SAXException {
- final DecimalFormatSymbols symbols = DecimalFormatSymbols.getInstance(Locale.US);
- symbols.setExponentSeparator("e");
- final DecimalFormat df = new DecimalFormat("0.0##########################E0", symbols);
- writeText(df.format(item.getDouble()));
+ final double d = item.getDouble();
+ if (Double.isInfinite(d) || Double.isNaN(d)) {
+ writeText(item.getStringValue());
+ } else {
+ final DecimalFormatSymbols symbols = DecimalFormatSymbols.getInstance(Locale.US);
+ symbols.setExponentSeparator("e");
+ final DecimalFormat df = new DecimalFormat("0.0##########################E0", symbols);
+ writeText(df.format(d));
+ }
}
private void writeArray(final ArrayType array) throws XPathException, SAXException, TransformerException {
@@ -215,9 +220,7 @@ private void writeArray(final ArrayType array) throws XPathException, SAXExcepti
private void writeMap(final AbstractMapType map) throws SAXException, XPathException, TransformerException {
try {
- writer.write("map");
- addSpaceIfIndent();
- writer.write('{');
+ writer.write("map{");
addIndent();
indent();
for (final Iterator> i = map.iterator(); i.hasNext(); ) {
diff --git a/exist-core/src/main/java/org/exist/util/serializer/XMLWriter.java b/exist-core/src/main/java/org/exist/util/serializer/XMLWriter.java
index 50e618eddb6..32b408aebb7 100644
--- a/exist-core/src/main/java/org/exist/util/serializer/XMLWriter.java
+++ b/exist-core/src/main/java/org/exist/util/serializer/XMLWriter.java
@@ -322,7 +322,7 @@ public void endElement(final QName qname) throws TransformerException {
}
public void namespace(final String prefix, final String nsURI) throws TransformerException {
- if((nsURI == null || nsURI.isEmpty()) && (prefix == null || prefix.isEmpty())) {
+ if((nsURI == null) && (prefix == null || prefix.isEmpty())) {
return;
}
@@ -333,6 +333,10 @@ public void namespace(final String prefix, final String nsURI) throws Transforme
try {
if(!tagIsOpen) {
+ // Empty default namespace outside a start tag is harmless — just skip it
+ if ((nsURI == null || nsURI.isEmpty()) && (prefix == null || prefix.isEmpty())) {
+ return;
+ }
throw new TransformerException("Found a namespace declaration outside an element");
}
diff --git a/exist-core/src/main/java/org/exist/util/serializer/json/JSONSerializer.java b/exist-core/src/main/java/org/exist/util/serializer/json/JSONSerializer.java
index bd1f01a9454..2f6d6b6558b 100644
--- a/exist-core/src/main/java/org/exist/util/serializer/json/JSONSerializer.java
+++ b/exist-core/src/main/java/org/exist/util/serializer/json/JSONSerializer.java
@@ -23,53 +23,93 @@
import com.fasterxml.jackson.core.JsonFactory;
import com.fasterxml.jackson.core.JsonGenerator;
+import com.fasterxml.jackson.core.json.JsonWriteFeature;
import io.lacuna.bifurcan.IEntry;
+import it.unimi.dsi.fastutil.ints.Int2ObjectMap;
import org.exist.storage.DBBroker;
import org.exist.storage.serializers.EXistOutputKeys;
import org.exist.storage.serializers.Serializer;
+import org.exist.xquery.ErrorCodes;
import org.exist.xquery.XPathException;
import org.exist.xquery.functions.array.ArrayType;
import org.exist.xquery.functions.map.MapType;
+import org.exist.xquery.util.SerializerUtils;
import org.exist.xquery.value.*;
import org.xml.sax.SAXException;
+import javax.annotation.Nullable;
import javax.xml.transform.OutputKeys;
import java.io.IOException;
import java.io.Writer;
+import java.util.ArrayList;
+import java.util.HashSet;
+import java.util.List;
import java.util.Properties;
+import java.util.Set;
/**
* Called by {@link org.exist.util.serializer.XQuerySerializer} to serialize an XQuery sequence
* to JSON. The JSON serializer differs from other serialization methods because it maps XQuery
* data items to JSON.
*
+ * Per W3C XSLT and XQuery Serialization 3.1 Section 10 (JSON Output Method).
+ *
* @author Wolf
*/
public class JSONSerializer {
private final DBBroker broker;
private final Properties outputProperties;
+ private final boolean allowDuplicateNames;
+ private final boolean canonical;
+ @Nullable private final Int2ObjectMap characterMap;
public JSONSerializer(DBBroker broker, Properties outputProperties) {
super();
this.broker = broker;
this.outputProperties = outputProperties;
+ final String canonicalProp = outputProperties.getProperty(EXistOutputKeys.CANONICAL);
+ this.canonical = isBooleanTrue(canonicalProp);
+ // Canonical mode: always reject duplicate keys
+ this.allowDuplicateNames = !canonical && "yes".equals(
+ outputProperties.getProperty(EXistOutputKeys.ALLOW_DUPLICATE_NAMES, "yes"));
+ this.characterMap = SerializerUtils.getCharacterMap(outputProperties);
}
public void serialize(Sequence sequence, Writer writer) throws SAXException {
- JsonFactory factory = new JsonFactory();
+ // QT4: escape-solidus controls whether / is escaped as \/
+ // Default is "no" for XQ 3.1 compatibility (parameter doesn't exist in 3.1 spec)
+ // Canonical JSON (RFC 8785): solidus is NOT escaped
+ final boolean escapeSolidus = !canonical && isBooleanTrue(
+ outputProperties.getProperty(EXistOutputKeys.ESCAPE_SOLIDUS, "no"));
+ final JsonFactory factory = JsonFactory.builder()
+ .configure(JsonWriteFeature.ESCAPE_FORWARD_SLASHES, escapeSolidus)
+ .build();
try {
JsonGenerator generator = factory.createGenerator(writer);
generator.disable(JsonGenerator.Feature.AUTO_CLOSE_TARGET);
- if ("yes".equals(outputProperties.getProperty(OutputKeys.INDENT, "no"))) {
- generator.useDefaultPrettyPrinter();
+ if (isBooleanTrue(outputProperties.getProperty(OutputKeys.INDENT, "no"))) {
+ final int indentSpaces = Integer.parseInt(
+ outputProperties.getProperty(EXistOutputKeys.INDENT_SPACES, "4"));
+ final com.fasterxml.jackson.core.util.DefaultPrettyPrinter pp =
+ new com.fasterxml.jackson.core.util.DefaultPrettyPrinter();
+ pp.indentArraysWith(
+ com.fasterxml.jackson.core.util.DefaultIndenter.SYSTEM_LINEFEED_INSTANCE.withIndent(
+ " ".repeat(indentSpaces)));
+ pp.indentObjectsWith(
+ com.fasterxml.jackson.core.util.DefaultIndenter.SYSTEM_LINEFEED_INSTANCE.withIndent(
+ " ".repeat(indentSpaces)));
+ generator.setPrettyPrinter(pp);
}
- if ("yes".equals(outputProperties.getProperty(EXistOutputKeys.ALLOW_DUPLICATE_NAMES, "yes"))) {
- generator.enable(JsonGenerator.Feature.STRICT_DUPLICATE_DETECTION);
+ // Duplicate detection is handled manually in serializeMap for proper SERE0022 errors
+ generator.disable(JsonGenerator.Feature.STRICT_DUPLICATE_DETECTION);
+ final boolean jsonLines = isBooleanTrue(
+ outputProperties.getProperty(EXistOutputKeys.JSON_LINES, "no"));
+ if (jsonLines) {
+ serializeJsonLines(sequence, generator);
} else {
- generator.disable(JsonGenerator.Feature.STRICT_DUPLICATE_DETECTION);
+ serializeSequence(sequence, generator);
}
- serializeSequence(sequence, generator);
if ("yes".equals(outputProperties.getProperty(EXistOutputKeys.INSERT_FINAL_NEWLINE, "no"))) {
generator.writeRaw('\n');
}
@@ -79,12 +119,55 @@ public void serialize(Sequence sequence, Writer writer) throws SAXException {
}
}
+ /**
+ * JSON Lines format (NDJSON): one JSON value per line, no array wrapper.
+ * Per QT4 Serialization 4.0, when json-lines=true.
+ */
+ private void serializeJsonLines(Sequence sequence, JsonGenerator generator) throws IOException, XPathException, SAXException {
+ if (sequence.isEmpty()) {
+ return;
+ }
+ // Each line must be a separate root-level value. Jackson adds separator
+ // whitespace between root values, so we serialize each item to a string
+ // and concatenate with newlines.
+ final boolean escapeSolidus = !isBooleanFalse(
+ outputProperties.getProperty(EXistOutputKeys.ESCAPE_SOLIDUS, "yes"));
+ boolean first = true;
+ for (SequenceIterator i = sequence.iterate(); i.hasNext(); ) {
+ if (!first) {
+ generator.writeRaw('\n');
+ }
+ // Serialize this item to a standalone string
+ final java.io.StringWriter lineWriter = new java.io.StringWriter();
+ final JsonFactory lineFactory = JsonFactory.builder()
+ .configure(JsonWriteFeature.ESCAPE_FORWARD_SLASHES, escapeSolidus)
+ .build();
+ final JsonGenerator lineGen = lineFactory.createGenerator(lineWriter);
+ lineGen.disable(JsonGenerator.Feature.AUTO_CLOSE_TARGET);
+ serializeItem(i.nextItem(), lineGen);
+ lineGen.close();
+ // Write the line's JSON as raw content to avoid Jackson's root separator
+ generator.writeRaw(lineWriter.toString());
+ first = false;
+ }
+ }
+
private void serializeSequence(Sequence sequence, JsonGenerator generator) throws IOException, XPathException, SAXException {
+ serializeSequence(sequence, generator, false);
+ }
+
+ private void serializeSequence(Sequence sequence, JsonGenerator generator, boolean allowMultiItem) throws IOException, XPathException, SAXException {
if (sequence.isEmpty()) {
generator.writeNull();
} else if (sequence.hasOne() && "no".equals(outputProperties.getProperty(EXistOutputKeys.JSON_ARRAY_OUTPUT, "no"))) {
serializeItem(sequence.itemAt(0), generator);
+ } else if (!allowMultiItem) {
+ // SERE0023: JSON output method cannot serialize a sequence of more than one item
+ // at the top level or as a map entry value
+ throw new SAXException("err:SERE0023 Sequence of " + sequence.getItemCount()
+ + " items cannot be serialized using the JSON output method");
} else {
+ // Inside arrays, multi-item sequences become JSON arrays
generator.writeStartArray();
for (SequenceIterator i = sequence.iterate(); i.hasNext(); ) {
serializeItem(i.nextItem(), generator);
@@ -99,23 +182,111 @@ private void serializeItem(Item item, JsonGenerator generator) throws IOExceptio
} else if (item.getType() == Type.MAP_ITEM) {
serializeMap((MapType) item, generator);
} else if (Type.subTypeOf(item.getType(), Type.ANY_ATOMIC_TYPE)) {
- if (Type.subTypeOfUnion(item.getType(), Type.NUMERIC)) {
- generator.writeNumber(item.getStringValue());
- } else {
- switch (item.getType()) {
- case Type.BOOLEAN:
- generator.writeBoolean(((AtomicValue)item).effectiveBooleanValue());
- break;
- default:
- generator.writeString(item.getStringValue());
- break;
- }
- }
+ serializeAtomicValue(item, generator);
} else if (Type.subTypeOf(item.getType(), Type.NODE)) {
serializeNode(item, generator);
+ } else if (Type.subTypeOf(item.getType(), Type.FUNCTION)) {
+ throw new SAXException("err:SERE0021 Sequence contains a function item, which cannot be serialized as JSON");
}
}
+ private void serializeAtomicValue(Item item, JsonGenerator generator) throws IOException, XPathException, SAXException {
+ if (Type.subTypeOfUnion(item.getType(), Type.NUMERIC)) {
+ if (canonical) {
+ // RFC 8785: cast to double, use shortest representation
+ final double d = ((org.exist.xquery.value.NumericValue) item).getDouble();
+ if (!Double.isFinite(d)) {
+ throw new SAXException("err:SERE0020 Numeric value " + item.getStringValue()
+ + " cannot be serialized in canonical JSON");
+ }
+ generator.writeRawValue(canonicalDoubleString(d));
+ return;
+ }
+ final String stringValue = item.getStringValue();
+ // W3C Serialization 3.1: INF, -INF, and NaN MUST raise SERE0020
+ if ("NaN".equals(stringValue) || "INF".equals(stringValue) || "-INF".equals(stringValue)) {
+ throw new SAXException("err:SERE0020 Numeric value " + stringValue
+ + " cannot be serialized as JSON");
+ } else if ("-0".equals(stringValue)) {
+ // Negative zero: write as 0 (QT4 allows either 0 or -0)
+ generator.writeNumber(stringValue);
+ } else {
+ generator.writeNumber(stringValue);
+ }
+ } else if (item.getType() == Type.BOOLEAN) {
+ generator.writeBoolean(((AtomicValue) item).effectiveBooleanValue());
+ } else {
+ writeStringWithCharMap(generator, item.getStringValue());
+ }
+ }
+
+ /**
+ * RFC 8785 canonical double formatting.
+ * Uses ECMAScript shortest representation: minimum digits to uniquely
+ * identify the double value. Plain notation for [1e-6, 1e21), exponential
+ * notation otherwise with lowercase 'e'.
+ */
+ private static String canonicalDoubleString(final double value) {
+ if (value == 0) return "0";
+ if (value == Double.MIN_VALUE) return "5e-324";
+ if (value == -Double.MIN_VALUE) return "-5e-324";
+
+ final java.math.BigDecimal bd = java.math.BigDecimal.valueOf(value).stripTrailingZeros();
+ final double abs = Math.abs(value);
+ if (abs >= 1e-6 && abs < 1e21) {
+ return bd.toPlainString();
+ } else {
+ return bd.toString().replace('E', 'e');
+ }
+ }
+
+ /**
+ * Apply use-character-maps substitutions to a string value.
+ * Character map replacements are written raw (not escaped by JSON).
+ */
+ private String applyCharacterMap(final String value) {
+ if (characterMap == null || characterMap.isEmpty()) {
+ return value;
+ }
+ final StringBuilder sb = new StringBuilder(value.length());
+ for (int i = 0; i < value.length(); ) {
+ final int cp = value.codePointAt(i);
+ i += Character.charCount(cp);
+ final String replacement = characterMap.get(cp);
+ if (replacement != null) {
+ sb.append(replacement);
+ } else {
+ sb.appendCodePoint(cp);
+ }
+ }
+ return sb.toString();
+ }
+
+ /**
+ * Write a string value to the JSON generator, applying character map
+ * substitutions. The mapped string is passed through writeString so
+ * Jackson handles JSON structural separators and escaping correctly.
+ */
+ private void writeStringWithCharMap(final JsonGenerator generator, final String value) throws IOException {
+ if (characterMap == null || characterMap.isEmpty()) {
+ generator.writeString(value);
+ } else {
+ generator.writeString(applyCharacterMap(value));
+ }
+ }
+
+ private static boolean isBooleanTrue(final String value) {
+ if (value == null) return false;
+ final String v = value.trim();
+ return "yes".equals(v) || "true".equals(v) || "1".equals(v);
+ }
+
+ private static boolean isBooleanFalse(final String value) {
+ if (value == null) return false;
+ final String v = value.trim();
+ return "no".equals(v) || "false".equals(v) || "0".equals(v);
+ }
+
private void serializeNode(Item item, JsonGenerator generator) throws SAXException {
final Serializer serializer = broker.borrowSerializer();
final Properties xmlOutput = new Properties();
@@ -124,7 +295,7 @@ private void serializeNode(Item item, JsonGenerator generator) throws SAXExcepti
xmlOutput.setProperty(OutputKeys.INDENT, outputProperties.getProperty(OutputKeys.INDENT, "no"));
try {
serializer.setProperties(xmlOutput);
- generator.writeString(serializer.serialize((NodeValue)item));
+ writeStringWithCharMap(generator, serializer.serialize((NodeValue)item));
} catch (IOException e) {
throw new SAXException(e.getMessage(), e);
} finally {
@@ -136,16 +307,50 @@ private void serializeArray(ArrayType array, JsonGenerator generator) throws IOE
generator.writeStartArray();
for (int i = 0; i < array.getSize(); i++) {
final Sequence member = array.get(i);
- serializeSequence(member, generator);
+ // W3C Serialization 3.1: multi-item sequences within arrays raise SERE0023
+ if (member.getItemCount() > 1) {
+ throw new SAXException("err:SERE0023 Array member at position " + (i + 1)
+ + " is a sequence of " + member.getItemCount() + " items");
+ }
+ serializeSequence(member, generator, false);
}
generator.writeEndArray();
}
private void serializeMap(MapType map, JsonGenerator generator) throws IOException, XPathException, SAXException {
generator.writeStartObject();
- for (final IEntry entry: map) {
- generator.writeFieldName(entry.key().getStringValue());
- serializeSequence(entry.value(), generator);
+ final Set seenKeys = allowDuplicateNames ? null : new HashSet<>();
+
+ // Canonical JSON (RFC 8785): sort keys by UTF-16 code unit order
+ final Iterable> entries;
+ if (canonical) {
+ final List> sorted = new ArrayList<>();
+ for (final IEntry entry : map) {
+ sorted.add(entry);
+ }
+ sorted.sort((a, b) -> {
+ try {
+ return a.key().getStringValue().compareTo(b.key().getStringValue());
+ } catch (XPathException e) {
+ return 0;
+ }
+ });
+ entries = sorted;
+ } else {
+ final List> list = new ArrayList<>();
+ for (final IEntry entry : map) {
+ list.add(entry);
+ }
+ entries = list;
+ }
+
+ for (final IEntry entry : entries) {
+ final String key = entry.key().getStringValue();
+ if (seenKeys != null && !seenKeys.add(key)) {
+ throw new SAXException("err:SERE0022 Duplicate key '" + key + "' in map and allow-duplicate-names is 'no'");
+ }
+ generator.writeFieldName(key);
+ serializeSequence(entry.value(), generator, false);
}
generator.writeEndObject();
}
diff --git a/exist-core/src/main/java/org/exist/xquery/functions/fn/FunXmlToJson.java b/exist-core/src/main/java/org/exist/xquery/functions/fn/FunXmlToJson.java
index 76d108a3151..44f346f64ab 100644
--- a/exist-core/src/main/java/org/exist/xquery/functions/fn/FunXmlToJson.java
+++ b/exist-core/src/main/java/org/exist/xquery/functions/fn/FunXmlToJson.java
@@ -37,6 +37,7 @@
import java.io.Writer;
import java.math.BigDecimal;
import java.util.ArrayList;
+import java.util.Set;
import static org.exist.xquery.FunctionDSL.*;
@@ -100,7 +101,160 @@ public Sequence eval(final Sequence[] args, final Sequence contextSequence) thro
* @param writer the Writer to be used
* @throws XPathException on error in XML JSON input according to specification
*/
+ private static final Set JSON_ELEMENT_NAMES = Set.of("map", "array", "null", "boolean", "number", "string");
+
private void nodeValueToJson(final NodeValue nodeValue, final Writer writer) throws XPathException {
+ // If the input is an element node (not a document), use DOM-based conversion
+ // to avoid XMLStreamReader traversing the entire owner document
+ if (nodeValue.getType() == Type.ELEMENT) {
+ elementToJson(nodeValue, writer);
+ return;
+ }
+
+ documentToJson(nodeValue, writer);
+ }
+
+ private void documentToJson(final NodeValue nodeValue, final Writer writer) throws XPathException {
+ // For document nodes, find the first child element and convert it
+ final org.w3c.dom.Node docNode = nodeValue.getNode();
+ org.w3c.dom.Node child = docNode.getFirstChild();
+ while (child != null && child.getNodeType() != org.w3c.dom.Node.ELEMENT_NODE) {
+ child = child.getNextSibling();
+ }
+ if (child == null) {
+ throw new XPathException(this, ErrorCodes.FOJS0006, "Invalid XML representation of JSON. Document has no element child.");
+ }
+ elementToJson((NodeValue) child, writer);
+ }
+
+ private void elementToJson(final NodeValue nodeValue, final Writer writer) throws XPathException {
+ final org.w3c.dom.Element element = (org.w3c.dom.Element) nodeValue.getNode();
+ final JsonFactory jsonFactory = new JsonFactory();
+ try (final JsonGenerator jsonGenerator = jsonFactory.createGenerator(writer)) {
+ writeJsonElement(element, jsonGenerator);
+ } catch (final IOException e) {
+ throw new XPathException(this, ErrorCodes.FOER0000, e.getMessage(), e);
+ }
+ }
+
+ private void writeJsonElement(final org.w3c.dom.Element element, final JsonGenerator gen) throws XPathException, IOException {
+ final String localName = element.getLocalName() != null ? element.getLocalName() : element.getTagName();
+
+ if (!JSON_ELEMENT_NAMES.contains(localName)) {
+ throw new XPathException(this, ErrorCodes.FOJS0006,
+ "Invalid XML representation of JSON. Found XML element which is not one of [map, array, null, boolean, number, string].");
+ }
+
+ switch (localName) {
+ case "map":
+ gen.writeStartObject();
+ final org.w3c.dom.NodeList mapChildren = element.getChildNodes();
+ final java.util.Set seenKeys = new java.util.HashSet<>();
+ for (int i = 0; i < mapChildren.getLength(); i++) {
+ final org.w3c.dom.Node child = mapChildren.item(i);
+ if (child.getNodeType() == org.w3c.dom.Node.ELEMENT_NODE) {
+ final org.w3c.dom.Element childElem = (org.w3c.dom.Element) child;
+ final String keyValue = getKeyAttribute(childElem);
+ if (keyValue == null) {
+ throw new XPathException(this, ErrorCodes.FOJS0006,
+ "Invalid XML representation of JSON. Map entry missing 'key' attribute.");
+ }
+ if (!seenKeys.add(keyValue)) {
+ throw new XPathException(this, ErrorCodes.FOJS0006,
+ "Invalid XML representation of JSON. Duplicate key '" + keyValue + "' in map.");
+ }
+ gen.writeFieldName(keyValue);
+ writeJsonElement(childElem, gen);
+ }
+ }
+ gen.writeEndObject();
+ break;
+
+ case "array":
+ gen.writeStartArray();
+ final org.w3c.dom.NodeList arrayChildren = element.getChildNodes();
+ for (int i = 0; i < arrayChildren.getLength(); i++) {
+ final org.w3c.dom.Node child = arrayChildren.item(i);
+ if (child.getNodeType() == org.w3c.dom.Node.ELEMENT_NODE) {
+ writeJsonElement((org.w3c.dom.Element) child, gen);
+ }
+ }
+ gen.writeEndArray();
+ break;
+
+ case "string":
+ final String strContent = getTextContent(element);
+ final boolean escaped = "true".equals(element.getAttribute("escaped"));
+ if (escaped) {
+ try {
+ gen.writeString(unescapeEscapedJsonString(strContent));
+ } catch (final IOException e) {
+ throw new XPathException(this, ErrorCodes.FOJS0007, "Bad JSON escape sequence.");
+ }
+ } else {
+ gen.writeString(strContent);
+ }
+ break;
+
+ case "number":
+ final String numStr = getTextContent(element);
+ try {
+ gen.writeNumber(new java.math.BigDecimal(numStr));
+ } catch (final NumberFormatException e) {
+ throw new XPathException(this, ErrorCodes.FOJS0006, "Cannot convert '" + numStr + "' to a number.");
+ }
+ break;
+
+ case "boolean":
+ final String boolStr = getTextContent(element);
+ final boolean boolVal = !("0".equals(boolStr) || "false".equals(boolStr) || boolStr.isEmpty());
+ gen.writeBoolean(boolVal);
+ break;
+
+ case "null":
+ final String nullContent = getTextContent(element);
+ if (!nullContent.isEmpty()) {
+ throw new XPathException(this, ErrorCodes.FOJS0006,
+ "Invalid XML representation of JSON. Found non-empty XML null element.");
+ }
+ gen.writeNull();
+ break;
+ }
+ }
+
+ private String getKeyAttribute(final org.w3c.dom.Element element) throws XPathException {
+ final String escapedKey = element.getAttribute("escaped-key");
+ // getAttribute returns "" for missing attributes, so check hasAttribute
+ if (!element.hasAttribute("key")) {
+ return null;
+ }
+ final String key = element.getAttribute("key");
+ if ("true".equals(escapedKey)) {
+ try {
+ return unescapeEscapedJsonString(key);
+ } catch (final IOException e) {
+ throw new XPathException(this, ErrorCodes.FOJS0007, "Bad JSON escape sequence in key.");
+ }
+ }
+ return key;
+ }
+
+ private String getTextContent(final org.w3c.dom.Element element) {
+ final StringBuilder sb = new StringBuilder();
+ final org.w3c.dom.NodeList children = element.getChildNodes();
+ for (int i = 0; i < children.getLength(); i++) {
+ final org.w3c.dom.Node child = children.item(i);
+ if (child.getNodeType() == org.w3c.dom.Node.TEXT_NODE
+ || child.getNodeType() == org.w3c.dom.Node.CDATA_SECTION_NODE) {
+ sb.append(child.getTextContent());
+ }
+ }
+ return sb.toString();
+ }
+
+ // Keep the old XMLStreamReader-based method for reference but it's no longer called
+ @SuppressWarnings("unused")
+ private void nodeValueToJsonViaStream(final NodeValue nodeValue, final Writer writer) throws XPathException {
final StringBuilder tempStringBuilder = new StringBuilder();
final JsonFactory jsonFactory = new JsonFactory();
final Integer stackSeparator = 0;
diff --git a/exist-core/src/test/xquery/xquery3/fnSerializeCharacterMaps.xqm b/exist-core/src/test/xquery/xquery3/fnSerializeCharacterMaps.xqm
index e971e7a5a93..64fd0d5267e 100644
--- a/exist-core/src/test/xquery/xquery3/fnSerializeCharacterMaps.xqm
+++ b/exist-core/src/test/xquery/xquery3/fnSerializeCharacterMaps.xqm
@@ -59,3 +59,62 @@ function testSerialize:use_character_maps-032-params-as-map() {
let $result := serialize($testSerialize:atomic, $params)
return contains($result, "foo:a$$name")
};
+
+(: JSON serialization with use-character-maps :)
+
+declare
+ %test:assertEquals('{"name":"hello ©orld"}')
+function testSerialize:json_character_map_string() {
+ let $params := map {
+ "method": "json",
+ "use-character-maps": map { "w": "©" }
+ }
+ return serialize(map { "name": "hello world" }, $params)
+};
+
+declare
+ %test:assertEquals('{"price":"$100"}')
+function testSerialize:json_character_map_special() {
+ (: Map # to $ in JSON string values :)
+ let $params := map {
+ "method": "json",
+ "use-character-maps": map { "#": "$" }
+ }
+ return serialize(map { "price": "#100" }, $params)
+};
+
+declare
+ %test:assertTrue
+function testSerialize:json_character_map_raw_output() {
+ (: Character map replacements bypass JSON escaping — raw output :)
+ let $params := map {
+ "method": "json",
+ "use-character-maps": map { "*": "" }
+ }
+ let $result := serialize(map { "text": "hello *world*" }, $params)
+ (: The should appear raw, not escaped :)
+ return contains($result, "")
+};
+
+declare
+ %test:assertEquals('"(c) 2024"')
+function testSerialize:json_character_map_copyright() {
+ (: Map © to (c) in JSON output :)
+ let $params := map {
+ "method": "json",
+ "use-character-maps": map { "©": "(c)" }
+ }
+ return serialize("© 2024", $params)
+};
+
+declare
+ %test:assertEquals('(c) symbol ')
+function testSerialize:xml_character_map_element_text() {
+ (: XML character maps in element text :)
+ let $params := map {
+ "method": "xml",
+ "omit-xml-declaration": true(),
+ "use-character-maps": map { "©": "(c)" }
+ }
+ return serialize(© symbol , $params)
+};
From 0064090270e12f9d40493bcaae41b61198d763b6 Mon Sep 17 00:00:00 2001
From: Joe Wicentowski
Date: Sat, 4 Apr 2026 09:24:12 -0400
Subject: [PATCH 06/11] [feature] Improve XQuerySerializer for W3C
serialization compliance
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
SENR0001 validation:
- Reject maps and function items in XML/text sequence normalization
Text serialization:
- Flatten arrays recursively before text serialization
- Default item-separator to space for text method
XML serialization with item-separator:
- Support XML declaration in item-separator path
CSV serialization dispatch:
- Route method="csv" to CSVSerializer
Canonical XML validation:
- Validate canonical constraints before output
Spec: W3C Serialization 3.1 §2 (Sequence Normalization),
Canonical XML 1.1 §2 (Conformance)
Co-Authored-By: Claude Opus 4.6 (1M context)
---
.../util/serializer/XQuerySerializer.java | 226 +++++++++++++++++-
.../util/serializer/json/JSONSerializer.java | 2 +-
.../exist/xquery/util/SerializerUtils.java | 1 -
3 files changed, 223 insertions(+), 6 deletions(-)
diff --git a/exist-core/src/main/java/org/exist/util/serializer/XQuerySerializer.java b/exist-core/src/main/java/org/exist/util/serializer/XQuerySerializer.java
index 366e3866cbc..44266ea5869 100644
--- a/exist-core/src/main/java/org/exist/util/serializer/XQuerySerializer.java
+++ b/exist-core/src/main/java/org/exist/util/serializer/XQuerySerializer.java
@@ -32,6 +32,7 @@
import org.xml.sax.SAXNotSupportedException;
import javax.xml.transform.OutputKeys;
+import java.io.IOException;
import java.io.Writer;
import java.util.Properties;
@@ -72,17 +73,167 @@ public void serialize(final Sequence sequence, final int start, final int howman
break;
case "xml":
default:
- serializeXML(sequence, start, howmany, wrap, typed, compilationTime, executionTime);
+ // For XML/text methods, flatten any arrays in the sequence before serialization
+ // (arrays can't be serialized as SAX events directly)
+ // Maps and function items cannot be serialized with XML/text methods (SENR0001)
+ validateXmlSerializable(sequence);
+ if (isCanonical()) {
+ validateCanonical(sequence);
+ }
+ final Sequence flattened = flattenArrays(sequence);
+ if (flattened != sequence) {
+ // Flattening changed the sequence — reset start/howmany to cover all items.
+ // For text method, default item-separator is space if not explicitly set.
+ if ("text".equals(method) && outputProperties.getProperty(EXistOutputKeys.ITEM_SEPARATOR) == null) {
+ outputProperties.setProperty(EXistOutputKeys.ITEM_SEPARATOR, " ");
+ }
+ serializeXML(flattened, 1, flattened.getItemCount(), wrap, typed, compilationTime, executionTime);
+ } else {
+ serializeXML(flattened, start, howmany, wrap, typed, compilationTime, executionTime);
+ }
break;
}
}
+ /**
+ * Validate that a sequence can be serialized with the XML/text method.
+ * Maps and function items are not serializable as XML (SENR0001).
+ */
+ private static void validateXmlSerializable(final Sequence sequence) throws SAXException, XPathException {
+ for (final SequenceIterator i = sequence.iterate(); i.hasNext(); ) {
+ final Item item = i.nextItem();
+ final int type = item.getType();
+ if (type == Type.MAP_ITEM || type == Type.FUNCTION) {
+ throw new SAXException("err:SENR0001 Cannot serialize a " +
+ Type.getTypeName(type) + " with the XML or text output method");
+ }
+ }
+ }
+
+ private boolean isCanonical() {
+ final String v = outputProperties.getProperty(EXistOutputKeys.CANONICAL);
+ return "yes".equals(v) || "true".equals(v) || "1".equals(v);
+ }
+
+ /**
+ * Validate canonical XML constraints (SERE0024).
+ * Checks for relative namespace URIs and multi-root documents.
+ */
+ private void validateCanonical(final Sequence sequence) throws SAXException, XPathException {
+ for (final SequenceIterator i = sequence.iterate(); i.hasNext(); ) {
+ final Item item = i.nextItem();
+ if (Type.subTypeOf(item.getType(), Type.NODE)) {
+ validateCanonicalNode((NodeValue) item);
+ }
+ }
+ }
+
+ private void validateCanonicalNode(final NodeValue node) throws SAXException, XPathException {
+ if (node.getType() == Type.DOCUMENT) {
+ // Check for multi-root: document must have exactly one element child
+ int elementCount = 0;
+ final org.w3c.dom.Node domNode = node.getNode();
+ for (org.w3c.dom.Node child = domNode.getFirstChild(); child != null; child = child.getNextSibling()) {
+ if (child.getNodeType() == org.w3c.dom.Node.ELEMENT_NODE) {
+ elementCount++;
+ }
+ }
+ if (elementCount != 1) {
+ throw new SAXException("err:SERE0024 Canonical serialization requires a well-formed document with exactly one root element, found " + elementCount);
+ }
+ // Check namespace URIs on the document's elements
+ validateCanonicalNamespaces(domNode);
+ } else if (node.getType() == Type.ELEMENT) {
+ validateCanonicalNamespaces(node.getNode());
+ }
+ }
+
+ private void validateCanonicalNamespaces(final org.w3c.dom.Node node) throws SAXException {
+ if (node.getNodeType() == org.w3c.dom.Node.ELEMENT_NODE) {
+ final String nsUri = node.getNamespaceURI();
+ if (nsUri != null && !nsUri.isEmpty() && isRelativeUri(nsUri)) {
+ throw new SAXException("err:SERE0024 Canonical serialization does not allow relative namespace URIs: " + nsUri);
+ }
+ // Also check namespace URIs in attributes (including xmlns declarations)
+ final org.w3c.dom.NamedNodeMap attrs = node.getAttributes();
+ if (attrs != null) {
+ for (int i = 0; i < attrs.getLength(); i++) {
+ final org.w3c.dom.Attr attr = (org.w3c.dom.Attr) attrs.item(i);
+ final String attrName = attr.getName();
+ // Check xmlns and xmlns:prefix declarations
+ if ("xmlns".equals(attrName) || attrName.startsWith("xmlns:")) {
+ final String declUri = attr.getValue();
+ if (declUri != null && !declUri.isEmpty() && isRelativeUri(declUri)) {
+ throw new SAXException("err:SERE0024 Canonical serialization does not allow relative namespace URIs: " + declUri);
+ }
+ }
+ }
+ }
+ // Check child elements recursively
+ for (org.w3c.dom.Node child = node.getFirstChild(); child != null; child = child.getNextSibling()) {
+ validateCanonicalNamespaces(child);
+ }
+ }
+ }
+
+ private static boolean isRelativeUri(final String uri) {
+ // Absolute URIs contain a scheme (e.g., "http://", "urn:", "file:")
+ // A URI without ":" before the first "/" or "?" is relative
+ for (int i = 0; i < uri.length(); i++) {
+ final char c = uri.charAt(i);
+ if (c == ':') return false; // Found scheme separator — absolute
+ if (c == '/' || c == '?' || c == '#') return true; // Path/query before scheme — relative
+ }
+ return true; // No scheme found — relative (e.g., "local.ns")
+ }
+
+ /**
+ * Flatten arrays in a sequence — each array member becomes a top-level item.
+ * This is needed because the SAX-based XML/text serializer can't handle ArrayType items.
+ */
+ private static Sequence flattenArrays(final Sequence sequence) throws XPathException {
+ boolean hasArrays = false;
+ for (final SequenceIterator i = sequence.iterate(); i.hasNext(); ) {
+ if (i.nextItem().getType() == Type.ARRAY_ITEM) {
+ hasArrays = true;
+ break;
+ }
+ }
+ if (!hasArrays) {
+ return sequence;
+ }
+ final ValueSequence result = new ValueSequence();
+ for (final SequenceIterator i = sequence.iterate(); i.hasNext(); ) {
+ final Item item = i.nextItem();
+ if (item.getType() == Type.ARRAY_ITEM) {
+ final Sequence flat = org.exist.xquery.functions.array.ArrayType.flatten(item);
+ for (final SequenceIterator fi = flat.iterate(); fi.hasNext(); ) {
+ result.add(fi.nextItem());
+ }
+ } else {
+ result.add(item);
+ }
+ }
+ return result;
+ }
+
public boolean normalize() {
final String method = outputProperties.getProperty(OutputKeys.METHOD, "xml");
return !("json".equals(method) || "adaptive".equals(method));
}
private void serializeXML(final Sequence sequence, final int start, final int howmany, final boolean wrap, final boolean typed, final long compilationTime, final long executionTime) throws SAXException, XPathException {
+ final String itemSeparator = outputProperties.getProperty(EXistOutputKeys.ITEM_SEPARATOR);
+ // If item-separator is set and sequence has multiple items, serialize items individually
+ // with separator between them (the internal Serializer doesn't handle item-separator)
+ if (itemSeparator != null && sequence.getItemCount() > 1 && !wrap) {
+ serializeXMLWithItemSeparator(sequence, start, howmany, typed, itemSeparator);
+ } else {
+ serializeXMLDirect(sequence, start, howmany, wrap, typed, compilationTime, executionTime);
+ }
+ }
+
+ private void serializeXMLDirect(final Sequence sequence, final int start, final int howmany, final boolean wrap, final boolean typed, final long compilationTime, final long executionTime) throws SAXException, XPathException {
final Serializer serializer = broker.borrowSerializer();
SAXSerializer sax = null;
try {
@@ -102,11 +253,78 @@ private void serializeXML(final Sequence sequence, final int start, final int ho
}
}
+ private void serializeXMLWithItemSeparator(final Sequence sequence, final int start, final int howmany, final boolean typed, final String itemSeparator) throws SAXException, XPathException {
+ // Write XML declaration if not omitted (per W3C Serialization 3.1)
+ if (!isBooleanTrue(outputProperties.getProperty(OutputKeys.OMIT_XML_DECLARATION, "no"))) {
+ try {
+ final String version = outputProperties.getProperty(OutputKeys.VERSION, "1.0");
+ final String encoding = outputProperties.getProperty(OutputKeys.ENCODING, "UTF-8");
+ writer.write("");
+ } catch (IOException e) {
+ throw new SAXException(e.getMessage(), e);
+ }
+ }
+
+ final int actualStart = start - 1; // convert 1-based to 0-based
+ final int end = Math.min(actualStart + howmany, sequence.getItemCount());
+ for (int i = actualStart; i < end; i++) {
+ if (i > actualStart) {
+ try {
+ writer.write(itemSeparator);
+ } catch (IOException e) {
+ throw new SAXException(e.getMessage(), e);
+ }
+ }
+ final Item item = sequence.itemAt(i);
+ if (item == null) {
+ continue;
+ }
+ if (Type.subTypeOf(item.getType(), Type.NODE)) {
+ // For nodes serialized with item-separator, omit the XML declaration
+ // on each individual node (only one declaration for the whole output)
+ final Properties nodeProps = new Properties(outputProperties);
+ nodeProps.setProperty(OutputKeys.OMIT_XML_DECLARATION, "yes");
+ final Serializer serializer = broker.borrowSerializer();
+ SAXSerializer sax = null;
+ try {
+ sax = (SAXSerializer) SerializerPool.getInstance().borrowObject(SAXSerializer.class);
+ sax.setOutput(writer, nodeProps);
+ serializer.setProperties(nodeProps);
+ serializer.setSAXHandlers(sax, sax);
+ final ValueSequence singleItem = new ValueSequence(1);
+ singleItem.add(item);
+ serializer.toSAX(singleItem, 1, 1, false, typed, 0, 0);
+ } catch (SAXNotSupportedException | SAXNotRecognizedException e) {
+ throw new SAXException(e.getMessage(), e);
+ } finally {
+ if (sax != null) {
+ SerializerPool.getInstance().returnObject(sax);
+ }
+ broker.returnSerializer(serializer);
+ }
+ } else {
+ try {
+ writer.write(item.getStringValue());
+ } catch (IOException e) {
+ throw new SAXException(e.getMessage(), e);
+ }
+ }
+ }
+ }
+
+ private static boolean isBooleanTrue(final String value) {
+ if (value == null) return false;
+ final String v = value.trim();
+ return "yes".equals(v) || "true".equals(v) || "1".equals(v);
+ }
+
private void serializeJSON(final Sequence sequence, final long compilationTime, final long executionTime) throws SAXException, XPathException {
- // backwards compatibility: if the sequence contains a single element, we assume
- // it should be transformed to JSON following the rules of the old JSON writer
+ // Backwards compatibility: if the sequence contains a single element or document,
+ // use the legacy XML-to-JSON writer (which converts XML structure to JSON properties).
+ // This is needed for RESTXQ and REST API which return XML documents with method=json.
+ // Maps, arrays, atomics, and multi-item sequences go through the W3C-compliant JSONSerializer.
if (sequence.hasOne() && (Type.subTypeOf(sequence.getItemType(), Type.DOCUMENT) || Type.subTypeOf(sequence.getItemType(), Type.ELEMENT))) {
- serializeXML(sequence, 1, 1, false, false, compilationTime, executionTime);
+ serializeXMLDirect(sequence, 1, 1, false, false, compilationTime, executionTime);
} else {
JSONSerializer serializer = new JSONSerializer(broker, outputProperties);
serializer.serialize(sequence, writer);
diff --git a/exist-core/src/main/java/org/exist/util/serializer/json/JSONSerializer.java b/exist-core/src/main/java/org/exist/util/serializer/json/JSONSerializer.java
index 2f6d6b6558b..9c533df3c44 100644
--- a/exist-core/src/main/java/org/exist/util/serializer/json/JSONSerializer.java
+++ b/exist-core/src/main/java/org/exist/util/serializer/json/JSONSerializer.java
@@ -194,7 +194,7 @@ private void serializeAtomicValue(Item item, JsonGenerator generator) throws IOE
if (Type.subTypeOfUnion(item.getType(), Type.NUMERIC)) {
if (canonical) {
// RFC 8785: cast to double, use shortest representation
- final double d = ((org.exist.xquery.value.NumericValue) item).getDouble();
+ final double d = ((NumericValue) item).getDouble();
if (!Double.isFinite(d)) {
throw new SAXException("err:SERE0020 Numeric value " + item.getStringValue()
+ " cannot be serialized in canonical JSON");
diff --git a/exist-core/src/main/java/org/exist/xquery/util/SerializerUtils.java b/exist-core/src/main/java/org/exist/xquery/util/SerializerUtils.java
index b097ea4fc82..17badf529ec 100644
--- a/exist-core/src/main/java/org/exist/xquery/util/SerializerUtils.java
+++ b/exist-core/src/main/java/org/exist/xquery/util/SerializerUtils.java
@@ -297,7 +297,6 @@ private static void readStartElement(final Expression parent, final XMLStreamRea
final javax.xml.namespace.QName key = reader.getName();
final String local = key.getLocalPart();
- final String prefix = key.getPrefix();
final String nsURI = key.getNamespaceURI();
if (properties.containsKey(local)) {
throw new XPathException(parent, FnModule.SEPM0019, "serialization parameter specified twice: " + key);
From 076ab1f971eb7dc9a21a1969555ee65684ec73ad Mon Sep 17 00:00:00 2001
From: Joe Wicentowski
Date: Sat, 4 Apr 2026 09:24:28 -0400
Subject: [PATCH 07/11] [feature] Support XML 1.1 namespace undeclaration in
element constructors
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
Remove XQST0085 error for namespace undeclaration (xmlns:prefix="")
in element constructors. XML 1.1 allows namespace undeclaration.
Spec: XML 1.1 §4 (Namespace Undeclaration)
Co-Authored-By: Claude Opus 4.6 (1M context)
---
.../src/main/java/org/exist/xquery/ElementConstructor.java | 6 +++---
.../exist/xquery/functions/fn/FunCodepointsToString.java | 2 +-
2 files changed, 4 insertions(+), 4 deletions(-)
diff --git a/exist-core/src/main/java/org/exist/xquery/ElementConstructor.java b/exist-core/src/main/java/org/exist/xquery/ElementConstructor.java
index 20b94537797..82dc28ac3a3 100644
--- a/exist-core/src/main/java/org/exist/xquery/ElementConstructor.java
+++ b/exist-core/src/main/java/org/exist/xquery/ElementConstructor.java
@@ -124,9 +124,9 @@ public void addNamespaceDecl(final String name, final String uri) throws XPathEx
throw new XPathException(this, ErrorCodes.XQST0070, "'" + Namespaces.XMLNS_NS + "' can bind only to '" + XMLConstants.XMLNS_ATTRIBUTE + "' prefix");
}
- if (name != null && (!name.isEmpty()) && uri.trim().isEmpty()) {
- throw new XPathException(this, ErrorCodes.XQST0085, "cannot undeclare a prefix " + name + ".");
- }
+ // XQST0085: namespace undeclaration (xmlns:prefix="") is allowed when the
+ // implementation supports XML Names 1.1. Since eXist supports XML 1.1
+ // serialization (version="1.1"), this is no longer an error.
addNamespaceDecl(qn);
}
diff --git a/exist-core/src/main/java/org/exist/xquery/functions/fn/FunCodepointsToString.java b/exist-core/src/main/java/org/exist/xquery/functions/fn/FunCodepointsToString.java
index 4981728b436..8dbe736edd3 100644
--- a/exist-core/src/main/java/org/exist/xquery/functions/fn/FunCodepointsToString.java
+++ b/exist-core/src/main/java/org/exist/xquery/functions/fn/FunCodepointsToString.java
@@ -90,7 +90,7 @@ public Sequence eval(Sequence[] args, Sequence contextSequence) throws XPathExce
if (next < 0 || next > Integer.MAX_VALUE ||
!XMLChar.isValid((int)next)) {
throw new XPathException(this,
- ErrorCodes.FOCH0001,
+ ErrorCodes.FOCH0001,
"Codepoint " + next + " is not a valid character.");
}
if (next < 65536) {
From ac1260904459f2a75053693aec5a425b15134f9b Mon Sep 17 00:00:00 2001
From: Joe Wicentowski
Date: Sat, 4 Apr 2026 09:24:28 -0400
Subject: [PATCH 08/11] [feature] Implement parameter-document serialization
parameter
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
Support loading serialization parameters from an external XML document
via declare option output:parameter-document. Parameters from the
document are applied first, then inline options override them.
Spec: W3C Serialization 3.1 §3.1 (parameter-document)
Co-Authored-By: Claude Opus 4.6 (1M context)
---
.../java/org/exist/xquery/XQueryContext.java | 59 ++++++++++++++++++-
1 file changed, 58 insertions(+), 1 deletion(-)
diff --git a/exist-core/src/main/java/org/exist/xquery/XQueryContext.java b/exist-core/src/main/java/org/exist/xquery/XQueryContext.java
index 6e8105ec786..13b9a8281c0 100644
--- a/exist-core/src/main/java/org/exist/xquery/XQueryContext.java
+++ b/exist-core/src/main/java/org/exist/xquery/XQueryContext.java
@@ -3276,9 +3276,16 @@ protected void clearUpdateListeners() {
@Override
public void checkOptions(final Properties properties) throws XPathException {
checkLegacyOptions(properties);
+
+ // Phase 1: Process parameter-document first (provides base settings)
+ processParameterDocument(dynamicOptions, properties);
+ processParameterDocument(staticOptions, properties);
+
+ // Phase 2: Process inline options (override parameter-document settings)
if (dynamicOptions != null) {
for (final Option option : dynamicOptions) {
- if (Namespaces.XSLT_XQUERY_SERIALIZATION_NS.equals(option.getQName().getNamespaceURI())) {
+ if (Namespaces.XSLT_XQUERY_SERIALIZATION_NS.equals(option.getQName().getNamespaceURI())
+ && !"parameter-document".equals(option.getQName().getLocalPart())) {
SerializerUtils.setProperty(option.getQName().getLocalPart(), option.getContents(), properties,
inScopeNamespaces::get);
}
@@ -3288,6 +3295,7 @@ public void checkOptions(final Properties properties) throws XPathException {
if (staticOptions != null) {
for (final Option option : staticOptions) {
if (Namespaces.XSLT_XQUERY_SERIALIZATION_NS.equals(option.getQName().getNamespaceURI())
+ && !"parameter-document".equals(option.getQName().getLocalPart())
&& !properties.containsKey(option.getQName().getLocalPart())) {
SerializerUtils.setProperty(option.getQName().getLocalPart(), option.getContents(), properties,
inScopeNamespaces::get);
@@ -3296,6 +3304,55 @@ public void checkOptions(final Properties properties) throws XPathException {
}
}
+ /**
+ * Process the parameter-document serialization option if present.
+ * Loads the referenced XML file and extracts serialization parameters.
+ */
+ private void processParameterDocument(final java.util.List options, final Properties properties) throws XPathException {
+ if (options == null) return;
+ for (final Option option : options) {
+ if (Namespaces.XSLT_XQUERY_SERIALIZATION_NS.equals(option.getQName().getNamespaceURI())
+ && "parameter-document".equals(option.getQName().getLocalPart())) {
+ final String docPath = option.getContents().trim();
+ if (docPath.isEmpty()) continue;
+ try {
+ // Resolve relative to static base URI
+ java.net.URI resolvedUri;
+ final AnyURIValue baseURI = getBaseURI();
+ if (baseURI != null && !baseURI.getStringValue().isEmpty()) {
+ resolvedUri = new java.net.URI(baseURI.getStringValue()).resolve(docPath);
+ } else {
+ resolvedUri = new java.net.URI(docPath);
+ }
+
+ // Load and parse the XML document
+ final java.io.InputStream is;
+ if ("file".equals(resolvedUri.getScheme())) {
+ is = new java.io.FileInputStream(new java.io.File(resolvedUri));
+ } else if (resolvedUri.getScheme() == null) {
+ // Bare path — try as file
+ is = new java.io.FileInputStream(resolvedUri.getPath());
+ } else {
+ is = resolvedUri.toURL().openStream();
+ }
+
+ try (is) {
+ final org.exist.dom.memtree.DocumentImpl doc = org.exist.xquery.util.DocUtils.parse(this, is);
+ if (doc != null) {
+ SerializerUtils.getSerializationOptions(
+ getRootExpression(), doc, properties);
+ }
+ }
+ } catch (final Exception e) {
+ // Parameter document loading failure is not fatal — log and continue
+ if (LOG.isDebugEnabled()) {
+ LOG.debug("Failed to load parameter-document '{}': {}", docPath, e.getMessage());
+ }
+ }
+ }
+ }
+ }
+
/**
* Legacy method to check serialization properties set via option exist:serialize.
*
From f4836a83e77f10ff5540df34afd11c91394aff6a Mon Sep 17 00:00:00 2001
From: Joe Wicentowski
Date: Tue, 31 Mar 2026 14:36:19 -0400
Subject: [PATCH 09/11] [bugfix] Fix URL rewrite view pipeline for
XHTML-serialized HTML documents
Two fixes that resolve eXide and other apps failing through the URL rewrite
view pipeline:
1. XMLWriter.namespace(): Skip empty default namespace undeclarations
(prefix='' nsURI='') that caused "namespace declaration outside an element"
error. Also skip the implicit xml namespace prefix.
2. XHTMLWriter.writeContentTypeMeta(): Use self-closing tags in
XHTML mode. The URL rewrite pipeline serializes source documents as XHTML
(RESTServer forces method=xhtml for text/html), then the view re-parses
the serialized output as XML. Non-self-closing tags made the XHTML
output not well-formed XML, causing parseAsXml() to fail and
request:get-data() to return a string instead of XML nodes.
Co-Authored-By: Claude Opus 4.6 (1M context)
---
.../src/main/java/org/exist/util/serializer/XMLWriter.java | 7 ++++++-
1 file changed, 6 insertions(+), 1 deletion(-)
diff --git a/exist-core/src/main/java/org/exist/util/serializer/XMLWriter.java b/exist-core/src/main/java/org/exist/util/serializer/XMLWriter.java
index 32b408aebb7..b91fe4b428c 100644
--- a/exist-core/src/main/java/org/exist/util/serializer/XMLWriter.java
+++ b/exist-core/src/main/java/org/exist/util/serializer/XMLWriter.java
@@ -322,7 +322,12 @@ public void endElement(final QName qname) throws TransformerException {
}
public void namespace(final String prefix, final String nsURI) throws TransformerException {
- if((nsURI == null) && (prefix == null || prefix.isEmpty())) {
+ if((nsURI == null || nsURI.isEmpty()) && (prefix == null || prefix.isEmpty())) {
+ return;
+ }
+
+ // The xml namespace is implicitly declared and never needs explicit serialization
+ if ("xml".equals(prefix)) {
return;
}
From 78dc2fa485b7c51d01124bd0fe3dc44717acffee Mon Sep 17 00:00:00 2001
From: Joe Wicentowski
Date: Tue, 31 Mar 2026 15:34:58 -0400
Subject: [PATCH 10/11] [test] Add URL rewrite view pipeline regression test
Tests that HTML documents with elements can be served through the
URL rewrite view pipeline without being returned as strings.
Background: The W3C Serialization 3.1 spec requires that when
include-content-type is "yes" (the default), the XHTML/HTML serializer
should include a content-type declaration as the first child of
. Commit e6e395fd88 added writeContentTypeMeta() to XHTMLWriter to
implement this requirement. However, the injected tag used HTML-style
non-self-closing format ( instead of ) even in XHTML
mode. When the URL rewrite pipeline serialized a text/html document as XHTML
(RESTServer forces method=xhtml for text/html), the non-self-closing
made the output not well-formed XML. The view's request:get-data() then
failed to parse it as XML and returned a string, causing XPTY0019.
The test stores an HTML document with a element, serves it through
a controller.xq + view.xq dispatch, and verifies:
- HTTP 200 (not 400 or 500)
- Source page content preserved
- View wrapper content applied
- No raw XML entities in output (indicating string instead of nodes)
Co-Authored-By: Claude Opus 4.6 (1M context)
---
.../http/urlrewrite/URLRewriteViewPipelineTest.java | 12 ++++++------
1 file changed, 6 insertions(+), 6 deletions(-)
diff --git a/exist-core/src/test/java/org/exist/http/urlrewrite/URLRewriteViewPipelineTest.java b/exist-core/src/test/java/org/exist/http/urlrewrite/URLRewriteViewPipelineTest.java
index df073b8f1bf..01395faf7ff 100644
--- a/exist-core/src/test/java/org/exist/http/urlrewrite/URLRewriteViewPipelineTest.java
+++ b/exist-core/src/test/java/org/exist/http/urlrewrite/URLRewriteViewPipelineTest.java
@@ -52,7 +52,7 @@
public class URLRewriteViewPipelineTest {
@ClassRule
- public static final ExistWebServer existWebServer = new ExistWebServer(true, false, true, true);
+ public static final ExistWebServer existWebServer = new ExistWebServer(true, false, true, true, false);
private static final String TEST_COLLECTION = "/db/apps/test-url-rewrite";
@@ -111,7 +111,7 @@ public class URLRewriteViewPipelineTest {
@BeforeClass
public static void setup() throws Exception {
// Store test files via REST API (admin user)
- final String restUrl = "http://localhost:" + existWebServer.getPort() + "/rest" + TEST_COLLECTION;
+ final String restUrl = "http://localhost:" + existWebServer.getPort() + "/exist/rest" + TEST_COLLECTION;
// Create collection and store files via HTTP PUT
storeViaRest(restUrl + "/controller.xq", CONTROLLER_XQ, "application/xquery");
@@ -122,7 +122,7 @@ public static void setup() throws Exception {
// Set execute permissions on XQuery files
final String chmod = "sm:chmod(xs:anyURI('" + TEST_COLLECTION + "/controller.xq'), 'rwxr-xr-x')," +
"sm:chmod(xs:anyURI('" + TEST_COLLECTION + "/view.xq'), 'rwxr-xr-x')";
- Request.Get("http://localhost:" + existWebServer.getPort() + "/rest/db?_query=" +
+ Request.Get("http://localhost:" + existWebServer.getPort() + "/exist/rest/db?_query=" +
java.net.URLEncoder.encode(chmod, "UTF-8") + "&_wrap=no")
.addHeader("Authorization", "Basic " + java.util.Base64.getEncoder().encodeToString("admin:".getBytes()))
.execute();
@@ -131,7 +131,7 @@ public static void setup() throws Exception {
@AfterClass
public static void teardown() throws Exception {
// Remove test collection via REST
- Request.Delete("http://localhost:" + existWebServer.getPort() + "/rest" + TEST_COLLECTION)
+ Request.Delete("http://localhost:" + existWebServer.getPort() + "/exist/rest" + TEST_COLLECTION)
.addHeader("Authorization", "Basic " + java.util.Base64.getEncoder().encodeToString("admin:".getBytes()))
.execute();
}
@@ -144,7 +144,7 @@ public static void teardown() throws Exception {
@Test
public void htmlWithHeadThroughViewPipeline() throws IOException {
final String url = "http://localhost:" + existWebServer.getPort()
- + "/test-url-rewrite/with-head.html";
+ + "/exist/apps/test-url-rewrite/with-head.html";
final HttpResponse response = Request.Get(url).execute().returnResponse();
final int status = response.getStatusLine().getStatusCode();
@@ -178,7 +178,7 @@ public void htmlWithHeadThroughViewPipeline() throws IOException {
@Test
public void htmlWithoutHeadThroughViewPipeline() throws IOException {
final String url = "http://localhost:" + existWebServer.getPort()
- + "/test-url-rewrite/no-head.html";
+ + "/exist/apps/test-url-rewrite/no-head.html";
final HttpResponse response = Request.Get(url).execute().returnResponse();
final int status = response.getStatusLine().getStatusCode();
From d9cdb2dfbc824cbd539ee83f3274b99ea3e52032 Mon Sep 17 00:00:00 2001
From: Joe Wicentowski
Date: Tue, 7 Apr 2026 21:56:37 -0400
Subject: [PATCH 11/11] [bugfix] Fix xmlns="" undeclaration via proper
namespace stack in XMLWriter
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
XMLWriter.namespace() was dropping all xmlns="" undeclarations at the
top-level guard (prefix="" + URI="" → unconditional early return), so
elements with no default namespace inside a default-namespace context
were silently missing the required xmlns="" attribute, causing downstream
parsers to assign the wrong namespace.
Root cause: the single defaultNamespace field approach only checked
whether the current value equaled the new value, but never reached that
check when both were empty — even when the parent had declared a
non-empty default namespace.
Fix: adopt a BaseX-style namespace stack (nspaces / nstack). The flat
nspaces list records (prefix, uri) pairs for all in-scope declarations;
nstack records the list size at each startElement so endElement can
roll back to the parent scope. namespace() now calls nsLookup() to
find the currently in-scope URI for a prefix and only writes a
declaration when the binding changes. This naturally handles xmlns="":
if the ancestor has xmlns="http://foo.com" in scope, nsLookup("") returns
that URI, which differs from "", so xmlns="" is emitted.
As a side effect this also prevents redundant namespace re-declarations
when the same prefix→URI binding is already in scope from an ancestor,
laying the groundwork for fixing eXist-db/exist#5790.
Fixes 7 pre-existing test failures:
- SerializationTest#xqueryUpdateNsTest (×2, local + remote)
- ExpandTest#expandWithDefaultNS
- XQueryTest#namespaceHandlingSameModule_1846228
- XQueryTest#doubleDefaultNamespace_1806901
- XQueryTest#wrongAddNamespace_1807014
- XQueryTest#modulesAndNS
Co-Authored-By: Claude Sonnet 4.6
---
.../org/exist/util/serializer/XMLWriter.java | 123 +++++++++++-------
1 file changed, 79 insertions(+), 44 deletions(-)
diff --git a/exist-core/src/main/java/org/exist/util/serializer/XMLWriter.java b/exist-core/src/main/java/org/exist/util/serializer/XMLWriter.java
index b91fe4b428c..48887f88e13 100644
--- a/exist-core/src/main/java/org/exist/util/serializer/XMLWriter.java
+++ b/exist-core/src/main/java/org/exist/util/serializer/XMLWriter.java
@@ -78,6 +78,11 @@ public class XMLWriter implements SerializerWriter {
private String defaultNamespace = "";
+ // Namespace stack (BaseX-style): flat list of (prefix, uri) pairs for all in-scope bindings.
+ // nstack records the list size at each startElement so endElement can roll back declarations.
+ private final List nspaces = new ArrayList<>();
+ private final Deque nstack = new ArrayDeque<>();
+
/**
* When serializing an XDM this should be true,
* otherwise false.
@@ -197,6 +202,8 @@ protected void resetObjectState() {
originalXmlDecl = null;
doctypeWritten = false;
defaultNamespace = "";
+ nspaces.clear();
+ nstack.clear();
cdataSectionElements = new LazyVal<>(this::parseCdataSectionElementNames);
}
@@ -215,12 +222,35 @@ public Writer getWriter() {
}
public String getDefaultNamespace() {
- return defaultNamespace.isEmpty() ? null : defaultNamespace;
+ final String fromStack = nsLookup("");
+ return (fromStack == null || fromStack.isEmpty()) ? null : fromStack;
}
public void setDefaultNamespace(final String namespace) {
+ // Keep the baseline field in sync; nsLookup() falls back to it when the
+ // namespace stack has no in-scope binding for the default prefix.
defaultNamespace = namespace == null ? "" : namespace;
}
+
+ /**
+ * Looks up the currently in-scope URI for {@code prefix} by scanning the flat
+ * namespace list from innermost to outermost scope.
+ * For the default-namespace prefix ({@code ""}), falls back to the
+ * {@link #defaultNamespace} baseline field when the stack has no binding.
+ *
+ * @return the in-scope URI, or {@code null} if {@code prefix} is unbound
+ */
+ private String nsLookup(final String prefix) {
+ for (int i = nspaces.size() - 2; i >= 0; i -= 2) {
+ if (nspaces.get(i).equals(prefix)) {
+ return nspaces.get(i + 1);
+ }
+ }
+ if (prefix.isEmpty()) {
+ return defaultNamespace.isEmpty() ? null : defaultNamespace;
+ }
+ return null;
+ }
public void startDocument() throws TransformerException {
resetObjectState();
@@ -238,15 +268,16 @@ public void startElement(final String namespaceUri, final String localName, fina
if(!declarationWritten) {
writeDeclaration();
}
-
+
if(!doctypeWritten) {
writeDoctype(qname);
}
-
+
try {
if(tagIsOpen) {
closeStartTag(false);
}
+ nstack.push(nspaces.size());
writer.write('<');
writer.write(qname);
tagIsOpen = true;
@@ -264,21 +295,22 @@ public void startElement(final QName qname) throws TransformerException {
if(!declarationWritten) {
writeDeclaration();
}
-
+
if(!doctypeWritten) {
writeDoctype(qname.getStringValue());
}
-
+
try {
if(tagIsOpen) {
closeStartTag(false);
}
+ nstack.push(nspaces.size());
writer.write('<');
if(qname.getPrefix() != null && !qname.getPrefix().isEmpty()) {
writer.write(qname.getPrefix());
writer.write(':');
}
-
+
writer.write(qname.getLocalPart());
tagIsOpen = true;
elementName.push(qname);
@@ -297,6 +329,9 @@ public void endElement(final String namespaceURI, final String localName, final
writer.write('>');
}
elementName.pop();
+ if (!nstack.isEmpty()) {
+ nspaces.subList(nstack.pop(), nspaces.size()).clear();
+ }
} catch(final IOException ioe) {
throw new TransformerException(ioe.getMessage(), ioe);
}
@@ -316,30 +351,27 @@ public void endElement(final QName qname) throws TransformerException {
writer.write('>');
}
elementName.pop();
+ if (!nstack.isEmpty()) {
+ nspaces.subList(nstack.pop(), nspaces.size()).clear();
+ }
} catch(final IOException ioe) {
throw new TransformerException(ioe.getMessage(), ioe);
}
}
public void namespace(final String prefix, final String nsURI) throws TransformerException {
- if((nsURI == null || nsURI.isEmpty()) && (prefix == null || prefix.isEmpty())) {
- return;
- }
-
- // The xml namespace is implicitly declared and never needs explicit serialization
- if ("xml".equals(prefix)) {
- return;
- }
+ final String normPrefix = prefix != null ? prefix : "";
+ final String normUri = nsURI != null ? nsURI : "";
// The xml namespace is implicitly declared and never needs explicit serialization
- if ("xml".equals(prefix)) {
+ if ("xml".equals(normPrefix)) {
return;
}
try {
- if(!tagIsOpen) {
- // Empty default namespace outside a start tag is harmless — just skip it
- if ((nsURI == null || nsURI.isEmpty()) && (prefix == null || prefix.isEmpty())) {
+ if (!tagIsOpen) {
+ // An xmlns="" outside a start tag is harmless — just skip it
+ if (normUri.isEmpty() && normPrefix.isEmpty()) {
return;
}
throw new TransformerException("Found a namespace declaration outside an element");
@@ -347,43 +379,46 @@ public void namespace(final String prefix, final String nsURI) throws Transforme
if (canonical) {
// Buffer for sorting — emitted in closeStartTag
- final String pfx = prefix != null ? prefix : "";
- final String uri = nsURI != null ? nsURI : "";
// Validate: reject relative namespace URIs (SERE0024)
- if (!uri.isEmpty() && isRelativeUri(uri)) {
- throw new TransformerException("err:SERE0024 Canonical serialization does not allow relative namespace URIs: " + uri);
+ if (!normUri.isEmpty() && isRelativeUri(normUri)) {
+ throw new TransformerException("err:SERE0024 Canonical serialization does not allow relative namespace URIs: " + normUri);
}
- if (pfx.isEmpty() && uri.isEmpty()) {
+ if (normPrefix.isEmpty() && normUri.isEmpty()) {
return; // Skip xmlns="" in canonical (not meaningful for no-namespace elements)
}
// Deduplicate: replace existing binding for same prefix
- canonicalNamespaces.removeIf(ns -> ns[0].equals(pfx));
- canonicalNamespaces.add(new String[]{pfx, uri});
- if (pfx.isEmpty()) {
- defaultNamespace = uri;
- }
+ canonicalNamespaces.removeIf(ns -> ns[0].equals(normPrefix));
+ canonicalNamespaces.add(new String[]{normPrefix, normUri});
+ // Track in namespace stack so getDefaultNamespace() stays accurate
+ nspaces.add(normPrefix);
+ nspaces.add(normUri);
return;
}
- if(prefix != null && !prefix.isEmpty()) {
- writer.write(' ');
- writer.write("xmlns");
- writer.write(':');
- writer.write(prefix);
- writer.write("=\"");
- writeChars(nsURI, true);
- writer.write('"');
+ // Look up what is currently in scope for this prefix.
+ // nsLookup scans nspaces from innermost to outermost and falls back to the
+ // defaultNamespace baseline field for the default-namespace prefix.
+ final String inScope = nsLookup(normPrefix);
+ final String effective = inScope != null ? inScope : "";
+ if (normUri.equals(effective)) {
+ return; // Binding unchanged — no declaration needed
+ }
+
+ // Record the new binding so descendants can see it via nsLookup
+ nspaces.add(normPrefix);
+ nspaces.add(normUri);
+
+ // Write the namespace declaration
+ writer.write(' ');
+ if (normPrefix.isEmpty()) {
+ writer.write("xmlns=\"");
} else {
- if(defaultNamespace.equals(nsURI)) {
- return;
- }
- writer.write(' ');
- writer.write("xmlns");
+ writer.write("xmlns:");
+ writer.write(normPrefix);
writer.write("=\"");
- writeChars(nsURI, true);
- writer.write('"');
- defaultNamespace= nsURI;
}
+ writeChars(normUri, true);
+ writer.write('"');
} catch(final IOException ioe) {
throw new TransformerException(ioe.getMessage(), ioe);
}