Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -25,3 +25,4 @@ work/

# Claude planning files
plans/
.xqts-runner/
11 changes: 11 additions & 0 deletions exist-core/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -323,6 +323,12 @@
</exclusions>
</dependency>

<dependency>
<groupId>nu.validator</groupId>
<artifactId>htmlparser</artifactId>
<version>1.4.16</version>
</dependency>

<dependency>
<groupId>org.apache.ws.commons.util</groupId>
<artifactId>ws-commons-util</artifactId>
Expand Down Expand Up @@ -390,6 +396,11 @@
<artifactId>Saxon-HE</artifactId>
</dependency>

<dependency>
<groupId>de.bottlecaps</groupId>
<artifactId>markup-blitz</artifactId>
</dependency>

<dependency>
<groupId>org.exist-db</groupId>
<artifactId>exist-saxon-regex</artifactId>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -267,14 +267,16 @@ throws PermissionDeniedException, EXistException, XPathException
v:VERSION_DECL
{
final String version = v.getText();
if (version.equals("3.1")) {
if (version.equals("4.0")) {
context.setXQueryVersion(40);
} else if (version.equals("3.1")) {
context.setXQueryVersion(31);
} else if (version.equals("3.0")) {
context.setXQueryVersion(30);
} else if (version.equals("1.0")) {
context.setXQueryVersion(10);
} else {
throw new XPathException(v, ErrorCodes.XQST0031, "Wrong XQuery version: require 1.0, 3.0 or 3.1");
throw new XPathException(v, ErrorCodes.XQST0031, "Wrong XQuery version: require 1.0, 3.0, 3.1, or 4.0");
}
}
( enc:STRING_LITERAL )?
Expand Down
159 changes: 144 additions & 15 deletions exist-core/src/main/java/org/exist/util/Collations.java
Original file line number Diff line number Diff line change
Expand Up @@ -346,7 +346,24 @@
*/
public static int compare(@Nullable final Collator collator, final String s1,final String s2) {
if (collator == null) {
return s1 == null ? (s2 == null ? 0 : -1) : s1.compareTo(s2);
if (s1 == null) {
return s2 == null ? 0 : -1;
}
// Compare by Unicode codepoints, not UTF-16 code units.
// String.compareTo() compares char (UTF-16) values, which gives wrong
// ordering for supplementary characters (U+10000+) encoded as surrogate pairs.
int i1 = 0, i2 = 0;

Check notice on line 355 in exist-core/src/main/java/org/exist/util/Collations.java

View check run for this annotation

Codacy Production / Codacy Static Code Analysis

exist-core/src/main/java/org/exist/util/Collations.java#L355

Use one line for each declaration, it enhances code readability.
while (i1 < s1.length() && i2 < s2.length()) {
final int cp1 = s1.codePointAt(i1);
final int cp2 = s2.codePointAt(i2);
if (cp1 != cp2) {
return cp1 - cp2;
}
i1 += Character.charCount(cp1);
i2 += Character.charCount(cp2);
}
// Shorter string is less; equal length means equal
return (s1.length() - i1) - (s2.length() - i2);
} else {
return collator.compare(s1, s2);
}
Expand All @@ -371,10 +388,16 @@
return true;
} else if (s1.isEmpty()) {
return false;
} else {
} else if (collator instanceof RuleBasedCollator rbc) {
final SearchIterator searchIterator =
new StringSearch(s2, new StringCharacterIterator(s1), (RuleBasedCollator) collator);
new StringSearch(s2, new StringCharacterIterator(s1), rbc);
return searchIterator.first() == 0;
} else {
// Fallback for non-RuleBasedCollator (e.g., HtmlAsciiCaseInsensitiveCollator)
if (s1.length() >= s2.length()) {
return collator.compare(s1.substring(0, s2.length()), s2) == 0;
}
return false;
}
}
}
Expand All @@ -398,9 +421,9 @@
return true;
} else if (s1.isEmpty()) {
return false;
} else {
} else if (collator instanceof RuleBasedCollator rbc) {
final SearchIterator searchIterator =
new StringSearch(s2, new StringCharacterIterator(s1), (RuleBasedCollator) collator);
new StringSearch(s2, new StringCharacterIterator(s1), rbc);
int lastPos = SearchIterator.DONE;
int lastLen = 0;
for (int pos = searchIterator.first(); pos != SearchIterator.DONE;
Expand All @@ -410,6 +433,12 @@
}

return lastPos > SearchIterator.DONE && lastPos + lastLen == s1.length();
} else {
// Fallback for non-RuleBasedCollator
if (s1.length() >= s2.length()) {
return collator.compare(s1.substring(s1.length() - s2.length()), s2) == 0;
}
return false;
}
}
}
Expand All @@ -433,10 +462,18 @@
return true;
} else if (s1.isEmpty()) {
return false;
} else {
} else if (collator instanceof RuleBasedCollator rbc) {
final SearchIterator searchIterator =
new StringSearch(s2, new StringCharacterIterator(s1), (RuleBasedCollator) collator);
new StringSearch(s2, new StringCharacterIterator(s1), rbc);
return searchIterator.first() >= 0;
} else {
// Fallback for non-RuleBasedCollator
for (int i = 0; i <= s1.length() - s2.length(); i++) {
if (collator.compare(s1.substring(i, i + s2.length()), s2) == 0) {
return true;
}
}
return false;
}
}
}
Expand All @@ -459,10 +496,18 @@
return 0;
} else if (s1.isEmpty()) {
return -1;
} else {
} else if (collator instanceof RuleBasedCollator rbc) {
final SearchIterator searchIterator =
new StringSearch(s2, new StringCharacterIterator(s1), (RuleBasedCollator) collator);
new StringSearch(s2, new StringCharacterIterator(s1), rbc);
return searchIterator.first();
} else {
// Fallback for non-RuleBasedCollator
for (int i = 0; i <= s1.length() - s2.length(); i++) {
if (collator.compare(s1.substring(i, i + s2.length()), s2) == 0) {
return i;
}
}
return -1;
}
}
}
Expand Down Expand Up @@ -809,21 +854,105 @@
return collator;
}

private static Collator getHtmlAsciiCaseInsensitiveCollator() throws Exception {
private static Collator getHtmlAsciiCaseInsensitiveCollator() {
Collator collator = htmlAsciiCaseInsensitiveCollator.get();
if (collator == null) {
collator = new RuleBasedCollator("&a=A &b=B &c=C &d=D &e=E &f=F &g=G &h=H "
+ "&i=I &j=J &k=K &l=L &m=M &n=N &o=O &p=P &q=Q &r=R &s=S &t=T "
+ "&u=U &v=V &w=W &x=X &y=Y &z=Z");
collator.setStrength(Collator.PRIMARY);
// XQ4 html-ascii-case-insensitive: ASCII letters A-Z fold to a-z,
// all other characters compare by Unicode codepoint order.
// Cannot use RuleBasedCollator with PRIMARY strength because that
// makes ALL case/accent differences irrelevant, not just ASCII.
htmlAsciiCaseInsensitiveCollator.compareAndSet(null,
collator.freeze());
new HtmlAsciiCaseInsensitiveCollator());
collator = htmlAsciiCaseInsensitiveCollator.get();
}

return collator;
}

/**
* Custom Collator for HTML ASCII case-insensitive comparison.
* Folds only ASCII letters A-Z to a-z, then compares by Unicode codepoint.
* Non-ASCII characters are compared by their codepoint value without folding.
*/
private static final class HtmlAsciiCaseInsensitiveCollator extends Collator {

@Override
public int compare(final String source, final String target) {
int i1 = 0, i2 = 0;
while (i1 < source.length() && i2 < target.length()) {
int cp1 = source.codePointAt(i1);
int cp2 = target.codePointAt(i2);
// Fold ASCII uppercase to lowercase only
if (cp1 >= 'A' && cp1 <= 'Z') {
cp1 += 32;
}
if (cp2 >= 'A' && cp2 <= 'Z') {
cp2 += 32;
}
if (cp1 != cp2) {
return cp1 - cp2;
}
i1 += Character.charCount(cp1);
i2 += Character.charCount(cp2);
}
return (source.length() - i1) - (target.length() - i2);
}

@Override
public CollationKey getCollationKey(final String source) {
throw new UnsupportedOperationException("CollationKey not supported for HTML ASCII case-insensitive collation");
}

@Override
public RawCollationKey getRawCollationKey(final String source, final RawCollationKey key) {
throw new UnsupportedOperationException("RawCollationKey not supported for HTML ASCII case-insensitive collation");
}

@Override
public int setVariableTop(final String varTop) {
return 0;
}

@Override
public int getVariableTop() {
return 0;
}

@Override
public void setVariableTop(final int varTop) {

Check notice on line 922 in exist-core/src/main/java/org/exist/util/Collations.java

View check run for this annotation

Codacy Production / Codacy Static Code Analysis

exist-core/src/main/java/org/exist/util/Collations.java#L922

Document empty method body
}

@Override
public VersionInfo getVersion() {
return VersionInfo.getInstance(1);
}

@Override
public VersionInfo getUCAVersion() {
return VersionInfo.getInstance(1);
}

@Override
public int hashCode() {
return HtmlAsciiCaseInsensitiveCollator.class.hashCode();
}

@Override
public Collator freeze() {
return this;
}

@Override
public boolean isFrozen() {
return true;
}

@Override
public Collator cloneAsThawed() {
return new HtmlAsciiCaseInsensitiveCollator();
}
}

private static Collator getXqtsAsciiCaseBlindCollator() throws Exception {
Collator collator = xqtsAsciiCaseBlindCollator.get();
if (collator == null) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -103,12 +103,25 @@ private void serializeXML(final Sequence sequence, final int start, final int ho
}

private void serializeJSON(final Sequence sequence, final long compilationTime, final long executionTime) throws SAXException, XPathException {
// backwards compatibility: if the sequence contains a single element, we assume
// it should be transformed to JSON following the rules of the old JSON writer
if (sequence.hasOne() && (Type.subTypeOf(sequence.getItemType(), Type.DOCUMENT) || Type.subTypeOf(sequence.getItemType(), Type.ELEMENT))) {
// XDM serialization: use JSONSerializer for maps and arrays (W3C JSON output method).
// For element/document nodes, use the legacy XML-to-JSON conversion path for
// backward compatibility with eXist's traditional JSON serialization.
// TODO (eXist 8.0): Remove legacy XML-to-JSON conversion.
// The legacy path is deprecated in 7.0 — use fn:serialize($map, map{"method":"json"}) instead.
final boolean isXdmMapOrArray = sequence.hasOne()
&& (sequence.getItemType() == Type.MAP_ITEM || sequence.getItemType() == Type.ARRAY_ITEM);

if (isXdmMapOrArray || (!sequence.hasOne())
|| Type.subTypeOfUnion(sequence.getItemType(), Type.ANY_ATOMIC_TYPE)) {
// Maps, arrays, sequences, and atomic values: use W3C JSONSerializer
final JSONSerializer serializer = new JSONSerializer(broker, outputProperties);
serializer.serialize(sequence, writer);
} else if (sequence.hasOne()
&& (Type.subTypeOf(sequence.getItemType(), Type.DOCUMENT) || Type.subTypeOf(sequence.getItemType(), Type.ELEMENT))) {
// Legacy path: single element/document → XML-to-JSON conversion
serializeXML(sequence, 1, 1, false, false, compilationTime, executionTime);
} else {
JSONSerializer serializer = new JSONSerializer(broker, outputProperties);
final JSONSerializer serializer = new JSONSerializer(broker, outputProperties);
serializer.serialize(sequence, writer);
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -64,7 +64,9 @@ public void serialize(Sequence sequence, Writer writer) throws SAXException {
if ("yes".equals(outputProperties.getProperty(OutputKeys.INDENT, "no"))) {
generator.useDefaultPrettyPrinter();
}
if ("yes".equals(outputProperties.getProperty(EXistOutputKeys.ALLOW_DUPLICATE_NAMES, "yes"))) {
// allow-duplicate-names=no (default per W3C) → enable strict detection
// allow-duplicate-names=yes → disable strict detection (allow duplicates)
if ("no".equals(outputProperties.getProperty(EXistOutputKeys.ALLOW_DUPLICATE_NAMES, "no"))) {
generator.enable(JsonGenerator.Feature.STRICT_DUPLICATE_DETECTION);
} else {
generator.disable(JsonGenerator.Feature.STRICT_DUPLICATE_DETECTION);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,7 @@ public void addValue(String value) {

public void addEnclosedExpr(Expression expr) throws XPathException {
if(isNamespaceDecl)
{throw new XPathException(this, "enclosed expressions are not allowed in namespace " +
{throw new XPathException(this, ErrorCodes.XQST0022, "enclosed expressions are not allowed in namespace " +
"declaration attributes");}
contents.add(expr);
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -84,12 +84,13 @@ public Sequence eval(final Sequence contextSequence, final Item contextItem) thr
}
}

// Should be handled by the parser
if (requiredType == Type.ANY_ATOMIC_TYPE || (requiredType == Type.NOTATION && expression.returnsType() != Type.NOTATION)) {
// XPST0080: cannot cast to abstract or special types
if (requiredType == Type.ANY_ATOMIC_TYPE || requiredType == Type.ANY_SIMPLE_TYPE
|| (requiredType == Type.NOTATION && expression.returnsType() != Type.NOTATION)) {
throw new XPathException(this, ErrorCodes.XPST0080, "cannot cast to " + Type.getTypeName(requiredType));
}

if (requiredType == Type.ANY_SIMPLE_TYPE || expression.returnsType() == Type.ANY_SIMPLE_TYPE || requiredType == Type.UNTYPED || expression.returnsType() == Type.UNTYPED) {
if (expression.returnsType() == Type.ANY_SIMPLE_TYPE || requiredType == Type.UNTYPED || expression.returnsType() == Type.UNTYPED) {
throw new XPathException(this, ErrorCodes.XPST0051, "cannot cast to " + Type.getTypeName(requiredType));
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -93,10 +93,11 @@ public Sequence eval(Sequence contextSequence, Item contextItem) throws XPathExc
{context.getProfiler().message(this, Profiler.START_SEQUENCES, "CONTEXT ITEM", contextItem.toSequence());}
}

if (requiredType == Type.ANY_ATOMIC_TYPE || (requiredType == Type.NOTATION && expression.returnsType() != Type.NOTATION))
if (requiredType == Type.ANY_ATOMIC_TYPE || requiredType == Type.ANY_SIMPLE_TYPE
|| (requiredType == Type.NOTATION && expression.returnsType() != Type.NOTATION))
{throw new XPathException(this, ErrorCodes.XPST0080, "cannot convert to " + Type.getTypeName(requiredType));}

if (requiredType == Type.ANY_SIMPLE_TYPE || expression.returnsType() == Type.ANY_SIMPLE_TYPE || requiredType == Type.UNTYPED || expression.returnsType() == Type.UNTYPED)
if (expression.returnsType() == Type.ANY_SIMPLE_TYPE || requiredType == Type.UNTYPED || expression.returnsType() == Type.UNTYPED)
{throw new XPathException(this, ErrorCodes.XPST0051, "cannot convert to " + Type.getTypeName(requiredType));}

Sequence result;
Expand Down
Loading
Loading