diff --git a/.gitignore b/.gitignore index 0a677773713..8ba285c898a 100644 --- a/.gitignore +++ b/.gitignore @@ -25,3 +25,4 @@ work/ # Claude planning files plans/ +.xqts-runner/ diff --git a/exist-core/pom.xml b/exist-core/pom.xml index 0a150cb21c0..c34dce2c684 100644 --- a/exist-core/pom.xml +++ b/exist-core/pom.xml @@ -323,6 +323,12 @@ + + nu.validator + htmlparser + 1.4.16 + + org.apache.ws.commons.util ws-commons-util @@ -390,6 +396,11 @@ Saxon-HE + + de.bottlecaps + markup-blitz + + org.exist-db exist-saxon-regex diff --git a/exist-core/src/main/antlr/org/exist/xquery/parser/XQueryTree.g b/exist-core/src/main/antlr/org/exist/xquery/parser/XQueryTree.g index 20308296806..399fa264dc6 100644 --- a/exist-core/src/main/antlr/org/exist/xquery/parser/XQueryTree.g +++ b/exist-core/src/main/antlr/org/exist/xquery/parser/XQueryTree.g @@ -267,14 +267,16 @@ throws PermissionDeniedException, EXistException, XPathException v:VERSION_DECL { final String version = v.getText(); - if (version.equals("3.1")) { + if (version.equals("4.0")) { + context.setXQueryVersion(40); + } else if (version.equals("3.1")) { context.setXQueryVersion(31); } else if (version.equals("3.0")) { context.setXQueryVersion(30); } else if (version.equals("1.0")) { context.setXQueryVersion(10); } else { - throw new XPathException(v, ErrorCodes.XQST0031, "Wrong XQuery version: require 1.0, 3.0 or 3.1"); + throw new XPathException(v, ErrorCodes.XQST0031, "Wrong XQuery version: require 1.0, 3.0, 3.1, or 4.0"); } } ( enc:STRING_LITERAL )? diff --git a/exist-core/src/main/java/org/exist/util/Collations.java b/exist-core/src/main/java/org/exist/util/Collations.java index 2d03138a291..ecb09f43cbe 100644 --- a/exist-core/src/main/java/org/exist/util/Collations.java +++ b/exist-core/src/main/java/org/exist/util/Collations.java @@ -346,7 +346,24 @@ public static boolean equals(@Nullable final Collator collator, final String s1, */ public static int compare(@Nullable final Collator collator, final String s1,final String s2) { if (collator == null) { - return s1 == null ? (s2 == null ? 0 : -1) : s1.compareTo(s2); + if (s1 == null) { + return s2 == null ? 0 : -1; + } + // Compare by Unicode codepoints, not UTF-16 code units. + // String.compareTo() compares char (UTF-16) values, which gives wrong + // ordering for supplementary characters (U+10000+) encoded as surrogate pairs. + int i1 = 0, i2 = 0; + while (i1 < s1.length() && i2 < s2.length()) { + final int cp1 = s1.codePointAt(i1); + final int cp2 = s2.codePointAt(i2); + if (cp1 != cp2) { + return cp1 - cp2; + } + i1 += Character.charCount(cp1); + i2 += Character.charCount(cp2); + } + // Shorter string is less; equal length means equal + return (s1.length() - i1) - (s2.length() - i2); } else { return collator.compare(s1, s2); } @@ -371,10 +388,16 @@ public static boolean startsWith(@Nullable final Collator collator, final String return true; } else if (s1.isEmpty()) { return false; - } else { + } else if (collator instanceof RuleBasedCollator rbc) { final SearchIterator searchIterator = - new StringSearch(s2, new StringCharacterIterator(s1), (RuleBasedCollator) collator); + new StringSearch(s2, new StringCharacterIterator(s1), rbc); return searchIterator.first() == 0; + } else { + // Fallback for non-RuleBasedCollator (e.g., HtmlAsciiCaseInsensitiveCollator) + if (s1.length() >= s2.length()) { + return collator.compare(s1.substring(0, s2.length()), s2) == 0; + } + return false; } } } @@ -398,9 +421,9 @@ public static boolean endsWith(@Nullable final Collator collator, final String s return true; } else if (s1.isEmpty()) { return false; - } else { + } else if (collator instanceof RuleBasedCollator rbc) { final SearchIterator searchIterator = - new StringSearch(s2, new StringCharacterIterator(s1), (RuleBasedCollator) collator); + new StringSearch(s2, new StringCharacterIterator(s1), rbc); int lastPos = SearchIterator.DONE; int lastLen = 0; for (int pos = searchIterator.first(); pos != SearchIterator.DONE; @@ -410,6 +433,12 @@ public static boolean endsWith(@Nullable final Collator collator, final String s } return lastPos > SearchIterator.DONE && lastPos + lastLen == s1.length(); + } else { + // Fallback for non-RuleBasedCollator + if (s1.length() >= s2.length()) { + return collator.compare(s1.substring(s1.length() - s2.length()), s2) == 0; + } + return false; } } } @@ -433,10 +462,18 @@ public static boolean contains(@Nullable final Collator collator, final String s return true; } else if (s1.isEmpty()) { return false; - } else { + } else if (collator instanceof RuleBasedCollator rbc) { final SearchIterator searchIterator = - new StringSearch(s2, new StringCharacterIterator(s1), (RuleBasedCollator) collator); + new StringSearch(s2, new StringCharacterIterator(s1), rbc); return searchIterator.first() >= 0; + } else { + // Fallback for non-RuleBasedCollator + for (int i = 0; i <= s1.length() - s2.length(); i++) { + if (collator.compare(s1.substring(i, i + s2.length()), s2) == 0) { + return true; + } + } + return false; } } } @@ -459,10 +496,18 @@ public static int indexOf(@Nullable final Collator collator, final String s1, fi return 0; } else if (s1.isEmpty()) { return -1; - } else { + } else if (collator instanceof RuleBasedCollator rbc) { final SearchIterator searchIterator = - new StringSearch(s2, new StringCharacterIterator(s1), (RuleBasedCollator) collator); + new StringSearch(s2, new StringCharacterIterator(s1), rbc); return searchIterator.first(); + } else { + // Fallback for non-RuleBasedCollator + for (int i = 0; i <= s1.length() - s2.length(); i++) { + if (collator.compare(s1.substring(i, i + s2.length()), s2) == 0) { + return i; + } + } + return -1; } } } @@ -809,21 +854,105 @@ private static Collator getSamiskCollator() throws Exception { return collator; } - private static Collator getHtmlAsciiCaseInsensitiveCollator() throws Exception { + private static Collator getHtmlAsciiCaseInsensitiveCollator() { Collator collator = htmlAsciiCaseInsensitiveCollator.get(); if (collator == null) { - collator = new RuleBasedCollator("&a=A &b=B &c=C &d=D &e=E &f=F &g=G &h=H " - + "&i=I &j=J &k=K &l=L &m=M &n=N &o=O &p=P &q=Q &r=R &s=S &t=T " - + "&u=U &v=V &w=W &x=X &y=Y &z=Z"); - collator.setStrength(Collator.PRIMARY); + // XQ4 html-ascii-case-insensitive: ASCII letters A-Z fold to a-z, + // all other characters compare by Unicode codepoint order. + // Cannot use RuleBasedCollator with PRIMARY strength because that + // makes ALL case/accent differences irrelevant, not just ASCII. htmlAsciiCaseInsensitiveCollator.compareAndSet(null, - collator.freeze()); + new HtmlAsciiCaseInsensitiveCollator()); collator = htmlAsciiCaseInsensitiveCollator.get(); } return collator; } + /** + * Custom Collator for HTML ASCII case-insensitive comparison. + * Folds only ASCII letters A-Z to a-z, then compares by Unicode codepoint. + * Non-ASCII characters are compared by their codepoint value without folding. + */ + private static final class HtmlAsciiCaseInsensitiveCollator extends Collator { + + @Override + public int compare(final String source, final String target) { + int i1 = 0, i2 = 0; + while (i1 < source.length() && i2 < target.length()) { + int cp1 = source.codePointAt(i1); + int cp2 = target.codePointAt(i2); + // Fold ASCII uppercase to lowercase only + if (cp1 >= 'A' && cp1 <= 'Z') { + cp1 += 32; + } + if (cp2 >= 'A' && cp2 <= 'Z') { + cp2 += 32; + } + if (cp1 != cp2) { + return cp1 - cp2; + } + i1 += Character.charCount(cp1); + i2 += Character.charCount(cp2); + } + return (source.length() - i1) - (target.length() - i2); + } + + @Override + public CollationKey getCollationKey(final String source) { + throw new UnsupportedOperationException("CollationKey not supported for HTML ASCII case-insensitive collation"); + } + + @Override + public RawCollationKey getRawCollationKey(final String source, final RawCollationKey key) { + throw new UnsupportedOperationException("RawCollationKey not supported for HTML ASCII case-insensitive collation"); + } + + @Override + public int setVariableTop(final String varTop) { + return 0; + } + + @Override + public int getVariableTop() { + return 0; + } + + @Override + public void setVariableTop(final int varTop) { + } + + @Override + public VersionInfo getVersion() { + return VersionInfo.getInstance(1); + } + + @Override + public VersionInfo getUCAVersion() { + return VersionInfo.getInstance(1); + } + + @Override + public int hashCode() { + return HtmlAsciiCaseInsensitiveCollator.class.hashCode(); + } + + @Override + public Collator freeze() { + return this; + } + + @Override + public boolean isFrozen() { + return true; + } + + @Override + public Collator cloneAsThawed() { + return new HtmlAsciiCaseInsensitiveCollator(); + } + } + private static Collator getXqtsAsciiCaseBlindCollator() throws Exception { Collator collator = xqtsAsciiCaseBlindCollator.get(); if (collator == null) { diff --git a/exist-core/src/main/java/org/exist/util/serializer/XQuerySerializer.java b/exist-core/src/main/java/org/exist/util/serializer/XQuerySerializer.java index 366e3866cbc..acfb8d16de6 100644 --- a/exist-core/src/main/java/org/exist/util/serializer/XQuerySerializer.java +++ b/exist-core/src/main/java/org/exist/util/serializer/XQuerySerializer.java @@ -103,12 +103,25 @@ private void serializeXML(final Sequence sequence, final int start, final int ho } private void serializeJSON(final Sequence sequence, final long compilationTime, final long executionTime) throws SAXException, XPathException { - // backwards compatibility: if the sequence contains a single element, we assume - // it should be transformed to JSON following the rules of the old JSON writer - if (sequence.hasOne() && (Type.subTypeOf(sequence.getItemType(), Type.DOCUMENT) || Type.subTypeOf(sequence.getItemType(), Type.ELEMENT))) { + // XDM serialization: use JSONSerializer for maps and arrays (W3C JSON output method). + // For element/document nodes, use the legacy XML-to-JSON conversion path for + // backward compatibility with eXist's traditional JSON serialization. + // TODO (eXist 8.0): Remove legacy XML-to-JSON conversion. + // The legacy path is deprecated in 7.0 — use fn:serialize($map, map{"method":"json"}) instead. + final boolean isXdmMapOrArray = sequence.hasOne() + && (sequence.getItemType() == Type.MAP_ITEM || sequence.getItemType() == Type.ARRAY_ITEM); + + if (isXdmMapOrArray || (!sequence.hasOne()) + || Type.subTypeOfUnion(sequence.getItemType(), Type.ANY_ATOMIC_TYPE)) { + // Maps, arrays, sequences, and atomic values: use W3C JSONSerializer + final JSONSerializer serializer = new JSONSerializer(broker, outputProperties); + serializer.serialize(sequence, writer); + } else if (sequence.hasOne() + && (Type.subTypeOf(sequence.getItemType(), Type.DOCUMENT) || Type.subTypeOf(sequence.getItemType(), Type.ELEMENT))) { + // Legacy path: single element/document → XML-to-JSON conversion serializeXML(sequence, 1, 1, false, false, compilationTime, executionTime); } else { - JSONSerializer serializer = new JSONSerializer(broker, outputProperties); + final JSONSerializer serializer = new JSONSerializer(broker, outputProperties); serializer.serialize(sequence, writer); } } diff --git a/exist-core/src/main/java/org/exist/util/serializer/json/JSONSerializer.java b/exist-core/src/main/java/org/exist/util/serializer/json/JSONSerializer.java index bd1f01a9454..7728633368a 100644 --- a/exist-core/src/main/java/org/exist/util/serializer/json/JSONSerializer.java +++ b/exist-core/src/main/java/org/exist/util/serializer/json/JSONSerializer.java @@ -64,7 +64,9 @@ public void serialize(Sequence sequence, Writer writer) throws SAXException { if ("yes".equals(outputProperties.getProperty(OutputKeys.INDENT, "no"))) { generator.useDefaultPrettyPrinter(); } - if ("yes".equals(outputProperties.getProperty(EXistOutputKeys.ALLOW_DUPLICATE_NAMES, "yes"))) { + // allow-duplicate-names=no (default per W3C) → enable strict detection + // allow-duplicate-names=yes → disable strict detection (allow duplicates) + if ("no".equals(outputProperties.getProperty(EXistOutputKeys.ALLOW_DUPLICATE_NAMES, "no"))) { generator.enable(JsonGenerator.Feature.STRICT_DUPLICATE_DETECTION); } else { generator.disable(JsonGenerator.Feature.STRICT_DUPLICATE_DETECTION); diff --git a/exist-core/src/main/java/org/exist/xquery/AttributeConstructor.java b/exist-core/src/main/java/org/exist/xquery/AttributeConstructor.java index bb1720e67d9..2dcbfe4652c 100644 --- a/exist-core/src/main/java/org/exist/xquery/AttributeConstructor.java +++ b/exist-core/src/main/java/org/exist/xquery/AttributeConstructor.java @@ -56,7 +56,7 @@ public void addValue(String value) { public void addEnclosedExpr(Expression expr) throws XPathException { if(isNamespaceDecl) - {throw new XPathException(this, "enclosed expressions are not allowed in namespace " + + {throw new XPathException(this, ErrorCodes.XQST0022, "enclosed expressions are not allowed in namespace " + "declaration attributes");} contents.add(expr); } diff --git a/exist-core/src/main/java/org/exist/xquery/CastExpression.java b/exist-core/src/main/java/org/exist/xquery/CastExpression.java index 8911c5c6144..a2453eaaddc 100644 --- a/exist-core/src/main/java/org/exist/xquery/CastExpression.java +++ b/exist-core/src/main/java/org/exist/xquery/CastExpression.java @@ -84,12 +84,13 @@ public Sequence eval(final Sequence contextSequence, final Item contextItem) thr } } - // Should be handled by the parser - if (requiredType == Type.ANY_ATOMIC_TYPE || (requiredType == Type.NOTATION && expression.returnsType() != Type.NOTATION)) { + // XPST0080: cannot cast to abstract or special types + if (requiredType == Type.ANY_ATOMIC_TYPE || requiredType == Type.ANY_SIMPLE_TYPE + || (requiredType == Type.NOTATION && expression.returnsType() != Type.NOTATION)) { throw new XPathException(this, ErrorCodes.XPST0080, "cannot cast to " + Type.getTypeName(requiredType)); } - if (requiredType == Type.ANY_SIMPLE_TYPE || expression.returnsType() == Type.ANY_SIMPLE_TYPE || requiredType == Type.UNTYPED || expression.returnsType() == Type.UNTYPED) { + if (expression.returnsType() == Type.ANY_SIMPLE_TYPE || requiredType == Type.UNTYPED || expression.returnsType() == Type.UNTYPED) { throw new XPathException(this, ErrorCodes.XPST0051, "cannot cast to " + Type.getTypeName(requiredType)); } diff --git a/exist-core/src/main/java/org/exist/xquery/CastableExpression.java b/exist-core/src/main/java/org/exist/xquery/CastableExpression.java index 9a0769f9653..e923da9fe08 100644 --- a/exist-core/src/main/java/org/exist/xquery/CastableExpression.java +++ b/exist-core/src/main/java/org/exist/xquery/CastableExpression.java @@ -93,10 +93,11 @@ public Sequence eval(Sequence contextSequence, Item contextItem) throws XPathExc {context.getProfiler().message(this, Profiler.START_SEQUENCES, "CONTEXT ITEM", contextItem.toSequence());} } - if (requiredType == Type.ANY_ATOMIC_TYPE || (requiredType == Type.NOTATION && expression.returnsType() != Type.NOTATION)) + if (requiredType == Type.ANY_ATOMIC_TYPE || requiredType == Type.ANY_SIMPLE_TYPE + || (requiredType == Type.NOTATION && expression.returnsType() != Type.NOTATION)) {throw new XPathException(this, ErrorCodes.XPST0080, "cannot convert to " + Type.getTypeName(requiredType));} - if (requiredType == Type.ANY_SIMPLE_TYPE || expression.returnsType() == Type.ANY_SIMPLE_TYPE || requiredType == Type.UNTYPED || expression.returnsType() == Type.UNTYPED) + if (expression.returnsType() == Type.ANY_SIMPLE_TYPE || requiredType == Type.UNTYPED || expression.returnsType() == Type.UNTYPED) {throw new XPathException(this, ErrorCodes.XPST0051, "cannot convert to " + Type.getTypeName(requiredType));} Sequence result; diff --git a/exist-core/src/main/java/org/exist/xquery/DecimalFormat.java b/exist-core/src/main/java/org/exist/xquery/DecimalFormat.java index 46a54962ad5..93c4d4b1fff 100644 --- a/exist-core/src/main/java/org/exist/xquery/DecimalFormat.java +++ b/exist-core/src/main/java/org/exist/xquery/DecimalFormat.java @@ -47,7 +47,7 @@ public class DecimalFormat { ); - // used both in the picture string, and in the formatted number + // Markers: used in the picture string to identify active elements public final int decimalSeparator; public final int exponentSeparator; public final int groupingSeparator; @@ -55,18 +55,38 @@ public class DecimalFormat { public final int perMille; public final int zeroDigit; - // used in the picture string + // used in the picture string only public final int digit; public final int patternSeparator; - //used in the result of formatting the number, but not in the picture string + // used in the result of formatting the number, but not in the picture string public final String infinity; public final String NaN; public final int minusSign; + // XQ4 renditions: output strings for properties that support char:rendition. + // When marker != rendition, the marker is used for picture parsing and the + // rendition string appears in the formatted output. + public final String decimalSeparatorRendition; + public final String exponentSeparatorRendition; + public final String groupingSeparatorRendition; + public final String percentRendition; + public final String perMilleRendition; + public DecimalFormat(final int decimalSeparator, final int exponentSeparator, final int groupingSeparator, final int percent, final int perMille, final int zeroDigit, final int digit, final int patternSeparator, final String infinity, final String NaN, final int minusSign) { + this(decimalSeparator, exponentSeparator, groupingSeparator, percent, perMille, + zeroDigit, digit, patternSeparator, infinity, NaN, minusSign, + null, null, null, null, null); + } + + public DecimalFormat(final int decimalSeparator, final int exponentSeparator, final int groupingSeparator, + final int percent, final int perMille, final int zeroDigit, final int digit, + final int patternSeparator, final String infinity, final String NaN, final int minusSign, + final String decimalSeparatorRendition, final String exponentSeparatorRendition, + final String groupingSeparatorRendition, final String percentRendition, + final String perMilleRendition) { this.decimalSeparator = decimalSeparator; this.exponentSeparator = exponentSeparator; this.groupingSeparator = groupingSeparator; @@ -78,5 +98,11 @@ public DecimalFormat(final int decimalSeparator, final int exponentSeparator, fi this.infinity = infinity; this.NaN = NaN; this.minusSign = minusSign; + // Renditions default to the marker character as a string + this.decimalSeparatorRendition = decimalSeparatorRendition != null ? decimalSeparatorRendition : new String(Character.toChars(decimalSeparator)); + this.exponentSeparatorRendition = exponentSeparatorRendition != null ? exponentSeparatorRendition : new String(Character.toChars(exponentSeparator)); + this.groupingSeparatorRendition = groupingSeparatorRendition != null ? groupingSeparatorRendition : new String(Character.toChars(groupingSeparator)); + this.percentRendition = percentRendition != null ? percentRendition : new String(Character.toChars(percent)); + this.perMilleRendition = perMilleRendition != null ? perMilleRendition : new String(Character.toChars(perMille)); } } diff --git a/exist-core/src/main/java/org/exist/xquery/DocumentConstructor.java b/exist-core/src/main/java/org/exist/xquery/DocumentConstructor.java index 3495fed460f..a67eaee3544 100644 --- a/exist-core/src/main/java/org/exist/xquery/DocumentConstructor.java +++ b/exist-core/src/main/java/org/exist/xquery/DocumentConstructor.java @@ -90,7 +90,7 @@ public Sequence eval(Sequence contextSequence, Item contextItem) throws XPathExc while(next != null) { context.proceed(this, builder); if (next.getType() == Type.ATTRIBUTE || next.getType() == Type.NAMESPACE) - {throw new XPathException(this, "Found a node of type " + + {throw new XPathException(this, ErrorCodes.XPTY0004, "Found a node of type " + Type.getTypeName(next.getType()) + " inside a document constructor");} // if item is an atomic value, collect the string values of all // following atomic values and seperate them by a space. diff --git a/exist-core/src/main/java/org/exist/xquery/DynamicAttributeConstructor.java b/exist-core/src/main/java/org/exist/xquery/DynamicAttributeConstructor.java index 168c2da95a6..583a4afc9ef 100644 --- a/exist-core/src/main/java/org/exist/xquery/DynamicAttributeConstructor.java +++ b/exist-core/src/main/java/org/exist/xquery/DynamicAttributeConstructor.java @@ -99,7 +99,7 @@ public Sequence eval(Sequence contextSequence, Item contextItem) throws XPathExc final Sequence nameSeq = qnameExpr.eval(contextSequence, contextItem); if(!nameSeq.hasOne()) - {throw new XPathException(this, "The name expression should evaluate to a single value");} + {throw new XPathException(this, ErrorCodes.XPTY0004, "The name expression should evaluate to a single value");} final Item qnItem = nameSeq.itemAt(0); QName qn; diff --git a/exist-core/src/main/java/org/exist/xquery/ErrorCodes.java b/exist-core/src/main/java/org/exist/xquery/ErrorCodes.java index 23226a155f2..4205d01484f 100644 --- a/exist-core/src/main/java/org/exist/xquery/ErrorCodes.java +++ b/exist-core/src/main/java/org/exist/xquery/ErrorCodes.java @@ -176,6 +176,9 @@ public class ErrorCodes { public static final ErrorCode FORX0002 = new W3CErrorCode("FORX0002", "Invalid regular expression."); public static final ErrorCode FORX0003 = new W3CErrorCode("FORX0003", "Regular expression matches zero-length string."); public static final ErrorCode FORX0004 = new W3CErrorCode("FORX0004", "Invalid replacement string."); + public static final ErrorCode FOCV0001 = new W3CErrorCode("FOCV0001", "CSV quote error."); + public static final ErrorCode FOCV0002 = new W3CErrorCode("FOCV0002", "Invalid CSV delimiter."); + public static final ErrorCode FOCV0003 = new W3CErrorCode("FOCV0003", "Conflicting CSV delimiters."); public static final ErrorCode FOTY0012 = new W3CErrorCode("FOTY0012", "Argument node does not have a typed value."); public static final ErrorCode FOTY0013 = new W3CErrorCode("FOTY0013", "The argument to fn:data() contains a function item."); @@ -211,6 +214,7 @@ public class ErrorCodes { public static final ErrorCode FTDY0020 = new W3CErrorCode("FTDY0020", ""); public static final ErrorCode FODC0006 = new W3CErrorCode("FODC0006", "String passed to fn:parse-xml is not a well-formed XML document."); + public static final ErrorCode FODC0011 = new W3CErrorCode("FODC0011", "HTML parsing error."); public static final ErrorCode FOAP0001 = new W3CErrorCode("FOAP0001", "Wrong number of arguments"); @@ -241,6 +245,10 @@ public class ErrorCodes { public static final ErrorCode FOXT0004 = new W3CErrorCode("FOXT0004", "XSLT transformation has been disabled"); public static final ErrorCode FOXT0006 = new W3CErrorCode("FOXT0006", "XSLT output contains non-accepted characters"); + // Invisible XML errors + public static final ErrorCode FOIX0001 = new W3CErrorCode("FOIX0001", "Invalid ixml grammar"); + public static final ErrorCode FOIX0002 = new W3CErrorCode("FOIX0002", "ixml parse error"); + public static final ErrorCode XTSE0165 = new W3CErrorCode("XTSE0165","It is a static error if the processor is not able to retrieve the resource identified by the URI reference [ in the href attribute of xsl:include or xsl:import] , or if the resource that is retrieved does not contain a stylesheet module conforming to this specification."); /* eXist specific XQuery and XPath errors diff --git a/exist-core/src/main/java/org/exist/xquery/FunctionFactory.java b/exist-core/src/main/java/org/exist/xquery/FunctionFactory.java index adcf7d3d5cb..a1602539964 100644 --- a/exist-core/src/main/java/org/exist/xquery/FunctionFactory.java +++ b/exist-core/src/main/java/org/exist/xquery/FunctionFactory.java @@ -245,7 +245,14 @@ private static CastExpression castExpression(XQueryContext context, ErrorCodes.XPST0017, "Wrong number of arguments for constructor function"); } final Expression arg = params.getFirst(); - final int code = Type.getType(qname); + final int code; + try { + code = Type.getType(qname); + } catch (final XPathException e) { + // Unknown type name in xs: namespace → XPST0017 (no such function) + throw new XPathException(ast.getLine(), ast.getColumn(), + ErrorCodes.XPST0017, "Unknown constructor function: " + qname.getStringValue()); + } final CastExpression castExpr = new CastExpression(context, arg, code, Cardinality.ZERO_OR_ONE); castExpr.setLocation(ast.getLine(), ast.getColumn()); return castExpr; diff --git a/exist-core/src/main/java/org/exist/xquery/RangeSequence.java b/exist-core/src/main/java/org/exist/xquery/RangeSequence.java index c23c663067e..eb3ecfa6507 100644 --- a/exist-core/src/main/java/org/exist/xquery/RangeSequence.java +++ b/exist-core/src/main/java/org/exist/xquery/RangeSequence.java @@ -21,8 +21,6 @@ */ package org.exist.xquery; -import org.apache.logging.log4j.LogManager; -import org.apache.logging.log4j.Logger; import org.exist.dom.persistent.NodeSet; import org.exist.xquery.value.AbstractSequence; import org.exist.xquery.value.IntegerValue; @@ -32,18 +30,40 @@ import org.exist.xquery.value.SequenceIterator; import org.exist.xquery.value.Type; -import java.math.BigInteger; - +/** + * An immutable, lazy sequence representing an integer range (start to end). + * Stores only the start and end values as primitive longs — no intermediate + * IntegerValue objects are created until accessed. Operations like count(), + * isEmpty(), itemAt(), and subsequence() are O(1). + */ public class RangeSequence extends AbstractSequence { - private final static Logger LOG = LogManager.getLogger(AbstractSequence.class); - - private final IntegerValue start; - private final IntegerValue end; + private final long start; + private final long end; + private final long size; public RangeSequence(final IntegerValue start, final IntegerValue end) { + this(start.getLong(), end.getLong()); + } + + public RangeSequence(final long start, final long end) { this.start = start; this.end = end; + if (start <= end) { + final long diff = end - start; + // Overflow protection: if diff < 0, the range is too large + this.size = (diff >= 0) ? diff + 1 : Long.MAX_VALUE; + } else { + this.size = 0; + } + } + + public long getStart() { + return start; + } + + public long getEnd() { + return end; } @Override @@ -62,16 +82,16 @@ public int getItemType() { @Override public SequenceIterator iterate() { - return new RangeSequenceIterator(start.getLong(), end.getLong()); + return new RangeSequenceIterator(start, end); } @Override public SequenceIterator unorderedIterator() { - return new RangeSequenceIterator(start.getLong(), end.getLong()); + return new RangeSequenceIterator(start, end); } public SequenceIterator iterateInReverse() { - return new ReverseRangeSequenceIterator(start.getLong(), end.getLong()); + return new ReverseRangeSequenceIterator(start, end); } private static class RangeSequenceIterator implements SequenceIterator { @@ -148,39 +168,30 @@ public long skip(final long n) { @Override public long getItemCountLong() { - if (start.compareTo(end) > 0) { - return 0; - } - try { - return ((IntegerValue) end.minus(start)).getLong() + 1; - } catch (final XPathException e) { - LOG.warn("Unexpected exception when processing result of range expression: {}", e.getMessage(), e); - return 0; - } + return size; } @Override public boolean isEmpty() { - return getItemCountLong() == 0; + return size == 0; } @Override public boolean hasOne() { - return getItemCountLong() == 1; + return size == 1; } @Override public boolean hasMany() { - return getItemCountLong() > 1; + return size > 1; } @Override public Cardinality getCardinality() { - final long itemCount = getItemCountLong(); - if (itemCount <= 0) { + if (size == 0) { return Cardinality.EMPTY_SEQUENCE; } - if (itemCount == 1) { + if (size == 1) { return Cardinality.EXACTLY_ONE; } return Cardinality._MANY; @@ -188,12 +199,26 @@ public Cardinality getCardinality() { @Override public Item itemAt(final int pos) { - if (pos < getItemCountLong()) { - return new IntegerValue(start.getLong() + pos); + if (pos >= 0 && pos < size) { + return new IntegerValue(start + pos); } return null; } + @Override + public boolean contains(final Item item) { + if (item instanceof IntegerValue) { + final long val = ((IntegerValue) item).getLong(); + return val >= start && val <= end; + } + return false; + } + + @Override + public boolean containsReference(final Item item) { + return false; // primitives don't have reference identity + } + @Override public NodeSet toNodeSet() throws XPathException { throw new XPathException(this, "Type error: the sequence cannot be converted into" + @@ -211,37 +236,7 @@ public void removeDuplicates() { } @Override - public boolean containsReference(final Item item) { - return start == item || end == item; - } - - @Override - public boolean contains(final Item item) { - if (item instanceof IntegerValue) { - try { - final BigInteger other = item.toJavaObject(BigInteger.class); - return other.compareTo(start.toJavaObject(BigInteger.class)) >= 0 - && other.compareTo(end.toJavaObject(BigInteger.class)) <= 0; - } catch (final XPathException e) { - LOG.warn(e.getMessage(), e); - return false; - } - } - return false; + public String toString() { + return "Range(" + start + " to " + end + ")"; } - - /** - * Generates a string representation of the Range Sequence. - * - * Range sequences can potentially be - * very large, so we generate a summary here - * rather than evaluating to generate a (possibly) - * huge sequence of objects. - * - * @return a string representation of the range sequence. - */ - @Override - public String toString() { - return "Range(" + start + " to " + end + ")"; - } } diff --git a/exist-core/src/main/java/org/exist/xquery/XQueryContext.java b/exist-core/src/main/java/org/exist/xquery/XQueryContext.java index b3721c34179..4153e3cf5da 100644 --- a/exist-core/src/main/java/org/exist/xquery/XQueryContext.java +++ b/exist-core/src/main/java/org/exist/xquery/XQueryContext.java @@ -2730,6 +2730,13 @@ private ExternalModule compileOrBorrowModule(final String namespaceURI, final St * @return The compiled module, or null if the source is not a module * @throws XPathException if the module could not be loaded (XQST0059) or compiled (XPST0003) */ + /** + * Compile a module from a Source. Public wrapper for fn:load-xquery-module content option. + */ + public @Nullable ExternalModule compileModuleFromSource(final String namespaceURI, final Source source) throws XPathException { + return compileModule(namespaceURI, null, "content", source); + } + private @Nullable ExternalModule compileModule(String namespaceURI, final String prefix, final String location, final Source source) throws XPathException { if (LOG.isDebugEnabled()) { diff --git a/exist-core/src/main/java/org/exist/xquery/functions/array/ArrayBuild.java b/exist-core/src/main/java/org/exist/xquery/functions/array/ArrayBuild.java new file mode 100644 index 00000000000..bcf73834e61 --- /dev/null +++ b/exist-core/src/main/java/org/exist/xquery/functions/array/ArrayBuild.java @@ -0,0 +1,87 @@ +/* + * eXist-db Open Source Native XML Database + * Copyright (C) 2001 The eXist-db Authors + * + * info@exist-db.org + * http://www.exist-db.org + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ +package org.exist.xquery.functions.array; + +import org.exist.dom.QName; +import org.exist.xquery.*; +import org.exist.xquery.value.*; + +import java.util.ArrayList; +import java.util.List; + +/** + * array:build($seq, $fn?) — Build array from sequence with optional mapping function. + */ +public class ArrayBuild extends BasicFunction { + + private AnalyzeContextInfo cachedContextInfo; + + public static final FunctionSignature[] signatures = { + new FunctionSignature( + new QName("build", ArrayModule.NAMESPACE_URI, ArrayModule.PREFIX), + "Builds an array from the items of a sequence.", + new SequenceType[] { + new FunctionParameterSequenceType("input", Type.ITEM, Cardinality.ZERO_OR_MORE, "The input sequence") + }, + new FunctionReturnSequenceType(Type.ARRAY_ITEM, Cardinality.EXACTLY_ONE, "The resulting array")), + new FunctionSignature( + new QName("build", ArrayModule.NAMESPACE_URI, ArrayModule.PREFIX), + "Builds an array by applying a function to each item of a sequence.", + new SequenceType[] { + new FunctionParameterSequenceType("input", Type.ITEM, Cardinality.ZERO_OR_MORE, "The input sequence"), + new FunctionParameterSequenceType("action", Type.FUNCTION, Cardinality.EXACTLY_ONE, "The function to apply") + }, + new FunctionReturnSequenceType(Type.ARRAY_ITEM, Cardinality.EXACTLY_ONE, "The resulting array")) + }; + + public ArrayBuild(final XQueryContext context, final FunctionSignature signature) { + super(context, signature); + } + + @Override + public void analyze(AnalyzeContextInfo contextInfo) throws XPathException { + cachedContextInfo = new AnalyzeContextInfo(contextInfo); + super.analyze(contextInfo); + } + + @Override + public Sequence eval(final Sequence[] args, final Sequence contextSequence) throws XPathException { + final Sequence input = args[0]; + final List members = new ArrayList<>(); + + if (getArgumentCount() == 2) { + try (final FunctionReference fn = (FunctionReference) args[1].itemAt(0)) { + fn.analyze(cachedContextInfo); + for (final SequenceIterator i = input.iterate(); i.hasNext(); ) { + final Item item = i.nextItem(); + members.add(fn.evalFunction(null, null, new Sequence[]{item.toSequence()})); + } + } + } else { + for (final SequenceIterator i = input.iterate(); i.hasNext(); ) { + members.add(i.nextItem().toSequence()); + } + } + + return new ArrayType(context, members); + } +} diff --git a/exist-core/src/main/java/org/exist/xquery/functions/array/ArrayFunction.java b/exist-core/src/main/java/org/exist/xquery/functions/array/ArrayFunction.java index ae46633a144..0559e1e473d 100644 --- a/exist-core/src/main/java/org/exist/xquery/functions/array/ArrayFunction.java +++ b/exist-core/src/main/java/org/exist/xquery/functions/array/ArrayFunction.java @@ -32,8 +32,10 @@ import org.exist.xquery.XPathException; import org.exist.xquery.XQueryContext; import org.exist.xquery.functions.fn.FunData; +import org.exist.xquery.value.BooleanValue; import org.exist.xquery.value.FunctionParameterSequenceType; import org.exist.xquery.value.FunctionReference; +import org.exist.xquery.value.StringValue; import org.exist.xquery.value.FunctionReturnSequenceType; import org.exist.xquery.value.IntegerValue; import org.exist.xquery.value.Sequence; @@ -276,6 +278,33 @@ public class ArrayFunction extends BasicFunction { ) ); + // --- XQuery 4.0 array functions --- + public static final FunctionSignature ARRAY_EMPTY = functionSignature( + Fn.EMPTY.fname, "Returns true if the supplied array is empty.", + returns(Type.BOOLEAN, "true if the array is empty"), + INPUT_ARRAY + ); + public static final FunctionSignature ARRAY_FOOT = functionSignature( + Fn.FOOT.fname, "Returns the last member of an array.", + returns(Type.ITEM, Cardinality.ZERO_OR_MORE, "The last member"), + INPUT_ARRAY + ); + public static final FunctionSignature ARRAY_TRUNK = functionSignature( + Fn.TRUNK.fname, "Returns all members except the last.", + RESULT_ARRAY, + INPUT_ARRAY + ); + public static final FunctionSignature ARRAY_ITEMS = functionSignature( + Fn.ITEMS.fname, "Returns the members of an array as a sequence.", + returns(Type.ITEM, Cardinality.ZERO_OR_MORE, "The members as a sequence"), + INPUT_ARRAY + ); + public static final FunctionSignature ARRAY_MEMBERS = functionSignature( + Fn.MEMBERS.fname, "Returns each member as a map with a 'value' key.", + returns(Type.MAP_ITEM, Cardinality.ZERO_OR_MORE, "Sequence of member maps"), + INPUT_ARRAY + ); + private AnalyzeContextInfo cachedContextInfo; public ArrayFunction(XQueryContext context, FunctionSignature signature) { @@ -314,6 +343,11 @@ public Sequence eval(Sequence[] args, Sequence contextSequence) throws XPathExce case FOR_EACH_PAIR -> forEachPair(args); case SORT -> sort(args); case FLATTEN -> flatten(args); + case EMPTY -> arrayEmpty(args); + case FOOT -> foot(args); + case TRUNK -> trunk(args); + case ITEMS -> items(args); + case MEMBERS -> members(args); }; } @@ -493,6 +527,53 @@ private Sequence getFunction(Sequence arg, FunctionE fnMap = new HashMap<>(); private final String fname; diff --git a/exist-core/src/main/java/org/exist/xquery/functions/array/ArrayIndexOf.java b/exist-core/src/main/java/org/exist/xquery/functions/array/ArrayIndexOf.java new file mode 100644 index 00000000000..c57c93532cf --- /dev/null +++ b/exist-core/src/main/java/org/exist/xquery/functions/array/ArrayIndexOf.java @@ -0,0 +1,63 @@ +/* + * eXist-db Open Source Native XML Database + * Copyright (C) 2001 The eXist-db Authors + * + * info@exist-db.org + * http://www.exist-db.org + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ +package org.exist.xquery.functions.array; + +import org.exist.dom.QName; +import org.exist.xquery.*; +import org.exist.xquery.functions.fn.FunDeepEqual; +import org.exist.xquery.value.*; + +/** + * array:index-of($array, $target) — Returns positions of matching members. + */ +public class ArrayIndexOf extends BasicFunction { + + public static final FunctionSignature[] signatures = { + new FunctionSignature( + new QName("index-of", ArrayModule.NAMESPACE_URI, ArrayModule.PREFIX), + "Returns the positions of members that are deep-equal to the target.", + new SequenceType[] { + new FunctionParameterSequenceType("array", Type.ARRAY_ITEM, Cardinality.EXACTLY_ONE, "The array to search"), + new FunctionParameterSequenceType("target", Type.ITEM, Cardinality.ZERO_OR_MORE, "The value to search for") + }, + new FunctionReturnSequenceType(Type.INTEGER, Cardinality.ZERO_OR_MORE, "The 1-based positions")) + }; + + public ArrayIndexOf(final XQueryContext context, final FunctionSignature signature) { + super(context, signature); + } + + @Override + public Sequence eval(final Sequence[] args, final Sequence contextSequence) throws XPathException { + final ArrayType array = (ArrayType) args[0].itemAt(0); + final Sequence target = args[1]; + final ValueSequence result = new ValueSequence(); + + for (int i = 0; i < array.getSize(); i++) { + final Sequence member = array.get(i); + if (FunDeepEqual.deepEqualsSeq(member, target, null)) { + result.add(new IntegerValue(this, i + 1)); + } + } + return result; + } +} diff --git a/exist-core/src/main/java/org/exist/xquery/functions/array/ArrayIndexWhere.java b/exist-core/src/main/java/org/exist/xquery/functions/array/ArrayIndexWhere.java new file mode 100644 index 00000000000..78d3b359b12 --- /dev/null +++ b/exist-core/src/main/java/org/exist/xquery/functions/array/ArrayIndexWhere.java @@ -0,0 +1,105 @@ +/* + * eXist-db Open Source Native XML Database + * Copyright (C) 2001 The eXist-db Authors + * + * info@exist-db.org + * http://www.exist-db.org + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ +package org.exist.xquery.functions.array; + +import org.exist.dom.QName; +import org.exist.xquery.BasicFunction; +import org.exist.xquery.Cardinality; +import org.exist.xquery.FunctionSignature; +import org.exist.xquery.XPathException; +import org.exist.xquery.XQueryContext; +import org.exist.xquery.value.BooleanValue; +import org.exist.xquery.value.FunctionParameterSequenceType; +import org.exist.xquery.value.FunctionReference; +import org.exist.xquery.value.FunctionReturnSequenceType; +import org.exist.xquery.value.IntegerValue; +import org.exist.xquery.value.Sequence; +import org.exist.xquery.value.SequenceType; +import org.exist.xquery.value.Type; +import org.exist.xquery.value.ValueSequence; + +/** + * Implements array:index-where (XQuery 4.0). + * + * Returns the positions in an input array of members that match a supplied + * predicate function, as a sequence of integers in ascending order. + */ +public class ArrayIndexWhere extends BasicFunction { + + public static final FunctionSignature[] signatures = { + new FunctionSignature( + new QName("index-where", ArrayModule.NAMESPACE_URI, ArrayModule.PREFIX), + "Returns positions of array members matching the predicate.", + new SequenceType[] { + new FunctionParameterSequenceType("array", Type.ARRAY_ITEM, Cardinality.EXACTLY_ONE, "The input array"), + new FunctionParameterSequenceType("predicate", Type.FUNCTION, Cardinality.EXACTLY_ONE, + "The predicate function") + }, + new FunctionReturnSequenceType(Type.INTEGER, Cardinality.ZERO_OR_MORE, + "positions of matching members")) + }; + + public ArrayIndexWhere(final XQueryContext context, final FunctionSignature signature) { + super(context, signature); + } + + @Override + public Sequence eval(final Sequence[] args, final Sequence contextSequence) throws XPathException { + final ArrayType array = (ArrayType) args[0].itemAt(0); + final int size = array.getSize(); + if (size == 0) { + return Sequence.EMPTY_SEQUENCE; + } + + try (final FunctionReference func = (FunctionReference) args[1].itemAt(0)) { + func.analyze(cachedContextInfo); + + final int arity = func.getSignature().getArgumentCount(); + final ValueSequence result = new ValueSequence(); + + for (int i = 0; i < size; i++) { + final Sequence member = array.get(i); + final Sequence[] funcArgs; + if (arity >= 2) { + funcArgs = new Sequence[] { member, new IntegerValue(this, i + 1) }; + } else { + funcArgs = new Sequence[] { member }; + } + + final Sequence predResult = func.evalFunction(null, null, funcArgs); + if (!predResult.isEmpty() && predResult.effectiveBooleanValue()) { + result.add(new IntegerValue(this, i + 1)); + } + } + return result; + } + } + + private org.exist.xquery.AnalyzeContextInfo cachedContextInfo = + new org.exist.xquery.AnalyzeContextInfo(); + + @Override + public void analyze(org.exist.xquery.AnalyzeContextInfo contextInfo) throws XPathException { + cachedContextInfo = new org.exist.xquery.AnalyzeContextInfo(contextInfo); + super.analyze(contextInfo); + } +} diff --git a/exist-core/src/main/java/org/exist/xquery/functions/array/ArrayModule.java b/exist-core/src/main/java/org/exist/xquery/functions/array/ArrayModule.java index a9eec0d3db9..f86dffdae10 100644 --- a/exist-core/src/main/java/org/exist/xquery/functions/array/ArrayModule.java +++ b/exist-core/src/main/java/org/exist/xquery/functions/array/ArrayModule.java @@ -43,28 +43,43 @@ public class ArrayModule extends AbstractInternalModule { public static final String PREFIX = "array"; private static final FunctionDef[] functions = functionDefs( - ArrayFunction.class, - ArrayFunction.SIZE, - ArrayFunction.GET, - ArrayFunction.PUT, - ArrayFunction.APPEND, - ArrayFunction.SUBARRAY_1, - ArrayFunction.SUBARRAY_2, - ArrayFunction.REMOVE, - ArrayFunction.INSERT_BEFORE, - ArrayFunction.HEAD, - ArrayFunction.TAIL, - ArrayFunction.REVERSE, - ArrayFunction.JOIN, - ArrayFunction.FOR_EACH, - ArrayFunction.FILTER, - ArrayFunction.FOLD_LEFT, - ArrayFunction.FOLD_RIGHT, - ArrayFunction.FOR_EACH_PAIR, - ArrayFunction.SORT_1, - ArrayFunction.SORT_2, - ArrayFunction.SORT_3, - ArrayFunction.FLATTEN + functionDefs(ArrayFunction.class, + ArrayFunction.SIZE, + ArrayFunction.GET, + ArrayFunction.PUT, + ArrayFunction.APPEND, + ArrayFunction.SUBARRAY_1, + ArrayFunction.SUBARRAY_2, + ArrayFunction.REMOVE, + ArrayFunction.INSERT_BEFORE, + ArrayFunction.HEAD, + ArrayFunction.TAIL, + ArrayFunction.REVERSE, + ArrayFunction.JOIN, + ArrayFunction.FOR_EACH, + ArrayFunction.FILTER, + ArrayFunction.FOLD_LEFT, + ArrayFunction.FOLD_RIGHT, + ArrayFunction.FOR_EACH_PAIR, + ArrayFunction.SORT_1, + ArrayFunction.SORT_2, + ArrayFunction.SORT_3, + ArrayFunction.FLATTEN, + // --- XQuery 4.0 --- + ArrayFunction.ARRAY_EMPTY, + ArrayFunction.ARRAY_FOOT, + ArrayFunction.ARRAY_TRUNK, + ArrayFunction.ARRAY_ITEMS, + ArrayFunction.ARRAY_MEMBERS + ), + functionDefs(ArraySlice.class, ArraySlice.signatures), + functionDefs(ArrayIndexWhere.class, ArrayIndexWhere.signatures), + functionDefs(ArraySortWith.class, ArraySortWith.signatures), + functionDefs(ArraySortBy.class, ArraySortBy.signatures), + functionDefs(ArrayBuild.class, ArrayBuild.signatures), + functionDefs(ArrayIndexOf.class, ArrayIndexOf.signatures), + functionDefs(ArrayOfMembers.class, ArrayOfMembers.signatures), + functionDefs(ArraySplit.class, ArraySplit.signatures) ); public ArrayModule(Map> parameters) { diff --git a/exist-core/src/main/java/org/exist/xquery/functions/array/ArrayOfMembers.java b/exist-core/src/main/java/org/exist/xquery/functions/array/ArrayOfMembers.java new file mode 100644 index 00000000000..7e0ef9d7cab --- /dev/null +++ b/exist-core/src/main/java/org/exist/xquery/functions/array/ArrayOfMembers.java @@ -0,0 +1,62 @@ +/* + * eXist-db Open Source Native XML Database + * Copyright (C) 2001 The eXist-db Authors + * + * info@exist-db.org + * http://www.exist-db.org + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ +package org.exist.xquery.functions.array; + +import org.exist.dom.QName; +import org.exist.xquery.*; +import org.exist.xquery.functions.map.AbstractMapType; +import org.exist.xquery.value.*; + +import java.util.ArrayList; +import java.util.List; + +/** + * array:of-members($input as map(xs:string, item()*)*) — Construct array from member maps. + * Inverse of array:members. + */ +public class ArrayOfMembers extends BasicFunction { + + public static final FunctionSignature[] signatures = { + new FunctionSignature( + new QName("of-members", ArrayModule.NAMESPACE_URI, ArrayModule.PREFIX), + "Constructs an array from a sequence of member maps (each with a 'value' key).", + new SequenceType[] { + new FunctionParameterSequenceType("input", Type.MAP_ITEM, Cardinality.ZERO_OR_MORE, "The member maps") + }, + new FunctionReturnSequenceType(Type.ARRAY_ITEM, Cardinality.EXACTLY_ONE, "The resulting array")) + }; + + public ArrayOfMembers(final XQueryContext context, final FunctionSignature signature) { + super(context, signature); + } + + @Override + public Sequence eval(final Sequence[] args, final Sequence contextSequence) throws XPathException { + final List members = new ArrayList<>(); + for (final SequenceIterator i = args[0].iterate(); i.hasNext(); ) { + final AbstractMapType map = (AbstractMapType) i.nextItem(); + final Sequence value = map.get(new StringValue("value")); + members.add(value != null ? value : Sequence.EMPTY_SEQUENCE); + } + return new ArrayType(context, members); + } +} diff --git a/exist-core/src/main/java/org/exist/xquery/functions/array/ArraySlice.java b/exist-core/src/main/java/org/exist/xquery/functions/array/ArraySlice.java new file mode 100644 index 00000000000..e5037030e97 --- /dev/null +++ b/exist-core/src/main/java/org/exist/xquery/functions/array/ArraySlice.java @@ -0,0 +1,145 @@ +/* + * eXist-db Open Source Native XML Database + * Copyright (C) 2001 The eXist-db Authors + * + * info@exist-db.org + * http://www.exist-db.org + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ +package org.exist.xquery.functions.array; + +import org.exist.dom.QName; +import org.exist.xquery.BasicFunction; +import org.exist.xquery.Cardinality; +import org.exist.xquery.FunctionSignature; +import org.exist.xquery.XPathException; +import org.exist.xquery.XQueryContext; +import org.exist.xquery.value.FunctionParameterSequenceType; +import org.exist.xquery.value.FunctionReturnSequenceType; +import org.exist.xquery.value.IntegerValue; +import org.exist.xquery.value.Sequence; +import org.exist.xquery.value.SequenceType; +import org.exist.xquery.value.Type; + +import java.util.ArrayList; +import java.util.List; + +/** + * Implements array:slice (XQuery 4.0). + * + * Returns an array containing selected members of a supplied input array + * based on their position. Supports negative indexing and step values + * (Python-style slicing with 1-based indexing). + */ +public class ArraySlice extends BasicFunction { + + public static final FunctionSignature[] signatures = { + new FunctionSignature( + new QName("slice", ArrayModule.NAMESPACE_URI, ArrayModule.PREFIX), + "Returns an array containing selected members based on position.", + new SequenceType[] { + new FunctionParameterSequenceType("array", Type.ARRAY_ITEM, Cardinality.EXACTLY_ONE, "The input array") + }, + new FunctionReturnSequenceType(Type.ARRAY_ITEM, Cardinality.EXACTLY_ONE, "the sliced array")), + new FunctionSignature( + new QName("slice", ArrayModule.NAMESPACE_URI, ArrayModule.PREFIX), + "Returns an array containing selected members based on position.", + new SequenceType[] { + new FunctionParameterSequenceType("array", Type.ARRAY_ITEM, Cardinality.EXACTLY_ONE, "The input array"), + new FunctionParameterSequenceType("start", Type.INTEGER, Cardinality.ZERO_OR_ONE, "The start position") + }, + new FunctionReturnSequenceType(Type.ARRAY_ITEM, Cardinality.EXACTLY_ONE, "the sliced array")), + new FunctionSignature( + new QName("slice", ArrayModule.NAMESPACE_URI, ArrayModule.PREFIX), + "Returns an array containing selected members based on position.", + new SequenceType[] { + new FunctionParameterSequenceType("array", Type.ARRAY_ITEM, Cardinality.EXACTLY_ONE, "The input array"), + new FunctionParameterSequenceType("start", Type.INTEGER, Cardinality.ZERO_OR_ONE, "The start position"), + new FunctionParameterSequenceType("end", Type.INTEGER, Cardinality.ZERO_OR_ONE, "The end position") + }, + new FunctionReturnSequenceType(Type.ARRAY_ITEM, Cardinality.EXACTLY_ONE, "the sliced array")), + new FunctionSignature( + new QName("slice", ArrayModule.NAMESPACE_URI, ArrayModule.PREFIX), + "Returns an array containing selected members based on position.", + new SequenceType[] { + new FunctionParameterSequenceType("array", Type.ARRAY_ITEM, Cardinality.EXACTLY_ONE, "The input array"), + new FunctionParameterSequenceType("start", Type.INTEGER, Cardinality.ZERO_OR_ONE, "The start position"), + new FunctionParameterSequenceType("end", Type.INTEGER, Cardinality.ZERO_OR_ONE, "The end position"), + new FunctionParameterSequenceType("step", Type.INTEGER, Cardinality.ZERO_OR_ONE, "The step value") + }, + new FunctionReturnSequenceType(Type.ARRAY_ITEM, Cardinality.EXACTLY_ONE, "the sliced array")) + }; + + public ArraySlice(final XQueryContext context, final FunctionSignature signature) { + super(context, signature); + } + + @Override + public Sequence eval(final Sequence[] args, final Sequence contextSequence) throws XPathException { + final ArrayType array = (ArrayType) args[0].itemAt(0); + final int count = array.getSize(); + + if (count == 0) { + return new ArrayType(this, context, new ArrayList<>()); + } + + // Resolve start + int s; + if (args.length < 2 || args[1].isEmpty() || ((IntegerValue) args[1].itemAt(0)).getLong() == 0) { + s = 1; + } else { + final long sv = ((IntegerValue) args[1].itemAt(0)).getLong(); + s = (int) (sv < 0 ? count + sv + 1 : sv); + } + + // Resolve end + int e; + if (args.length < 3 || args[2].isEmpty() || ((IntegerValue) args[2].itemAt(0)).getLong() == 0) { + e = count; + } else { + final long ev = ((IntegerValue) args[2].itemAt(0)).getLong(); + e = (int) (ev < 0 ? count + ev + 1 : ev); + } + + // Resolve step + int step; + if (args.length < 4 || args[3].isEmpty() || ((IntegerValue) args[3].itemAt(0)).getLong() == 0) { + step = (e >= s) ? 1 : -1; + } else { + step = (int) ((IntegerValue) args[3].itemAt(0)).getLong(); + } + + // Handle negative step: reverse array and recurse with negated positions + if (step < 0) { + final ArrayType reversed = array.reverse(); + final Sequence[] newArgs = new Sequence[4]; + newArgs[0] = reversed; + newArgs[1] = new IntegerValue(this, -s); + newArgs[2] = new IntegerValue(this, -e); + newArgs[3] = new IntegerValue(this, -step); + return eval(newArgs, contextSequence); + } + + // Positive step: select members + final List result = new ArrayList<>(); + for (int pos = s; pos <= e && pos <= count; pos += step) { + if (pos >= 1) { + result.add(array.get(pos - 1)); + } + } + return new ArrayType(this, context, result); + } +} diff --git a/exist-core/src/main/java/org/exist/xquery/functions/array/ArraySortBy.java b/exist-core/src/main/java/org/exist/xquery/functions/array/ArraySortBy.java new file mode 100644 index 00000000000..bf16e1d9f6a --- /dev/null +++ b/exist-core/src/main/java/org/exist/xquery/functions/array/ArraySortBy.java @@ -0,0 +1,215 @@ +/* + * eXist-db Open Source Native XML Database + * Copyright (C) 2001 The eXist-db Authors + * + * info@exist-db.org + * http://www.exist-db.org + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ +package org.exist.xquery.functions.array; + +import com.ibm.icu.text.Collator; +import org.exist.dom.QName; +import org.exist.xquery.AnalyzeContextInfo; +import org.exist.xquery.BasicFunction; +import org.exist.xquery.Cardinality; +import org.exist.xquery.ErrorCodes; +import org.exist.xquery.FunctionSignature; +import org.exist.xquery.XPathException; +import org.exist.xquery.XQueryContext; +import org.exist.xquery.functions.fn.FunCompare; +import org.exist.xquery.functions.fn.FunData; +import org.exist.xquery.functions.map.AbstractMapType; +import org.exist.xquery.value.AtomicValue; +import org.exist.xquery.value.FunctionParameterSequenceType; +import org.exist.xquery.value.FunctionReference; +import org.exist.xquery.value.Item; +import org.exist.xquery.value.FunctionReturnSequenceType; +import org.exist.xquery.value.NumericValue; +import org.exist.xquery.value.Sequence; +import org.exist.xquery.value.SequenceIterator; +import org.exist.xquery.value.SequenceType; +import org.exist.xquery.value.StringValue; +import org.exist.xquery.value.Type; +import org.exist.xquery.value.ValueSequence; +import org.exist.xquery.NamedFunctionReference; + +import java.util.ArrayList; +import java.util.List; + +/** + * Implements array:sort-by (XQuery 4.0). + * + * Sorts a supplied array based on the value of sort keys supplied as + * record (map) specifications with optional key, collation, and order fields. + */ +public class ArraySortBy extends BasicFunction { + + public static final FunctionSignature[] signatures = { + new FunctionSignature( + new QName("sort-by", ArrayModule.NAMESPACE_URI, ArrayModule.PREFIX), + "Sorts the array based on sort key specifications.", + new SequenceType[] { + new FunctionParameterSequenceType("array", Type.ARRAY_ITEM, Cardinality.EXACTLY_ONE, "The input array"), + new FunctionParameterSequenceType("keys", Type.MAP_ITEM, Cardinality.ZERO_OR_MORE, + "Sort key records with optional key, collation, and order fields") + }, + new FunctionReturnSequenceType(Type.ARRAY_ITEM, Cardinality.EXACTLY_ONE, "the sorted array")) + }; + + private AnalyzeContextInfo cachedContextInfo = new AnalyzeContextInfo(); + + public ArraySortBy(final XQueryContext context, final FunctionSignature signature) { + super(context, signature); + } + + @Override + public void analyze(final AnalyzeContextInfo contextInfo) throws XPathException { + cachedContextInfo = new AnalyzeContextInfo(contextInfo); + super.analyze(cachedContextInfo); + } + + @Override + public Sequence eval(final Sequence[] args, final Sequence contextSequence) throws XPathException { + final ArrayType array = (ArrayType) args[0].itemAt(0); + final int size = array.getSize(); + if (size <= 1) { + return array; + } + + final Sequence keys = args[1]; + + // Parse sort key specifications + final List sortKeys = new ArrayList<>(); + if (keys.isEmpty()) { + final SortKey defaultKey = new SortKey(); + defaultKey.collator = context.getDefaultCollator(); + sortKeys.add(defaultKey); + } else { + for (final SequenceIterator ki = keys.iterate(); ki.hasNext(); ) { + final AbstractMapType keyMap = (AbstractMapType) ki.nextItem(); + sortKeys.add(parseSortKey(keyMap)); + } + } + + // Pre-compute sort keys for each member + final Sequence[][] keyValues = new Sequence[size][sortKeys.size()]; + for (int idx = 0; idx < size; idx++) { + final Sequence member = array.get(idx); + for (int k = 0; k < sortKeys.size(); k++) { + final SortKey sk = sortKeys.get(k); + if (sk.keyFunction != null) { + keyValues[idx][k] = sk.keyFunction.evalFunction(null, null, + new Sequence[]{member}); + } else { + // Default: atomize members + final ValueSequence atomized = new ValueSequence(); + for (final SequenceIterator mi = member.iterate(); mi.hasNext(); ) { + atomized.add(mi.nextItem().atomize()); + } + keyValues[idx][k] = atomized; + } + } + } + + // Build index array for stable sort + final Integer[] indices = new Integer[size]; + for (int i = 0; i < indices.length; i++) { + indices[i] = i; + } + + try { + java.util.Arrays.sort(indices, (a, b) -> { + try { + for (int k = 0; k < sortKeys.size(); k++) { + final SortKey sk = sortKeys.get(k); + final int cmp = compareKeys(keyValues[a][k], keyValues[b][k], sk.collator); + if (cmp != 0) { + return sk.descending ? -cmp : cmp; + } + } + return 0; + } catch (final XPathException e) { + throw new RuntimeException(e); + } + }); + } catch (final RuntimeException e) { + if (e.getCause() instanceof XPathException) { + throw (XPathException) e.getCause(); + } + throw e; + } + + // Build result array + final List resultMembers = new ArrayList<>(size); + for (final int idx : indices) { + resultMembers.add(array.get(idx)); + } + return new ArrayType(this, context, resultMembers); + } + + private int compareKeys(final Sequence a, final Sequence b, final Collator collator) throws XPathException { + final boolean emptyA = a.isEmpty(); + final boolean emptyB = b.isEmpty(); + if (emptyA && emptyB) return 0; + if (emptyA) return -1; + if (emptyB) return 1; + + final int len = Math.min(a.getItemCount(), b.getItemCount()); + for (int i = 0; i < len; i++) { + final AtomicValue va = a.itemAt(i).atomize(); + final AtomicValue vb = b.itemAt(i).atomize(); + final int cmp = FunCompare.compare(va, vb, collator); + if (cmp != 0) return cmp; + } + return Integer.compare(a.getItemCount(), b.getItemCount()); + } + + private SortKey parseSortKey(final AbstractMapType map) throws XPathException { + final SortKey sk = new SortKey(); + + final Sequence keySeq = map.get(new StringValue(this, "key")); + if (keySeq != null && !keySeq.isEmpty()) { + final Item keyItem = keySeq.itemAt(0); + if (!(keyItem instanceof FunctionReference)) { + throw new XPathException(this, ErrorCodes.XPTY0004, + "Expected function reference for 'key', got " + Type.getTypeName(keyItem.getType())); + } + sk.keyFunction = (FunctionReference) keyItem; + sk.keyFunction.analyze(cachedContextInfo); + } + + final Sequence collSeq = map.get(new StringValue(this, "collation")); + if (collSeq != null && !collSeq.isEmpty()) { + sk.collator = context.getCollator(collSeq.getStringValue(), ErrorCodes.FOCH0002); + } else { + sk.collator = context.getDefaultCollator(); + } + + final Sequence orderSeq = map.get(new StringValue(this, "order")); + if (orderSeq != null && !orderSeq.isEmpty()) { + sk.descending = "descending".equals(orderSeq.getStringValue()); + } + + return sk; + } + + private static class SortKey { + FunctionReference keyFunction; + Collator collator; + boolean descending; + } +} diff --git a/exist-core/src/main/java/org/exist/xquery/functions/array/ArraySortWith.java b/exist-core/src/main/java/org/exist/xquery/functions/array/ArraySortWith.java new file mode 100644 index 00000000000..06f48b5fd44 --- /dev/null +++ b/exist-core/src/main/java/org/exist/xquery/functions/array/ArraySortWith.java @@ -0,0 +1,144 @@ +/* + * eXist-db Open Source Native XML Database + * Copyright (C) 2001 The eXist-db Authors + * + * info@exist-db.org + * http://www.exist-db.org + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ +package org.exist.xquery.functions.array; + +import org.exist.dom.QName; +import org.exist.xquery.AnalyzeContextInfo; +import org.exist.xquery.BasicFunction; +import org.exist.xquery.Cardinality; +import org.exist.xquery.FunctionSignature; +import org.exist.xquery.XPathException; +import org.exist.xquery.XQueryContext; +import org.exist.xquery.value.FunctionParameterSequenceType; +import org.exist.xquery.value.FunctionReference; +import org.exist.xquery.value.FunctionReturnSequenceType; +import org.exist.xquery.value.IntegerValue; +import org.exist.xquery.value.Item; +import org.exist.xquery.value.Sequence; +import org.exist.xquery.value.SequenceIterator; +import org.exist.xquery.value.SequenceType; +import org.exist.xquery.value.Type; + +import java.util.ArrayList; +import java.util.List; + +/** + * Implements array:sort-with (XQuery 4.0). + * + * Sorts a supplied array according to the order induced by one or more + * supplied comparator functions. Sort is stable. + */ +public class ArraySortWith extends BasicFunction { + + public static final FunctionSignature[] signatures = { + new FunctionSignature( + new QName("sort-with", ArrayModule.NAMESPACE_URI, ArrayModule.PREFIX), + "Sorts the array using the supplied comparator function(s).", + new SequenceType[] { + new FunctionParameterSequenceType("array", Type.ARRAY_ITEM, Cardinality.EXACTLY_ONE, "The input array"), + new FunctionParameterSequenceType("comparators", Type.FUNCTION, Cardinality.ONE_OR_MORE, + "One or more comparator functions (fn(item()*, item()*) as xs:integer)") + }, + new FunctionReturnSequenceType(Type.ARRAY_ITEM, Cardinality.EXACTLY_ONE, "the sorted array")) + }; + + private AnalyzeContextInfo cachedContextInfo = new AnalyzeContextInfo(); + + public ArraySortWith(final XQueryContext context, final FunctionSignature signature) { + super(context, signature); + } + + @Override + public void analyze(AnalyzeContextInfo contextInfo) throws XPathException { + cachedContextInfo = new AnalyzeContextInfo(contextInfo); + super.analyze(contextInfo); + } + + @Override + public Sequence eval(final Sequence[] args, final Sequence contextSequence) throws XPathException { + final ArrayType array = (ArrayType) args[0].itemAt(0); + final int size = array.getSize(); + if (size <= 1) { + return array; + } + + // Collect comparator functions + final Sequence comparatorsSeq = args[1]; + final List comparators = new ArrayList<>(comparatorsSeq.getItemCount()); + for (final SequenceIterator it = comparatorsSeq.iterate(); it.hasNext(); ) { + final FunctionReference ref = (FunctionReference) it.nextItem(); + ref.analyze(cachedContextInfo); + comparators.add(ref); + } + + // Build list of (index, member) to sort + final List members = new ArrayList<>(size); + for (int i = 0; i < size; i++) { + members.add(new IndexedMember(i, array.get(i))); + } + + // Stable sort using comparator chain + try { + members.sort((a, b) -> { + try { + for (final FunctionReference comp : comparators) { + final Sequence[] funcArgs = new Sequence[] { a.value, b.value }; + final Sequence result = comp.evalFunction(null, null, funcArgs); + if (result.isEmpty()) { + continue; + } + final long cmp = ((IntegerValue) result.itemAt(0).convertTo(Type.INTEGER)).getLong(); + if (cmp != 0) { + return cmp < 0 ? -1 : 1; + } + } + return 0; + } catch (final XPathException e) { + throw new RuntimeException(e); + } + }); + } catch (final RuntimeException e) { + if (e.getCause() instanceof XPathException) { + throw (XPathException) e.getCause(); + } + throw e; + } + + // Build result array + final List resultMembers = new ArrayList<>(size); + for (final IndexedMember m : members) { + resultMembers.add(m.value); + } + + return new ArrayType(this, context, resultMembers); + } + + private static class IndexedMember { + final int index; + final Sequence value; + + IndexedMember(int index, Sequence value) { + this.index = index; + this.value = value; + } + } +} diff --git a/exist-core/src/main/java/org/exist/xquery/functions/array/ArraySplit.java b/exist-core/src/main/java/org/exist/xquery/functions/array/ArraySplit.java new file mode 100644 index 00000000000..25d231d64d7 --- /dev/null +++ b/exist-core/src/main/java/org/exist/xquery/functions/array/ArraySplit.java @@ -0,0 +1,58 @@ +/* + * eXist-db Open Source Native XML Database + * Copyright (C) 2001 The eXist-db Authors + * + * info@exist-db.org + * http://www.exist-db.org + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ +package org.exist.xquery.functions.array; + +import org.exist.dom.QName; +import org.exist.xquery.*; +import org.exist.xquery.value.*; + +import java.util.Collections; + +/** + * array:split($array) — Split array into sequence of single-member arrays. + */ +public class ArraySplit extends BasicFunction { + + public static final FunctionSignature[] signatures = { + new FunctionSignature( + new QName("split", ArrayModule.NAMESPACE_URI, ArrayModule.PREFIX), + "Splits an array into a sequence of single-member arrays.", + new SequenceType[] { + new FunctionParameterSequenceType("array", Type.ARRAY_ITEM, Cardinality.EXACTLY_ONE, "The array to split") + }, + new FunctionReturnSequenceType(Type.ARRAY_ITEM, Cardinality.ZERO_OR_MORE, "A sequence of single-member arrays")) + }; + + public ArraySplit(final XQueryContext context, final FunctionSignature signature) { + super(context, signature); + } + + @Override + public Sequence eval(final Sequence[] args, final Sequence contextSequence) throws XPathException { + final ArrayType array = (ArrayType) args[0].itemAt(0); + final ValueSequence result = new ValueSequence(array.getSize()); + for (int i = 0; i < array.getSize(); i++) { + result.add(new ArrayType(context, Collections.singletonList(array.get(i)))); + } + return result; + } +} diff --git a/exist-core/src/main/java/org/exist/xquery/functions/fn/CsvFunctions.java b/exist-core/src/main/java/org/exist/xquery/functions/fn/CsvFunctions.java new file mode 100644 index 00000000000..ea4a275ba56 --- /dev/null +++ b/exist-core/src/main/java/org/exist/xquery/functions/fn/CsvFunctions.java @@ -0,0 +1,619 @@ +/* + * eXist-db Open Source Native XML Database + * Copyright (C) 2001 The eXist-db Authors + * + * info@exist-db.org + * http://www.exist-db.org + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ +package org.exist.xquery.functions.fn; + +import org.apache.commons.io.IOUtils; +import org.exist.dom.QName; +import org.exist.dom.memtree.MemTreeBuilder; +import org.exist.security.PermissionDeniedException; +import org.exist.source.FileSource; +import org.exist.source.Source; +import org.exist.source.SourceFactory; +import org.exist.xquery.*; +import org.exist.xquery.functions.array.ArrayType; +import io.lacuna.bifurcan.IEntry; +import org.exist.xquery.functions.map.AbstractMapType; +import org.exist.xquery.functions.map.MapType; +import org.exist.xquery.value.*; + +import java.io.IOException; +import java.io.InputStream; +import java.io.StringWriter; +import java.net.URI; +import java.net.URISyntaxException; +import java.nio.charset.StandardCharsets; +import java.util.ArrayList; +import java.util.List; + +/** + * Implements XQuery 4.0 CSV functions: + * fn:csv-to-arrays, fn:parse-csv, fn:csv-to-xml, fn:csv-doc. + */ +public class CsvFunctions extends BasicFunction { + + // XQ4 namespace for CSV XML output + private static final String CSV_NS = "http://www.w3.org/2005/xpath-functions"; + + // fn:csv-to-arrays signatures + public static final FunctionSignature[] FN_CSV_TO_ARRAYS = { + new FunctionSignature( + new QName("csv-to-arrays", Function.BUILTIN_FUNCTION_NS), + "Parses a string as CSV data and returns the result as a sequence of arrays.", + new SequenceType[]{ + new FunctionParameterSequenceType("csv", Type.STRING, Cardinality.ZERO_OR_ONE, "The CSV string to parse") + }, + new FunctionReturnSequenceType(Type.ARRAY_ITEM, Cardinality.ZERO_OR_MORE, "A sequence of arrays, one per row")), + new FunctionSignature( + new QName("csv-to-arrays", Function.BUILTIN_FUNCTION_NS), + "Parses a string as CSV data and returns the result as a sequence of arrays, using the specified options.", + new SequenceType[]{ + new FunctionParameterSequenceType("csv", Type.STRING, Cardinality.ZERO_OR_ONE, "The CSV string to parse"), + new FunctionParameterSequenceType("options", Type.MAP_ITEM, Cardinality.EXACTLY_ONE, "Parsing options") + }, + new FunctionReturnSequenceType(Type.ARRAY_ITEM, Cardinality.ZERO_OR_MORE, "A sequence of arrays, one per row")) + }; + + // fn:parse-csv signatures + public static final FunctionSignature[] FN_PARSE_CSV = { + new FunctionSignature( + new QName("parse-csv", Function.BUILTIN_FUNCTION_NS), + "Parses a string as CSV data and returns the result as a map.", + new SequenceType[]{ + new FunctionParameterSequenceType("csv", Type.STRING, Cardinality.ZERO_OR_ONE, "The CSV string to parse") + }, + new FunctionReturnSequenceType(Type.MAP_ITEM, Cardinality.ZERO_OR_ONE, "A map with columns, column-index, rows, and get")), + new FunctionSignature( + new QName("parse-csv", Function.BUILTIN_FUNCTION_NS), + "Parses a string as CSV data and returns the result as a map, using the specified options.", + new SequenceType[]{ + new FunctionParameterSequenceType("csv", Type.STRING, Cardinality.ZERO_OR_ONE, "The CSV string to parse"), + new FunctionParameterSequenceType("options", Type.MAP_ITEM, Cardinality.EXACTLY_ONE, "Parsing options") + }, + new FunctionReturnSequenceType(Type.MAP_ITEM, Cardinality.ZERO_OR_ONE, "A map with columns, column-index, rows, and get")) + }; + + // fn:csv-to-xml signatures + public static final FunctionSignature[] FN_CSV_TO_XML = { + new FunctionSignature( + new QName("csv-to-xml", Function.BUILTIN_FUNCTION_NS), + "Parses a string as CSV data and returns the result as an XML document.", + new SequenceType[]{ + new FunctionParameterSequenceType("csv", Type.STRING, Cardinality.ZERO_OR_ONE, "The CSV string to parse") + }, + new FunctionReturnSequenceType(Type.DOCUMENT, Cardinality.ZERO_OR_ONE, "An XML document representing the CSV data")), + new FunctionSignature( + new QName("csv-to-xml", Function.BUILTIN_FUNCTION_NS), + "Parses a string as CSV data and returns the result as an XML document, using the specified options.", + new SequenceType[]{ + new FunctionParameterSequenceType("csv", Type.STRING, Cardinality.ZERO_OR_ONE, "The CSV string to parse"), + new FunctionParameterSequenceType("options", Type.MAP_ITEM, Cardinality.EXACTLY_ONE, "Parsing options") + }, + new FunctionReturnSequenceType(Type.DOCUMENT, Cardinality.ZERO_OR_ONE, "An XML document representing the CSV data")) + }; + + // fn:csv-doc signatures + public static final FunctionSignature[] FN_CSV_DOC = { + new FunctionSignature( + new QName("csv-doc", Function.BUILTIN_FUNCTION_NS), + "Reads CSV data from the specified URI and returns the result as a map.", + new SequenceType[]{ + new FunctionParameterSequenceType("uri", Type.STRING, Cardinality.ZERO_OR_ONE, "The URI of the CSV resource") + }, + new FunctionReturnSequenceType(Type.MAP_ITEM, Cardinality.ZERO_OR_ONE, "A map with columns, column-index, rows, and get")), + new FunctionSignature( + new QName("csv-doc", Function.BUILTIN_FUNCTION_NS), + "Reads CSV data from the specified URI and returns the result as a map, using the specified options.", + new SequenceType[]{ + new FunctionParameterSequenceType("uri", Type.STRING, Cardinality.ZERO_OR_ONE, "The URI of the CSV resource"), + new FunctionParameterSequenceType("options", Type.MAP_ITEM, Cardinality.EXACTLY_ONE, "Parsing options") + }, + new FunctionReturnSequenceType(Type.MAP_ITEM, Cardinality.ZERO_OR_ONE, "A map with columns, column-index, rows, and get")) + }; + + public CsvFunctions(final XQueryContext context, final FunctionSignature signature) { + super(context, signature); + } + + @Override + public Sequence eval(final Sequence[] args, final Sequence contextSequence) throws XPathException { + if (isCalledAs("csv-doc")) { + return evalCsvDoc(args); + } + + // Empty sequence input returns empty + if (args[0].isEmpty()) { + return Sequence.EMPTY_SEQUENCE; + } + + final String csv = args[0].getStringValue(); + final CsvParser.CsvOptions options = parseOptions(args); + + if (isCalledAs("csv-to-arrays")) { + return evalCsvToArrays(csv, options); + } else if (isCalledAs("parse-csv")) { + return evalParseCsv(csv, options); + } else if (isCalledAs("csv-to-xml")) { + return evalCsvToXml(csv, options); + } + throw new XPathException(this, ErrorCodes.XPST0017, "Unknown CSV function: " + getSignature().getName().getLocalPart()); + } + + // ==================== fn:csv-to-arrays ==================== + + private Sequence evalCsvToArrays(final String csv, final CsvParser.CsvOptions options) throws XPathException { + options.validate(this); + final CsvParser parser = new CsvParser(options, this); + final ValueSequence result = new ValueSequence(); + + parser.parse(csv, new CsvParser.CsvConverter() { + @Override + public void header(final List fields) { + // Header row is also returned as an array in csv-to-arrays + // (per XQ4 spec: "If header is true, the first row is treated as a header + // but still appears in the output") + // Actually per spec: if header=true, the header row is NOT included + // in the result of csv-to-arrays. + } + + @Override + public void record(final List fields) throws XPathException { + result.add(fieldsToArray(fields)); + } + + @Override + public void finish() { + } + }); + return result; + } + + // ==================== fn:parse-csv ==================== + + private Sequence evalParseCsv(final String csv, final CsvParser.CsvOptions options) throws XPathException { + options.validate(this); + final CsvParser parser = new CsvParser(options, this); + final List> allRows = new ArrayList<>(); + final List[] headerHolder = new List[]{null}; + + parser.parse(csv, new CsvParser.CsvConverter() { + @Override + public void header(final List fields) { + headerHolder[0] = fields; + } + + @Override + public void record(final List fields) { + allRows.add(fields); + } + + @Override + public void finish() { + } + }); + + // Explicit header from options overrides parsed header + final List effectiveHeader = options.explicitHeader != null + ? options.explicitHeader : headerHolder[0]; + + return buildParseCsvResult(effectiveHeader, allRows, options); + } + + private Sequence buildParseCsvResult(final List header, final List> rows, + final CsvParser.CsvOptions options) throws XPathException { + final MapType result = new MapType(this, context); + + // "columns" - sequence of column names (empty sequence if no header) + final Sequence columns; + if (header != null) { + final ValueSequence colSeq = new ValueSequence(header.size()); + for (final String h : header) { + colSeq.add(new StringValue(this, h)); + } + columns = colSeq; + } else { + columns = Sequence.EMPTY_SEQUENCE; + } + + // "column-index" - map from column name to 1-based position + // Empty names are excluded; duplicate names map to first occurrence + final MapType columnIndex = new MapType(this, context); + MapType colIdxResult = columnIndex; + if (header != null) { + final java.util.Set seen = new java.util.HashSet<>(); + for (int i = 0; i < header.size(); i++) { + final String name = header.get(i); + if (!name.isEmpty() && seen.add(name)) { + colIdxResult = (MapType) colIdxResult.put(new StringValue(this, name), + new IntegerValue(this, i + 1)); + } + } + } + + // "rows" - sequence of arrays + final ValueSequence rowSeq = new ValueSequence(rows.size()); + for (final List row : rows) { + rowSeq.add(fieldsToArray(row)); + } + + // Build the result map + MapType map = (MapType) result.put(new StringValue(this, "columns"), columns); + map = (MapType) map.put(new StringValue(this, "column-index"), colIdxResult); + map = (MapType) map.put(new StringValue(this, "rows"), rowSeq); + + // "get" - accessor function: fn($row as xs:integer, $column as item()) as xs:string + // $column can be an integer (1-based) or a string (column name) + final UserDefinedFunction getFunc = new UserDefinedFunction(context, + new FunctionSignature( + new QName("get", Function.BUILTIN_FUNCTION_NS), + null, + new SequenceType[]{ + new FunctionParameterSequenceType("row", Type.INTEGER, Cardinality.EXACTLY_ONE, "Row number (1-based)"), + new FunctionParameterSequenceType("column", Type.ITEM, Cardinality.EXACTLY_ONE, "Column number (1-based) or column name") + }, + new FunctionReturnSequenceType(Type.STRING, Cardinality.ZERO_OR_ONE, "The field value"))); + getFunc.addVariable("row"); + getFunc.addVariable("column"); + getFunc.setFunctionBody(new CsvGetExpression(context, rowSeq, header)); + final FunctionCall getCall = new FunctionCall(context, getFunc); + getCall.setLocation(getLine(), getColumn()); + final FunctionReference getFuncRef = new FunctionReference(this, getCall); + map = (MapType) map.put(new StringValue(this, "get"), getFuncRef); + + return map; + } + + // ==================== fn:csv-to-xml ==================== + + private Sequence evalCsvToXml(final String csv, final CsvParser.CsvOptions options) throws XPathException { + options.validate(this); + final CsvParser parser = new CsvParser(options, this); + + final List[] headerHolder = new List[]{null}; + final List> allRecords = new ArrayList<>(); + + parser.parse(csv, new CsvParser.CsvConverter() { + @Override + public void header(final List fields) { + headerHolder[0] = fields; + } + + @Override + public void record(final List fields) { + allRecords.add(fields); + } + + @Override + public void finish() { + } + }); + + // Explicit header from options overrides parsed header + final List effectiveHeader = options.explicitHeader != null + ? options.explicitHeader : headerHolder[0]; + + context.pushDocumentContext(); + try { + final MemTreeBuilder builder = context.getDocumentBuilder(); + + builder.startElement(new QName("csv", CSV_NS), null); + + // Write columns element only if headers are present + if (effectiveHeader != null) { + builder.startElement(new QName("columns", CSV_NS), null); + for (final String col : effectiveHeader) { + builder.startElement(new QName("column", CSV_NS), null); + builder.characters(col); + builder.endElement(); + } + builder.endElement(); // + } + + // Write rows + builder.startElement(new QName("rows", CSV_NS), null); + for (final List record : allRecords) { + builder.startElement(new QName("row", CSV_NS), null); + // A row with a single empty field is an empty row (no field elements) + final boolean isEmptyRow = record.size() == 1 && record.get(0).isEmpty(); + if (!isEmptyRow) { + for (int f = 0; f < record.size(); f++) { + final String field = record.get(f); + builder.startElement(new QName("field", CSV_NS), null); + if (effectiveHeader != null && f < effectiveHeader.size() + && !effectiveHeader.get(f).isEmpty()) { + builder.addAttribute(new QName("column", null, null), effectiveHeader.get(f)); + } + if (!field.isEmpty()) { + builder.characters(field); + } + builder.endElement(); + } + } + builder.endElement(); // + } + builder.endElement(); // + + builder.endElement(); // + + return builder.getDocument(); + } finally { + context.popDocumentContext(); + } + } + + // ==================== fn:csv-doc ==================== + + private Sequence evalCsvDoc(final Sequence[] args) throws XPathException { + if (args[0].isEmpty()) { + return Sequence.EMPTY_SEQUENCE; + } + final String uri = args[0].getStringValue(); + + // Read the CSV content from the URI (same approach as fn:unparsed-text) + final String csvContent; + try { + final URI parsedUri = new URI(uri); + if (parsedUri.getFragment() != null) { + throw new XPathException(this, ErrorCodes.FODC0005, + "URI may not contain a fragment identifier: " + uri); + } + final Source source = SourceFactory.getSource(context.getBroker(), "", parsedUri.toASCIIString(), false); + if (source == null) { + throw new XPathException(this, ErrorCodes.FODC0002, + "Could not find CSV resource: " + uri); + } + if (source instanceof FileSource && !context.getBroker().getCurrentSubject().hasDbaRole()) { + throw new PermissionDeniedException("non-dba user not allowed to read from file system"); + } + final StringWriter output = new StringWriter(); + try (final InputStream is = source.getInputStream()) { + IOUtils.copy(is, output, StandardCharsets.UTF_8); + } + csvContent = output.toString(); + } catch (final IOException | PermissionDeniedException | URISyntaxException e) { + throw new XPathException(this, ErrorCodes.FODC0002, + "Error reading CSV resource: " + uri + " - " + e.getMessage()); + } + + final CsvParser.CsvOptions options = parseOptions(args); + return evalParseCsv(csvContent, options); + } + + // ==================== Shared utilities ==================== + + private CsvParser.CsvOptions parseOptions(final Sequence[] args) throws XPathException { + final CsvParser.CsvOptions options = new CsvParser.CsvOptions(); + if (args.length < 2 || args[1].isEmpty()) { + return options; + } + + final AbstractMapType map = (AbstractMapType) args[1].itemAt(0); + + // field-delimiter + final Sequence fdSeq = map.get(new StringValue(this, "field-delimiter")); + if (fdSeq != null && !fdSeq.isEmpty()) { + final String fd = fdSeq.getStringValue(); + if (fd.isEmpty()) { + throw new XPathException(this, ErrorCodes.FOCV0002, + "field-delimiter must be a single character"); + } + if (fd.codePointCount(0, fd.length()) != 1) { + throw new XPathException(this, ErrorCodes.FOCV0002, + "field-delimiter must be a single character, got: \"" + fd + "\""); + } + options.fieldDelimiter = fd.codePointAt(0); + } + + // row-delimiter + final Sequence rdSeq = map.get(new StringValue(this, "row-delimiter")); + if (rdSeq != null && !rdSeq.isEmpty()) { + if (rdSeq.getItemCount() != 1) { + throw new XPathException(this, ErrorCodes.FOCV0002, + "row-delimiter must be a single string, got " + rdSeq.getItemCount() + " items"); + } + final String rd = rdSeq.itemAt(0).getStringValue(); + if (rd.isEmpty() || rd.codePointCount(0, rd.length()) != 1) { + throw new XPathException(this, ErrorCodes.FOCV0002, + "row-delimiter must be a single character"); + } + options.rowDelimiter = rd.codePointAt(0); + } + + // quote-character + final Sequence qcSeq = map.get(new StringValue(this, "quote-character")); + if (qcSeq != null && !qcSeq.isEmpty()) { + final String qc = qcSeq.getStringValue(); + if (qc.isEmpty()) { + options.quoteChar = -1; // disable quoting + } else if (qc.codePointCount(0, qc.length()) != 1) { + throw new XPathException(this, ErrorCodes.FOCV0002, + "quote-character must be a single character or empty string"); + } else { + options.quoteChar = qc.codePointAt(0); + } + } + + // trim-whitespace + final Sequence twSeq = map.get(new StringValue(this, "trim-whitespace")); + if (twSeq != null && !twSeq.isEmpty()) { + options.trimWhitespace = twSeq.effectiveBooleanValue(); + } + + // header: boolean, "present", or sequence of explicit column names + final Sequence hdrSeq = map.get(new StringValue(this, "header")); + if (hdrSeq != null && !hdrSeq.isEmpty()) { + final Item hdrItem = hdrSeq.itemAt(0); + if (hdrItem.getType() == Type.BOOLEAN) { + options.hasHeader = hdrItem.toSequence().effectiveBooleanValue(); + } else if (hdrSeq.getItemCount() == 1) { + final String hdrStr = hdrItem.getStringValue(); + if ("true".equals(hdrStr) || "present".equals(hdrStr)) { + options.hasHeader = true; + } else if ("false".equals(hdrStr) || "absent".equals(hdrStr)) { + options.hasHeader = false; + } else { + // Single string → explicit column name + options.explicitHeader = new ArrayList<>(); + options.explicitHeader.add(hdrStr); + options.hasHeader = false; // don't consume first data row + } + } else { + // Multiple items → sequence of explicit column names + options.explicitHeader = new ArrayList<>(hdrSeq.getItemCount()); + for (int j = 0; j < hdrSeq.getItemCount(); j++) { + options.explicitHeader.add(hdrSeq.itemAt(j).getStringValue()); + } + options.hasHeader = false; // don't consume first data row + } + } + + // select-columns + final Sequence scSeq = map.get(new StringValue(this, "select-columns")); + if (scSeq != null && !scSeq.isEmpty()) { + final int count = scSeq.getItemCount(); + options.selectColumns = new int[count]; + for (int j = 0; j < count; j++) { + final int col = ((IntegerValue) scSeq.itemAt(j).convertTo(Type.INTEGER)).getInt(); + if (col < 1) { + throw new XPathException(this, ErrorCodes.XPTY0004, + "select-columns values must be positive integers, got: " + col); + } + options.selectColumns[j] = col; + } + } + + // trim-rows + final Sequence trSeq = map.get(new StringValue(this, "trim-rows")); + if (trSeq != null && !trSeq.isEmpty()) { + options.trimRows = trSeq.effectiveBooleanValue(); + } + + // Validate no unknown option keys + final java.util.Set knownKeys = java.util.Set.of( + "field-delimiter", "row-delimiter", "quote-character", + "trim-whitespace", "header", "select-columns", "trim-rows"); + for (final IEntry entry : map) { + final String key = entry.key().getStringValue(); + if (!knownKeys.contains(key)) { + throw new XPathException(this, ErrorCodes.XPTY0004, + "Unknown CSV option: '" + key + "'"); + } + } + + return options; + } + + private ArrayType fieldsToArray(final List fields) throws XPathException { + // XQ4 spec: a row with a single empty field produces an empty array + if (fields.size() == 1 && fields.get(0).isEmpty()) { + return new ArrayType(this, context, new ArrayList<>()); + } + final List items = new ArrayList<>(fields.size()); + for (final String field : fields) { + items.add(new StringValue(this, field)); + } + return new ArrayType(this, context, items); + } + + /** + * Expression body for the "get" accessor function in fn:parse-csv results. + * Implements fn($row as xs:integer, $column as xs:integer) as xs:string. + * Both row and column are 1-based indexes. + */ + private static class CsvGetExpression extends AbstractExpression { + + private final ValueSequence rows; + private final List header; + + public CsvGetExpression(final XQueryContext context, final ValueSequence rows, final List header) { + super(context); + this.rows = rows; + this.header = header; + } + + @Override + public Sequence eval(final Sequence contextSequence, final Item contextItem) throws XPathException { + final Sequence rowIdxSeq = context.resolveVariable("row").getValue(); + final Sequence colSeq = context.resolveVariable("column").getValue(); + + if (rowIdxSeq.isEmpty() || colSeq.isEmpty()) { + return Sequence.EMPTY_SEQUENCE; + } + + final int rowIdx = ((IntegerValue) rowIdxSeq.itemAt(0).convertTo(Type.INTEGER)).getInt(); + + if (rowIdx < 1 || rowIdx > rows.getItemCount()) { + return Sequence.EMPTY_SEQUENCE; + } + + // Resolve column: integer index or string name + final Item colItem = colSeq.itemAt(0); + final int colIdx; + if (Type.subTypeOf(colItem.getType(), Type.INTEGER)) { + colIdx = ((IntegerValue) colItem.convertTo(Type.INTEGER)).getInt(); + } else { + // String column name — look up in header + final String colName = colItem.getStringValue(); + if (header == null) { + return Sequence.EMPTY_SEQUENCE; + } + int found = -1; + for (int i = 0; i < header.size(); i++) { + if (header.get(i).equals(colName)) { + found = i + 1; // 1-based + break; + } + } + if (found == -1) { + return Sequence.EMPTY_SEQUENCE; + } + colIdx = found; + } + + final ArrayType row = (ArrayType) rows.itemAt(rowIdx - 1); + if (colIdx < 1 || colIdx > row.getSize()) { + return Sequence.EMPTY_SEQUENCE; + } + + return row.get(colIdx - 1); + } + + @Override + public int returnsType() { + return Type.STRING; + } + + @Override + public void analyze(final AnalyzeContextInfo contextInfo) throws XPathException { + // no-op + } + + @Override + public void dump(final org.exist.xquery.util.ExpressionDumper dumper) { + dumper.display("[csv-get]"); + } + + @Override + public String toString() { + return "[csv-get]"; + } + } +} diff --git a/exist-core/src/main/java/org/exist/xquery/functions/fn/CsvParser.java b/exist-core/src/main/java/org/exist/xquery/functions/fn/CsvParser.java new file mode 100644 index 00000000000..3b1524108bb --- /dev/null +++ b/exist-core/src/main/java/org/exist/xquery/functions/fn/CsvParser.java @@ -0,0 +1,338 @@ +/* + * eXist-db Open Source Native XML Database + * Copyright (C) 2001 The eXist-db Authors + * + * info@exist-db.org + * http://www.exist-db.org + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ +package org.exist.xquery.functions.fn; + +import org.exist.xquery.ErrorCodes; +import org.exist.xquery.Expression; +import org.exist.xquery.XPathException; + +import java.util.ArrayList; +import java.util.List; + +/** + * State-machine CSV parser following the XQuery 4.0 specification. + * Parses CSV text into records (rows) of fields using SAX-like callbacks. + * + * Options supported (per XQ4 spec): + * - field-delimiter (default: comma) + * - row-delimiter (default: CRLF/LF/CR) + * - quote-character (default: double-quote; empty string disables quoting) + * - trim-whitespace (default: false) + * - header (default: false; true or "present" means first row is header) + * - select-columns (default: all) + * - trim-rows (default: false; removes trailing empty rows) + */ +public class CsvParser { + + /** + * Callback interface for CSV parsing events. + */ + public interface CsvConverter { + void header(List fields) throws XPathException; + void record(List fields) throws XPathException; + void finish() throws XPathException; + } + + private final int fieldDelimiter; + private final int rowDelimiter; + private final int quoteChar; + private final boolean trimWhitespace; + private final boolean hasHeader; + private final int[] selectColumns; + private final boolean trimRows; + private final Expression expression; + + public CsvParser(final CsvOptions options, final Expression expression) { + this.fieldDelimiter = options.fieldDelimiter; + this.rowDelimiter = options.rowDelimiter; + this.quoteChar = options.quoteChar; + this.trimWhitespace = options.trimWhitespace; + this.hasHeader = options.hasHeader; + this.selectColumns = options.selectColumns; + this.trimRows = options.trimRows; + this.expression = expression; + } + + /** + * Parse CSV text, calling the converter for each record. + */ + public void parse(final String input, final CsvConverter converter) throws XPathException { + final List> allRecords = new ArrayList<>(); + List currentRecord = new ArrayList<>(); + final StringBuilder field = new StringBuilder(); + + // State: FIELD_START, IN_UNQUOTED, IN_QUOTED, AFTER_QUOTED + int state = 0; // 0=field_start, 1=in_unquoted, 2=in_quoted, 3=after_quoted + int i = 0; + final int len = input.length(); + + while (i < len) { + final int cp = input.codePointAt(i); + final int cpLen = Character.charCount(cp); + + switch (state) { + case 0: // FIELD_START — beginning of a new field + if (cp == quoteChar && quoteChar != -1) { + state = 2; // start quoted field + i += cpLen; + } else if (cp == fieldDelimiter) { + currentRecord.add(finishField(field)); + field.setLength(0); + // remain in FIELD_START + i += cpLen; + } else if (isRowDelimiter(cp)) { + currentRecord.add(finishField(field)); + field.setLength(0); + allRecords.add(currentRecord); + currentRecord = new ArrayList<>(); + i += rowDelimiterLength(input, i, cp); + } else { + field.appendCodePoint(cp); + state = 1; // in unquoted field + i += cpLen; + } + break; + + case 1: // IN_UNQUOTED — inside an unquoted field + if (cp == quoteChar && quoteChar != -1) { + // Quote in middle of unquoted field → error + throw new XPathException(expression, ErrorCodes.FOCV0001, + "Quote character found in middle of unquoted field"); + } else if (cp == fieldDelimiter) { + currentRecord.add(finishField(field)); + field.setLength(0); + state = 0; + i += cpLen; + } else if (isRowDelimiter(cp)) { + currentRecord.add(finishField(field)); + field.setLength(0); + allRecords.add(currentRecord); + currentRecord = new ArrayList<>(); + state = 0; + i += rowDelimiterLength(input, i, cp); + } else { + field.appendCodePoint(cp); + i += cpLen; + } + break; + + case 2: // IN_QUOTED — inside a quoted field + if (cp == quoteChar) { + // Check for escaped quote (doubled) + if (i + cpLen < len && input.codePointAt(i + cpLen) == quoteChar) { + field.appendCodePoint(quoteChar); + i += cpLen * 2; + } else { + // End of quoted field + state = 3; // after closing quote + i += cpLen; + } + } else { + field.appendCodePoint(cp); + i += cpLen; + } + break; + + case 3: // AFTER_QUOTED — just saw closing quote + if (cp == fieldDelimiter) { + currentRecord.add(finishField(field)); + field.setLength(0); + state = 0; + i += cpLen; + } else if (isRowDelimiter(cp)) { + currentRecord.add(finishField(field)); + field.setLength(0); + allRecords.add(currentRecord); + currentRecord = new ArrayList<>(); + state = 0; + i += rowDelimiterLength(input, i, cp); + } else if (cp == ' ' || cp == '\t') { + // Whitespace after closing quote is allowed (ignored) + i += cpLen; + } else { + // Non-delimiter content after closing quote → error + throw new XPathException(expression, ErrorCodes.FOCV0001, + "Content after closing quote in CSV field"); + } + break; + } + } + + // Check for unterminated quotes + if (state == 2) { + throw new XPathException(expression, ErrorCodes.FOCV0001, + "Unterminated quoted field in CSV input"); + } + + // Handle last field/record (if input doesn't end with row delimiter). + // A trailing row delimiter does not create an additional empty record. + // With trim-whitespace, a trailing row delimiter followed by only whitespace + // also does not create an additional record. + if (!currentRecord.isEmpty() || state == 3) { + // We had field delimiters on this line or a quoted field — always add + currentRecord.add(finishField(field)); + allRecords.add(currentRecord); + } else if (field.length() > 0) { + final String finished = finishField(field); + if (!finished.isEmpty()) { + currentRecord.add(finished); + allRecords.add(currentRecord); + } + } + + // Trim trailing empty rows if requested + if (trimRows) { + while (!allRecords.isEmpty()) { + final List lastRow = allRecords.get(allRecords.size() - 1); + if (isEmptyRow(lastRow)) { + allRecords.remove(allRecords.size() - 1); + } else { + break; + } + } + + // Normalize column count: all rows trimmed/padded to match first row (or header) + if (!allRecords.isEmpty()) { + final int columnCount = allRecords.get(0).size(); + for (int r = 1; r < allRecords.size(); r++) { + final List row = allRecords.get(r); + if (row.size() > columnCount) { + allRecords.set(r, new ArrayList<>(row.subList(0, columnCount))); + } else { + while (row.size() < columnCount) { + row.add(""); + } + } + } + } + } + + // Process header and records + int startIdx = 0; + if (hasHeader && !allRecords.isEmpty()) { + // Headers are always trimmed (per XQ4 spec), regardless of trim-whitespace option + final List headerFields = allRecords.get(0); + final List trimmedHeader = new ArrayList<>(headerFields.size()); + for (final String h : headerFields) { + trimmedHeader.add(h.trim()); + } + converter.header(selectFields(trimmedHeader)); + startIdx = 1; + } + + for (int r = startIdx; r < allRecords.size(); r++) { + converter.record(selectFields(allRecords.get(r))); + } + + converter.finish(); + } + + private String finishField(final StringBuilder field) { + if (trimWhitespace) { + return field.toString().trim(); + } + return field.toString(); + } + + private boolean isRowDelimiter(final int cp) { + if (rowDelimiter == -1) { + // Auto-detect: CR, LF, or CRLF + return cp == '\n' || cp == '\r'; + } + return cp == rowDelimiter; + } + + private int rowDelimiterLength(final String input, final int pos, final int cp) { + if (rowDelimiter == -1) { + // Auto-detect: CRLF counts as one delimiter + if (cp == '\r' && pos + 1 < input.length() && input.charAt(pos + 1) == '\n') { + return 2; + } + return 1; + } + return Character.charCount(rowDelimiter); + } + + private List selectFields(final List fields) { + if (selectColumns == null) { + return fields; + } + final List selected = new ArrayList<>(selectColumns.length); + for (final int col : selectColumns) { + if (col >= 1 && col <= fields.size()) { + selected.add(fields.get(col - 1)); + } else { + selected.add(""); + } + } + return selected; + } + + private static boolean isEmptyRow(final List row) { + for (final String field : row) { + if (!field.isEmpty()) { + return false; + } + } + return true; + } + + /** + * Parsed CSV options from an XQuery map. + */ + public static class CsvOptions { + public int fieldDelimiter = ','; + public int rowDelimiter = -1; // -1 = auto-detect (CR/LF/CRLF) + public int quoteChar = '"'; + public boolean trimWhitespace = false; + public boolean hasHeader = false; + public List explicitHeader = null; // explicit column names from options + public int[] selectColumns = null; + public boolean trimRows = false; + + /** + * Validate options per the XQ4 spec. + */ + public void validate(final Expression expression) throws XPathException { + // Field delimiter and quote character must be different + if (quoteChar != -1 && fieldDelimiter == quoteChar) { + throw new XPathException(expression, ErrorCodes.FOCV0003, + "Field delimiter and quote character must be different"); + } + // Field delimiter and row delimiter must be different + if (rowDelimiter != -1 && fieldDelimiter == rowDelimiter) { + throw new XPathException(expression, ErrorCodes.FOCV0003, + "Field delimiter and row delimiter must be different"); + } + // When using auto-detect row delimiters, field delimiter can't be CR or LF + if (rowDelimiter == -1 && (fieldDelimiter == '\n' || fieldDelimiter == '\r')) { + throw new XPathException(expression, ErrorCodes.FOCV0003, + "Field delimiter conflicts with auto-detected row delimiter (CR/LF)"); + } + // Quote character and row delimiter must be different + if (quoteChar != -1 && rowDelimiter != -1 && quoteChar == rowDelimiter) { + throw new XPathException(expression, ErrorCodes.FOCV0003, + "Quote character and row delimiter must be different"); + } + } + } +} diff --git a/exist-core/src/main/java/org/exist/xquery/functions/fn/DeepEqualOptions.java b/exist-core/src/main/java/org/exist/xquery/functions/fn/DeepEqualOptions.java new file mode 100644 index 00000000000..17e1bcdcd57 --- /dev/null +++ b/exist-core/src/main/java/org/exist/xquery/functions/fn/DeepEqualOptions.java @@ -0,0 +1,962 @@ +/* + * eXist-db Open Source Native XML Database + * Copyright (C) 2001 The eXist-db Authors + * + * info@exist-db.org + * http://www.exist-db.org + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ +package org.exist.xquery.functions.fn; + +import com.ibm.icu.text.Collator; +import io.lacuna.bifurcan.IEntry; +import org.exist.Namespaces; +import org.exist.dom.memtree.NodeImpl; +import org.exist.dom.memtree.ReferenceNode; +import org.exist.xquery.Constants; +import org.exist.xquery.ErrorCodes; +import org.exist.xquery.FunctionSignature; +import org.exist.xquery.InlineFunction; +import org.exist.xquery.ValueComparison; +import org.exist.xquery.XPathException; +import org.exist.xquery.XQueryContext; +import org.exist.xquery.functions.array.ArrayType; +import org.exist.xquery.functions.map.AbstractMapType; +import org.exist.xquery.value.*; + +import javax.annotation.Nullable; +import java.text.Normalizer; +import java.util.*; + +/** + * XQuery 4.0 deep-equal options and options-aware comparison engine. + * + * Holds the parsed option flags from the options map parameter and provides + * comparison methods that respect those options. + */ +public class DeepEqualOptions { + + // Valid boolean option keys (no namespace) + private static final Set VALID_BOOLEAN_OPTIONS = Set.of( + "base-uri", "comments", "debug", + "id-property", "idrefs-property", + "in-scope-namespaces", "namespace-prefixes", "nilled-property", + "processing-instructions", "timezones", "type-annotations", + "type-variety", "typed-values" + ); + + // Valid string-valued option keys + private static final Set VALID_STRING_OPTIONS = Set.of( + "collation", "whitespace" + ); + + // Valid boolean-valued option keys (not in VALID_BOOLEAN_OPTIONS) + private static final Set VALID_ORDERED_OPTIONS = Set.of( + "ordered", "map-order" + ); + + // All valid string keys (no namespace) + private static final Set ALL_VALID_KEYS; + static { + final Set keys = new HashSet<>(); + keys.addAll(VALID_BOOLEAN_OPTIONS); + keys.addAll(VALID_STRING_OPTIONS); + keys.addAll(VALID_ORDERED_OPTIONS); + ALL_VALID_KEYS = Collections.unmodifiableSet(keys); + } + + // Option flags (defaults per XQ4 spec) + public final boolean comments; // default: false + public final boolean processingInstructions; // default: false + public final boolean ordered; // default: true + public final boolean namespacePrefixes; // default: false + public final boolean inScopeNamespaces; // default: false + public final boolean baseUri; // default: false + public final boolean idProperty; // default: false + public final boolean idrefsProperty; // default: false + public final boolean nilledProperty; // default: false + public final boolean timezones; // default: true + public final boolean typeAnnotations; // default: false + public final boolean typeVariety; // default: false + public final boolean typedValues; // default: true + public final boolean debug; // default: false + public final boolean mapOrder; // default: false + public final boolean unorderedElements; // from 'ordered' key on element comparison + + public enum WhitespaceMode { PRESERVE, NORMALIZE, STRIP } + public final WhitespaceMode whitespace; // default: PRESERVE + + @Nullable + public final Collator collator; + + /** Default options (XQ3.1 compatible behavior). */ + public static final DeepEqualOptions DEFAULTS = new DeepEqualOptions( + false, false, true, false, false, false, + false, false, false, true, false, false, true, + false, false, WhitespaceMode.PRESERVE, null + ); + + private DeepEqualOptions( + boolean comments, boolean processingInstructions, boolean ordered, + boolean namespacePrefixes, boolean inScopeNamespaces, boolean baseUri, + boolean idProperty, boolean idrefsProperty, boolean nilledProperty, + boolean timezones, boolean typeAnnotations, boolean typeVariety, + boolean typedValues, boolean debug, boolean mapOrder, + WhitespaceMode whitespace, @Nullable Collator collator) { + this.comments = comments; + this.processingInstructions = processingInstructions; + this.ordered = ordered; + this.namespacePrefixes = namespacePrefixes; + this.inScopeNamespaces = inScopeNamespaces; + this.baseUri = baseUri; + this.idProperty = idProperty; + this.idrefsProperty = idrefsProperty; + this.nilledProperty = nilledProperty; + this.timezones = timezones; + this.typeAnnotations = typeAnnotations; + this.typeVariety = typeVariety; + this.typedValues = typedValues; + this.debug = debug; + this.mapOrder = mapOrder; + this.unorderedElements = !ordered; + this.whitespace = whitespace; + this.collator = collator; + } + + /** + * Parse an XQ4 options map into a DeepEqualOptions instance. + * Validates all option keys and values per the spec. + * + * @param options the options map + * @param context the XQuery context (for collation resolution) + * @return parsed options + * @throws XPathException XPTY0004 if any option key or value is invalid + */ + public static DeepEqualOptions parse(final AbstractMapType options, final XQueryContext context) throws XPathException { + boolean comments = false; + boolean processingInstructions = false; + boolean ordered = true; + boolean namespacePrefixes = false; + boolean inScopeNamespaces = false; + boolean baseUri = false; + boolean idProperty = false; + boolean idrefsProperty = false; + boolean nilledProperty = false; + boolean timezones = true; + boolean typeAnnotations = false; + boolean typeVariety = false; + boolean typedValues = true; + boolean debug = false; + boolean mapOrder = false; + WhitespaceMode whitespace = WhitespaceMode.PRESERVE; + Collator collator = context.getDefaultCollator(); + + for (final IEntry entry : options) { + final AtomicValue key = entry.key(); + + // Keys that are QNames in a namespace are ignored (vendor extensions) + if (key.getType() == Type.QNAME) { + final QNameValue qnv = (QNameValue) key; + final String ns = qnv.getQName().getNamespaceURI(); + if (ns != null && !ns.isEmpty()) { + continue; // Ignore vendor extension options + } + // QName in no namespace → error + throw new XPathException(ErrorCodes.XPTY0004, + "Option key in no namespace is not recognized: " + key.getStringValue()); + } + + final String keyStr = key.getStringValue(); + + // Validate that the key is known + if (!ALL_VALID_KEYS.contains(keyStr)) { + throw new XPathException(ErrorCodes.XPTY0004, + "Unknown deep-equal option: '" + keyStr + "'"); + } + + final Sequence value = entry.value(); + + if (VALID_BOOLEAN_OPTIONS.contains(keyStr)) { + final boolean boolVal = parseBooleanOption(keyStr, value); + switch (keyStr) { + case "comments" -> comments = boolVal; + case "processing-instructions" -> processingInstructions = boolVal; + case "namespace-prefixes" -> namespacePrefixes = boolVal; + case "in-scope-namespaces" -> inScopeNamespaces = boolVal; + case "base-uri" -> baseUri = boolVal; + case "id-property" -> idProperty = boolVal; + case "idrefs-property" -> idrefsProperty = boolVal; + case "nilled-property" -> nilledProperty = boolVal; + case "timezones" -> timezones = boolVal; + case "type-annotations" -> typeAnnotations = boolVal; + case "type-variety" -> typeVariety = boolVal; + case "typed-values" -> typedValues = boolVal; + case "debug" -> debug = boolVal; + } + } else if (VALID_ORDERED_OPTIONS.contains(keyStr)) { + final boolean boolVal = parseBooleanOption(keyStr, value); + switch (keyStr) { + case "ordered" -> ordered = boolVal; + case "map-order" -> mapOrder = boolVal; + } + } else if ("collation".equals(keyStr)) { + if (!value.isEmpty()) { + collator = context.getCollator(value.getStringValue()); + } + } else if ("whitespace".equals(keyStr)) { + if (!value.isEmpty()) { + final String wsVal = value.getStringValue(); + whitespace = switch (wsVal) { + case "preserve" -> WhitespaceMode.PRESERVE; + case "normalize" -> WhitespaceMode.NORMALIZE; + case "strip" -> WhitespaceMode.STRIP; + default -> throw new XPathException(ErrorCodes.XPTY0004, + "Invalid whitespace option value: '" + wsVal + "'"); + }; + } + } + } + + return new DeepEqualOptions( + comments, processingInstructions, ordered, + namespacePrefixes, inScopeNamespaces, baseUri, + idProperty, idrefsProperty, nilledProperty, + timezones, typeAnnotations, typeVariety, typedValues, + debug, mapOrder, whitespace, collator + ); + } + + /** + * Parse a boolean option value using XQ4 option parameter conventions. + * Accepts: xs:boolean, xs:string ("true"/"false"/"0"/"1"), + * xs:integer (0/1), or nodes (effective boolean value). + */ + private static boolean parseBooleanOption(final String key, final Sequence value) throws XPathException { + if (value.isEmpty()) { + return false; + } + + final Item item = value.itemAt(0); + + // If it's already a boolean, use it directly + if (item.getType() == Type.BOOLEAN) { + return ((BooleanValue) item).getValue(); + } + + // Try casting to xs:boolean — accepts "true"/"false"/"0"/"1" and numeric 0/1 + try { + final AtomicValue boolVal = item.atomize().convertTo(Type.BOOLEAN); + return ((BooleanValue) boolVal).getValue(); + } catch (final XPathException e) { + throw new XPathException(ErrorCodes.XPTY0004, + "Invalid value for boolean option '" + key + "': " + item.getStringValue()); + } + } + + // ======================================================================== + // Options-aware deep comparison engine + // ======================================================================== + + /** + * Deep-compare two sequences with options. + */ + public int deepCompareSeq(final Sequence sequence1, final Sequence sequence2) { + if (sequence1 == sequence2) { + return Constants.EQUAL; + } + + if (!ordered) { + return deepCompareSeqUnordered(sequence1, sequence2); + } + + final int count1 = sequence1.getItemCount(); + final int count2 = sequence2.getItemCount(); + if (count1 != count2) { + return count1 < count2 ? Constants.INFERIOR : Constants.SUPERIOR; + } + + for (int i = 0; i < count1; i++) { + final int cmp = deepCompare(sequence1.itemAt(i), sequence2.itemAt(i)); + if (cmp != Constants.EQUAL) { + return cmp; + } + } + return Constants.EQUAL; + } + + /** + * Unordered sequence comparison: every item in seq1 must match some + * item in seq2 (and vice versa, by equal counts of matches). + */ + private int deepCompareSeqUnordered(final Sequence sequence1, final Sequence sequence2) { + final int count1 = sequence1.getItemCount(); + final int count2 = sequence2.getItemCount(); + if (count1 != count2) { + return count1 < count2 ? Constants.INFERIOR : Constants.SUPERIOR; + } + + // For each item in seq1, find a matching item in seq2 + final boolean[] matched = new boolean[count2]; + for (int i = 0; i < count1; i++) { + final Item item1 = sequence1.itemAt(i); + boolean found = false; + for (int j = 0; j < count2; j++) { + if (!matched[j] && deepCompare(item1, sequence2.itemAt(j)) == Constants.EQUAL) { + matched[j] = true; + found = true; + break; + } + } + if (!found) { + return Constants.INFERIOR; + } + } + return Constants.EQUAL; + } + + /** + * Deep-compare two items with options. + */ + public int deepCompare(final Item item1, final Item item2) { + if (item1 == item2) { + return Constants.EQUAL; + } + + try { + // Array comparison + if (item1.getType() == Type.ARRAY_ITEM || item2.getType() == Type.ARRAY_ITEM) { + if (item1.getType() != item2.getType()) { + return Constants.INFERIOR; + } + final ArrayType array1 = (ArrayType) item1; + final ArrayType array2 = (ArrayType) item2; + if (array1.getSize() != array2.getSize()) { + return array1.getSize() < array2.getSize() ? Constants.INFERIOR : Constants.SUPERIOR; + } + for (int i = 0; i < array1.getSize(); i++) { + final int cmp = deepCompareSeq(array1.get(i), array2.get(i)); + if (cmp != Constants.EQUAL) { + return cmp; + } + } + return Constants.EQUAL; + } + + // Map comparison + if (item1.getType() == Type.MAP_ITEM || item2.getType() == Type.MAP_ITEM) { + if (item1.getType() != item2.getType()) { + return Constants.INFERIOR; + } + return compareMaps((AbstractMapType) item1, (AbstractMapType) item2); + } + + // Function items: identity comparison via function-identity semantics + if (Type.subTypeOf(item1.getType(), Type.FUNCTION) || Type.subTypeOf(item2.getType(), Type.FUNCTION)) { + if (!Type.subTypeOf(item1.getType(), Type.FUNCTION) || !Type.subTypeOf(item2.getType(), Type.FUNCTION)) { + return Constants.INFERIOR; + } + return compareFunctionItems(item1, item2); + } + + // Atomic values + final boolean item1IsAtomic = Type.subTypeOf(item1.getType(), Type.ANY_ATOMIC_TYPE); + final boolean item2IsAtomic = Type.subTypeOf(item2.getType(), Type.ANY_ATOMIC_TYPE); + if (item1IsAtomic || item2IsAtomic) { + if (!item1IsAtomic || !item2IsAtomic) { + return item1IsAtomic ? Constants.INFERIOR : Constants.SUPERIOR; + } + return compareAtomics((AtomicValue) item1, (AtomicValue) item2); + } + + // Node comparison + if (item1.getType() != item2.getType()) { + return Constants.INFERIOR; + } + + final NodeValue nva = (NodeValue) item1; + final NodeValue nvb = (NodeValue) item2; + if (nva == nvb) { + return Constants.EQUAL; + } + + switch (item1.getType()) { + case Type.DOCUMENT: + return compareContents( + nva instanceof org.w3c.dom.Node n1 ? n1 : ((org.exist.dom.persistent.NodeProxy) nva).getOwnerDocument(), + nvb instanceof org.w3c.dom.Node n2 ? n2 : ((org.exist.dom.persistent.NodeProxy) nvb).getOwnerDocument()); + + case Type.ELEMENT: + return compareElements(nva.getNode(), nvb.getNode()); + + case Type.ATTRIBUTE: + final int attrNameCmp = compareNames(nva.getNode(), nvb.getNode()); + if (attrNameCmp != Constants.EQUAL) { + return attrNameCmp; + } + // whitespace:normalize applies to attribute values, but strip does NOT + return safeCompare( + maybeNormalizeWSAttr(nva.getNode().getNodeValue()), + maybeNormalizeWSAttr(nvb.getNode().getNodeValue()), + collator); + + case Type.PROCESSING_INSTRUCTION: + return comparePIs(nva, nvb); + + case Type.NAMESPACE: + final int nsNameCmp = safeCompare(nva.getNode().getNodeName(), nvb.getNode().getNodeName(), null); + if (nsNameCmp != Constants.EQUAL) { + return nsNameCmp; + } + return safeCompare(nva.getStringValue(), nvb.getStringValue(), collator); + + case Type.TEXT: + return safeCompare( + maybeNormalizeWS(nva.getStringValue()), + maybeNormalizeWS(nvb.getStringValue()), + collator); + + case Type.COMMENT: + // Apply whitespace normalization to comment content if whitespace option is set + return safeCompare( + maybeNormalizeWS(nva.getStringValue()), + maybeNormalizeWS(nvb.getStringValue()), + collator); + + default: + return Constants.INFERIOR; + } + } catch (final XPathException e) { + return Constants.INFERIOR; + } + } + + /** + * Compare function items using function-identity semantics (XQ4). + * Named functions with same name and arity are equal. + * Anonymous functions use reference identity. + */ + private static int compareFunctionItems(final Item item1, final Item item2) { + if (item1 == item2) { + return Constants.EQUAL; + } + if (item1 instanceof FunctionReference ref1 && item2 instanceof FunctionReference ref2) { + final FunctionSignature sig1 = ref1.getSignature(); + final FunctionSignature sig2 = ref2.getSignature(); + final org.exist.dom.QName name1 = sig1.getName(); + final org.exist.dom.QName name2 = sig2.getName(); + // Both must be named functions (not inline/anonymous) + if (name1 != null && name2 != null + && name1 != InlineFunction.INLINE_FUNCTION_QNAME + && name2 != InlineFunction.INLINE_FUNCTION_QNAME) { + if (name1.equals(name2) && sig1.getArgumentCount() == sig2.getArgumentCount()) { + return Constants.EQUAL; + } + } + } + return Constants.INFERIOR; + } + + private int compareMaps(final AbstractMapType map1, final AbstractMapType map2) { + if (map1.size() != map2.size()) { + return map1.size() < map2.size() ? Constants.INFERIOR : Constants.SUPERIOR; + } + + for (final IEntry entry1 : map1) { + if (!map2.contains(entry1.key())) { + return Constants.SUPERIOR; + } + final int cmp = deepCompareSeq(entry1.value(), map2.get(entry1.key())); + if (cmp != Constants.EQUAL) { + return cmp; + } + } + return Constants.EQUAL; + } + + private int compareAtomics(final AtomicValue av, final AtomicValue bv) { + try { + // Whitespace normalization for string-like atomics + if (whitespace != WhitespaceMode.PRESERVE) { + if (isStringLike(av) && isStringLike(bv)) { + final String a = applyWhitespace(av.getStringValue()); + final String b = applyWhitespace(bv.getStringValue()); + if (collator != null) { + return collator.compare(a, b); + } + return a.compareTo(b); + } + } + + if (Type.subTypeOfUnion(av.getType(), Type.NUMERIC) && + Type.subTypeOfUnion(bv.getType(), Type.NUMERIC)) { + if (((NumericValue) av).isNaN() && ((NumericValue) bv).isNaN()) { + return Constants.EQUAL; + } + } + return ValueComparison.compareAtomic(collator, av, bv); + } catch (final XPathException e) { + return Constants.INFERIOR; + } + } + + private static boolean isStringLike(final AtomicValue v) { + return Type.subTypeOf(v.getType(), Type.STRING) || + v.getType() == Type.UNTYPED_ATOMIC || + v.getType() == Type.ANY_URI; + } + + private int compareElements(final org.w3c.dom.Node a, final org.w3c.dom.Node b) { + int cmp = compareNames(a, b); + if (cmp != Constants.EQUAL) { + return cmp; + } + + // Compare namespace prefixes if option is set + if (namespacePrefixes) { + cmp = safeCompare(a.getPrefix(), b.getPrefix(), null); + if (cmp != Constants.EQUAL) { + return cmp; + } + } + + cmp = compareAttributes(a, b); + if (cmp != Constants.EQUAL) { + return cmp; + } + + if (unorderedElements) { + return compareContentsUnordered(a, b); + } + + return compareContents(a, b); + } + + private int comparePIs(final NodeValue nva, final NodeValue nvb) throws XPathException { + final int nameCmp = safeCompare(nva.getNode().getNodeName(), nvb.getNode().getNodeName(), null); + if (nameCmp != Constants.EQUAL) { + return nameCmp; + } + // Apply whitespace normalization to PI data content + return safeCompare( + maybeNormalizeWS(nva.getStringValue()), + maybeNormalizeWS(nvb.getStringValue()), + collator); + } + + private int compareContents(final org.w3c.dom.Node a, final org.w3c.dom.Node b) { + final List childrenA = getSignificantChildren(a); + final List childrenB = getSignificantChildren(b); + + // Merge adjacent text nodes + final List mergedA = mergeAdjacentTextNodes(childrenA); + final List mergedB = mergeAdjacentTextNodes(childrenB); + + if (mergedA.size() != mergedB.size()) { + return mergedA.size() < mergedB.size() ? Constants.INFERIOR : Constants.SUPERIOR; + } + + for (int i = 0; i < mergedA.size(); i++) { + final Object itemA = mergedA.get(i); + final Object itemB = mergedB.get(i); + + if (itemA instanceof String sa && itemB instanceof String sb) { + // Text may already be normalized/stripped by addMergedText; apply WS normalization if PRESERVE mode + final String normA = whitespace == WhitespaceMode.PRESERVE ? sa : maybeNormalizeWS(sa); + final String normB = whitespace == WhitespaceMode.PRESERVE ? sb : maybeNormalizeWS(sb); + final int cmp = safeCompare(normA, normB, collator); + if (cmp != Constants.EQUAL) { + return cmp; + } + } else if (itemA instanceof org.w3c.dom.Node na && itemB instanceof org.w3c.dom.Node nb) { + final int typeA = getEffectiveNodeType(na); + final int typeB = getEffectiveNodeType(nb); + if (typeA != typeB) { + return Constants.INFERIOR; + } + final int cmp; + switch (typeA) { + case org.w3c.dom.Node.ELEMENT_NODE: + cmp = compareElements(na, nb); + break; + case org.w3c.dom.Node.COMMENT_NODE: + cmp = safeCompare(maybeNormalizeWS(na.getNodeValue()), + maybeNormalizeWS(nb.getNodeValue()), collator); + break; + case org.w3c.dom.Node.PROCESSING_INSTRUCTION_NODE: + final int piNameCmp = safeCompare(na.getNodeName(), nb.getNodeName(), null); + cmp = piNameCmp != Constants.EQUAL ? piNameCmp : + safeCompare(maybeNormalizeWS(na.getNodeValue()), + maybeNormalizeWS(nb.getNodeValue()), collator); + break; + default: + cmp = Constants.INFERIOR; + } + if (cmp != Constants.EQUAL) { + return cmp; + } + } else { + // Mismatched types (text vs node) + return Constants.INFERIOR; + } + } + return Constants.EQUAL; + } + + /** + * Unordered element comparison: child elements are compared as multisets. + */ + private int compareContentsUnordered(final org.w3c.dom.Node a, final org.w3c.dom.Node b) { + final List childrenA = getSignificantChildren(a); + final List childrenB = getSignificantChildren(b); + + // Separate text content and element/other nodes + final StringBuilder textA = new StringBuilder(); + final List elementsA = new ArrayList<>(); + for (final org.w3c.dom.Node n : childrenA) { + final int type = getEffectiveNodeType(n); + if (type == org.w3c.dom.Node.TEXT_NODE) { + textA.append(getNodeValue(n)); + } else { + elementsA.add(n); + } + } + + final StringBuilder textB = new StringBuilder(); + final List elementsB = new ArrayList<>(); + for (final org.w3c.dom.Node n : childrenB) { + final int type = getEffectiveNodeType(n); + if (type == org.w3c.dom.Node.TEXT_NODE) { + textB.append(getNodeValue(n)); + } else { + elementsB.add(n); + } + } + + // Compare concatenated text content + final int textCmp = safeCompare( + maybeNormalizeWS(textA.toString()), + maybeNormalizeWS(textB.toString()), + collator); + if (textCmp != Constants.EQUAL) { + return textCmp; + } + + // Compare elements as multisets + if (elementsA.size() != elementsB.size()) { + return elementsA.size() < elementsB.size() ? Constants.INFERIOR : Constants.SUPERIOR; + } + + final boolean[] matched = new boolean[elementsB.size()]; + for (final org.w3c.dom.Node na : elementsA) { + boolean found = false; + for (int j = 0; j < elementsB.size(); j++) { + if (!matched[j]) { + final int typeA = getEffectiveNodeType(na); + final int typeB = getEffectiveNodeType(elementsB.get(j)); + if (typeA == typeB) { + int cmp; + if (typeA == org.w3c.dom.Node.ELEMENT_NODE) { + cmp = compareElements(na, elementsB.get(j)); + } else if (typeA == org.w3c.dom.Node.COMMENT_NODE) { + cmp = safeCompare(na.getNodeValue(), elementsB.get(j).getNodeValue(), collator); + } else if (typeA == org.w3c.dom.Node.PROCESSING_INSTRUCTION_NODE) { + cmp = safeCompare(na.getNodeName(), elementsB.get(j).getNodeName(), null); + if (cmp == Constants.EQUAL) { + cmp = safeCompare(na.getNodeValue(), elementsB.get(j).getNodeValue(), collator); + } + } else { + cmp = Constants.INFERIOR; + } + if (cmp == Constants.EQUAL) { + matched[j] = true; + found = true; + break; + } + } + } + } + if (!found) { + return Constants.INFERIOR; + } + } + return Constants.EQUAL; + } + + /** + * Get child nodes that are significant for deep-equal comparison, + * based on the current options. + */ + private List getSignificantChildren(final org.w3c.dom.Node parent) { + final List result = new ArrayList<>(); + final boolean preserveWS = isXmlSpacePreserve(parent); + org.w3c.dom.Node child = parent.getFirstChild(); + while (child != null) { + final int type = getEffectiveNodeType(child); + switch (type) { + case org.w3c.dom.Node.ELEMENT_NODE: + result.add(child); + break; + case org.w3c.dom.Node.TEXT_NODE: + if (whitespace == WhitespaceMode.STRIP) { + // Strip whitespace-only text nodes (deep-equal strip option + // overrides xml:space="preserve" per XQ4 spec) + final String value = getNodeValue(child); + if (value != null && !value.trim().isEmpty()) { + result.add(child); + } + } else { + result.add(child); + } + break; + case org.w3c.dom.Node.COMMENT_NODE: + if (comments) { + result.add(child); + } + break; + case org.w3c.dom.Node.PROCESSING_INSTRUCTION_NODE: + if (processingInstructions) { + result.add(child); + } + break; + } + child = child.getNextSibling(); + } + return result; + } + + /** + * Merge adjacent text nodes into single String entries. + * Non-text nodes are kept as-is. This handles the case where + * comments/PIs split text differently in two trees. + */ + private List mergeAdjacentTextNodes(final List nodes) { + final List result = new ArrayList<>(); + StringBuilder currentText = null; + + for (final org.w3c.dom.Node node : nodes) { + final int type = getEffectiveNodeType(node); + if (type == org.w3c.dom.Node.TEXT_NODE) { + if (currentText == null) { + currentText = new StringBuilder(); + } + currentText.append(getNodeValue(node)); + } else { + if (currentText != null) { + addMergedText(result, currentText.toString()); + currentText = null; + } + result.add(node); + } + } + if (currentText != null) { + addMergedText(result, currentText.toString()); + } + return result; + } + + /** + * Add merged text to the result list, applying whitespace rules. + * In STRIP mode, whitespace-only text is dropped. + * In NORMALIZE mode, text that normalizes to empty is dropped. + */ + private void addMergedText(final List result, final String text) { + if (whitespace == WhitespaceMode.STRIP) { + if (!text.trim().isEmpty()) { + result.add(text); + } + } else if (whitespace == WhitespaceMode.NORMALIZE) { + final String normalized = normalizeWhitespace(text); + if (!normalized.isEmpty()) { + result.add(normalized); + } + } else { + result.add(text); + } + } + + private int compareAttributes(final org.w3c.dom.Node a, final org.w3c.dom.Node b) { + final org.w3c.dom.NamedNodeMap nnma = a.getAttributes(); + final org.w3c.dom.NamedNodeMap nnmb = b.getAttributes(); + + final int aCount = getAttrCount(nnma); + final int bCount = getAttrCount(nnmb); + + if (aCount != bCount) { + return aCount < bCount ? Constants.INFERIOR : Constants.SUPERIOR; + } + + for (int i = 0; i < nnma.getLength(); i++) { + final org.w3c.dom.Node ta = nnma.item(i); + final String nsA = ta.getNamespaceURI(); + if (nsA != null && Namespaces.XMLNS_NS.equals(nsA)) { + continue; + } + final org.w3c.dom.Node tb = ta.getLocalName() == null ? + nnmb.getNamedItem(ta.getNodeName()) : + nnmb.getNamedItemNS(ta.getNamespaceURI(), ta.getLocalName()); + if (tb == null) { + return Constants.SUPERIOR; + } + final int cmp = safeCompare( + maybeNormalizeWSAttr(ta.getNodeValue()), + maybeNormalizeWSAttr(tb.getNodeValue()), + collator); + if (cmp != Constants.EQUAL) { + return cmp; + } + } + return Constants.EQUAL; + } + + // ======================================================================== + // Utility methods + // ======================================================================== + + private String maybeNormalizeWS(@Nullable final String s) { + if (s == null || whitespace == WhitespaceMode.PRESERVE) { + return s; + } + // Both NORMALIZE and STRIP normalize text content + return normalizeWhitespace(s); + } + + /** + * Normalize whitespace for attribute values: only NORMALIZE mode applies, + * STRIP mode does NOT affect attribute values. + */ + private String maybeNormalizeWSAttr(@Nullable final String s) { + if (s == null || whitespace != WhitespaceMode.NORMALIZE) { + return s; + } + return normalizeWhitespace(s); + } + + private static String normalizeWhitespace(final String s) { + return s.strip().replaceAll("\\s+", " "); + } + + private String applyWhitespace(final String s) { + if (whitespace == WhitespaceMode.NORMALIZE) { + return normalizeWhitespace(s); + } + if (whitespace == WhitespaceMode.STRIP) { + return normalizeWhitespace(s); + } + return s; + } + + private static int getAttrCount(final org.w3c.dom.NamedNodeMap nnm) { + int count = 0; + for (int i = 0; i < nnm.getLength(); i++) { + final org.w3c.dom.Node n = nnm.item(i); + final String ns = n.getNamespaceURI(); + if (ns == null || !Namespaces.XMLNS_NS.equals(ns)) { + ++count; + } + } + return count; + } + + private static int compareNames(final org.w3c.dom.Node a, final org.w3c.dom.Node b) { + if (a.getLocalName() != null || b.getLocalName() != null) { + final int nsComparison = safeCompare(a.getNamespaceURI(), b.getNamespaceURI(), null); + if (nsComparison != Constants.EQUAL) { + return nsComparison; + } + return safeCompare(a.getLocalName(), b.getLocalName(), null); + } + return safeCompare(a.getNodeName(), b.getNodeName(), null); + } + + private static int safeCompare(@Nullable final String a, @Nullable final String b, @Nullable final Collator collator) { + if (a == b) { + return Constants.EQUAL; + } + if (a == null) { + return Constants.INFERIOR; + } + if (b == null) { + return Constants.SUPERIOR; + } + if (collator != null) { + return collator.compare(a, b); + } + return a.compareTo(b); + } + + private static String getNodeValue(final org.w3c.dom.Node n) { + if (n.getNodeType() == NodeImpl.REFERENCE_NODE) { + return ((ReferenceNode) n).getReference().getNodeValue(); + } + return n.getNodeValue(); + } + + private static int getEffectiveNodeType(final org.w3c.dom.Node n) { + int nodeType = n.getNodeType(); + if (nodeType == NodeImpl.REFERENCE_NODE) { + nodeType = ((ReferenceNode) n).getReference().getNode().getNodeType(); + } + return nodeType; + } + + /** + * Check if the given node or any ancestor has xml:space="preserve". + * Uses NamedNodeMap lookup for broader DOM compatibility. + */ + private static boolean isXmlSpacePreserve(final org.w3c.dom.Node node) { + org.w3c.dom.Node current = node; + while (current != null && current.getNodeType() == org.w3c.dom.Node.ELEMENT_NODE) { + if (current instanceof org.w3c.dom.Element elem) { + // Use Element.getAttributeNS for persistent DOM and other implementations + final String xmlSpace = elem.getAttributeNS( + "http://www.w3.org/XML/1998/namespace", "space"); + if ("preserve".equals(xmlSpace)) { + return true; + } + if ("default".equals(xmlSpace)) { + return false; + } + } + // Also check via NamedNodeMap for broader compatibility + final org.w3c.dom.NamedNodeMap attrs = current.getAttributes(); + if (attrs != null) { + org.w3c.dom.Node xmlSpace = attrs.getNamedItemNS( + "http://www.w3.org/XML/1998/namespace", "space"); + if (xmlSpace == null) { + xmlSpace = attrs.getNamedItem("xml:space"); + } + if (xmlSpace != null) { + final String val = xmlSpace.getNodeValue(); + if ("preserve".equals(val)) { + return true; + } + if ("default".equals(val)) { + return false; + } + } + } + current = current.getParentNode(); + } + return false; + } + + /** + * Deep equality using these options. + */ + public boolean deepEqualsSeq(final Sequence sequence1, final Sequence sequence2) { + return deepCompareSeq(sequence1, sequence2) == Constants.EQUAL; + } +} diff --git a/exist-core/src/main/java/org/exist/xquery/functions/fn/FnAllEqualDifferent.java b/exist-core/src/main/java/org/exist/xquery/functions/fn/FnAllEqualDifferent.java new file mode 100644 index 00000000000..c8825265991 --- /dev/null +++ b/exist-core/src/main/java/org/exist/xquery/functions/fn/FnAllEqualDifferent.java @@ -0,0 +1,165 @@ +/* + * eXist-db Open Source Native XML Database + * Copyright (C) 2001 The eXist-db Authors + * + * info@exist-db.org + * http://www.exist-db.org + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ +package org.exist.xquery.functions.fn; + +import com.ibm.icu.text.Collator; +import org.exist.dom.QName; +import org.exist.xquery.BasicFunction; +import org.exist.xquery.Cardinality; +import org.exist.xquery.ErrorCodes; +import org.exist.xquery.Function; +import org.exist.xquery.FunctionSignature; +import org.exist.xquery.XPathException; +import org.exist.xquery.XQueryContext; +import org.exist.xquery.value.AtomicValue; +import org.exist.xquery.value.BooleanValue; +import org.exist.xquery.value.FunctionParameterSequenceType; +import org.exist.xquery.value.FunctionReturnSequenceType; +import org.exist.xquery.value.NumericValue; +import org.exist.xquery.value.Sequence; +import org.exist.xquery.value.SequenceIterator; +import org.exist.xquery.value.SequenceType; +import org.exist.xquery.value.Type; + +/** + * Implements fn:all-equal and fn:all-different (XQuery 4.0). + */ +public class FnAllEqualDifferent extends BasicFunction { + + public static final FunctionSignature[] FN_ALL_EQUAL = { + new FunctionSignature( + new QName("all-equal", Function.BUILTIN_FUNCTION_NS), + "Returns true if all items in the supplied sequence are equal.", + new SequenceType[] { + new FunctionParameterSequenceType("values", Type.ANY_ATOMIC_TYPE, Cardinality.ZERO_OR_MORE, "The values to compare") + }, + new FunctionReturnSequenceType(Type.BOOLEAN, Cardinality.EXACTLY_ONE, "true if all values are equal")), + new FunctionSignature( + new QName("all-equal", Function.BUILTIN_FUNCTION_NS), + "Returns true if all items in the supplied sequence are equal, using the specified collation.", + new SequenceType[] { + new FunctionParameterSequenceType("values", Type.ANY_ATOMIC_TYPE, Cardinality.ZERO_OR_MORE, "The values to compare"), + new FunctionParameterSequenceType("collation", Type.STRING, Cardinality.ZERO_OR_ONE, "The collation URI") + }, + new FunctionReturnSequenceType(Type.BOOLEAN, Cardinality.EXACTLY_ONE, "true if all values are equal")) + }; + + public static final FunctionSignature[] FN_ALL_DIFFERENT = { + new FunctionSignature( + new QName("all-different", Function.BUILTIN_FUNCTION_NS), + "Returns true if no two items in the supplied sequence are equal.", + new SequenceType[] { + new FunctionParameterSequenceType("values", Type.ANY_ATOMIC_TYPE, Cardinality.ZERO_OR_MORE, "The values to compare") + }, + new FunctionReturnSequenceType(Type.BOOLEAN, Cardinality.EXACTLY_ONE, "true if all values are different")), + new FunctionSignature( + new QName("all-different", Function.BUILTIN_FUNCTION_NS), + "Returns true if no two items in the supplied sequence are equal, using the specified collation.", + new SequenceType[] { + new FunctionParameterSequenceType("values", Type.ANY_ATOMIC_TYPE, Cardinality.ZERO_OR_MORE, "The values to compare"), + new FunctionParameterSequenceType("collation", Type.STRING, Cardinality.ZERO_OR_ONE, "The collation URI") + }, + new FunctionReturnSequenceType(Type.BOOLEAN, Cardinality.EXACTLY_ONE, "true if all values are different")) + }; + + public FnAllEqualDifferent(final XQueryContext context, final FunctionSignature signature) { + super(context, signature); + } + + @Override + public Sequence eval(final Sequence[] args, final Sequence contextSequence) throws XPathException { + final Sequence values = args[0]; + if (values.getItemCount() <= 1) { + return BooleanValue.TRUE; + } + + final Collator collator = getCollator(args); + + // Collect all atomized values + final java.util.List items = new java.util.ArrayList<>(values.getItemCount()); + for (final SequenceIterator i = values.iterate(); i.hasNext(); ) { + items.add(i.nextItem().atomize()); + } + + if (isCalledAs("all-equal")) { + return allEqual(items, collator); + } else { + return allDifferent(items, collator); + } + } + + private Sequence allEqual(final java.util.List items, final Collator collator) throws XPathException { + // all-equal iff count(distinct-values) <= 1, using contextual equality + final AtomicValue first = items.get(0); + for (int i = 1; i < items.size(); i++) { + if (!contextuallyEqual(first, items.get(i), collator)) { + return BooleanValue.FALSE; + } + } + return BooleanValue.TRUE; + } + + private Sequence allDifferent(final java.util.List items, final Collator collator) throws XPathException { + // all-different iff count(distinct-values) == count + for (int i = 0; i < items.size(); i++) { + for (int j = i + 1; j < items.size(); j++) { + if (contextuallyEqual(items.get(i), items.get(j), collator)) { + return BooleanValue.FALSE; + } + } + } + return BooleanValue.TRUE; + } + + /** + * XQ4 contextual equality: two values are contextually equal if fn:compare returns 0. + * NaN is treated as equal to NaN. Errors in comparison mean values are unequal. + */ + static boolean contextuallyEqual(final AtomicValue v1, final AtomicValue v2, final Collator collator) { + try { + // NaN handling: NaN equals NaN + if (v1 instanceof NumericValue && v2 instanceof NumericValue) { + final boolean nan1 = ((NumericValue) v1).isNaN(); + final boolean nan2 = ((NumericValue) v2).isNaN(); + if (nan1 && nan2) { + return true; + } + if (nan1 || nan2) { + return false; + } + } + return FunCompare.compare(v1, v2, collator) == 0; + } catch (final Exception e) { + // Errors in comparison mean values are unequal + return false; + } + } + + private Collator getCollator(final Sequence[] args) throws XPathException { + if (args.length > 1 && !args[1].isEmpty()) { + final String collationURI = args[1].getStringValue(); + return context.getCollator(collationURI, ErrorCodes.FOCH0002); + } + return context.getDefaultCollator(); + } + +} diff --git a/exist-core/src/main/java/org/exist/xquery/functions/fn/FnAtomicEqual.java b/exist-core/src/main/java/org/exist/xquery/functions/fn/FnAtomicEqual.java new file mode 100644 index 00000000000..0836fb00e66 --- /dev/null +++ b/exist-core/src/main/java/org/exist/xquery/functions/fn/FnAtomicEqual.java @@ -0,0 +1,214 @@ +/* + * eXist-db Open Source Native XML Database + * Copyright (C) 2001 The eXist-db Authors + * + * info@exist-db.org + * http://www.exist-db.org + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ +package org.exist.xquery.functions.fn; + +import org.exist.dom.QName; +import org.exist.xquery.BasicFunction; +import org.exist.xquery.Cardinality; +import org.exist.xquery.Function; +import org.exist.xquery.FunctionSignature; +import org.exist.xquery.XPathException; +import org.exist.xquery.XQueryContext; +import org.exist.xquery.value.AbstractDateTimeValue; +import org.exist.xquery.value.AtomicValue; +import org.exist.xquery.value.BinaryValue; +import org.exist.xquery.value.BooleanValue; +import org.exist.xquery.value.DoubleValue; +import org.exist.xquery.value.FloatValue; +import org.exist.xquery.value.FunctionParameterSequenceType; +import org.exist.xquery.value.FunctionReturnSequenceType; +import org.exist.xquery.value.NumericValue; +import org.exist.xquery.value.Sequence; +import org.exist.xquery.value.SequenceType; +import org.exist.xquery.value.Type; + +/** + * Implements fn:atomic-equal (XQuery 4.0). + * + * Compares two atomic values for equality. Unlike eq, this function: + * - Never raises a dynamic error (returns false for incomparable types) + * - NaN equals NaN + * - Does not depend on static or dynamic context + */ +public class FnAtomicEqual extends BasicFunction { + + public static final FunctionSignature FN_ATOMIC_EQUAL = new FunctionSignature( + new QName("atomic-equal", Function.BUILTIN_FUNCTION_NS), + "Compares two atomic values for equality. NaN equals NaN, and incomparable types return false.", + new SequenceType[] { + new FunctionParameterSequenceType("value1", Type.ANY_ATOMIC_TYPE, Cardinality.EXACTLY_ONE, "The first value"), + new FunctionParameterSequenceType("value2", Type.ANY_ATOMIC_TYPE, Cardinality.EXACTLY_ONE, "The second value") + }, + new FunctionReturnSequenceType(Type.BOOLEAN, Cardinality.EXACTLY_ONE, "true if the values are equal")); + + public FnAtomicEqual(final XQueryContext context, final FunctionSignature signature) { + super(context, signature); + } + + @Override + public Sequence eval(final Sequence[] args, final Sequence contextSequence) throws XPathException { + final AtomicValue v1 = args[0].itemAt(0).atomize(); + final AtomicValue v2 = args[1].itemAt(0).atomize(); + + // Handle NaN: NaN equals NaN (across float/double) + if (isNaN(v1) && isNaN(v2)) { + return BooleanValue.TRUE; + } + if (isNaN(v1) || isNaN(v2)) { + return BooleanValue.FALSE; + } + + // Handle Infinity: float INF equals double INF (and -INF) + if (isInfinite(v1) && isInfinite(v2)) { + return BooleanValue.valueOf(toDouble(v1) == toDouble(v2)); + } + + try { + final int t1 = v1.getType(); + final int t2 = v2.getType(); + + // String-like types: string, untypedAtomic, anyURI all compare equal + if (isStringLike(t1) && isStringLike(t2)) { + return BooleanValue.valueOf(v1.getStringValue().equals(v2.getStringValue())); + } + + // Numeric: compare by mathematical value regardless of type + // Per XQ4 spec: "Two numeric values are equal if their mathematical values are equal" + if (v1 instanceof NumericValue && v2 instanceof NumericValue) { + return BooleanValue.valueOf(numericEqual((NumericValue) v1, (NumericValue) v2)); + } + + // Binary types: hexBinary and base64Binary compare equal by content + if (isBinaryType(t1) && isBinaryType(t2)) { + if (v1 instanceof BinaryValue && v2 instanceof BinaryValue) { + return BooleanValue.valueOf(v1.compareTo(null, v2) == 0); + } + return BooleanValue.FALSE; + } + + // Boolean + if (t1 == Type.BOOLEAN && t2 == Type.BOOLEAN) { + return BooleanValue.valueOf(v1.effectiveBooleanValue() == v2.effectiveBooleanValue()); + } + + // Date/time: values with timezone never equal values without timezone + if (v1 instanceof AbstractDateTimeValue && v2 instanceof AbstractDateTimeValue) { + final AbstractDateTimeValue dt1 = (AbstractDateTimeValue) v1; + final AbstractDateTimeValue dt2 = (AbstractDateTimeValue) v2; + if (dt1.hasTimezone() != dt2.hasTimezone()) { + return BooleanValue.FALSE; + } + } + + // Different base types are never equal + if (t1 != t2 && !Type.subTypeOf(t1, t2) && !Type.subTypeOf(t2, t1)) { + return BooleanValue.FALSE; + } + + // Same type — compare by value + final int cmp = v1.compareTo(null, v2); + return BooleanValue.valueOf(cmp == 0); + } catch (final XPathException | RuntimeException e) { + // Incomparable types or indeterminate ordering — return false per spec + return BooleanValue.FALSE; + } + } + + private static boolean isNaN(final AtomicValue v) { + if (v instanceof DoubleValue) { + return Double.isNaN(((DoubleValue) v).getDouble()); + } + if (v instanceof FloatValue) { + return Float.isNaN(((FloatValue) v).getValue()); + } + return false; + } + + private static boolean isInfinite(final AtomicValue v) { + if (v instanceof DoubleValue) { + return Double.isInfinite(((DoubleValue) v).getDouble()); + } + if (v instanceof FloatValue) { + return Float.isInfinite(((FloatValue) v).getValue()); + } + return false; + } + + private static double toDouble(final AtomicValue v) { + if (v instanceof DoubleValue) { + return ((DoubleValue) v).getDouble(); + } + if (v instanceof FloatValue) { + return ((FloatValue) v).getValue(); + } + return 0; + } + + static boolean numericEqual(final NumericValue v1, final NumericValue v2) throws XPathException { + // Both floating-point: use double comparison (handles 0.0 == -0.0) + if ((v1 instanceof DoubleValue || v1 instanceof FloatValue) + && (v2 instanceof DoubleValue || v2 instanceof FloatValue)) { + return v1.getDouble() == v2.getDouble(); + } + // Mixed floating-point and exact: convert to BigDecimal for exact mathematical comparison + // This handles cases like atomic-equal(16777218, xs:double("16777218")) + final java.math.BigDecimal bd1 = numericToBigDecimal(v1); + final java.math.BigDecimal bd2 = numericToBigDecimal(v2); + return bd1.compareTo(bd2) == 0; + } + + private static java.math.BigDecimal numericToBigDecimal(final NumericValue v) throws XPathException { + if (v instanceof DoubleValue) { + // Use new BigDecimal(double) for exact binary representation, + // not valueOf() which rounds via Double.toString() + return new java.math.BigDecimal(((DoubleValue) v).getDouble()); + } + if (v instanceof FloatValue) { + return new java.math.BigDecimal(((FloatValue) v).getValue()); + } + // Integer and decimal types: parse from string for exact representation + return new java.math.BigDecimal(v.getStringValue()); + } + + private static int primitiveNumericType(final int type) { + if (Type.subTypeOf(type, Type.INTEGER)) { + return Type.INTEGER; + } + if (Type.subTypeOf(type, Type.DECIMAL)) { + return Type.DECIMAL; + } + if (type == Type.FLOAT) { + return Type.FLOAT; + } + return Type.DOUBLE; + } + + private static boolean isStringLike(final int type) { + return Type.subTypeOf(type, Type.STRING) + || type == Type.UNTYPED_ATOMIC + || Type.subTypeOf(type, Type.ANY_URI); + } + + private static boolean isBinaryType(final int type) { + return type == Type.HEX_BINARY || type == Type.BASE64_BINARY; + } +} diff --git a/exist-core/src/main/java/org/exist/xquery/functions/fn/FnBuildUri.java b/exist-core/src/main/java/org/exist/xquery/functions/fn/FnBuildUri.java new file mode 100644 index 00000000000..bcd257a3869 --- /dev/null +++ b/exist-core/src/main/java/org/exist/xquery/functions/fn/FnBuildUri.java @@ -0,0 +1,335 @@ +/* + * eXist-db Open Source Native XML Database + * Copyright (C) 2001 The eXist-db Authors + * + * info@exist-db.org + * http://www.exist-db.org + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ +package org.exist.xquery.functions.fn; + +import java.io.UnsupportedEncodingException; +import java.util.Arrays; +import java.util.HashSet; +import java.util.Set; + +import org.exist.dom.QName; +import org.exist.xquery.BasicFunction; +import org.exist.xquery.Cardinality; +import org.exist.xquery.Function; +import org.exist.xquery.FunctionSignature; +import org.exist.xquery.XPathException; +import org.exist.xquery.XQueryContext; +import org.exist.xquery.functions.map.MapType; +import org.exist.xquery.value.FunctionParameterSequenceType; +import org.exist.xquery.value.FunctionReturnSequenceType; +import org.exist.xquery.value.Sequence; +import org.exist.xquery.value.SequenceIterator; +import org.exist.xquery.value.SequenceType; +import org.exist.xquery.value.StringValue; +import org.exist.xquery.value.Type; + +/** + * Implements fn:build-uri (XQuery 4.0). + * + * Constructs a URI from the parts provided in a map. + */ +public class FnBuildUri extends BasicFunction { + + private static final Set NON_HIERARCHICAL_SCHEMES = new HashSet<>(Arrays.asList( + "mailto", "news", "urn", "tel", "tag", "jar", "data", "javascript", "cid", "mid" + )); + + public static final FunctionSignature[] FN_BUILD_URI = { + new FunctionSignature( + new QName("build-uri", Function.BUILTIN_FUNCTION_NS), + "Constructs a URI from the parts provided.", + new SequenceType[] { + new FunctionParameterSequenceType("parts", Type.MAP_ITEM, + Cardinality.EXACTLY_ONE, "Map of URI components") + }, + new FunctionReturnSequenceType(Type.STRING, + Cardinality.EXACTLY_ONE, "The constructed URI")), + new FunctionSignature( + new QName("build-uri", Function.BUILTIN_FUNCTION_NS), + "Constructs a URI from the parts provided.", + new SequenceType[] { + new FunctionParameterSequenceType("parts", Type.MAP_ITEM, + Cardinality.EXACTLY_ONE, "Map of URI components"), + new FunctionParameterSequenceType("options", Type.MAP_ITEM, + Cardinality.ZERO_OR_ONE, "Options map") + }, + new FunctionReturnSequenceType(Type.STRING, + Cardinality.EXACTLY_ONE, "The constructed URI")) + }; + + public FnBuildUri(final XQueryContext context, final FunctionSignature signature) { + super(context, signature); + } + + @Override + public Sequence eval(final Sequence[] args, final Sequence contextSequence) throws XPathException { + final MapType parts = (MapType) args[0].itemAt(0); + + // Parse options + boolean allowDeprecated = false; + boolean omitDefaultPorts = false; + boolean uncPath = false; + if (args.length > 1 && !args[1].isEmpty()) { + final MapType options = (MapType) args[1].itemAt(0); + allowDeprecated = getBooleanOption(options, "allow-deprecated-features", false); + omitDefaultPorts = getBooleanOption(options, "omit-default-ports", false); + uncPath = getBooleanOption(options, "unc-path", false); + } + + final StringBuilder uri = new StringBuilder(); + + // Get scheme + final String scheme = getStringValue(parts, "scheme"); + + // Determine if hierarchical + boolean hierarchical = true; + final Sequence hierSeq = parts.get(new StringValue(this, "hierarchical")); + if (hierSeq != null && !hierSeq.isEmpty()) { + hierarchical = hierSeq.effectiveBooleanValue(); + } else if (scheme != null) { + hierarchical = !NON_HIERARCHICAL_SCHEMES.contains(scheme.toLowerCase()); + } + + // Add scheme + if (scheme != null) { + uri.append(scheme); + if (!hierarchical) { + uri.append(':'); + } else if ("file".equalsIgnoreCase(scheme) && uncPath) { + uri.append(":////"); + } else { + uri.append("://"); + } + } + + // Build authority from components or use authority directly + final String userinfo = getStringValue(parts, "userinfo"); + final String host = getStringValue(parts, "host"); + final Sequence portSeq = parts.get(new StringValue(this, "port")); + Integer port = null; + if (portSeq != null && !portSeq.isEmpty()) { + port = ((Number) portSeq.itemAt(0).toJavaObject(Long.class)).intValue(); + } + + // Handle deprecated password in userinfo + String effectiveUserinfo = userinfo; + if (!allowDeprecated && effectiveUserinfo != null && effectiveUserinfo.contains(":")) { + final String password = effectiveUserinfo.substring(effectiveUserinfo.indexOf(':') + 1); + if (!password.isEmpty()) { + effectiveUserinfo = null; + } + } + + // Omit default ports + if (omitDefaultPorts && port != null && scheme != null) { + if (isDefaultPort(scheme.toLowerCase(), port)) { + port = null; + } + } + + if (effectiveUserinfo != null || host != null || port != null) { + if (scheme == null) { + uri.append("//"); + } + if (effectiveUserinfo != null) { + uri.append(effectiveUserinfo).append('@'); + } + if (host != null) { + uri.append(host); + } + if (port != null) { + uri.append(':').append(port); + } + } else { + final String authority = getStringValue(parts, "authority"); + if (authority != null) { + if (scheme == null) { + uri.append("//"); + } + uri.append(authority); + } + } + + // Build path from path-segments or use path directly + final Sequence pathSegments = parts.get(new StringValue(this, "path-segments")); + if (pathSegments != null && !pathSegments.isEmpty()) { + final StringBuilder pathBuilder = new StringBuilder(); + boolean first = true; + for (final SequenceIterator i = pathSegments.iterate(); i.hasNext(); ) { + if (!first) { + pathBuilder.append('/'); + } + first = false; + final String segment = i.nextItem().getStringValue(); + if (hierarchical) { + pathBuilder.append(encodePathSegment(segment)); + } else { + pathBuilder.append(segment); + } + } + uri.append(pathBuilder); + } else { + final String path = getStringValue(parts, "path"); + if (path != null) { + uri.append(path); + } + } + + // Build query from query-parameters or use query directly + final Sequence queryParamsSeq = parts.get(new StringValue(this, "query-parameters")); + if (queryParamsSeq != null && !queryParamsSeq.isEmpty() && queryParamsSeq.itemAt(0) instanceof MapType) { + final MapType queryParams = (MapType) queryParamsSeq.itemAt(0); + final StringBuilder queryBuilder = new StringBuilder(); + boolean first = true; + for (final SequenceIterator ki = queryParams.keys().iterate(); ki.hasNext(); ) { + final StringValue key = (StringValue) ki.nextItem(); + final Sequence values = queryParams.get(key); + for (final SequenceIterator vi = values.iterate(); vi.hasNext(); ) { + if (!first) { + queryBuilder.append('&'); + } + first = false; + final String keyStr = key.getStringValue(); + final String valStr = vi.nextItem().getStringValue(); + if (keyStr.isEmpty()) { + queryBuilder.append(encodeQueryComponent(valStr)); + } else { + queryBuilder.append(encodeQueryComponent(keyStr)) + .append('=') + .append(encodeQueryComponent(valStr)); + } + } + } + if (queryBuilder.length() > 0) { + uri.append('?').append(queryBuilder); + } + } else { + final String query = getStringValue(parts, "query"); + if (query != null) { + uri.append('?').append(query); + } + } + + // Fragment + final String fragment = getStringValue(parts, "fragment"); + if (fragment != null) { + uri.append('#').append(encodeFragment(fragment)); + } + + return new StringValue(this, uri.toString()); + } + + private String getStringValue(final MapType map, final String key) throws XPathException { + final Sequence val = map.get(new StringValue(this, key)); + if (val != null && !val.isEmpty()) { + return val.getStringValue(); + } + return null; + } + + private boolean getBooleanOption(final MapType options, final String key, + final boolean defaultValue) throws XPathException { + final Sequence val = options.get(new StringValue(this, key)); + if (val != null && !val.isEmpty()) { + return val.effectiveBooleanValue(); + } + return defaultValue; + } + + private static boolean isDefaultPort(final String scheme, final int port) { + switch (scheme) { + case "http": return port == 80; + case "https": return port == 443; + case "ftp": return port == 21; + case "ssh": return port == 22; + default: return false; + } + } + + // Encode path segment: control chars + space % / ? # + [ ] + private static String encodePathSegment(final String s) { + if (s == null || s.isEmpty()) { + return s; + } + final StringBuilder sb = new StringBuilder(s.length()); + for (int i = 0; i < s.length(); i++) { + final char c = s.charAt(i); + if (c < 0x20 || c == ' ' || c == '%' || c == '/' || c == '?' + || c == '#' || c == '+' || c == '[' || c == ']') { + appendPercentEncoded(sb, c); + } else { + sb.append(c); + } + } + return sb.toString(); + } + + // Encode query component: control chars + space % = & # + [ ] + private static String encodeQueryComponent(final String s) { + if (s == null || s.isEmpty()) { + return s; + } + final StringBuilder sb = new StringBuilder(s.length()); + for (int i = 0; i < s.length(); i++) { + final char c = s.charAt(i); + if (c < 0x20 || c == ' ' || c == '%' || c == '=' || c == '&' + || c == '#' || c == '+' || c == '[' || c == ']') { + appendPercentEncoded(sb, c); + } else { + sb.append(c); + } + } + return sb.toString(); + } + + // Encode fragment: control chars + space % # + [ ] + private static String encodeFragment(final String s) { + if (s == null || s.isEmpty()) { + return s; + } + final StringBuilder sb = new StringBuilder(s.length()); + for (int i = 0; i < s.length(); i++) { + final char c = s.charAt(i); + if (c < 0x20 || c == ' ' || c == '%' || c == '#' || c == '+' || c == '[' || c == ']') { + appendPercentEncoded(sb, c); + } else { + sb.append(c); + } + } + return sb.toString(); + } + + private static void appendPercentEncoded(final StringBuilder sb, final char c) { + if (c < 0x80) { + sb.append('%').append(String.format("%02X", (int) c)); + } else { + try { + final byte[] bytes = String.valueOf(c).getBytes("UTF-8"); + for (final byte b : bytes) { + sb.append('%').append(String.format("%02X", b & 0xFF)); + } + } catch (final UnsupportedEncodingException e) { + sb.append(c); + } + } + } +} diff --git a/exist-core/src/main/java/org/exist/xquery/functions/fn/FnChar.java b/exist-core/src/main/java/org/exist/xquery/functions/fn/FnChar.java new file mode 100644 index 00000000000..fb97fd1a0dc --- /dev/null +++ b/exist-core/src/main/java/org/exist/xquery/functions/fn/FnChar.java @@ -0,0 +1,218 @@ +/* + * eXist-db Open Source Native XML Database + * Copyright (C) 2001 The eXist-db Authors + * + * info@exist-db.org + * http://www.exist-db.org + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ +package org.exist.xquery.functions.fn; + +import java.io.BufferedReader; +import java.io.IOException; +import java.io.InputStream; +import java.io.InputStreamReader; +import java.nio.charset.StandardCharsets; +import java.util.HashMap; +import java.util.Map; + +import org.exist.dom.QName; +import org.exist.xquery.BasicFunction; +import org.exist.xquery.Cardinality; +import org.exist.xquery.ErrorCodes; +import org.exist.xquery.Function; +import org.exist.xquery.value.NumericValue; +import org.exist.xquery.FunctionSignature; +import org.exist.xquery.XPathException; +import org.exist.xquery.XQueryContext; +import org.exist.xquery.value.FunctionParameterSequenceType; +import org.exist.xquery.value.FunctionReturnSequenceType; +import org.exist.xquery.value.IntegerValue; +import org.exist.xquery.value.Sequence; +import org.exist.xquery.value.SequenceType; +import org.exist.xquery.value.StringValue; +import org.exist.xquery.value.Type; + +/** + * Implements fn:char (XQuery 4.0). + * + * Returns a string containing a single character identified by its codepoint + * or by an HTML5 character reference name. + */ +public class FnChar extends BasicFunction { + + private static final ErrorCodes.ErrorCode FOCH0005 = new ErrorCodes.ErrorCode( + "FOCH0005", "Unknown character name"); + + public static final FunctionSignature FN_CHAR = new FunctionSignature( + new QName("char", Function.BUILTIN_FUNCTION_NS), + "Returns a string containing a single character identified by codepoint or character name.", + new SequenceType[] { + new FunctionParameterSequenceType("value", Type.ANY_ATOMIC_TYPE, Cardinality.EXACTLY_ONE, + "A codepoint (integer) or character name (string)") + }, + new FunctionReturnSequenceType(Type.STRING, Cardinality.EXACTLY_ONE, "the character")); + + private static volatile Map htmlEntities; + + public FnChar(final XQueryContext context, final FunctionSignature signature) { + super(context, signature); + } + + @Override + public Sequence eval(final Sequence[] args, final Sequence contextSequence) throws XPathException { + final var item = args[0].itemAt(0); + final int type = item.getType(); + + if (Type.subTypeOf(type, Type.INTEGER)) { + // Codepoint + final long codepoint = ((IntegerValue) item).getLong(); + if (codepoint < 1 || codepoint > 0x10FFFF) { + throw new XPathException(this, ErrorCodes.XPTY0004, + "Codepoint " + codepoint + " is not in the valid range 1 to 1114111"); + } + // Check for XML-illegal characters (surrogates, etc.) + if (!isXmlChar((int) codepoint)) { + throw new XPathException(this, FOCH0005, + "Codepoint " + codepoint + " is not a valid XML character"); + } + return new StringValue(this, new String(Character.toChars((int) codepoint))); + } else if (Type.subTypeOf(type, Type.DOUBLE) || Type.subTypeOf(type, Type.FLOAT) + || Type.subTypeOf(type, Type.DECIMAL)) { + // Numeric but not integer — try to convert + final NumericValue num = (NumericValue) item; + if (num.hasFractionalPart()) { + throw new XPathException(this, ErrorCodes.XPTY0004, + "Codepoint must be an integer, got " + Type.getTypeName(type)); + } + final long codepoint = num.getLong(); + if (codepoint < 1 || codepoint > 0x10FFFF) { + throw new XPathException(this, ErrorCodes.XPTY0004, + "Codepoint " + codepoint + " is not in the valid range 1 to 1114111"); + } + if (!isXmlChar((int) codepoint)) { + throw new XPathException(this, FOCH0005, + "Codepoint " + codepoint + " is not a valid XML character"); + } + return new StringValue(this, new String(Character.toChars((int) codepoint))); + } else { + // Character name lookup + final String name = item.getStringValue(); + + // Handle backslash escapes + switch (name) { + case "\\n": return new StringValue(this, "\n"); + case "\\r": return new StringValue(this, "\r"); + case "\\t": return new StringValue(this, "\t"); + } + + // Try HTML5 named character reference first (case-sensitive per spec) + final Map entities = getHtmlEntities(); + String resolved = entities.get(name); + if (resolved != null) { + return new StringValue(this, resolved); + } + + // Try Unicode character name + try { + final int cp = Character.codePointOf(name.replace(" ", "_").replace("-", "_").toUpperCase()); + if (isXmlChar(cp)) { + return new StringValue(this, new String(Character.toChars(cp))); + } + } catch (final IllegalArgumentException e) { + // Not a Unicode name either + } + + throw new XPathException(this, FOCH0005, + "Unknown character name: " + name); + } + } + + private static boolean isXmlChar(final int cp) { + return cp == 0x9 || cp == 0xA || cp == 0xD + || (cp >= 0x20 && cp <= 0xD7FF) + || (cp >= 0xE000 && cp <= 0xFFFD) + || (cp >= 0x10000 && cp <= 0x10FFFF); + } + + private static Map getHtmlEntities() { + if (htmlEntities == null) { + synchronized (FnChar.class) { + if (htmlEntities == null) { + htmlEntities = loadHtmlEntities(); + } + } + } + return htmlEntities; + } + + private static Map loadHtmlEntities() { + final Map map = new HashMap<>(2500); + + // Load from bundled resource file + final InputStream is = FnChar.class.getResourceAsStream("html5-entities.properties"); + if (is != null) { + try (final BufferedReader reader = new BufferedReader(new InputStreamReader(is, StandardCharsets.UTF_8))) { + String line; + while ((line = reader.readLine()) != null) { + line = line.trim(); + if (line.isEmpty() || line.startsWith("#")) { + continue; + } + final int eq = line.indexOf('='); + if (eq > 0) { + final String entityName = line.substring(0, eq); + final String codepoints = line.substring(eq + 1); + map.put(entityName, decodeCodepoints(codepoints)); + } + } + } catch (final IOException e) { + // Fall through with partial map + } + } + + // Add a few critical aliases if the file wasn't found + if (map.isEmpty()) { + addCommonEntities(map); + } + + return map; + } + + private static String decodeCodepoints(final String spec) { + // Format: "U+XXXX" or "U+XXXX,U+YYYY" + final StringBuilder sb = new StringBuilder(); + for (final String part : spec.split(",")) { + final String trimmed = part.trim(); + if (trimmed.startsWith("U+") || trimmed.startsWith("u+")) { + final int cp = Integer.parseInt(trimmed.substring(2), 16); + sb.appendCodePoint(cp); + } + } + return sb.toString(); + } + + private static void addCommonEntities(final Map map) { + map.put("amp", "&"); + map.put("lt", "<"); + map.put("gt", ">"); + map.put("quot", "\""); + map.put("apos", "'"); + map.put("nbsp", "\u00A0"); + map.put("tab", "\t"); + map.put("newline", "\n"); + } +} diff --git a/exist-core/src/main/java/org/exist/xquery/functions/fn/FnCharacters.java b/exist-core/src/main/java/org/exist/xquery/functions/fn/FnCharacters.java new file mode 100644 index 00000000000..a45f63ca623 --- /dev/null +++ b/exist-core/src/main/java/org/exist/xquery/functions/fn/FnCharacters.java @@ -0,0 +1,77 @@ +/* + * eXist-db Open Source Native XML Database + * Copyright (C) 2001 The eXist-db Authors + * + * info@exist-db.org + * http://www.exist-db.org + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ +package org.exist.xquery.functions.fn; + +import org.exist.dom.QName; +import org.exist.xquery.BasicFunction; +import org.exist.xquery.Cardinality; +import org.exist.xquery.Function; +import org.exist.xquery.FunctionSignature; +import org.exist.xquery.XPathException; +import org.exist.xquery.XQueryContext; +import org.exist.xquery.value.FunctionParameterSequenceType; +import org.exist.xquery.value.FunctionReturnSequenceType; +import org.exist.xquery.value.Sequence; +import org.exist.xquery.value.SequenceType; +import org.exist.xquery.value.StringValue; +import org.exist.xquery.value.Type; +import org.exist.xquery.value.ValueSequence; + +/** + * Implements fn:characters (XQuery 4.0). + * + * Splits the supplied string into a sequence of single-character strings. + */ +public class FnCharacters extends BasicFunction { + + public static final FunctionSignature FN_CHARACTERS = new FunctionSignature( + new QName("characters", Function.BUILTIN_FUNCTION_NS), + "Splits the supplied string into a sequence of single-character strings.", + new SequenceType[] { + new FunctionParameterSequenceType("value", Type.STRING, Cardinality.ZERO_OR_ONE, "The string to split") + }, + new FunctionReturnSequenceType(Type.STRING, Cardinality.ZERO_OR_MORE, "a sequence of single-character strings")); + + public FnCharacters(final XQueryContext context, final FunctionSignature signature) { + super(context, signature); + } + + @Override + public Sequence eval(final Sequence[] args, final Sequence contextSequence) throws XPathException { + if (args[0].isEmpty()) { + return Sequence.EMPTY_SEQUENCE; + } + final String str = args[0].getStringValue(); + if (str.isEmpty()) { + return Sequence.EMPTY_SEQUENCE; + } + final ValueSequence result = new ValueSequence(str.length()); + // Use codepoint iteration to handle surrogate pairs correctly + int i = 0; + while (i < str.length()) { + final int codepoint = str.codePointAt(i); + result.add(new StringValue(this, new String(Character.toChars(codepoint)))); + i += Character.charCount(codepoint); + } + return result; + } +} diff --git a/exist-core/src/main/java/org/exist/xquery/functions/fn/FnCivilTimezone.java b/exist-core/src/main/java/org/exist/xquery/functions/fn/FnCivilTimezone.java new file mode 100644 index 00000000000..bace87f5755 --- /dev/null +++ b/exist-core/src/main/java/org/exist/xquery/functions/fn/FnCivilTimezone.java @@ -0,0 +1,152 @@ +/* + * eXist-db Open Source Native XML Database + * Copyright (C) 2001 The eXist-db Authors + * + * info@exist-db.org + * http://www.exist-db.org + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ +package org.exist.xquery.functions.fn; + +import org.exist.dom.QName; +import org.exist.xquery.BasicFunction; +import org.exist.xquery.Cardinality; +import org.exist.xquery.ErrorCodes; +import org.exist.xquery.Function; +import org.exist.xquery.FunctionSignature; +import org.exist.xquery.XPathException; +import org.exist.xquery.XQueryContext; +import org.exist.xquery.value.AbstractDateTimeValue; +import org.exist.xquery.value.DayTimeDurationValue; +import org.exist.xquery.value.FunctionParameterSequenceType; +import org.exist.xquery.value.FunctionReturnSequenceType; +import org.exist.xquery.value.Sequence; +import org.exist.xquery.value.SequenceType; +import org.exist.xquery.value.Type; + +import javax.xml.datatype.XMLGregorianCalendar; +import javax.xml.datatype.DatatypeConstants; +import java.time.Instant; +import java.time.LocalDateTime; +import java.time.ZoneId; +import java.time.ZoneOffset; +import java.time.ZonedDateTime; +import java.time.zone.ZoneRulesException; + +/** + * Implements XQuery 4.0 fn:civil-timezone. + * + * fn:civil-timezone($value as xs:dateTime, $place as xs:string?) as xs:dayTimeDuration + * + * Returns the civil timezone offset for a given dateTime at a given IANA timezone location, + * accounting for daylight savings time transitions. + */ +public class FnCivilTimezone extends BasicFunction { + + private static final ErrorCodes.ErrorCode FODT0004 = new ErrorCodes.ErrorCode("FODT0004", + "No timezone data available"); + + public static final FunctionSignature[] FN_CIVIL_TIMEZONE = { + new FunctionSignature( + new QName("civil-timezone", Function.BUILTIN_FUNCTION_NS), + "Returns the civil timezone offset for a dateTime at a place.", + new SequenceType[] { + new FunctionParameterSequenceType("value", Type.DATE_TIME, Cardinality.EXACTLY_ONE, "The dateTime to look up"), + new FunctionParameterSequenceType("place", Type.STRING, Cardinality.ZERO_OR_ONE, "IANA timezone name (e.g. 'America/New_York')") + }, + new FunctionReturnSequenceType(Type.DAY_TIME_DURATION, Cardinality.EXACTLY_ONE, "the civil timezone offset")), + new FunctionSignature( + new QName("civil-timezone", Function.BUILTIN_FUNCTION_NS), + "Returns the civil timezone offset for a dateTime using the default place.", + new SequenceType[] { + new FunctionParameterSequenceType("value", Type.DATE_TIME, Cardinality.EXACTLY_ONE, "The dateTime to look up") + }, + new FunctionReturnSequenceType(Type.DAY_TIME_DURATION, Cardinality.EXACTLY_ONE, "the civil timezone offset")) + }; + + public FnCivilTimezone(final XQueryContext context, final FunctionSignature signature) { + super(context, signature); + } + + @Override + public Sequence eval(final Sequence[] args, final Sequence contextSequence) throws XPathException { + final AbstractDateTimeValue dtv = (AbstractDateTimeValue) args[0].itemAt(0); + final XMLGregorianCalendar cal = (XMLGregorianCalendar) dtv.calendar.clone(); + + // Determine the IANA zone + final ZoneId zone; + if (args.length > 1 && !args[1].isEmpty()) { + final String place = args[1].getStringValue(); + try { + zone = ZoneId.of(place); + } catch (final java.time.DateTimeException e) { + throw new XPathException(this, FODT0004, + "Unknown timezone: " + place); + } + } else { + // Use system default timezone as the "default place" + zone = ZoneId.systemDefault(); + } + + // Convert the dateTime to a LocalDateTime (ignoring any timezone on the value) + final int year = cal.getYear(); + final int month = cal.getMonth(); + final int day = cal.getDay(); + final int hour = cal.getHour() == DatatypeConstants.FIELD_UNDEFINED ? 0 : cal.getHour(); + final int minute = cal.getMinute() == DatatypeConstants.FIELD_UNDEFINED ? 0 : cal.getMinute(); + final int second = cal.getSecond() == DatatypeConstants.FIELD_UNDEFINED ? 0 : cal.getSecond(); + + final LocalDateTime ldt = LocalDateTime.of(year, month, day, hour, minute, second); + + // Get the offset at that local date-time in the given zone + final ZonedDateTime zdt = ldt.atZone(zone); + final ZoneOffset offset = zdt.getOffset(); + final int totalSeconds = offset.getTotalSeconds(); + + // Convert to xs:dayTimeDuration + final String dur = secondsToDayTimeDuration(totalSeconds); + return new DayTimeDurationValue(this, dur); + } + + private static String secondsToDayTimeDuration(final int totalSeconds) { + final boolean negative = totalSeconds < 0; + int abs = Math.abs(totalSeconds); + final int hours = abs / 3600; + abs %= 3600; + final int minutes = abs / 60; + final int seconds = abs % 60; + + final StringBuilder sb = new StringBuilder(); + if (negative) { + sb.append('-'); + } + sb.append("PT"); + if (hours > 0) { + sb.append(hours).append('H'); + } + if (minutes > 0) { + sb.append(minutes).append('M'); + } + if (seconds > 0) { + sb.append(seconds).append('S'); + } + // If all zero, output PT0S + if (hours == 0 && minutes == 0 && seconds == 0) { + sb.append("0S"); + } + return sb.toString(); + } +} diff --git a/exist-core/src/main/java/org/exist/xquery/functions/fn/FnCollation.java b/exist-core/src/main/java/org/exist/xquery/functions/fn/FnCollation.java new file mode 100644 index 00000000000..fd8fed95284 --- /dev/null +++ b/exist-core/src/main/java/org/exist/xquery/functions/fn/FnCollation.java @@ -0,0 +1,94 @@ +/* + * eXist-db Open Source Native XML Database + * Copyright (C) 2001 The eXist-db Authors + * + * info@exist-db.org + * http://www.exist-db.org + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ +package org.exist.xquery.functions.fn; + +import org.exist.dom.QName; +import org.exist.xquery.*; +import org.exist.xquery.value.*; + +/** + * fn:collation() — Returns the default collation URI. + * fn:collation-available($uri) — Returns true if the collation is supported. + */ +public class FnCollation extends BasicFunction { + + public static final FunctionSignature[] FN_COLLATION = { + new FunctionSignature( + new QName("collation", Function.BUILTIN_FUNCTION_NS), + "Returns the URI of the default collation.", + null, + new FunctionReturnSequenceType(Type.STRING, Cardinality.EXACTLY_ONE, + "The default collation URI")), + new FunctionSignature( + new QName("collation", Function.BUILTIN_FUNCTION_NS), + "Returns the collation URI if supported, empty sequence otherwise.", + new SequenceType[] { + new FunctionParameterSequenceType("uri", Type.STRING, + Cardinality.EXACTLY_ONE, "The collation URI to check") + }, + new FunctionReturnSequenceType(Type.STRING, Cardinality.ZERO_OR_ONE, + "The collation URI if supported")) + }; + + public static final FunctionSignature FN_COLLATION_AVAILABLE = new FunctionSignature( + new QName("collation-available", Function.BUILTIN_FUNCTION_NS), + "Returns true if the specified collation is supported.", + new SequenceType[] { + new FunctionParameterSequenceType("uri", Type.STRING, + Cardinality.EXACTLY_ONE, "The collation URI") + }, + new FunctionReturnSequenceType(Type.BOOLEAN, Cardinality.EXACTLY_ONE, + "true if the collation is supported")); + + public FnCollation(final XQueryContext context, final FunctionSignature signature) { + super(context, signature); + } + + @Override + public Sequence eval(final Sequence[] args, final Sequence contextSequence) throws XPathException { + if (isCalledAs("collation")) { + if (getArgumentCount() == 1) { + // 1-arg: check if the named collation is supported + final String uri = args[0].getStringValue(); + try { + context.getCollator(uri); + return new StringValue(this, uri); + } catch (final XPathException e) { + return Sequence.EMPTY_SEQUENCE; + } + } + // 0-arg: return default collation + final String defaultCollation = context.getDefaultCollation(); + return new StringValue(this, defaultCollation != null ? defaultCollation + : org.exist.util.Collations.UNICODE_CODEPOINT_COLLATION_URI); + } else { + // collation-available + final String uri = args[0].getStringValue(); + try { + context.getCollator(uri); + return BooleanValue.TRUE; + } catch (final XPathException e) { + return BooleanValue.FALSE; + } + } + } +} diff --git a/exist-core/src/main/java/org/exist/xquery/functions/fn/FnDateTimeParts.java b/exist-core/src/main/java/org/exist/xquery/functions/fn/FnDateTimeParts.java new file mode 100644 index 00000000000..b9445ce1fd0 --- /dev/null +++ b/exist-core/src/main/java/org/exist/xquery/functions/fn/FnDateTimeParts.java @@ -0,0 +1,176 @@ +/* + * eXist-db Open Source Native XML Database + * Copyright (C) 2001 The eXist-db Authors + * + * info@exist-db.org + * http://www.exist-db.org + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ +package org.exist.xquery.functions.fn; + +import org.exist.dom.QName; +import org.exist.xquery.*; +import org.exist.xquery.functions.map.MapType; +import org.exist.xquery.value.*; + +import javax.xml.datatype.DatatypeConstants; +import java.math.BigDecimal; + +/** + * fn:build-dateTime($date, $time) — Combine xs:date + xs:time into xs:dateTime. + * fn:parts-of-dateTime($dateTime) — Decompose xs:dateTime into a map of components. + * + * The map returned by parts-of-dateTime has keys: year, month, day, hour, minute, + * seconds (as xs:decimal including fractional), timezone (as xs:dayTimeDuration). + * When the Parser branch merges, these maps will be compatible with record type checking. + */ +public class FnDateTimeParts extends BasicFunction { + + public static final FunctionSignature FN_BUILD_DATETIME = new FunctionSignature( + new QName("build-dateTime", Function.BUILTIN_FUNCTION_NS), + "Combines an xs:date and an xs:time into an xs:dateTime.", + new SequenceType[] { + new FunctionParameterSequenceType("date", Type.DATE, + Cardinality.EXACTLY_ONE, "The date component"), + new FunctionParameterSequenceType("time", Type.TIME, + Cardinality.EXACTLY_ONE, "The time component") + }, + new FunctionReturnSequenceType(Type.DATE_TIME, Cardinality.EXACTLY_ONE, + "The combined xs:dateTime")); + + public static final FunctionSignature FN_PARTS_OF_DATETIME = new FunctionSignature( + new QName("parts-of-dateTime", Function.BUILTIN_FUNCTION_NS), + "Decomposes an xs:dateTime into a map of its components.", + new SequenceType[] { + new FunctionParameterSequenceType("dateTime", Type.DATE_TIME, + Cardinality.ZERO_OR_ONE, "The dateTime to decompose") + }, + new FunctionReturnSequenceType(Type.MAP_ITEM, Cardinality.ZERO_OR_ONE, + "A map with keys: year, month, day, hour, minute, seconds, timezone")); + + public FnDateTimeParts(final XQueryContext context, final FunctionSignature signature) { + super(context, signature); + } + + @Override + public Sequence eval(final Sequence[] args, final Sequence contextSequence) throws XPathException { + if (isCalledAs("build-dateTime")) { + return buildDateTime(args); + } else { + return partsOfDateTime(args); + } + } + + private Sequence buildDateTime(final Sequence[] args) throws XPathException { + final DateValue date = (DateValue) args[0].itemAt(0); + final TimeValue time = (TimeValue) args[1].itemAt(0); + + final int year = date.getPart(AbstractDateTimeValue.YEAR); + final int month = date.getPart(AbstractDateTimeValue.MONTH); + final int day = date.getPart(AbstractDateTimeValue.DAY); + final int hour = time.getPart(AbstractDateTimeValue.HOUR); + final int minute = time.getPart(AbstractDateTimeValue.MINUTE); + final int second = time.getPart(AbstractDateTimeValue.SECOND); + final int millis = time.getPart(AbstractDateTimeValue.MILLISECOND); + + // Timezone: both must agree or one must be absent + final Sequence dateTz = date.getTimezone(); + final Sequence timeTz = time.getTimezone(); + + String tzSuffix = ""; + if (!dateTz.isEmpty() && !timeTz.isEmpty()) { + // Both have timezones — they must be equal + final String dateTzStr = dateTz.getStringValue(); + final String timeTzStr = timeTz.getStringValue(); + if (!dateTzStr.equals(timeTzStr)) { + throw new XPathException(this, ErrorCodes.FORG0008, + "Date and time timezone offsets do not match"); + } + tzSuffix = formatTimezoneOffset(date); + } else if (!dateTz.isEmpty()) { + tzSuffix = formatTimezoneOffset(date); + } else if (!timeTz.isEmpty()) { + tzSuffix = formatTimezoneOffset(time); + } + + // Build the lexical representation + final String fracSeconds = millis > 0 ? "." + String.format("%03d", millis) : ""; + final String lexical = String.format("%04d-%02d-%02dT%02d:%02d:%02d%s%s", + year, month, day, hour, minute, second, fracSeconds, tzSuffix); + + return new DateTimeValue(this, lexical); + } + + private String formatTimezoneOffset(final AbstractDateTimeValue dt) throws XPathException { + final Sequence tz = dt.getTimezone(); + if (tz.isEmpty()) { + return ""; + } + final DayTimeDurationValue dtv = (DayTimeDurationValue) tz; + final int totalMinutes = (int) (dtv.getValueInMilliseconds() / 60000L); + if (totalMinutes == 0) { + return "Z"; + } + final int hours = totalMinutes / 60; + final int mins = Math.abs(totalMinutes % 60); + return String.format("%+03d:%02d", hours, mins); + } + + private Sequence partsOfDateTime(final Sequence[] args) throws XPathException { + if (args[0].isEmpty()) { + return Sequence.EMPTY_SEQUENCE; + } + + final DateTimeValue dt = (DateTimeValue) args[0].itemAt(0); + final MapType result = new MapType(this, context); + + // year as xs:integer + result.add(new StringValue("year"), + new IntegerValue(this, dt.getPart(AbstractDateTimeValue.YEAR))); + + // month as xs:integer + result.add(new StringValue("month"), + new IntegerValue(this, dt.getPart(AbstractDateTimeValue.MONTH))); + + // day as xs:integer + result.add(new StringValue("day"), + new IntegerValue(this, dt.getPart(AbstractDateTimeValue.DAY))); + + // hour as xs:integer + result.add(new StringValue("hour"), + new IntegerValue(this, dt.getPart(AbstractDateTimeValue.HOUR))); + + // minute as xs:integer + result.add(new StringValue("minute"), + new IntegerValue(this, dt.getPart(AbstractDateTimeValue.MINUTE))); + + // seconds as xs:decimal (including fractional part) + final int sec = dt.getPart(AbstractDateTimeValue.SECOND); + final int millis = dt.getPart(AbstractDateTimeValue.MILLISECOND); + final BigDecimal seconds = BigDecimal.valueOf(sec) + .add(BigDecimal.valueOf(millis, 3)); + result.add(new StringValue("seconds"), + new DecimalValue(this, seconds)); + + // timezone as xs:dayTimeDuration (or absent) + final Sequence tz = dt.getTimezone(); + if (!tz.isEmpty()) { + result.add(new StringValue("timezone"), tz); + } + + return result; + } +} diff --git a/exist-core/src/main/java/org/exist/xquery/functions/fn/FnDecodeFromUri.java b/exist-core/src/main/java/org/exist/xquery/functions/fn/FnDecodeFromUri.java new file mode 100644 index 00000000000..b94f7bc58c1 --- /dev/null +++ b/exist-core/src/main/java/org/exist/xquery/functions/fn/FnDecodeFromUri.java @@ -0,0 +1,183 @@ +/* + * eXist-db Open Source Native XML Database + * Copyright (C) 2001 The eXist-db Authors + * + * info@exist-db.org + * http://www.exist-db.org + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ +package org.exist.xquery.functions.fn; + +import org.exist.dom.QName; +import org.exist.xquery.BasicFunction; +import org.exist.xquery.Cardinality; +import org.exist.xquery.Function; +import org.exist.xquery.FunctionSignature; +import org.exist.xquery.XPathException; +import org.exist.xquery.XQueryContext; +import org.exist.xquery.value.FunctionParameterSequenceType; +import org.exist.xquery.value.FunctionReturnSequenceType; +import org.exist.xquery.value.Sequence; +import org.exist.xquery.value.SequenceType; +import org.exist.xquery.value.StringValue; +import org.exist.xquery.value.Type; + +import java.io.ByteArrayOutputStream; +import java.nio.ByteBuffer; +import java.nio.CharBuffer; +import java.nio.charset.CharsetDecoder; +import java.nio.charset.CoderResult; +import java.nio.charset.CodingErrorAction; +import java.nio.charset.StandardCharsets; + +/** + * Implements XQuery 4.0 fn:decode-from-uri. + * + * Decodes a URI-encoded string. Replaces '+' with space. + * Invalid/incomplete percent sequences are replaced with U+FFFD. + * Resulting octets are decoded as UTF-8; invalid UTF-8 is replaced with U+FFFD. + * XML-invalid codepoints are replaced with U+FFFD. + */ +public class FnDecodeFromUri extends BasicFunction { + + private static final char REPLACEMENT = '\uFFFD'; + + public static final FunctionSignature FN_DECODE_FROM_URI = new FunctionSignature( + new QName("decode-from-uri", Function.BUILTIN_FUNCTION_NS), + "Decodes a URI-encoded string.", + new SequenceType[] { + new FunctionParameterSequenceType("value", Type.STRING, Cardinality.ZERO_OR_ONE, "The URI-encoded string to decode") + }, + new FunctionReturnSequenceType(Type.STRING, Cardinality.EXACTLY_ONE, "the decoded string")); + + public FnDecodeFromUri(final XQueryContext context, final FunctionSignature signature) { + super(context, signature); + } + + @Override + public Sequence eval(final Sequence[] args, final Sequence contextSequence) throws XPathException { + if (args[0].isEmpty()) { + return new StringValue(this, ""); + } + + final String input = args[0].getStringValue(); + + // Phase 1: decode percent-encoding and '+' to bytes, collecting raw bytes + final ByteArrayOutputStream bytes = new ByteArrayOutputStream(input.length()); + final StringBuilder result = new StringBuilder(input.length()); + + int i = 0; + while (i < input.length()) { + final char c = input.charAt(i); + if (c == '+') { + // Flush any accumulated bytes first + flushBytes(bytes, result); + result.append(' '); + i++; + } else if (c == '%') { + // Try to read percent-encoded byte + if (i + 2 < input.length() && isAscii(input.charAt(i + 1))) { + // Two chars follow and first is ASCII — treat as percent triplet + final int hi = hexDigit(input.charAt(i + 1)); + final int lo = hexDigit(input.charAt(i + 2)); + if (hi >= 0 && lo >= 0) { + bytes.write((hi << 4) | lo); + i += 3; + } else { + // Invalid hex pair: consume all 3 chars, produce one replacement + flushBytes(bytes, result); + result.append(REPLACEMENT); + i += 3; + } + } else if (i + 1 < input.length()) { + // First char after % is non-ASCII, or only 1 char follows + // Consume % + next char, produce replacement + flushBytes(bytes, result); + result.append(REPLACEMENT); + i += 2; + } else { + // % at end of string + flushBytes(bytes, result); + result.append(REPLACEMENT); + i++; + } + } else { + flushBytes(bytes, result); + result.append(c); + i++; + } + } + flushBytes(bytes, result); + + // Phase 2: replace XML-invalid codepoints (handle surrogate pairs for supplementary chars) + final StringBuilder cleaned = new StringBuilder(result.length()); + for (int j = 0; j < result.length(); j++) { + final char ch = result.charAt(j); + if (Character.isHighSurrogate(ch) && j + 1 < result.length() + && Character.isLowSurrogate(result.charAt(j + 1))) { + // Valid surrogate pair = supplementary character (valid in XML 1.0 4th+ edition) + cleaned.append(ch); + cleaned.append(result.charAt(++j)); + } else if (isXmlValid(ch)) { + cleaned.append(ch); + } else { + cleaned.append(REPLACEMENT); + } + } + + return new StringValue(this, cleaned.toString()); + } + + /** + * Flush accumulated bytes as UTF-8, replacing invalid sequences with U+FFFD. + */ + private void flushBytes(final ByteArrayOutputStream bytes, final StringBuilder result) { + if (bytes.size() == 0) { + return; + } + final byte[] data = bytes.toByteArray(); + bytes.reset(); + + final CharsetDecoder decoder = StandardCharsets.UTF_8.newDecoder() + .onMalformedInput(CodingErrorAction.REPLACE) + .onUnmappableCharacter(CodingErrorAction.REPLACE) + .replaceWith("\uFFFD"); + + final ByteBuffer bb = ByteBuffer.wrap(data); + final CharBuffer cb = CharBuffer.allocate(data.length * 2); + decoder.decode(bb, cb, true); + decoder.flush(cb); + cb.flip(); + result.append(cb); + } + + private static int hexDigit(final char c) { + if (c >= '0' && c <= '9') return c - '0'; + if (c >= 'a' && c <= 'f') return c - 'a' + 10; + if (c >= 'A' && c <= 'F') return c - 'A' + 10; + return -1; + } + + private static boolean isAscii(final char c) { + return c <= 0x7F; + } + + private static boolean isXmlValid(final char c) { + return c == 0x9 || c == 0xA || c == 0xD || + (c >= 0x20 && c <= 0xD7FF) || + (c >= 0xE000 && c <= 0xFFFD); + } +} diff --git a/exist-core/src/main/java/org/exist/xquery/functions/fn/FnDeepEqualOptions.java b/exist-core/src/main/java/org/exist/xquery/functions/fn/FnDeepEqualOptions.java new file mode 100644 index 00000000000..05973da16c1 --- /dev/null +++ b/exist-core/src/main/java/org/exist/xquery/functions/fn/FnDeepEqualOptions.java @@ -0,0 +1,84 @@ +/* + * eXist-db Open Source Native XML Database + * Copyright (C) 2001 The eXist-db Authors + * + * info@exist-db.org + * http://www.exist-db.org + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ +package org.exist.xquery.functions.fn; + +import com.ibm.icu.text.Collator; +import org.exist.dom.QName; +import org.exist.xquery.*; +import org.exist.xquery.functions.map.AbstractMapType; +import org.exist.xquery.value.*; + +/** + * Implements XQuery 4.0 fn:deep-equal with options parameter (string or map). + * + * Accepts either a collation URI string (XQ3.1 compatible) or an options + * map (XQ4.0) as the 3rd parameter. When an options map is provided, + * validates all option keys/values and uses the options-aware comparison + * engine in {@link DeepEqualOptions}. + */ +public class FnDeepEqualOptions extends BasicFunction { + + public static final FunctionSignature FN_DEEP_EQUAL_OPTIONS = new FunctionSignature( + new QName("deep-equal", Function.BUILTIN_FUNCTION_NS), + "Returns true() iff every item in $items-1 is deep-equal to the item " + + "at the same position in $items-2, using the specified options or collation. " + + "If both $items-1 and $items-2 are the empty sequence, returns true().", + new SequenceType[]{ + new FunctionParameterSequenceType("items-1", Type.ITEM, + Cardinality.ZERO_OR_MORE, "The first item sequence"), + new FunctionParameterSequenceType("items-2", Type.ITEM, + Cardinality.ZERO_OR_MORE, "The second item sequence"), + new FunctionParameterSequenceType("options", Type.ITEM, + Cardinality.ZERO_OR_ONE, "Collation URI string or options map") + }, + new FunctionReturnSequenceType(Type.BOOLEAN, Cardinality.EXACTLY_ONE, + "true() if the sequences are deep-equal, false() otherwise")); + + public FnDeepEqualOptions(final XQueryContext context, final FunctionSignature signature) { + super(context, signature); + } + + @Override + public Sequence eval(final Sequence[] args, final Sequence contextSequence) throws XPathException { + final Sequence items1 = args[0]; + final Sequence items2 = args[1]; + + // Parse 3rd parameter: either string (collation) or map (options) + if (args.length > 2 && !args[2].isEmpty()) { + final Item optionsItem = args[2].itemAt(0); + if (optionsItem instanceof AbstractMapType) { + // XQ4: options map — parse, validate, and use options-aware comparison + final DeepEqualOptions options = DeepEqualOptions.parse( + (AbstractMapType) optionsItem, context); + return BooleanValue.valueOf(options.deepEqualsSeq(items1, items2)); + } else { + // XQ3.1 compat: string collation URI + final Collator collator = context.getCollator(optionsItem.getStringValue()); + return BooleanValue.valueOf(FunDeepEqual.deepEqualsSeq(items1, items2, collator)); + } + } + + // No 3rd parameter — use default comparison + final Collator collator = context.getDefaultCollator(); + return BooleanValue.valueOf(FunDeepEqual.deepEqualsSeq(items1, items2, collator)); + } +} diff --git a/exist-core/src/main/java/org/exist/xquery/functions/fn/FnDistinctOrderedNodes.java b/exist-core/src/main/java/org/exist/xquery/functions/fn/FnDistinctOrderedNodes.java new file mode 100644 index 00000000000..e8f6f151094 --- /dev/null +++ b/exist-core/src/main/java/org/exist/xquery/functions/fn/FnDistinctOrderedNodes.java @@ -0,0 +1,71 @@ +/* + * eXist-db Open Source Native XML Database + * Copyright (C) 2001 The eXist-db Authors + * + * info@exist-db.org + * http://www.exist-db.org + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ +package org.exist.xquery.functions.fn; + +import org.exist.dom.QName; +import org.exist.xquery.BasicFunction; +import org.exist.xquery.Cardinality; +import org.exist.xquery.Function; +import org.exist.xquery.FunctionSignature; +import org.exist.xquery.XPathException; +import org.exist.xquery.XQueryContext; +import org.exist.xquery.value.FunctionParameterSequenceType; +import org.exist.xquery.value.FunctionReturnSequenceType; +import org.exist.xquery.value.Sequence; +import org.exist.xquery.value.SequenceType; +import org.exist.xquery.value.Type; +import org.exist.xquery.value.ValueSequence; + +/** + * Implements XQuery 4.0 fn:distinct-ordered-nodes. + * + * Returns nodes in document order with duplicates removed, equivalent to + * the "/" operator's node deduplication behavior. + */ +public class FnDistinctOrderedNodes extends BasicFunction { + + public static final FunctionSignature FN_DISTINCT_ORDERED_NODES = new FunctionSignature( + new QName("distinct-ordered-nodes", Function.BUILTIN_FUNCTION_NS), + "Returns nodes in document order with duplicates removed.", + new SequenceType[] { + new FunctionParameterSequenceType("nodes", Type.NODE, Cardinality.ZERO_OR_MORE, "The nodes to deduplicate and order") + }, + new FunctionReturnSequenceType(Type.NODE, Cardinality.ZERO_OR_MORE, "the deduplicated nodes in document order")); + + public FnDistinctOrderedNodes(final XQueryContext context, final FunctionSignature signature) { + super(context, signature); + } + + @Override + public Sequence eval(final Sequence[] args, final Sequence contextSequence) throws XPathException { + final Sequence nodes = args[0]; + if (nodes.isEmpty()) { + return Sequence.EMPTY_SEQUENCE; + } + + // ValueSequence with noDups=true handles both document ordering and deduplication + final ValueSequence result = new ValueSequence(true); + result.addAll(nodes); + result.removeDuplicates(); + return result; + } +} diff --git a/exist-core/src/main/java/org/exist/xquery/functions/fn/FnDivideDecimals.java b/exist-core/src/main/java/org/exist/xquery/functions/fn/FnDivideDecimals.java new file mode 100644 index 00000000000..0ebd7c732f0 --- /dev/null +++ b/exist-core/src/main/java/org/exist/xquery/functions/fn/FnDivideDecimals.java @@ -0,0 +1,119 @@ +/* + * eXist-db Open Source Native XML Database + * Copyright (C) 2001 The eXist-db Authors + * + * info@exist-db.org + * http://www.exist-db.org + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ +package org.exist.xquery.functions.fn; + +import org.exist.dom.QName; +import org.exist.xquery.BasicFunction; +import org.exist.xquery.Cardinality; +import org.exist.xquery.ErrorCodes; +import org.exist.xquery.Function; +import org.exist.xquery.FunctionSignature; +import org.exist.xquery.XPathException; +import org.exist.xquery.XQueryContext; +import org.exist.xquery.value.DecimalValue; +import org.exist.xquery.value.FunctionParameterSequenceType; +import org.exist.xquery.value.FunctionReturnSequenceType; +import org.exist.xquery.value.IntegerValue; +import org.exist.xquery.value.Sequence; +import org.exist.xquery.value.SequenceType; +import org.exist.xquery.value.StringValue; +import org.exist.xquery.value.Type; + +import org.exist.xquery.functions.map.MapType; + +import org.exist.xquery.value.AtomicValue; +import org.exist.xquery.value.Item; + +import java.math.BigDecimal; +import java.math.RoundingMode; + +/** + * Implements XQuery 4.0 fn:divide-decimals. + * + * fn:divide-decimals($value, $divisor, $precision?) returns a record with + * quotient and remainder fields. + */ +public class FnDivideDecimals extends BasicFunction { + + public static final FunctionSignature[] FN_DIVIDE_DECIMALS = { + new FunctionSignature( + new QName("divide-decimals", Function.BUILTIN_FUNCTION_NS), + "Divides one decimal by another to specified precision, returning quotient and remainder.", + new SequenceType[] { + new FunctionParameterSequenceType("value", Type.DECIMAL, Cardinality.EXACTLY_ONE, "The dividend"), + new FunctionParameterSequenceType("divisor", Type.DECIMAL, Cardinality.EXACTLY_ONE, "The divisor"), + new FunctionParameterSequenceType("precision", Type.INTEGER, Cardinality.ZERO_OR_ONE, "Decimal precision (default: 0)") + }, + new FunctionReturnSequenceType(Type.MAP_ITEM, Cardinality.EXACTLY_ONE, "record with quotient and remainder")), + new FunctionSignature( + new QName("divide-decimals", Function.BUILTIN_FUNCTION_NS), + "Divides one decimal by another returning integer quotient and remainder.", + new SequenceType[] { + new FunctionParameterSequenceType("value", Type.DECIMAL, Cardinality.EXACTLY_ONE, "The dividend"), + new FunctionParameterSequenceType("divisor", Type.DECIMAL, Cardinality.EXACTLY_ONE, "The divisor") + }, + new FunctionReturnSequenceType(Type.MAP_ITEM, Cardinality.EXACTLY_ONE, "record with quotient and remainder")) + }; + + public FnDivideDecimals(final XQueryContext context, final FunctionSignature signature) { + super(context, signature); + } + + @Override + public Sequence eval(final Sequence[] args, final Sequence contextSequence) throws XPathException { + final BigDecimal value = toBigDecimal(args[0].itemAt(0)); + final BigDecimal divisor = toBigDecimal(args[1].itemAt(0)); + + if (divisor.compareTo(BigDecimal.ZERO) == 0) { + throw new XPathException(this, ErrorCodes.FOAR0001, "Division by zero"); + } + + int precision = 0; + if (args.length > 2 && !args[2].isEmpty()) { + precision = (int) ((IntegerValue) args[2].itemAt(0)).getLong(); + } + + // Quotient: truncate toward zero to given precision + final BigDecimal quotient = value.divide(divisor, precision, RoundingMode.DOWN); + final BigDecimal remainder = value.subtract(quotient.multiply(divisor)); + + // Build result record (map) + final MapType result = new MapType(this, context); + result.add(new StringValue(this, "quotient"), new DecimalValue(this, quotient)); + result.add(new StringValue(this, "remainder"), new DecimalValue(this, remainder)); + + return result; + } + + private BigDecimal toBigDecimal(final Item item) throws XPathException { + final AtomicValue av = item.atomize(); + if (av instanceof DecimalValue) { + return ((DecimalValue) av).getValue(); + } + // xs:integer is a subtype of xs:decimal — use string to avoid long truncation + if (av instanceof IntegerValue) { + return new BigDecimal(av.getStringValue()); + } + // Fallback: convert to decimal + return ((DecimalValue) av.convertTo(Type.DECIMAL)).getValue(); + } +} diff --git a/exist-core/src/main/java/org/exist/xquery/functions/fn/FnDuplicateValues.java b/exist-core/src/main/java/org/exist/xquery/functions/fn/FnDuplicateValues.java new file mode 100644 index 00000000000..356b53b6826 --- /dev/null +++ b/exist-core/src/main/java/org/exist/xquery/functions/fn/FnDuplicateValues.java @@ -0,0 +1,126 @@ +/* + * eXist-db Open Source Native XML Database + * Copyright (C) 2001 The eXist-db Authors + * + * info@exist-db.org + * http://www.exist-db.org + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ +package org.exist.xquery.functions.fn; + +import com.ibm.icu.text.Collator; +import org.exist.dom.QName; +import org.exist.xquery.BasicFunction; +import org.exist.xquery.Cardinality; +import org.exist.xquery.ErrorCodes; +import org.exist.xquery.Function; +import org.exist.xquery.FunctionSignature; +import org.exist.xquery.XPathException; +import org.exist.xquery.XQueryContext; +import org.exist.xquery.value.AtomicValue; +import org.exist.xquery.value.FunctionParameterSequenceType; +import org.exist.xquery.value.FunctionReturnSequenceType; +import org.exist.xquery.value.Item; +import org.exist.xquery.value.Sequence; +import org.exist.xquery.value.SequenceIterator; +import org.exist.xquery.value.SequenceType; +import org.exist.xquery.value.Type; +import org.exist.xquery.value.ValueSequence; + +/** + * Implements fn:duplicate-values (XQuery 4.0). + * + * Returns the values that appear more than once in the input sequence. + */ +public class FnDuplicateValues extends BasicFunction { + + public static final FunctionSignature[] FN_DUPLICATE_VALUES = { + new FunctionSignature( + new QName("duplicate-values", Function.BUILTIN_FUNCTION_NS), + "Returns those values that appear more than once in the input sequence.", + new SequenceType[] { + new FunctionParameterSequenceType("values", Type.ANY_ATOMIC_TYPE, Cardinality.ZERO_OR_MORE, "The input values") + }, + new FunctionReturnSequenceType(Type.ANY_ATOMIC_TYPE, Cardinality.ZERO_OR_MORE, "the duplicate values")), + new FunctionSignature( + new QName("duplicate-values", Function.BUILTIN_FUNCTION_NS), + "Returns those values that appear more than once in the input sequence, using the specified collation.", + new SequenceType[] { + new FunctionParameterSequenceType("values", Type.ANY_ATOMIC_TYPE, Cardinality.ZERO_OR_MORE, "The input values"), + new FunctionParameterSequenceType("collation", Type.STRING, Cardinality.ZERO_OR_ONE, "The collation URI") + }, + new FunctionReturnSequenceType(Type.ANY_ATOMIC_TYPE, Cardinality.ZERO_OR_MORE, "the duplicate values")) + }; + + public FnDuplicateValues(final XQueryContext context, final FunctionSignature signature) { + super(context, signature); + } + + @Override + public Sequence eval(final Sequence[] args, final Sequence contextSequence) throws XPathException { + final Sequence values = args[0]; + if (values.getItemCount() <= 1) { + return Sequence.EMPTY_SEQUENCE; + } + + final Collator collator = getCollator(args); + + // Use contextual equality (fn:compare = 0) per XQ4 spec + final java.util.List seen = new java.util.ArrayList<>(); + final java.util.List reported = new java.util.ArrayList<>(); + final ValueSequence result = new ValueSequence(); + + for (final SequenceIterator i = values.iterate(); i.hasNext(); ) { + final Item item = i.nextItem(); + final AtomicValue value = item.atomize(); + + boolean isDuplicate = false; + for (final AtomicValue prev : seen) { + if (FnAllEqualDifferent.contextuallyEqual(prev, value, collator)) { + isDuplicate = true; + break; + } + } + + if (isDuplicate) { + // Check if we already reported this value + boolean alreadyReported = false; + for (final AtomicValue rep : reported) { + if (FnAllEqualDifferent.contextuallyEqual(rep, value, collator)) { + alreadyReported = true; + break; + } + } + if (!alreadyReported) { + result.add(value); + reported.add(value); + } + } else { + seen.add(value); + } + } + return result; + } + + private Collator getCollator(final Sequence[] args) throws XPathException { + if (args.length > 1 && !args[1].isEmpty()) { + final String collationURI = args[1].getStringValue(); + return context.getCollator(collationURI, ErrorCodes.FOCH0002); + } + return context.getDefaultCollator(); + } + +} diff --git a/exist-core/src/main/java/org/exist/xquery/functions/fn/FnElementToMap.java b/exist-core/src/main/java/org/exist/xquery/functions/fn/FnElementToMap.java new file mode 100644 index 00000000000..5635586ee7a --- /dev/null +++ b/exist-core/src/main/java/org/exist/xquery/functions/fn/FnElementToMap.java @@ -0,0 +1,458 @@ +/* + * eXist-db Open Source Native XML Database + * Copyright (C) 2001 The eXist-db Authors + * + * info@exist-db.org + * http://www.exist-db.org + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ +package org.exist.xquery.functions.fn; + +import org.exist.dom.QName; +import org.exist.xquery.*; +import org.exist.xquery.functions.array.ArrayType; +import org.exist.xquery.functions.map.MapType; +import org.exist.xquery.value.*; +import org.w3c.dom.*; + +import javax.xml.XMLConstants; +import java.util.ArrayList; +import java.util.LinkedHashMap; +import java.util.List; +import java.util.Map; + +/** + * Implements XQuery 4.0 fn:element-to-map. + * + * Converts an element node to a map representation following the XQ4 spec rules + * for different content models (empty, simple, record, list, sequence, mixed). + */ +public class FnElementToMap extends BasicFunction { + + public static final FunctionSignature[] FN_ELEMENT_TO_MAP = { + new FunctionSignature( + new QName("element-to-map", Function.BUILTIN_FUNCTION_NS), + "Converts an element to a map representation.", + new SequenceType[]{ + new FunctionParameterSequenceType("element", Type.ELEMENT, Cardinality.ZERO_OR_ONE, "The element to convert") + }, + new FunctionReturnSequenceType(Type.MAP_ITEM, Cardinality.ZERO_OR_ONE, "The map representation")), + new FunctionSignature( + new QName("element-to-map", Function.BUILTIN_FUNCTION_NS), + "Converts an element to a map representation with options.", + new SequenceType[]{ + new FunctionParameterSequenceType("element", Type.ELEMENT, Cardinality.ZERO_OR_ONE, "The element to convert"), + new FunctionParameterSequenceType("options", Type.MAP_ITEM, Cardinality.ZERO_OR_ONE, "Options map") + }, + new FunctionReturnSequenceType(Type.MAP_ITEM, Cardinality.ZERO_OR_ONE, "The map representation")) + }; + + private static final String DEFAULT_ATTR_MARKER = "@"; + private static final String DEFAULT_CONTENT_KEY = "#content"; + private static final String DEFAULT_COMMENT_KEY = "#comment"; + private static final String DEFAULT_NAME_FORMAT = "eqname"; + + public FnElementToMap(final XQueryContext context, final FunctionSignature signature) { + super(context, signature); + } + + @Override + public Sequence eval(final Sequence[] args, final Sequence contextSequence) throws XPathException { + if (args[0].isEmpty()) { + return Sequence.EMPTY_SEQUENCE; + } + + final Node node = ((NodeValue) args[0].itemAt(0)).getNode(); + if (node.getNodeType() != Node.ELEMENT_NODE) { + throw new XPathException(this, ErrorCodes.XPTY0004, "Expected element node"); + } + + // Parse options + String nameFormat = DEFAULT_NAME_FORMAT; + String attrMarker = DEFAULT_ATTR_MARKER; + String contentKey = DEFAULT_CONTENT_KEY; + String commentKey = DEFAULT_COMMENT_KEY; + + if (args.length > 1 && !args[1].isEmpty()) { + final MapType options = (MapType) args[1].itemAt(0); + final Sequence nfSeq = options.get(new StringValue(this, "name-format")); + if (nfSeq != null && !nfSeq.isEmpty()) { + nameFormat = nfSeq.getStringValue(); + } + final Sequence amSeq = options.get(new StringValue(this, "attribute-marker")); + if (amSeq != null && !amSeq.isEmpty()) { + attrMarker = amSeq.getStringValue(); + } + final Sequence ckSeq = options.get(new StringValue(this, "content-key")); + if (ckSeq != null && !ckSeq.isEmpty()) { + contentKey = ckSeq.getStringValue(); + } + final Sequence cmSeq = options.get(new StringValue(this, "comment-key")); + if (cmSeq != null && !cmSeq.isEmpty()) { + commentKey = cmSeq.getStringValue(); + } + } + + final Options opts = new Options(nameFormat, attrMarker, contentKey, commentKey); + return convertElement((Element) node, opts); + } + + private MapType convertElement(final Element elem, final Options opts) throws XPathException { + final String elemName = formatName(elem, opts); + final Sequence value = convertContent(elem, opts); + + MapType result = new MapType(this, context); + result = (MapType) result.put(new StringValue(this, elemName), value); + return result; + } + + private Sequence convertContent(final Element elem, final Options opts) throws XPathException { + // Collect attributes (excluding xmlns and xsi:type) + final Map attrs = new LinkedHashMap<>(); + final NamedNodeMap attrNodes = elem.getAttributes(); + if (attrNodes != null) { + for (int i = 0; i < attrNodes.getLength(); i++) { + final Attr attr = (Attr) attrNodes.item(i); + final String attrName = attr.getName(); + // Skip namespace declarations and xsi:type + if (attrName.startsWith("xmlns") && (attrName.length() == 5 || attrName.charAt(5) == ':')) { + continue; + } + if ("xsi:type".equals(attrName)) { + continue; + } + if (attrName.equals("xsi:nil")) { + continue; + } + final String key = opts.attrMarker + formatAttrName(attr, opts); + attrs.put(key, attr.getValue()); + } + } + + // Check for xsi:nil + final String nilAttr = elem.getAttributeNS(XMLConstants.W3C_XML_SCHEMA_INSTANCE_NS_URI, "nil"); + if ("true".equals(nilAttr) || "1".equals(nilAttr)) { + if (attrs.isEmpty()) { + // Return fn:null() as QName + return new QNameValue(this, context, new QName("null", Function.BUILTIN_FUNCTION_NS, "fn")); + } else { + MapType attrMap = new MapType(this, context); + for (final Map.Entry a : attrs.entrySet()) { + attrMap = (MapType) attrMap.put(new StringValue(this, a.getKey()), new StringValue(this, a.getValue())); + } + attrMap = (MapType) attrMap.put( + new StringValue(this, opts.contentKey), + new QNameValue(this, context, new QName("null", Function.BUILTIN_FUNCTION_NS, "fn"))); + return attrMap; + } + } + + // Collect child nodes (elements, text, comments, PIs) + final List children = new ArrayList<>(); + final NodeList childNodes = elem.getChildNodes(); + for (int i = 0; i < childNodes.getLength(); i++) { + final Node child = childNodes.item(i); + switch (child.getNodeType()) { + case Node.ELEMENT_NODE: + case Node.TEXT_NODE: + case Node.CDATA_SECTION_NODE: + case Node.COMMENT_NODE: + children.add(child); + break; + default: + break; + } + } + + // Classify content model + final boolean hasElements = children.stream().anyMatch(n -> n.getNodeType() == Node.ELEMENT_NODE); + final boolean hasTextContent = children.stream().anyMatch(n -> + (n.getNodeType() == Node.TEXT_NODE || n.getNodeType() == Node.CDATA_SECTION_NODE) + && !n.getTextContent().trim().isEmpty()); + final boolean hasComments = children.stream().anyMatch(n -> n.getNodeType() == Node.COMMENT_NODE); + + // Empty element + if (children.isEmpty() || (!hasElements && !hasTextContent && !hasComments)) { + if (attrs.isEmpty()) { + return new StringValue(this, ""); + } else { + // Empty-plus: attributes only, no #content key + MapType attrMap = new MapType(this, context); + for (final Map.Entry a : attrs.entrySet()) { + attrMap = (MapType) attrMap.put(new StringValue(this, a.getKey()), new StringValue(this, a.getValue())); + } + return attrMap; + } + } + + // Simple text content (no child elements) + if (!hasElements && !hasComments) { + final String textContent = getTextContent(children); + if (attrs.isEmpty()) { + return new StringValue(this, textContent); + } else { + return buildAttrMap(attrs, new StringValue(this, textContent), opts); + } + } + + // Mixed content (has both text and element children) + if (hasTextContent && hasElements) { + return buildMixedContent(children, attrs, opts); + } + + // Element-only content — determine layout + final List childElements = new ArrayList<>(); + for (final Node child : children) { + if (child.getNodeType() == Node.ELEMENT_NODE) { + childElements.add((Element) child); + } + } + + // Check for comments interleaved with elements + if (hasComments && !hasElements) { + return buildMixedContent(children, attrs, opts); + } + + // Check if all children have the same name (list pattern) + final boolean allSameName = childElements.size() > 1 && + childElements.stream().allMatch(e -> + formatName(e, opts).equals(formatName(childElements.get(0), opts))); + + // Check if all children have unique names (record pattern) + final Map> groupedByName = new LinkedHashMap<>(); + for (final Element child : childElements) { + groupedByName.computeIfAbsent(formatName(child, opts), k -> new ArrayList<>()).add(child); + } + final boolean allUnique = groupedByName.values().stream().allMatch(l -> l.size() == 1); + + if (allSameName) { + // List layout: array of child values + return buildListContent(childElements, attrs, opts); + } else if (allUnique) { + // Record layout: map of child name → value + return buildRecordContent(childElements, attrs, children, opts); + } else { + // Sequence layout: array of child maps + return buildSequenceContent(children, attrs, opts); + } + } + + private Sequence buildAttrMap(final Map attrs, final Sequence contentValue, final Options opts) throws XPathException { + MapType attrMap = new MapType(this, context); + for (final Map.Entry a : attrs.entrySet()) { + attrMap = (MapType) attrMap.put(new StringValue(this, a.getKey()), new StringValue(this, a.getValue())); + } + attrMap = (MapType) attrMap.put(new StringValue(this, opts.contentKey), contentValue); + return attrMap; + } + + private Sequence buildListContent(final List children, final Map attrs, final Options opts) throws XPathException { + // Array of child content values + final List items = new ArrayList<>(); + for (final Element child : children) { + items.add(convertContent(child, opts)); + } + final ArrayType array = new ArrayType(this, context, items); + + if (attrs.isEmpty()) { + return array; + } else { + MapType attrMap = new MapType(this, context); + for (final Map.Entry a : attrs.entrySet()) { + attrMap = (MapType) attrMap.put(new StringValue(this, a.getKey()), new StringValue(this, a.getValue())); + } + attrMap = (MapType) attrMap.put(new StringValue(this, opts.contentKey), array); + return attrMap; + } + } + + private Sequence buildRecordContent(final List childElements, final Map attrs, + final List allChildren, final Options opts) throws XPathException { + MapType recordMap = new MapType(this, context); + + // Add attributes first + for (final Map.Entry a : attrs.entrySet()) { + recordMap = (MapType) recordMap.put(new StringValue(this, a.getKey()), new StringValue(this, a.getValue())); + } + + // Add comments if present + for (final Node child : allChildren) { + if (child.getNodeType() == Node.COMMENT_NODE) { + recordMap = (MapType) recordMap.put( + new StringValue(this, opts.commentKey), + new StringValue(this, child.getTextContent())); + } + } + + // Add child elements + for (final Element child : childElements) { + final String childName = formatName(child, opts); + final Sequence childValue = convertContent(child, opts); + recordMap = (MapType) recordMap.put(new StringValue(this, childName), childValue); + } + + return recordMap; + } + + private Sequence buildSequenceContent(final List children, final Map attrs, final Options opts) throws XPathException { + // Build array of child maps/values + final List items = new ArrayList<>(); + for (final Node child : children) { + if (child.getNodeType() == Node.ELEMENT_NODE) { + items.add(convertElement((Element) child, opts)); + } else if (child.getNodeType() == Node.TEXT_NODE || child.getNodeType() == Node.CDATA_SECTION_NODE) { + final String text = child.getTextContent(); + if (!text.trim().isEmpty()) { + items.add(new StringValue(this, text)); + } + } else if (child.getNodeType() == Node.COMMENT_NODE) { + MapType commentMap = new MapType(this, context); + commentMap = (MapType) commentMap.put( + new StringValue(this, opts.commentKey), + new StringValue(this, child.getTextContent())); + items.add(commentMap); + } + } + final ArrayType array = new ArrayType(this, context, items); + + if (attrs.isEmpty()) { + return array; + } else { + MapType attrMap = new MapType(this, context); + for (final Map.Entry a : attrs.entrySet()) { + attrMap = (MapType) attrMap.put(new StringValue(this, a.getKey()), new StringValue(this, a.getValue())); + } + attrMap = (MapType) attrMap.put(new StringValue(this, opts.contentKey), array); + return attrMap; + } + } + + private Sequence buildMixedContent(final List children, final Map attrs, final Options opts) throws XPathException { + final List items = new ArrayList<>(); + for (final Node child : children) { + switch (child.getNodeType()) { + case Node.ELEMENT_NODE: + items.add(convertElement((Element) child, opts)); + break; + case Node.TEXT_NODE: + case Node.CDATA_SECTION_NODE: + final String text = child.getTextContent(); + if (!text.isEmpty()) { + items.add(new StringValue(this, text)); + } + break; + case Node.COMMENT_NODE: + MapType commentMap = new MapType(this, context); + commentMap = (MapType) commentMap.put( + new StringValue(this, opts.commentKey), + new StringValue(this, child.getTextContent())); + items.add(commentMap); + break; + default: + break; + } + } + final ArrayType array = new ArrayType(this, context, items); + + if (attrs.isEmpty()) { + return array; + } else { + MapType attrMap = new MapType(this, context); + for (final Map.Entry a : attrs.entrySet()) { + attrMap = (MapType) attrMap.put(new StringValue(this, a.getKey()), new StringValue(this, a.getValue())); + } + attrMap = (MapType) attrMap.put(new StringValue(this, opts.contentKey), array); + return attrMap; + } + } + + private String formatName(final Element elem, final Options opts) { + final String ns = elem.getNamespaceURI(); + final String local = elem.getLocalName() != null ? elem.getLocalName() : elem.getTagName(); + + switch (opts.nameFormat) { + case "eqname": + if (ns != null && !ns.isEmpty()) { + return "Q{" + ns + "}" + local; + } + return local; + case "lexical": + final String prefix = elem.getPrefix(); + if (prefix != null && !prefix.isEmpty()) { + return prefix + ":" + local; + } + return local; + case "local": + return local; + default: + // Default to eqname + if (ns != null && !ns.isEmpty()) { + return "Q{" + ns + "}" + local; + } + return local; + } + } + + private String formatAttrName(final Attr attr, final Options opts) { + final String ns = attr.getNamespaceURI(); + final String local = attr.getLocalName() != null ? attr.getLocalName() : attr.getName(); + + switch (opts.nameFormat) { + case "eqname": + if (ns != null && !ns.isEmpty()) { + return "Q{" + ns + "}" + local; + } + return local; + case "lexical": + final String prefix = attr.getPrefix(); + if (prefix != null && !prefix.isEmpty()) { + return prefix + ":" + local; + } + return local; + case "local": + return local; + default: + if (ns != null && !ns.isEmpty()) { + return "Q{" + ns + "}" + local; + } + return local; + } + } + + private static String getTextContent(final List children) { + final StringBuilder sb = new StringBuilder(); + for (final Node child : children) { + if (child.getNodeType() == Node.TEXT_NODE || child.getNodeType() == Node.CDATA_SECTION_NODE) { + sb.append(child.getTextContent()); + } + } + return sb.toString(); + } + + private static class Options { + final String nameFormat; + final String attrMarker; + final String contentKey; + final String commentKey; + + Options(final String nameFormat, final String attrMarker, final String contentKey, final String commentKey) { + this.nameFormat = nameFormat; + this.attrMarker = attrMarker; + this.contentKey = contentKey; + this.commentKey = commentKey; + } + } +} diff --git a/exist-core/src/main/java/org/exist/xquery/functions/fn/FnElementToMapPlan.java b/exist-core/src/main/java/org/exist/xquery/functions/fn/FnElementToMapPlan.java new file mode 100644 index 00000000000..57afc1dc54e --- /dev/null +++ b/exist-core/src/main/java/org/exist/xquery/functions/fn/FnElementToMapPlan.java @@ -0,0 +1,263 @@ +/* + * eXist-db Open Source Native XML Database + * Copyright (C) 2001 The eXist-db Authors + * + * info@exist-db.org + * http://www.exist-db.org + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ +package org.exist.xquery.functions.fn; + +import org.exist.dom.QName; +import org.exist.xquery.*; +import org.exist.xquery.functions.map.MapType; +import org.exist.xquery.value.*; +import org.w3c.dom.*; + +import java.util.*; + +/** + * fn:element-to-map-plan($input as node()*) as map(*) + * + * Analyzes the structure of input elements and returns a plan map + * describing the layout of each element type encountered. + * + * Layout values: empty, empty-plus, simple, simple-plus, list, list-plus, + * record, mixed. + */ +public class FnElementToMapPlan extends BasicFunction { + + public static final FunctionSignature FN_ELEMENT_TO_MAP_PLAN = new FunctionSignature( + new QName("element-to-map-plan", Function.BUILTIN_FUNCTION_NS), + "Analyzes the structure of input elements and returns a plan map.", + new SequenceType[] { + new FunctionParameterSequenceType("input", Type.NODE, + Cardinality.ZERO_OR_MORE, "The input nodes to analyze") + }, + new FunctionReturnSequenceType(Type.MAP_ITEM, Cardinality.EXACTLY_ONE, + "A map describing the element layouts")); + + public FnElementToMapPlan(final XQueryContext context, final FunctionSignature signature) { + super(context, signature); + } + + @Override + public Sequence eval(final Sequence[] args, final Sequence contextSequence) throws XPathException { + if (args[0].isEmpty()) { + return new MapType(this, context); + } + + final MapType plan = new MapType(this, context); + final Set processed = new HashSet<>(); + + // Analyze each input node + for (final SequenceIterator iter = args[0].iterate(); iter.hasNext(); ) { + final Item item = iter.nextItem(); + if (item.getType() == Type.DOCUMENT) { + // For document nodes, analyze the document element + final Node docNode = ((NodeValue) item).getNode(); + analyzeNode(docNode, plan, processed); + } else if (Type.subTypeOf(item.getType(), Type.ELEMENT)) { + final Node elemNode = ((NodeValue) item).getNode(); + analyzeElement(elemNode, plan, processed); + } + } + + return plan; + } + + private void analyzeNode(final Node node, final MapType plan, final Set processed) throws XPathException { + final NodeList children = node.getChildNodes(); + for (int i = 0; i < children.getLength(); i++) { + final Node child = children.item(i); + if (child.getNodeType() == Node.ELEMENT_NODE) { + analyzeElement(child, plan, processed); + } + } + } + + private void analyzeElement(final Node elem, final MapType plan, final Set processed) throws XPathException { + final String elemKey = getElementKey(elem); + if (processed.contains(elemKey)) { + return; // Already analyzed this element type + } + processed.add(elemKey); + + final MapType layoutMap = new MapType(this, context); + + // Determine layout + final boolean hasAttributes = hasSignificantAttributes(elem); + final List childElements = getChildElements(elem); + final boolean hasTextContent = hasSignificantTextContent(elem); + + if (childElements.isEmpty() && !hasTextContent) { + // Empty element + layoutMap.add(new StringValue("layout"), + new StringValue(hasAttributes ? "empty-plus" : "empty")); + } else if (childElements.isEmpty() && hasTextContent) { + // Simple content (text only) + final String type = detectContentType(elem); + layoutMap.add(new StringValue("layout"), + new StringValue(hasAttributes ? "simple-plus" : "simple")); + if (type != null) { + layoutMap.add(new StringValue("type"), new StringValue(type)); + } + } else if (!hasTextContent && allChildrenSameName(childElements)) { + // List of same-named elements + final String childName = getElementKey(childElements.get(0)); + layoutMap.add(new StringValue("layout"), + new StringValue(hasAttributes ? "list-plus" : "list")); + layoutMap.add(new StringValue("child"), new StringValue(childName)); + } else if (hasTextContent || hasMixedContent(elem)) { + // Mixed content + layoutMap.add(new StringValue("layout"), new StringValue("mixed")); + } else { + // Record (distinct child element names) + layoutMap.add(new StringValue("layout"), new StringValue("record")); + } + + plan.add(new StringValue(elemKey), layoutMap); + + // Analyze attribute types + if (hasAttributes) { + final NamedNodeMap attrs = elem.getAttributes(); + for (int i = 0; i < attrs.getLength(); i++) { + final Node attr = attrs.item(i); + final String attrName = attr.getLocalName() != null ? attr.getLocalName() : attr.getNodeName(); + final String ns = attr.getNamespaceURI(); + // Skip xmlns declarations + if ("http://www.w3.org/2000/xmlns/".equals(ns) || attrName.startsWith("xmlns")) { + continue; + } + final String attrKey = "@" + (ns != null && !ns.isEmpty() ? + "Q{" + ns + "}" + attrName : attrName); + if (!processed.contains(attrKey)) { + processed.add(attrKey); + final MapType attrMap = new MapType(this, context); + final String type = detectValueType(attr.getNodeValue()); + if (type != null) { + attrMap.add(new StringValue("type"), new StringValue(type)); + } + plan.add(new StringValue(attrKey), attrMap); + } + } + } + + // Recursively analyze child elements + for (final Node child : childElements) { + analyzeElement(child, plan, processed); + } + } + + private String getElementKey(final Node elem) { + final String ns = elem.getNamespaceURI(); + final String local = elem.getLocalName() != null ? elem.getLocalName() : elem.getNodeName(); + if (ns != null && !ns.isEmpty()) { + return "Q{" + ns + "}" + local; + } + return local; + } + + private boolean hasSignificantAttributes(final Node elem) { + final NamedNodeMap attrs = elem.getAttributes(); + if (attrs == null) return false; + for (int i = 0; i < attrs.getLength(); i++) { + final Node attr = attrs.item(i); + final String name = attr.getNodeName(); + if (!name.startsWith("xmlns")) { + return true; + } + } + return false; + } + + private List getChildElements(final Node elem) { + final List result = new ArrayList<>(); + final NodeList children = elem.getChildNodes(); + for (int i = 0; i < children.getLength(); i++) { + if (children.item(i).getNodeType() == Node.ELEMENT_NODE) { + result.add(children.item(i)); + } + } + return result; + } + + private boolean hasSignificantTextContent(final Node elem) { + final NodeList children = elem.getChildNodes(); + for (int i = 0; i < children.getLength(); i++) { + final Node child = children.item(i); + if (child.getNodeType() == Node.TEXT_NODE) { + final String text = child.getNodeValue(); + if (text != null && !text.trim().isEmpty()) { + return true; + } + } + } + return false; + } + + private boolean hasMixedContent(final Node elem) { + boolean hasElements = false; + boolean hasText = false; + final NodeList children = elem.getChildNodes(); + for (int i = 0; i < children.getLength(); i++) { + final Node child = children.item(i); + if (child.getNodeType() == Node.ELEMENT_NODE) { + hasElements = true; + } else if (child.getNodeType() == Node.TEXT_NODE) { + if (child.getNodeValue() != null && !child.getNodeValue().trim().isEmpty()) { + hasText = true; + } + } + } + return hasElements && hasText; + } + + private boolean allChildrenSameName(final List children) { + if (children.isEmpty()) return false; + final String firstName = getElementKey(children.get(0)); + for (int i = 1; i < children.size(); i++) { + if (!firstName.equals(getElementKey(children.get(i)))) { + return false; + } + } + return true; + } + + private String detectContentType(final Node elem) { + final String text = elem.getTextContent(); + if (text == null || text.trim().isEmpty()) { + return null; + } + return detectValueType(text.trim()); + } + + private String detectValueType(final String value) { + if (value == null || value.isEmpty()) { + return null; + } + try { + Double.parseDouble(value); + return "numeric"; + } catch (final NumberFormatException e) { + // Not numeric + } + if ("true".equals(value) || "false".equals(value)) { + return "boolean"; + } + return null; // default: string (not annotated) + } +} diff --git a/exist-core/src/main/java/org/exist/xquery/functions/fn/FnEverySome.java b/exist-core/src/main/java/org/exist/xquery/functions/fn/FnEverySome.java new file mode 100644 index 00000000000..ee18e143012 --- /dev/null +++ b/exist-core/src/main/java/org/exist/xquery/functions/fn/FnEverySome.java @@ -0,0 +1,177 @@ +/* + * eXist-db Open Source Native XML Database + * Copyright (C) 2001 The eXist-db Authors + * + * info@exist-db.org + * http://www.exist-db.org + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ +package org.exist.xquery.functions.fn; + +import org.exist.dom.QName; +import org.exist.xquery.AnalyzeContextInfo; +import org.exist.xquery.BasicFunction; +import org.exist.xquery.Cardinality; +import org.exist.xquery.Function; +import org.exist.xquery.FunctionSignature; +import org.exist.xquery.ErrorCodes; +import org.exist.xquery.XPathException; +import org.exist.xquery.XQueryContext; +import org.exist.xquery.value.BooleanValue; +import org.exist.xquery.value.FunctionParameterSequenceType; +import org.exist.xquery.value.FunctionReference; +import org.exist.xquery.value.FunctionReturnSequenceType; +import org.exist.xquery.value.IntegerValue; +import org.exist.xquery.value.Item; +import org.exist.xquery.value.Sequence; +import org.exist.xquery.value.SequenceIterator; +import org.exist.xquery.value.SequenceType; +import org.exist.xquery.value.Type; + +/** + * Implements XQuery 4.0 fn:every and fn:some. + */ +public class FnEverySome extends BasicFunction { + + public static final FunctionSignature[] FN_EVERY = { + new FunctionSignature( + new QName("every", Function.BUILTIN_FUNCTION_NS), + "Returns true if every item in the input sequence matches the supplied predicate.", + new SequenceType[] { + new FunctionParameterSequenceType("input", Type.ITEM, Cardinality.ZERO_OR_MORE, "The input sequence"), + new FunctionParameterSequenceType("predicate", Type.FUNCTION, Cardinality.ZERO_OR_ONE, "The predicate function (defaults to fn:boolean#1)") + }, + new FunctionReturnSequenceType(Type.BOOLEAN, Cardinality.EXACTLY_ONE, "true if all items match")), + new FunctionSignature( + new QName("every", Function.BUILTIN_FUNCTION_NS), + "Returns true if every item in the input sequence has an effective boolean value of true.", + new SequenceType[] { + new FunctionParameterSequenceType("input", Type.ITEM, Cardinality.ZERO_OR_MORE, "The input sequence") + }, + new FunctionReturnSequenceType(Type.BOOLEAN, Cardinality.EXACTLY_ONE, "true if all items are truthy")) + }; + + public static final FunctionSignature[] FN_SOME = { + new FunctionSignature( + new QName("some", Function.BUILTIN_FUNCTION_NS), + "Returns true if at least one item in the input sequence matches the supplied predicate.", + new SequenceType[] { + new FunctionParameterSequenceType("input", Type.ITEM, Cardinality.ZERO_OR_MORE, "The input sequence"), + new FunctionParameterSequenceType("predicate", Type.FUNCTION, Cardinality.ZERO_OR_ONE, "The predicate function (defaults to fn:boolean#1)") + }, + new FunctionReturnSequenceType(Type.BOOLEAN, Cardinality.EXACTLY_ONE, "true if any item matches")), + new FunctionSignature( + new QName("some", Function.BUILTIN_FUNCTION_NS), + "Returns true if at least one item in the input sequence has an effective boolean value of true.", + new SequenceType[] { + new FunctionParameterSequenceType("input", Type.ITEM, Cardinality.ZERO_OR_MORE, "The input sequence") + }, + new FunctionReturnSequenceType(Type.BOOLEAN, Cardinality.EXACTLY_ONE, "true if any item is truthy")) + }; + + private AnalyzeContextInfo cachedContextInfo; + + public FnEverySome(final XQueryContext context, final FunctionSignature signature) { + super(context, signature); + } + + @Override + public void analyze(final AnalyzeContextInfo contextInfo) throws XPathException { + cachedContextInfo = new AnalyzeContextInfo(contextInfo); + super.analyze(cachedContextInfo); + } + + @Override + public Sequence eval(final Sequence[] args, final Sequence contextSequence) throws XPathException { + final Sequence input = args[0]; + final boolean isEvery = isCalledAs("every"); + + // 1-arg overload: use effective boolean value + if (args.length == 1) { + return evalWithEBV(input, isEvery); + } + + // 2-arg overload: use predicate function (empty predicate = use EBV) + if (args[1].isEmpty()) { + return evalWithEBV(input, isEvery); + } + + if (input.isEmpty()) { + return BooleanValue.valueOf(isEvery); + } + + try (final FunctionReference ref = (FunctionReference) args[1].itemAt(0)) { + ref.analyze(cachedContextInfo); + final int arity = ref.getSignature().getArgumentCount(); + + // Validate arity: predicate must accept 0, 1, or 2 arguments + if (arity > 2) { + throw new XPathException(this, ErrorCodes.XPTY0004, + "Predicate function must accept 0, 1, or 2 arguments, but has arity " + arity); + } + + int pos = 1; + for (final SequenceIterator i = input.iterate(); i.hasNext(); pos++) { + final Item item = i.nextItem(); + final Sequence r = callPredicate(ref, item, pos, arity); + // XQ4: predicate must return xs:boolean (xs:untypedAtomic is coercible) + if (!r.isEmpty()) { + final int rType = r.itemAt(0).getType(); + if (rType != Type.BOOLEAN && rType != Type.UNTYPED_ATOMIC) { + throw new XPathException(this, ErrorCodes.XPTY0004, + "Predicate function must return xs:boolean, but returned " + + Type.getTypeName(rType)); + } + } + final boolean matches = !r.isEmpty() && r.effectiveBooleanValue(); + if (isEvery && !matches) { + return BooleanValue.FALSE; + } + if (!isEvery && matches) { + return BooleanValue.TRUE; + } + } + return BooleanValue.valueOf(isEvery); + } + } + + private Sequence evalWithEBV(final Sequence input, final boolean isEvery) throws XPathException { + if (input.isEmpty()) { + return BooleanValue.valueOf(isEvery); + } + for (final SequenceIterator i = input.iterate(); i.hasNext(); ) { + final Item item = i.nextItem(); + final boolean ebv = item.toSequence().effectiveBooleanValue(); + if (isEvery && !ebv) { + return BooleanValue.FALSE; + } + if (!isEvery && ebv) { + return BooleanValue.TRUE; + } + } + return BooleanValue.valueOf(isEvery); + } + + private Sequence callPredicate(final FunctionReference ref, final Item item, final int pos, final int arity) throws XPathException { + if (arity == 0) { + return ref.evalFunction(null, null, new Sequence[0]); + } else if (arity == 1) { + return ref.evalFunction(null, null, new Sequence[]{item.toSequence()}); + } else { + return ref.evalFunction(null, null, new Sequence[]{item.toSequence(), new IntegerValue(this, pos)}); + } + } +} diff --git a/exist-core/src/main/java/org/exist/xquery/functions/fn/FnExpandedQName.java b/exist-core/src/main/java/org/exist/xquery/functions/fn/FnExpandedQName.java new file mode 100644 index 00000000000..7dc2190314a --- /dev/null +++ b/exist-core/src/main/java/org/exist/xquery/functions/fn/FnExpandedQName.java @@ -0,0 +1,74 @@ +/* + * eXist-db Open Source Native XML Database + * Copyright (C) 2001 The eXist-db Authors + * + * info@exist-db.org + * http://www.exist-db.org + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ +package org.exist.xquery.functions.fn; + +import org.exist.dom.QName; +import org.exist.xquery.BasicFunction; +import org.exist.xquery.Cardinality; +import org.exist.xquery.Function; +import org.exist.xquery.FunctionSignature; +import org.exist.xquery.XPathException; +import org.exist.xquery.XQueryContext; +import org.exist.xquery.value.FunctionParameterSequenceType; +import org.exist.xquery.value.FunctionReturnSequenceType; +import org.exist.xquery.value.QNameValue; +import org.exist.xquery.value.Sequence; +import org.exist.xquery.value.SequenceType; +import org.exist.xquery.value.StringValue; +import org.exist.xquery.value.Type; + +/** + * Implements fn:expanded-QName (XQuery 4.0). + * + * Returns a string in Q{uri}local format for a QName value. + */ +public class FnExpandedQName extends BasicFunction { + + public static final FunctionSignature FN_EXPANDED_QNAME = new FunctionSignature( + new QName("expanded-QName", Function.BUILTIN_FUNCTION_NS), + "Returns the expanded QName in Q{uri}local notation.", + new SequenceType[] { + new FunctionParameterSequenceType("value", Type.QNAME, Cardinality.ZERO_OR_ONE, + "The QName value") + }, + new FunctionReturnSequenceType(Type.STRING, Cardinality.ZERO_OR_ONE, + "the expanded QName string in Q{uri}local format")); + + public FnExpandedQName(final XQueryContext context, final FunctionSignature signature) { + super(context, signature); + } + + @Override + public Sequence eval(final Sequence[] args, final Sequence contextSequence) throws XPathException { + if (args[0].isEmpty()) { + return Sequence.EMPTY_SEQUENCE; + } + + final QNameValue qnameVal = (QNameValue) args[0].itemAt(0); + final QName qname = qnameVal.getQName(); + + final String ns = qname.getNamespaceURI() != null ? qname.getNamespaceURI() : ""; + final String local = qname.getLocalPart(); + + return new StringValue(this, "Q{" + ns + "}" + local); + } +} diff --git a/exist-core/src/main/java/org/exist/xquery/functions/fn/FnFormatDates.java b/exist-core/src/main/java/org/exist/xquery/functions/fn/FnFormatDates.java index 2ade21d3117..c39b28f29a6 100644 --- a/exist-core/src/main/java/org/exist/xquery/functions/fn/FnFormatDates.java +++ b/exist-core/src/main/java/org/exist/xquery/functions/fn/FnFormatDates.java @@ -21,12 +21,17 @@ */ package org.exist.xquery.functions.fn; +import com.ibm.icu.text.MessageFormat; +import com.ibm.icu.text.RuleBasedNumberFormat; +import org.apache.commons.lang3.StringUtils; import org.exist.dom.QName; import org.exist.xquery.*; import org.exist.xquery.util.NumberFormatter; import org.exist.xquery.value.*; +import java.util.ArrayList; import java.util.Calendar; +import java.util.List; import java.util.Locale; import java.util.Optional; import java.util.TimeZone; @@ -152,6 +157,7 @@ public Sequence eval(Sequence[] args, Sequence contextSequence) throws XPathExce final String picture = args[1].getStringValue(); final String language; final Optional place; + String calendar = null; if (getArgumentCount() == 5) { if (args[2].hasOne()) { language = args[2].getStringValue(); @@ -159,6 +165,10 @@ public Sequence eval(Sequence[] args, Sequence contextSequence) throws XPathExce language = context.getDefaultLanguage(); } + if (args[3].hasOne()) { + calendar = args[3].getStringValue(); + } + if(args[4].hasOne()) { place = Optional.of(args[4].getStringValue()); } else { @@ -169,6 +179,32 @@ public Sequence eval(Sequence[] args, Sequence contextSequence) throws XPathExce place = Optional.empty(); } + // Validate calendar parameter + if (calendar != null) { + if (calendar.startsWith(":")) { + throw new XPathException(this, ErrorCodes.FOFD1340, + "Invalid calendar name: " + calendar); + } + if (calendar.startsWith("Q{}")) { + final String localPart = calendar.substring(3); + if (localPart.isEmpty() || !Character.isLetter(localPart.charAt(0))) { + throw new XPathException(this, ErrorCodes.FOFD1340, + "Invalid calendar name: " + calendar); + } + if (!isKnownCalendar(localPart)) { + throw new XPathException(this, ErrorCodes.FOFD1340, + "Unknown calendar: " + calendar); + } + } else if (calendar.startsWith("Q{") && calendar.contains("}")) { + // EQName with non-empty namespace: accept with fallback + } else if (calendar.contains(":")) { + // Prefixed QName: accept with fallback + } else if (!isKnownCalendar(calendar)) { + throw new XPathException(this, ErrorCodes.FOFD1340, + "Unknown calendar: " + calendar); + } + } + return new StringValue(this, formatDate(picture, value, language, place)); } @@ -214,6 +250,8 @@ private String formatDate(String pic, AbstractDateTimeValue dt, final String lan private void formatComponent(String component, AbstractDateTimeValue dt, final String language, final Optional place, final boolean tzHMZNPictureHint, final StringBuilder sb) throws XPathException { + // Per spec, whitespace within a variable marker is insignificant + component = component.replaceAll("\\s+", ""); final Matcher matcher = componentPattern.matcher(component); if (!matcher.matches()) { throw new XPathException(this, ErrorCodes.FOFD1340, "Unrecognized date/time component: " + component); @@ -349,8 +387,8 @@ private void formatComponent(String component, AbstractDateTimeValue dt, final S break; case 'f': if (allowTime) { - final int fraction = dt.getPart(AbstractDateTimeValue.MILLISECOND); - formatNumber(specifier, picture, width, fraction, language, sb); + final int millis = dt.getPart(AbstractDateTimeValue.MILLISECOND); + formatFractionalSeconds(millis, picture, width, sb); } else { throw new XPathException(this, ErrorCodes.FOFD1350, "format-date does not support a fractional seconds component"); @@ -384,85 +422,255 @@ private void formatComponent(String component, AbstractDateTimeValue dt, final S sb.append(formatTimeZone(picture, dtv.getPart(DurationValue.HOUR), minute, cal.getTimeZone(), language, place)); + } else if ("Z".equals(picture)) { + // Military timezone: J = local time (no timezone specified) + sb.append("J"); } break; + case 'E': + if (allowDate) { + final int year = dt.getPart(AbstractDateTimeValue.YEAR); + sb.append(year >= 0 ? "AD" : "BC"); + } else { + throw new XPathException(this, ErrorCodes.FOFD1350, + "format-time does not support an era component"); + } + break; + case 'C': + sb.append("AD"); + break; default: throw new XPathException(this, ErrorCodes.FOFD1340, "Unrecognized date/time component: " + component); } } - private String formatTimeZone(final String timezonePicture, final int hour, final int minute, + private String formatTimeZone(String timezonePicture, final int hour, final int minute, final TimeZone timeZone, final String language, final Optional place) { - final Locale locale = new Locale(language); + // Military timezone letter + if ("Z".equals(timezonePicture)) { + return formatMilitaryTimeZone(hour, minute); + } - final String format; - switch(timezonePicture) { - case "0": - if(minute != 0) { - format = "%+d:%02d"; + // Named timezone + if ("N".equals(timezonePicture)) { + final Locale locale = new Locale(language); + final TimeZone tz = place.map(TimeZone::getTimeZone).orElse(timeZone); + return tz.getDisplayName(timeZone.useDaylightTime(), TimeZone.SHORT, locale); + } + + // Check for 't' modifier (use "Z" for UTC) + final boolean useZForUTC = timezonePicture.endsWith("t"); + if (useZForUTC) { + timezonePicture = timezonePicture.substring(0, timezonePicture.length() - 1); + } + if (useZForUTC && hour == 0 && minute == 0) { + return "Z"; + } + + // Parse the picture: find digit family, separator, hour/minute digit counts + int zero = '0'; + boolean zeroFound = false; + int hourDigits = 0; + int minuteDigits = 0; + String separator = null; + + for (int i = 0; i < timezonePicture.length(); i++) { + final int ch = timezonePicture.codePointAt(i); + final int family = net.sf.saxon.expr.number.Alphanumeric.getDigitFamily(ch); + if (family >= 0) { + if (!zeroFound) { zero = family; zeroFound = true; } + if (separator == null) { hourDigits++; } else { minuteDigits++; } + } else if (ch == '#') { + if (separator == null) { hourDigits++; } else { minuteDigits++; } + } else if (separator == null && hourDigits > 0) { + separator = new String(Character.toChars(ch)); + } + if (Character.isSupplementaryCodePoint(ch)) { i++; } + } + + final int absHour = Math.abs(hour); + final String sign = (hour < 0) ? "-" : "+"; + final StringBuilder result = new StringBuilder(sign); + + if (separator != null && minuteDigits > 0) { + result.append(padWithDigitFamily(absHour, hourDigits, zero)); + result.append(separator); + result.append(padWithDigitFamily(minute, minuteDigits, zero)); + } else if (hourDigits >= 3) { + result.append(padWithDigitFamily(absHour * 100 + minute, hourDigits, zero)); + } else { + result.append(padWithDigitFamily(absHour, hourDigits, zero)); + if (minute != 0) { + result.append(":"); + result.append(padWithDigitFamily(minute, 2, zero)); + } + } + + return result.toString(); + } + + private static String padWithDigitFamily(int value, int minDigits, int zero) { + String s = Integer.toString(value); + while (s.length() < minDigits) { s = "0" + s; } + if (zero != '0') { + final StringBuilder converted = new StringBuilder(); + for (int i = 0; i < s.length(); i++) { + final char ch = s.charAt(i); + if (ch >= '0' && ch <= '9') { + converted.appendCodePoint(zero + (ch - '0')); } else { - format = "%+d"; + converted.append(ch); } - break; + } + return converted.toString(); + } + return s; + } - case "0000": - format = "%+03d%02d"; - break; + // Military timezone: Z(0), A-I(+1 to +9), K-M(+10 to +12), N-Y(-1 to -12) + // J is reserved for local time (no timezone) and is NOT in this array + private final static char[] MILITARY_TZ_CHARS = {'Z', 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'K', 'L', + 'M', 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y' }; - case "0:00": - format = "%+d:%02d"; - break; + private String formatMilitaryTimeZone(final int hour, final int minute) { + if (minute == 0 && hour >= -12 && hour <= 12) { + final int offset = (hour < 0) ? 12 + (hour * -1) : hour; + return String.valueOf(MILITARY_TZ_CHARS[offset]); + } else { + return String.format("%+03d:%02d", hour, minute); + } + } + + /** + * Format fractional seconds as left-aligned digits. + * Unlike regular integer formatting, fractional seconds treat the value + * as a fraction (0.456) where digits are extracted left-to-right. + */ + private void formatFractionalSeconds(int millis, String picture, String width, + StringBuilder sb) throws XPathException { + // Build the fractional digit string, left-aligned, padded to 3 digits + String fracDigits = String.format("%03d", millis); + + // Count actual digit positions in picture (ignoring separators and modifiers) + int picMin = 0; + int picMax = 0; + for (int i = 0; i < picture.length(); i++) { + final char ch = picture.charAt(i); + if ((ch == 'o' || ch == 'c') && i == picture.length() - 1) { break; } + final int family = net.sf.saxon.expr.number.Alphanumeric.getDigitFamily(ch); + if (family >= 0) { + picMin++; + picMax++; + } else if (ch == '#') { + picMax++; + } + } + + int min = picMin; + // A multi-digit picture constrains max precision; single-digit is unbounded + final boolean pictureSetsMax = (picMax > 1); + int max = pictureSetsMax ? picMax : Integer.MAX_VALUE; - case "00:00t": - if(hour == 0 && minute == 0) { - format = "Z"; + // Width specifier + final int[] widths = getWidths(width); + if (widths != null) { + if (widths[0] > 0) { min = Math.max(picMin, widths[0]); } + if (widths[1] > 0) { + if (pictureSetsMax) { + max = Math.max(picMax, widths[1]); } else { - format = "%+03d:%02d"; + max = widths[1]; } - break; + } + } + if (max < min) { max = min; } - case "N": - final TimeZone tz = place.map(TimeZone::getTimeZone).orElse(timeZone); - return tz.getDisplayName(timeZone.useDaylightTime(), TimeZone.SHORT, locale); + // Pad to min with trailing zeros + while (fracDigits.length() < min) { + fracDigits += "0"; + } - case "Z": - return formatMilitaryTimeZone(hour, minute); + // Truncate to max precision + if (fracDigits.length() > max) { + fracDigits = fracDigits.substring(0, max); + } - case "00:00": - default: - format = "%+03d:%02d"; + // Remove trailing zeros beyond min (variable-width output) + while (fracDigits.length() > min && fracDigits.endsWith("0")) { + fracDigits = fracDigits.substring(0, fracDigits.length() - 1); + } + + // Apply digit family from picture (e.g., Arabic-Indic digits) + final int digitSign = getFirstDigitInPicture(picture); + if (digitSign >= 0) { + final int zero = net.sf.saxon.expr.number.Alphanumeric.getDigitFamily(digitSign); + if (zero != '0') { + final StringBuilder converted = new StringBuilder(); + for (int i = 0; i < fracDigits.length(); i++) { + final char ch = fracDigits.charAt(i); + if (ch >= '0' && ch <= '9') { + converted.append((char)(zero + (ch - '0'))); + } else { + converted.append(ch); + } + } + fracDigits = converted.toString(); + } + } + + // Insert grouping separators from picture if present + if (hasGroupingSeparators(picture)) { + fracDigits = applyGroupingSeparators(fracDigits, picture); } - return String.format(locale, format, hour, minute); + sb.append(fracDigits); } - private final static char[] MILITARY_TZ_CHARS = {'Z', 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', - 'M', 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y' }; + private static int getFirstDigitInPicture(String picture) { + for (int i = 0; i < picture.length(); i++) { + final char ch = picture.charAt(i); + if (ch != '#' && ch != 'o' && ch != 'c') { + final int family = net.sf.saxon.expr.number.Alphanumeric.getDigitFamily(ch); + if (family >= 0) { + return ch; + } + } + } + return -1; + } - /** - * Military time zone - * - * Z = +00:00, A = +01:00, B = +02:00, ..., M = +12:00, N = -01:00, O = -02:00, ... Y = -12:00. - * - * The letter J (meaning local time) is used in the case of a value that does not specify a timezone - * offset. - * - * Timezone offsets that have no representation in this system (for example Indian Standard Time, +05:30) - * are output as if the format 01:01 had been requested. - */ - private String formatMilitaryTimeZone(final int hour, final int minute) { - if(minute == 0 && hour > -12 && hour < 12) { - final int offset; - if(hour < 0) { - offset = 13 + (hour * -1); + private static boolean hasGroupingSeparators(String picture) { + for (int i = 0; i < picture.length(); i++) { + final char ch = picture.charAt(i); + if ((ch == 'o' || ch == 'c') && i == picture.length() - 1) { break; } + final int family = net.sf.saxon.expr.number.Alphanumeric.getDigitFamily(ch); + if (family < 0 && ch != '#') { + return true; + } + } + return false; + } + + private static String applyGroupingSeparators(String digits, String picture) { + final StringBuilder result = new StringBuilder(); + int digitIdx = 0; + for (int i = 0; i < picture.length() && digitIdx < digits.length(); i++) { + final char ch = picture.charAt(i); + if ((ch == 'o' || ch == 'c') && i == picture.length() - 1) { break; } + final int family = net.sf.saxon.expr.number.Alphanumeric.getDigitFamily(ch); + if (family >= 0 || ch == '#') { + result.append(digits.charAt(digitIdx)); + digitIdx++; } else { - offset = hour; + result.append(ch); } - return String.valueOf(MILITARY_TZ_CHARS[offset]); - } else { - return String.format("%+03d:%02d", hour, minute); } + while (digitIdx < digits.length()) { + result.append(digits.charAt(digitIdx)); + digitIdx++; + } + return result.toString(); } private String getDefaultFormat(char specifier) { @@ -512,6 +720,80 @@ private void formatNumber(char specifier, String picture, String width, int num, return; } + // Word formatting: W (uppercase), w (lowercase), Ww (title case) + // With optional ordinal modifier: Wo, wo, Wwo + final String basePicture = picture.endsWith("o") ? picture.substring(0, picture.length() - 1) : picture; + final boolean ordinalWords = picture.endsWith("o") && (basePicture.equals("W") || basePicture.equals("w") || basePicture.equals("Ww")); + if ("W".equals(basePicture) || "w".equals(basePicture) || "Ww".equals(basePicture)) { + final Locale locale = new Locale(language); + final String spelloutRule = ordinalWords ? "%spellout-ordinal" : "%spellout-cardinal"; + + // Check if the rule exists, fall back to cardinal if ordinal not available + final RuleBasedNumberFormat rbnf = new RuleBasedNumberFormat(locale, RuleBasedNumberFormat.SPELLOUT); + String ruleToUse = spelloutRule; + boolean ruleFound = false; + for (final String ruleName : rbnf.getRuleSetNames()) { + if (ruleName.equals(ruleToUse)) { + ruleFound = true; + break; + } + } + if (!ruleFound) { + ruleToUse = "%spellout-cardinal"; + } + + final MessageFormat fmt = new MessageFormat("{0,spellout," + ruleToUse + "}", locale); + String word = fmt.format(new Object[]{num}); + + if ("W".equals(basePicture)) { + word = word.toUpperCase(locale); + } else if ("Ww".equals(basePicture)) { + // Title case: capitalize each word + final String[] parts = word.split("((?<=[ -])|(?=[ -]))"); + final StringBuilder titled = new StringBuilder(); + for (final String part : parts) { + titled.append(StringUtils.capitalize(part)); + } + word = titled.toString(); + } + // "w" is already lowercase from ICU4J + + sb.append(word); + return; + } + + // Roman numeral formatting: I (uppercase), i (lowercase) + if ("I".equals(picture) || "i".equals(picture)) { + String roman = toRoman(Math.abs(num)); + if ("i".equals(picture)) { + roman = roman.toLowerCase(); + } + sb.append(roman); + return; + } + + // Handle grouping separators in numeric pictures (e.g., [Y9;999], [Y9,999,*]) + if (hasGroupingSeparators(picture)) { + sb.append(formatWithGroupingSeparators(num, picture)); + return; + } + + // Validate optional digit placement: # must precede mandatory digits, not follow + boolean seenMandatory = false; + for (int i = 0; i < picture.length(); i++) { + final char ch = picture.charAt(i); + if ((ch == 'o' || ch == 'c') && i == picture.length() - 1) { break; } + if (ch == '#') { + if (seenMandatory) { + throw new XPathException(this, ErrorCodes.FOFD1340, + "Optional digit '#' must not appear after mandatory digits in: " + picture); + } + } else { + final int family = net.sf.saxon.expr.number.Alphanumeric.getDigitFamily(ch); + if (family >= 0) { seenMandatory = true; } + } + } + // determine min and max width int min = NumberFormatter.getMinDigits(picture); int max = NumberFormatter.getMaxDigits(picture); @@ -531,6 +813,83 @@ private void formatNumber(char specifier, String picture, String width, int num, } } + private static final int[] ROMAN_VALUES = {1000, 900, 500, 400, 100, 90, 50, 40, 10, 9, 5, 4, 1}; + private static final String[] ROMAN_SYMBOLS = {"M", "CM", "D", "CD", "C", "XC", "L", "XL", "X", "IX", "V", "IV", "I"}; + + private static String toRoman(int num) { + final StringBuilder sb = new StringBuilder(); + for (int i = 0; i < ROMAN_VALUES.length; i++) { + while (num >= ROMAN_VALUES[i]) { + sb.append(ROMAN_SYMBOLS[i]); + num -= ROMAN_VALUES[i]; + } + } + return sb.toString(); + } + + private static String formatWithGroupingSeparators(int num, String picture) { + String pic = picture; + if (pic.endsWith("o") || pic.endsWith("c")) { pic = pic.substring(0, pic.length() - 1); } + if (pic.endsWith(",*")) { pic = pic.substring(0, pic.length() - 2); } + + int zero = '0'; + for (int i = 0; i < pic.length(); i++) { + final int family = net.sf.saxon.expr.number.Alphanumeric.getDigitFamily(pic.charAt(i)); + if (family >= 0) { zero = family; break; } + } + + // Map separator positions (counted from the right) + final List sepPositions = new ArrayList<>(); + final List sepChars = new ArrayList<>(); + int digitCount = 0; + for (int i = pic.length() - 1; i >= 0; i--) { + final char ch = pic.charAt(i); + final int family = net.sf.saxon.expr.number.Alphanumeric.getDigitFamily(ch); + if (family >= 0 || ch == '#') { + digitCount++; + } else { + sepPositions.add(digitCount); + sepChars.add(ch); + } + } + + final String digits = Integer.toString(num); + final StringBuilder result = new StringBuilder(); + int digitIdx = digits.length() - 1; + int pos = 0; + while (digitIdx >= 0) { + for (int s = 0; s < sepPositions.size(); s++) { + if (sepPositions.get(s) == pos && pos > 0) { + result.insert(0, sepChars.get(s)); + } + } + result.insert(0, digits.charAt(digitIdx)); + digitIdx--; + pos++; + } + + if (zero != '0') { + final StringBuilder converted = new StringBuilder(); + for (int i = 0; i < result.length(); i++) { + final char ch = result.charAt(i); + if (ch >= '0' && ch <= '9') { + converted.append((char)(zero + (ch - '0'))); + } else { + converted.append(ch); + } + } + return converted.toString(); + } + return result.toString(); + } + + private static boolean isKnownCalendar(final String calendar) { + return switch (calendar.toUpperCase()) { + case "AD", "ISO", "OS", "NS" -> true; + default -> false; + }; + } + private int[] getWidths(String width) throws XPathException { if (width == null || width.isEmpty()) {return null;} diff --git a/exist-core/src/main/java/org/exist/xquery/functions/fn/FnFormatNumbers.java b/exist-core/src/main/java/org/exist/xquery/functions/fn/FnFormatNumbers.java index 3633d2c71fc..7be81fbf44d 100644 --- a/exist-core/src/main/java/org/exist/xquery/functions/fn/FnFormatNumbers.java +++ b/exist-core/src/main/java/org/exist/xquery/functions/fn/FnFormatNumbers.java @@ -83,6 +83,7 @@ import org.exist.dom.QName; import org.exist.util.CodePointString; import org.exist.xquery.*; +import org.exist.xquery.functions.map.MapType; import org.exist.xquery.value.*; import javax.annotation.Nullable; @@ -125,7 +126,7 @@ public class FnFormatNumbers extends BasicFunction { arity( FS_PARAM_VALUE, FS_PARAM_PICTURE, - optParam("decimal-format-name", Type.STRING, "The name (as an EQName) of a decimal format to use.") + optParam("options", Type.ITEM, "The name (as an EQName) of a decimal format, or a map of formatting options (XQuery 4.0).") ) ) ); @@ -138,22 +139,8 @@ public FnFormatNumbers(final XQueryContext context, final FunctionSignature sign public Sequence eval(final Sequence[] args, final Sequence contextSequence) throws XPathException { - // get the decimal format - final QName qnDecimalFormat; - if (args.length == 3 && !args[2].isEmpty()) { - final String decimalFormatName = args[2].itemAt(0).getStringValue().trim(); - try { - qnDecimalFormat = QName.parse(context, decimalFormatName); - } catch (final QName.IllegalQNameException e) { - throw new XPathException(this, ErrorCodes.FODF1280, "Invalid decimal format QName.", args[2], e); - } - } else { - qnDecimalFormat = null; - } - final DecimalFormat decimalFormat = context.getStaticDecimalFormat(qnDecimalFormat); - if (decimalFormat == null) { - throw new XPathException(this, ErrorCodes.FODF1280, "No known decimal format of that name.", args[2]); - } + // Resolve decimal format from the options argument (XQ4: string or map) + final DecimalFormat decimalFormat = resolveDecimalFormat(args); final NumericValue number; if (args[0].isEmpty()) { @@ -171,6 +158,145 @@ public Sequence eval(final Sequence[] args, final Sequence contextSequence) return new StringValue(this, value); } + /** + * Resolves the decimal format from the 3rd argument. + * XQ3.1: absent or xs:string (decimal format name). + * XQ4: map(*) with formatting properties and optional format-name. + */ + private DecimalFormat resolveDecimalFormat(final Sequence[] args) throws XPathException { + if (args.length < 3 || args[2].isEmpty()) { + // No options — use unnamed default + final DecimalFormat df = context.getStaticDecimalFormat(null); + if (df == null) { + throw new XPathException(this, ErrorCodes.FODF1280, "No unnamed decimal format in static context."); + } + return df; + } + + final Item optionsItem = args[2].itemAt(0); + + if (optionsItem instanceof MapType) { + // XQ4 map overload + return resolveDecimalFormatFromMap((MapType) optionsItem); + } + + // XQ3.1 string overload (decimal format name) + final String decimalFormatName = optionsItem.getStringValue().trim(); + final QName qnDecimalFormat; + try { + qnDecimalFormat = QName.parse(context, decimalFormatName); + } catch (final QName.IllegalQNameException e) { + throw new XPathException(this, ErrorCodes.FODF1280, "Invalid decimal format QName.", args[2], e); + } + final DecimalFormat df = context.getStaticDecimalFormat(qnDecimalFormat); + if (df == null) { + throw new XPathException(this, ErrorCodes.FODF1280, "No known decimal format of that name.", args[2]); + } + return df; + } + + /** + * Resolves a decimal format from an XQ4 options map. + * The map can contain format-name (to select a base format) and + * individual property overrides (decimal-separator, grouping-separator, etc.). + * + * Properties use the char:rendition pattern — a single character is both + * marker and rendition; "char:string" splits marker from rendition. + * For this implementation, only the marker (first character) is used for + * picture string analysis; the rendition is used for output formatting. + */ + private DecimalFormat resolveDecimalFormatFromMap(final MapType map) throws XPathException { + // Start with the named or unnamed base format + final Sequence formatNameSeq = map.get(new StringValue(this, "format-name")); + DecimalFormat base; + if (formatNameSeq != null && !formatNameSeq.isEmpty()) { + final String formatName = formatNameSeq.itemAt(0).getStringValue().trim(); + final QName qn; + try { + qn = QName.parse(context, formatName); + } catch (final QName.IllegalQNameException e) { + throw new XPathException(this, ErrorCodes.FODF1280, "Invalid format-name in options map.", formatNameSeq, e); + } + base = context.getStaticDecimalFormat(qn); + if (base == null) { + throw new XPathException(this, ErrorCodes.FODF1280, "No known decimal format: " + formatName); + } + } else { + base = context.getStaticDecimalFormat(null); + if (base == null) { + base = DecimalFormat.UNNAMED; + } + } + + // Override individual properties from the map, extracting char:rendition + final CharRendition decSep = getCharRenditionProperty(map, "decimal-separator", base.decimalSeparator); + final CharRendition grpSep = getCharRenditionProperty(map, "grouping-separator", base.groupingSeparator); + final CharRendition expSep = getCharRenditionProperty(map, "exponent-separator", base.exponentSeparator); + final CharRendition pct = getCharRenditionProperty(map, "percent", base.percent); + final CharRendition pml = getCharRenditionProperty(map, "per-mille", base.perMille); + final int zeroDigit = getCharProperty(map, "zero-digit", base.zeroDigit); + final int digit = getCharProperty(map, "digit", base.digit); + final int patternSeparator = getCharProperty(map, "pattern-separator", base.patternSeparator); + final int minusSign = getCharProperty(map, "minus-sign", base.minusSign); + final String infinity = getStringProperty(map, "infinity", base.infinity); + final String nan = getStringProperty(map, "NaN", base.NaN); + + return new DecimalFormat(decSep.marker(), expSep.marker(), grpSep.marker(), + pct.marker(), pml.marker(), zeroDigit, digit, patternSeparator, infinity, nan, minusSign, + decSep.rendition(), expSep.rendition(), grpSep.rendition(), + pct.rendition(), pml.rendition()); + } + + /** + * Result of parsing a char:rendition property value. + * Marker is used for picture string parsing; rendition for output. + */ + private record CharRendition(int marker, String rendition) {} + + /** + * Extracts a single-character property from the map, handling the + * char:rendition pattern. Returns marker (first char) and rendition. + * If the property is absent, returns the default marker with null rendition. + */ + private CharRendition getCharRenditionProperty(final MapType map, final String key, final int defaultValue) throws XPathException { + final Sequence seq = map.get(new StringValue(this, key)); + if (seq == null || seq.isEmpty()) { + return new CharRendition(defaultValue, null); + } + final String value = seq.itemAt(0).getStringValue(); + if (value.isEmpty()) { + throw new XPathException(this, ErrorCodes.FODF1280, + "Decimal format property '" + key + "' must not be empty."); + } + final int marker = value.codePointAt(0); + final int markerLen = Character.charCount(marker); + // char:rendition pattern: "X:rendition" where X is the marker + if (value.length() > markerLen && value.charAt(markerLen) == ':') { + final String rendition = value.substring(markerLen + 1); + return new CharRendition(marker, rendition); + } + return new CharRendition(marker, null); + } + + /** + * Extracts a single-character property (no rendition support). + */ + private int getCharProperty(final MapType map, final String key, final int defaultValue) throws XPathException { + return getCharRenditionProperty(map, key, defaultValue).marker(); + } + + /** + * Extracts a string property from the map. + * If absent, returns the default. + */ + private String getStringProperty(final MapType map, final String key, final String defaultValue) throws XPathException { + final Sequence seq = map.get(new StringValue(this, key)); + if (seq == null || seq.isEmpty()) { + return defaultValue; + } + return seq.itemAt(0).getStringValue(); + } + enum AnalyzeState { MANTISSA_PART, INTEGER_PART, @@ -715,19 +841,58 @@ private String format(final NumericValue number, final DecimalFormat decimalForm if (minimumExponentSize > 0) { formatted.append(decimalFormat.exponentSeparator); - final CodePointString expStr = new CodePointString(String.valueOf(exp)); + // Handle negative exponents: pad the absolute value, then prepend sign + final boolean negativeExp = exp < 0; + final CodePointString expStr = new CodePointString(String.valueOf(Math.abs(exp))); final int expPadLen = subPicture.getMinimumExponentSize() - expStr.length(); if (expPadLen > 0) { expStr.leftPad(decimalFormat.zeroDigit, expPadLen); } + if (negativeExp) { + expStr.insert(0, decimalFormat.minusSign); + } + formatted.append(expStr); } // Rule 14 - concatenate prefix, formatted number, and suffix - final String result = subPicture.getPrefixString() + formatted + subPicture.getSuffixString(); + String result = subPicture.getPrefixString() + formatted + subPicture.getSuffixString(); + + // XQ4: Apply char:rendition substitutions — replace marker characters with + // their rendition strings in the final output + result = applyRenditions(result, decimalFormat); + + return result; + } + /** + * XQ4 char:rendition: replace marker characters with their rendition strings + * in the formatted output. Only applies when a rendition differs from the + * marker (i.e., the property was specified as "marker:rendition"). + */ + private static String applyRenditions(String result, final DecimalFormat df) { + final String decMarker = new String(Character.toChars(df.decimalSeparator)); + if (!decMarker.equals(df.decimalSeparatorRendition)) { + result = result.replace(decMarker, df.decimalSeparatorRendition); + } + final String grpMarker = new String(Character.toChars(df.groupingSeparator)); + if (!grpMarker.equals(df.groupingSeparatorRendition)) { + result = result.replace(grpMarker, df.groupingSeparatorRendition); + } + final String expMarker = new String(Character.toChars(df.exponentSeparator)); + if (!expMarker.equals(df.exponentSeparatorRendition)) { + result = result.replace(expMarker, df.exponentSeparatorRendition); + } + final String pctMarker = new String(Character.toChars(df.percent)); + if (!pctMarker.equals(df.percentRendition)) { + result = result.replace(pctMarker, df.percentRendition); + } + final String pmlMarker = new String(Character.toChars(df.perMille)); + if (!pmlMarker.equals(df.perMilleRendition)) { + result = result.replace(pmlMarker, df.perMilleRendition); + } return result; } diff --git a/exist-core/src/main/java/org/exist/xquery/functions/fn/FnFunctionAnnotations.java b/exist-core/src/main/java/org/exist/xquery/functions/fn/FnFunctionAnnotations.java new file mode 100644 index 00000000000..6bdfd09ceeb --- /dev/null +++ b/exist-core/src/main/java/org/exist/xquery/functions/fn/FnFunctionAnnotations.java @@ -0,0 +1,84 @@ +/* + * eXist-db Open Source Native XML Database + * Copyright (C) 2001 The eXist-db Authors + * + * info@exist-db.org + * http://www.exist-db.org + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ +package org.exist.xquery.functions.fn; + +import org.exist.dom.QName; +import org.exist.xquery.*; +import org.exist.xquery.functions.map.MapType; +import org.exist.xquery.value.*; + +/** + * Implements fn:function-annotations (XQuery 4.0). + * + * Returns annotations on a function item as a sequence of single-entry maps, + * where each map has the annotation QName as key and annotation values as value. + */ +public class FnFunctionAnnotations extends BasicFunction { + + public static final FunctionSignature FN_FUNCTION_ANNOTATIONS = new FunctionSignature( + new QName("function-annotations", Function.BUILTIN_FUNCTION_NS), + "Returns the annotations of a function item as a sequence of single-entry maps.", + new SequenceType[]{ + new FunctionParameterSequenceType("function", Type.FUNCTION, + Cardinality.EXACTLY_ONE, "The function item to inspect") + }, + new FunctionReturnSequenceType(Type.MAP_ITEM, Cardinality.ZERO_OR_MORE, + "A sequence of single-entry maps, one per annotation")); + + public FnFunctionAnnotations(final XQueryContext context, final FunctionSignature signature) { + super(context, signature); + } + + @Override + public Sequence eval(final Sequence[] args, final Sequence contextSequence) throws XPathException { + final Item funcItem = args[0].itemAt(0); + if (!(funcItem instanceof FunctionReference ref)) { + return Sequence.EMPTY_SEQUENCE; + } + + final FunctionSignature sig = ref.getSignature(); + final Annotation[] annotations = sig.getAnnotations(); + if (annotations == null || annotations.length == 0) { + return Sequence.EMPTY_SEQUENCE; + } + + final ValueSequence result = new ValueSequence(annotations.length); + for (final Annotation ann : annotations) { + final MapType map = new MapType(this, context); + final QNameValue qnameKey = new QNameValue(this, context, ann.getName()); + + // Build annotation values sequence + final LiteralValue[] values = ann.getValue(); + if (values == null || values.length == 0) { + map.add(qnameKey, Sequence.EMPTY_SEQUENCE); + } else { + final ValueSequence valSeq = new ValueSequence(values.length); + for (final LiteralValue lv : values) { + valSeq.add(lv.getValue()); + } + map.add(qnameKey, valSeq); + } + result.add(map); + } + return result; + } +} diff --git a/exist-core/src/main/java/org/exist/xquery/functions/fn/FnFunctionIdentity.java b/exist-core/src/main/java/org/exist/xquery/functions/fn/FnFunctionIdentity.java new file mode 100644 index 00000000000..e4c1fdadf07 --- /dev/null +++ b/exist-core/src/main/java/org/exist/xquery/functions/fn/FnFunctionIdentity.java @@ -0,0 +1,100 @@ +/* + * eXist-db Open Source Native XML Database + * Copyright (C) 2001 The eXist-db Authors + * + * info@exist-db.org + * http://www.exist-db.org + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ +package org.exist.xquery.functions.fn; + +import org.exist.dom.QName; +import org.exist.xquery.*; +import org.exist.xquery.functions.array.ArrayType; +import org.exist.xquery.functions.map.AbstractMapType; +import org.exist.xquery.value.*; + +import java.util.IdentityHashMap; +import java.util.Map; +import java.util.concurrent.atomic.AtomicLong; + +/** + * Implements fn:function-identity (XQuery 4.0). + * + * Returns a string that uniquely identifies a function item. Two calls with + * the same function return codepoint-equal strings; calls with different + * functions return different strings. + * + * For named functions, identity is based on QName + arity. + * For anonymous functions, maps, and arrays, identity is based on object identity. + */ +public class FnFunctionIdentity extends BasicFunction { + + /** Counter for assigning unique IDs to anonymous function items, maps, and arrays. */ + private static final AtomicLong ID_COUNTER = new AtomicLong(1); + + /** Identity-based map to ensure the same object always gets the same ID. + * Uses reference equality (==), not equals(), so structurally equal but + * distinct maps/arrays get different IDs per the spec. */ + private static final Map IDENTITY_MAP = new IdentityHashMap<>(); + + private static synchronized long getOrAssignId(final Object obj) { + return IDENTITY_MAP.computeIfAbsent(obj, k -> ID_COUNTER.getAndIncrement()); + } + + public static final FunctionSignature FN_FUNCTION_IDENTITY = new FunctionSignature( + new QName("function-identity", Function.BUILTIN_FUNCTION_NS), + "Returns a string that uniquely identifies a function item.", + new SequenceType[]{ + new FunctionParameterSequenceType("function", Type.ITEM, + Cardinality.EXACTLY_ONE, "The function item to identify") + }, + new FunctionReturnSequenceType(Type.STRING, Cardinality.EXACTLY_ONE, + "A string uniquely identifying the function")); + + public FnFunctionIdentity(final XQueryContext context, final FunctionSignature signature) { + super(context, signature); + } + + @Override + public Sequence eval(final Sequence[] args, final Sequence contextSequence) throws XPathException { + final Item funcItem = args[0].itemAt(0); + return new StringValue(this, computeIdentity(funcItem)); + } + + private static String computeIdentity(final Item item) throws XPathException { + if (item instanceof FunctionReference ref) { + final FunctionSignature sig = ref.getSignature(); + final QName name = sig.getName(); + if (name != null && name != InlineFunction.INLINE_FUNCTION_QNAME) { + // Named function: identity based on expanded QName + arity + return "Q{" + (name.getNamespaceURI() != null ? name.getNamespaceURI() : "") + + "}" + name.getLocalPart() + "#" + sig.getArgumentCount(); + } + // Anonymous function: use counter-based identity + return "anon@" + getOrAssignId(ref); + } + if (item instanceof AbstractMapType) { + // Each distinct map object gets a unique ID + return "map@" + getOrAssignId(item); + } + if (item instanceof ArrayType) { + return "array@" + getOrAssignId(item); + } + // Fallback for other function types + return "func@" + getOrAssignId(item); + } +} diff --git a/exist-core/src/main/java/org/exist/xquery/functions/fn/FnGet.java b/exist-core/src/main/java/org/exist/xquery/functions/fn/FnGet.java new file mode 100644 index 00000000000..a5c1b7d57cc --- /dev/null +++ b/exist-core/src/main/java/org/exist/xquery/functions/fn/FnGet.java @@ -0,0 +1,91 @@ +/* + * eXist-db Open Source Native XML Database + * Copyright (C) 2001 The eXist-db Authors + * + * info@exist-db.org + * http://www.exist-db.org + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ +package org.exist.xquery.functions.fn; + +import org.exist.dom.QName; +import org.exist.xquery.*; +import org.exist.xquery.functions.array.ArrayType; +import org.exist.xquery.functions.map.AbstractMapType; +import org.exist.xquery.value.*; + +/** + * fn:get($key as xs:anyAtomicType) as item()* + * + * XQuery 4.0 context-dependent lookup function. Looks up a value from + * the context item: + * - For arrays: returns the member at the given position + * - For maps: returns the value for the given key + * - For atomic values: returns the value itself (identity) + */ +public class FnGet extends BasicFunction { + + public static final FunctionSignature FN_GET = new FunctionSignature( + new QName("get", Function.BUILTIN_FUNCTION_NS), + "Looks up a value from the context item. For arrays, returns the member " + + "at the given position. For maps, returns the value for the given key.", + new SequenceType[] { + new FunctionParameterSequenceType("key", Type.ANY_ATOMIC_TYPE, + Cardinality.EXACTLY_ONE, "The lookup key or index") + }, + new FunctionReturnSequenceType(Type.ITEM, Cardinality.ZERO_OR_MORE, + "The looked-up value")); + + public FnGet(final XQueryContext context, final FunctionSignature signature) { + super(context, signature); + } + + @Override + public Sequence eval(final Sequence[] args, final Sequence contextSequence) throws XPathException { + // Get the context item + Sequence ctxSeq = contextSequence; + if (ctxSeq == null || ctxSeq.isEmpty()) { + throw new XPathException(this, ErrorCodes.XPDY0002, + "fn:get requires a context item"); + } + + final Item contextItem = ctxSeq.itemAt(0); + final AtomicValue key = (AtomicValue) args[0].itemAt(0); + + if (contextItem instanceof ArrayType) { + // Array lookup by position + final ArrayType array = (ArrayType) contextItem; + final int index = ((IntegerValue) key.convertTo(Type.INTEGER)).getInt(); + if (index < 1 || index > array.getSize()) { + throw new XPathException(this, ErrorCodes.FOAY0001, + "Array index " + index + " out of bounds (1.." + array.getSize() + ")"); + } + return array.get(index - 1); + } else if (contextItem instanceof AbstractMapType) { + // Map lookup by key + final AbstractMapType map = (AbstractMapType) contextItem; + final Sequence value = map.get(key); + return value != null ? value : Sequence.EMPTY_SEQUENCE; + } else if (contextItem instanceof FunctionReference) { + // Function application + final FunctionReference funcRef = (FunctionReference) contextItem; + return funcRef.evalFunction(null, null, new Sequence[]{key.toSequence()}); + } else { + // Atomic value: return the context item itself + return contextItem.toSequence(); + } + } +} diff --git a/exist-core/src/main/java/org/exist/xquery/functions/fn/FnGraphemes.java b/exist-core/src/main/java/org/exist/xquery/functions/fn/FnGraphemes.java new file mode 100644 index 00000000000..45701961288 --- /dev/null +++ b/exist-core/src/main/java/org/exist/xquery/functions/fn/FnGraphemes.java @@ -0,0 +1,86 @@ +/* + * eXist-db Open Source Native XML Database + * Copyright (C) 2001 The eXist-db Authors + * + * info@exist-db.org + * http://www.exist-db.org + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ +package org.exist.xquery.functions.fn; + +import com.ibm.icu.text.BreakIterator; +import org.exist.dom.QName; +import org.exist.xquery.BasicFunction; +import org.exist.xquery.Cardinality; +import org.exist.xquery.Function; +import org.exist.xquery.FunctionSignature; +import org.exist.xquery.XPathException; +import org.exist.xquery.XQueryContext; +import org.exist.xquery.value.FunctionParameterSequenceType; +import org.exist.xquery.value.FunctionReturnSequenceType; +import org.exist.xquery.value.Sequence; +import org.exist.xquery.value.SequenceType; +import org.exist.xquery.value.StringValue; +import org.exist.xquery.value.Type; +import org.exist.xquery.value.ValueSequence; + +/** + * Implements fn:graphemes (XQuery 4.0). + * + * Splits the supplied string into a sequence of strings, each containing + * one Unicode extended grapheme cluster. + * + * Uses ICU4J's BreakIterator for Unicode grapheme cluster boundary detection, + * which handles combining marks, emoji sequences, regional indicators, etc. + */ +public class FnGraphemes extends BasicFunction { + + public static final FunctionSignature FN_GRAPHEMES = new FunctionSignature( + new QName("graphemes", Function.BUILTIN_FUNCTION_NS), + "Splits the supplied string into a sequence of strings, each containing " + + "one Unicode extended grapheme cluster.", + new SequenceType[] { + new FunctionParameterSequenceType("value", Type.STRING, Cardinality.ZERO_OR_ONE, + "The string to split into grapheme clusters") + }, + new FunctionReturnSequenceType(Type.STRING, Cardinality.ZERO_OR_MORE, + "a sequence of strings, each containing one grapheme cluster")); + + public FnGraphemes(final XQueryContext context, final FunctionSignature signature) { + super(context, signature); + } + + @Override + public Sequence eval(final Sequence[] args, final Sequence contextSequence) throws XPathException { + if (args[0].isEmpty()) { + return Sequence.EMPTY_SEQUENCE; + } + final String str = args[0].getStringValue(); + if (str.isEmpty()) { + return Sequence.EMPTY_SEQUENCE; + } + + final BreakIterator bi = BreakIterator.getCharacterInstance(); + bi.setText(str); + + final ValueSequence result = new ValueSequence(); + int start = bi.first(); + for (int end = bi.next(); end != BreakIterator.DONE; start = end, end = bi.next()) { + result.add(new StringValue(this, str.substring(start, end))); + } + return result; + } +} diff --git a/exist-core/src/main/java/org/exist/xquery/functions/fn/FnHash.java b/exist-core/src/main/java/org/exist/xquery/functions/fn/FnHash.java new file mode 100644 index 00000000000..47a02cb8b9e --- /dev/null +++ b/exist-core/src/main/java/org/exist/xquery/functions/fn/FnHash.java @@ -0,0 +1,177 @@ +/* + * eXist-db Open Source Native XML Database + * Copyright (C) 2001 The eXist-db Authors + * + * info@exist-db.org + * http://www.exist-db.org + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ +package org.exist.xquery.functions.fn; + +import java.nio.ByteBuffer; +import java.nio.charset.StandardCharsets; +import java.security.MessageDigest; +import java.security.NoSuchAlgorithmException; +import java.util.zip.CRC32; + +import org.bouncycastle.crypto.digests.Blake3Digest; + +import org.exist.dom.QName; +import org.exist.xquery.BasicFunction; +import org.exist.xquery.Cardinality; +import org.exist.xquery.ErrorCodes; +import org.exist.xquery.Function; +import org.exist.xquery.FunctionSignature; +import org.exist.xquery.XPathException; +import org.exist.xquery.XQueryContext; +import org.exist.xquery.value.BinaryValue; +import org.exist.xquery.value.BinaryValueFromBinaryString; +import org.exist.xquery.value.FunctionParameterSequenceType; +import org.exist.xquery.value.FunctionReturnSequenceType; +import org.exist.xquery.value.HexBinaryValueType; +import org.exist.xquery.value.Sequence; +import org.exist.xquery.value.SequenceType; +import org.exist.xquery.value.Type; + +/** + * Implements fn:hash (XQuery 4.0). + * + * Returns the result of a hash/checksum function applied to the input. + * Supports MD5, SHA-1, SHA-256, CRC-32. + */ +public class FnHash extends BasicFunction { + + public static final ErrorCodes.ErrorCode FOHA0001 = new ErrorCodes.ErrorCode("FOHA0001", + "Unsupported hash algorithm"); + + public static final FunctionSignature[] FN_HASH = { + new FunctionSignature( + new QName("hash", Function.BUILTIN_FUNCTION_NS), + "Returns the hash of the input value using the default algorithm (MD5).", + new SequenceType[] { + new FunctionParameterSequenceType("value", Type.ITEM, Cardinality.ZERO_OR_ONE, "The value to hash (string, hexBinary, or base64Binary)") + }, + new FunctionReturnSequenceType(Type.HEX_BINARY, Cardinality.ZERO_OR_ONE, "the hash value")), + new FunctionSignature( + new QName("hash", Function.BUILTIN_FUNCTION_NS), + "Returns the hash of the input value using the specified algorithm.", + new SequenceType[] { + new FunctionParameterSequenceType("value", Type.ITEM, Cardinality.ZERO_OR_ONE, "The value to hash (string, hexBinary, or base64Binary)"), + new FunctionParameterSequenceType("algorithm", Type.STRING, Cardinality.ZERO_OR_ONE, "The hash algorithm (MD5, SHA-1, SHA-256, CRC-32)") + }, + new FunctionReturnSequenceType(Type.HEX_BINARY, Cardinality.ZERO_OR_ONE, "the hash value")), + new FunctionSignature( + new QName("hash", Function.BUILTIN_FUNCTION_NS), + "Returns the hash of the input value using the specified algorithm and options.", + new SequenceType[] { + new FunctionParameterSequenceType("value", Type.ITEM, Cardinality.ZERO_OR_ONE, "The value to hash (string, hexBinary, or base64Binary)"), + new FunctionParameterSequenceType("algorithm", Type.STRING, Cardinality.ZERO_OR_ONE, "The hash algorithm (MD5, SHA-1, SHA-256, CRC-32)"), + new FunctionParameterSequenceType("options", Type.MAP_ITEM, Cardinality.ZERO_OR_ONE, "Options map (reserved for future use)") + }, + new FunctionReturnSequenceType(Type.HEX_BINARY, Cardinality.ZERO_OR_ONE, "the hash value")) + }; + + public FnHash(final XQueryContext context, final FunctionSignature signature) { + super(context, signature); + } + + @Override + public Sequence eval(final Sequence[] args, final Sequence contextSequence) throws XPathException { + if (args[0].isEmpty()) { + return Sequence.EMPTY_SEQUENCE; + } + + // Get the input bytes + final byte[] inputBytes = getInputBytes(args[0]); + + // Get the algorithm + String algorithm = "MD5"; + if (args.length > 1 && !args[1].isEmpty()) { + algorithm = args[1].getStringValue().trim().toUpperCase(); + } + + // Compute hash + final byte[] hashBytes; + if ("CRC-32".equals(algorithm) || "CRC32".equals(algorithm)) { + final CRC32 crc32 = new CRC32(); + crc32.update(inputBytes); + final long crcValue = crc32.getValue(); + // Return as 4-byte big-endian hexBinary + hashBytes = ByteBuffer.allocate(4).putInt((int) crcValue).array(); + } else if ("BLAKE3".equals(algorithm)) { + final Blake3Digest blake3 = new Blake3Digest(32); + blake3.update(inputBytes, 0, inputBytes.length); + hashBytes = new byte[32]; + blake3.doFinal(hashBytes, 0); + } else { + // Map algorithm names to Java MessageDigest names + final String javaAlgorithm; + switch (algorithm) { + case "MD5": + javaAlgorithm = "MD5"; + break; + case "SHA-1": + case "SHA1": + javaAlgorithm = "SHA-1"; + break; + case "SHA-256": + case "SHA256": + javaAlgorithm = "SHA-256"; + break; + case "SHA-384": + case "SHA384": + javaAlgorithm = "SHA-384"; + break; + case "SHA-512": + case "SHA512": + javaAlgorithm = "SHA-512"; + break; + default: + throw new XPathException(this, FOHA0001, + "Unsupported hash algorithm: " + algorithm); + } + try { + final MessageDigest digest = MessageDigest.getInstance(javaAlgorithm); + hashBytes = digest.digest(inputBytes); + } catch (final NoSuchAlgorithmException e) { + throw new XPathException(this, FOHA0001, + "Hash algorithm not available: " + javaAlgorithm); + } + } + + // Return as hexBinary — use BinaryValueFromBinaryString to avoid + // stream registration with the XQuery context (prevents deadlock + // in concurrent test execution environments) + final StringBuilder hex = new StringBuilder(hashBytes.length * 2); + for (final byte b : hashBytes) { + hex.append(String.format("%02X", b & 0xFF)); + } + return new BinaryValueFromBinaryString(this, new HexBinaryValueType(), hex.toString()); + } + + private byte[] getInputBytes(final Sequence value) throws XPathException { + final int type = value.itemAt(0).getType(); + if (Type.subTypeOf(type, Type.STRING) || Type.subTypeOf(type, Type.ANY_URI) || Type.subTypeOf(type, Type.UNTYPED_ATOMIC)) { + return value.getStringValue().getBytes(StandardCharsets.UTF_8); + } else if (Type.subTypeOf(type, Type.BASE64_BINARY) || Type.subTypeOf(type, Type.HEX_BINARY)) { + final BinaryValue binaryValue = (BinaryValue) value.itemAt(0); + return binaryValue.toJavaObject(byte[].class); + } else { + throw new XPathException(this, ErrorCodes.XPTY0004, + "fn:hash expects string, hexBinary, or base64Binary, got: " + Type.getTypeName(type)); + } + } +} diff --git a/exist-core/src/main/java/org/exist/xquery/functions/fn/FnHigherOrderFun40.java b/exist-core/src/main/java/org/exist/xquery/functions/fn/FnHigherOrderFun40.java new file mode 100644 index 00000000000..bbd77a86d8e --- /dev/null +++ b/exist-core/src/main/java/org/exist/xquery/functions/fn/FnHigherOrderFun40.java @@ -0,0 +1,361 @@ +/* + * eXist-db Open Source Native XML Database + * Copyright (C) 2001 The eXist-db Authors + * + * info@exist-db.org + * http://www.exist-db.org + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ +package org.exist.xquery.functions.fn; + +import java.util.ArrayList; +import java.util.Collections; +import java.util.List; + +import org.exist.dom.QName; +import org.exist.xquery.AnalyzeContextInfo; +import org.exist.xquery.BasicFunction; +import org.exist.xquery.Cardinality; +import org.exist.xquery.Function; +import org.exist.xquery.FunctionSignature; +import org.exist.xquery.XPathException; +import org.exist.xquery.XQueryContext; +import org.exist.xquery.value.FunctionParameterSequenceType; +import org.exist.xquery.value.FunctionReference; +import org.exist.xquery.value.FunctionReturnSequenceType; +import org.exist.xquery.value.IntegerValue; +import org.exist.xquery.value.Item; +import org.exist.xquery.value.Sequence; +import org.exist.xquery.value.SequenceIterator; +import org.exist.xquery.value.SequenceType; +import org.exist.xquery.value.Type; +import org.exist.xquery.value.ValueSequence; + +import org.exist.xquery.functions.array.ArrayType; + +/** + * Implements XQuery 4.0 higher-order functions: + * fn:index-where, fn:take-while, fn:do-until, fn:while-do, fn:sort-with, + * fn:scan-left, fn:scan-right. + */ +public class FnHigherOrderFun40 extends BasicFunction { + + public static final FunctionSignature FN_INDEX_WHERE = new FunctionSignature( + new QName("index-where", Function.BUILTIN_FUNCTION_NS), + "Returns the positions of items that match the supplied predicate.", + new SequenceType[] { + new FunctionParameterSequenceType("input", Type.ITEM, Cardinality.ZERO_OR_MORE, "The input sequence"), + new FunctionParameterSequenceType("predicate", Type.FUNCTION, Cardinality.EXACTLY_ONE, "The predicate function") + }, + new FunctionReturnSequenceType(Type.INTEGER, Cardinality.ZERO_OR_MORE, "positions where the predicate is true")); + + public static final FunctionSignature FN_TAKE_WHILE = new FunctionSignature( + new QName("take-while", Function.BUILTIN_FUNCTION_NS), + "Returns items from the input sequence prior to the first one that fails to match a supplied predicate.", + new SequenceType[] { + new FunctionParameterSequenceType("input", Type.ITEM, Cardinality.ZERO_OR_MORE, "The input sequence"), + new FunctionParameterSequenceType("predicate", Type.FUNCTION, Cardinality.EXACTLY_ONE, "The predicate function") + }, + new FunctionReturnSequenceType(Type.ITEM, Cardinality.ZERO_OR_MORE, "the leading items matching the predicate")); + + public static final FunctionSignature FN_WHILE_DO = new FunctionSignature( + new QName("while-do", Function.BUILTIN_FUNCTION_NS), + "Processes a supplied value repeatedly, continuing while a condition is true.", + new SequenceType[] { + new FunctionParameterSequenceType("input", Type.ITEM, Cardinality.ZERO_OR_MORE, "The initial input"), + new FunctionParameterSequenceType("predicate", Type.FUNCTION, Cardinality.EXACTLY_ONE, "The condition to test"), + new FunctionParameterSequenceType("action", Type.FUNCTION, Cardinality.EXACTLY_ONE, "The action to apply") + }, + new FunctionReturnSequenceType(Type.ITEM, Cardinality.ZERO_OR_MORE, "the first value that fails the predicate")); + + public static final FunctionSignature FN_DO_UNTIL = new FunctionSignature( + new QName("do-until", Function.BUILTIN_FUNCTION_NS), + "Processes a supplied value repeatedly, continuing until a condition becomes true.", + new SequenceType[] { + new FunctionParameterSequenceType("input", Type.ITEM, Cardinality.ZERO_OR_MORE, "The initial input"), + new FunctionParameterSequenceType("action", Type.FUNCTION, Cardinality.EXACTLY_ONE, "The action to apply"), + new FunctionParameterSequenceType("predicate", Type.FUNCTION, Cardinality.EXACTLY_ONE, "The condition to test") + }, + new FunctionReturnSequenceType(Type.ITEM, Cardinality.ZERO_OR_MORE, "the first value that satisfies the predicate")); + + public static final FunctionSignature FN_SORT_WITH = new FunctionSignature( + new QName("sort-with", Function.BUILTIN_FUNCTION_NS), + "Sorts a sequence according to a supplied comparator function.", + new SequenceType[] { + new FunctionParameterSequenceType("input", Type.ITEM, Cardinality.ZERO_OR_MORE, "The sequence to sort"), + new FunctionParameterSequenceType("comparators", Type.FUNCTION, Cardinality.ONE_OR_MORE, "The comparator function(s)") + }, + new FunctionReturnSequenceType(Type.ITEM, Cardinality.ZERO_OR_MORE, "the sorted sequence")); + + public static final FunctionSignature FN_SCAN_LEFT = new FunctionSignature( + new QName("scan-left", Function.BUILTIN_FUNCTION_NS), + "Returns successive partial results of fold-left.", + new SequenceType[] { + new FunctionParameterSequenceType("input", Type.ITEM, Cardinality.ZERO_OR_MORE, "The input sequence"), + new FunctionParameterSequenceType("init", Type.ITEM, Cardinality.ZERO_OR_MORE, "The initial value"), + new FunctionParameterSequenceType("action", Type.FUNCTION, Cardinality.EXACTLY_ONE, + "The accumulation function: fn(accumulator, item) as item()*") + }, + new FunctionReturnSequenceType(Type.ARRAY_ITEM, Cardinality.ZERO_OR_MORE, + "sequence of single-member arrays with successive fold results")); + + public static final FunctionSignature FN_SCAN_RIGHT = new FunctionSignature( + new QName("scan-right", Function.BUILTIN_FUNCTION_NS), + "Returns successive partial results of fold-right.", + new SequenceType[] { + new FunctionParameterSequenceType("input", Type.ITEM, Cardinality.ZERO_OR_MORE, "The input sequence"), + new FunctionParameterSequenceType("init", Type.ITEM, Cardinality.ZERO_OR_MORE, "The initial value"), + new FunctionParameterSequenceType("action", Type.FUNCTION, Cardinality.EXACTLY_ONE, + "The accumulation function: fn(item, accumulator) as item()*") + }, + new FunctionReturnSequenceType(Type.ARRAY_ITEM, Cardinality.ZERO_OR_MORE, + "sequence of single-member arrays with successive fold results")); + + private AnalyzeContextInfo cachedContextInfo; + + public FnHigherOrderFun40(final XQueryContext context, final FunctionSignature signature) { + super(context, signature); + } + + @Override + public void analyze(final AnalyzeContextInfo contextInfo) throws XPathException { + cachedContextInfo = new AnalyzeContextInfo(contextInfo); + super.analyze(cachedContextInfo); + } + + @Override + public Sequence eval(final Sequence[] args, final Sequence contextSequence) throws XPathException { + if (isCalledAs("while-do")) { + return whileDo(args); + } else if (isCalledAs("do-until")) { + return doUntil(args); + } else if (isCalledAs("sort-with")) { + return sortWith(args); + } else if (isCalledAs("scan-left")) { + return scanLeft(args); + } else if (isCalledAs("scan-right")) { + return scanRight(args); + } + + final Sequence input = args[0]; + if (input.isEmpty()) { + return Sequence.EMPTY_SEQUENCE; + } + + try (final FunctionReference ref = (FunctionReference) args[1].itemAt(0)) { + ref.analyze(cachedContextInfo); + final int arity = ref.getSignature().getArgumentCount(); + + if (isCalledAs("index-where")) { + return indexWhere(input, ref, arity); + } else { + return takeWhile(input, ref, arity); + } + } + } + + private Sequence indexWhere(final Sequence input, final FunctionReference ref, final int arity) throws XPathException { + final ValueSequence result = new ValueSequence(); + int pos = 1; + for (final SequenceIterator i = input.iterate(); i.hasNext(); pos++) { + final Item item = i.nextItem(); + final Sequence r = callPredicate(ref, item, pos, arity); + if (!r.isEmpty() && r.effectiveBooleanValue()) { + result.add(new IntegerValue(this, pos)); + } + } + return result; + } + + private Sequence takeWhile(final Sequence input, final FunctionReference ref, final int arity) throws XPathException { + final ValueSequence result = new ValueSequence(); + int pos = 1; + for (final SequenceIterator i = input.iterate(); i.hasNext(); pos++) { + final Item item = i.nextItem(); + final Sequence r = callPredicate(ref, item, pos, arity); + if (r.isEmpty() || !r.effectiveBooleanValue()) { + break; + } + result.add(item); + } + return result; + } + + private Sequence callPredicate(final FunctionReference ref, final Item item, final int pos, final int arity) throws XPathException { + if (arity == 1) { + return ref.evalFunction(null, null, new Sequence[]{item.toSequence()}); + } else { + return ref.evalFunction(null, null, new Sequence[]{item.toSequence(), new IntegerValue(this, pos)}); + } + } + + private Sequence callWithSeqAndPos(final FunctionReference ref, final Sequence input, final int pos, final int arity) throws XPathException { + if (arity == 1) { + return ref.evalFunction(null, null, new Sequence[]{input}); + } else { + return ref.evalFunction(null, null, new Sequence[]{input, new IntegerValue(this, pos)}); + } + } + + private Sequence whileDo(final Sequence[] args) throws XPathException { + Sequence input = args[0]; + try (final FunctionReference predicate = (FunctionReference) args[1].itemAt(0); + final FunctionReference action = (FunctionReference) args[2].itemAt(0)) { + predicate.analyze(cachedContextInfo); + action.analyze(cachedContextInfo); + final int predArity = predicate.getSignature().getArgumentCount(); + final int actArity = action.getSignature().getArgumentCount(); + int pos = 1; + while (true) { + final Sequence test = callWithSeqAndPos(predicate, input, pos, predArity); + if (test.isEmpty() || !test.effectiveBooleanValue()) { + return input; + } + input = callWithSeqAndPos(action, input, pos, actArity); + pos++; + } + } + } + + private Sequence doUntil(final Sequence[] args) throws XPathException { + Sequence input = args[0]; + try (final FunctionReference action = (FunctionReference) args[1].itemAt(0); + final FunctionReference predicate = (FunctionReference) args[2].itemAt(0)) { + action.analyze(cachedContextInfo); + predicate.analyze(cachedContextInfo); + final int actArity = action.getSignature().getArgumentCount(); + final int predArity = predicate.getSignature().getArgumentCount(); + int pos = 1; + while (true) { + input = callWithSeqAndPos(action, input, pos, actArity); + final Sequence test = callWithSeqAndPos(predicate, input, pos, predArity); + if (!test.isEmpty() && test.effectiveBooleanValue()) { + return input; + } + pos++; + } + } + } + + private Sequence sortWith(final Sequence[] args) throws XPathException { + final Sequence input = args[0]; + if (input.getItemCount() <= 1) { + return input; + } + final Sequence comparators = args[1]; + + // Collect all items into a list + final List items = new ArrayList<>(input.getItemCount()); + for (final SequenceIterator i = input.iterate(); i.hasNext(); ) { + items.add(i.nextItem()); + } + + // Get the first comparator (most test cases use a single one) + final FunctionReference[] comparatorRefs = new FunctionReference[comparators.getItemCount()]; + for (int c = 0; c < comparators.getItemCount(); c++) { + comparatorRefs[c] = (FunctionReference) comparators.itemAt(c); + comparatorRefs[c].analyze(cachedContextInfo); + } + + // Sort using the comparator(s) + try { + items.sort((a, b) -> { + try { + for (final FunctionReference comp : comparatorRefs) { + final Sequence result = comp.evalFunction(null, null, + new Sequence[]{a.toSequence(), b.toSequence()}); + final long cmp = ((IntegerValue) result.itemAt(0)).getLong(); + if (cmp != 0) { + return Long.compare(cmp, 0); + } + } + return 0; + } catch (final XPathException e) { + throw new RuntimeException(e); + } + }); + } catch (final RuntimeException e) { + if (e.getCause() instanceof XPathException) { + throw (XPathException) e.getCause(); + } + throw e; + } + + final ValueSequence result = new ValueSequence(items.size()); + for (final Item item : items) { + result.add(item); + } + return result; + } + + private Sequence scanLeft(final Sequence[] args) throws XPathException { + final Sequence input = args[0]; + Sequence accumulator = args[1]; + try (final FunctionReference action = (FunctionReference) args[2].itemAt(0)) { + action.analyze(cachedContextInfo); + + final int count = input.getItemCount(); + final ValueSequence result = new ValueSequence(count + 1); + + // First element: [init] + result.add(new ArrayType(this, context, Collections.singletonList(accumulator))); + + // For each input item, apply action and wrap result + for (final SequenceIterator i = input.iterate(); i.hasNext(); ) { + final Item item = i.nextItem(); + accumulator = action.evalFunction(null, null, + new Sequence[]{accumulator, item.toSequence()}); + result.add(new ArrayType(this, context, Collections.singletonList(accumulator))); + } + + return result; + } + } + + private Sequence scanRight(final Sequence[] args) throws XPathException { + final Sequence input = args[0]; + final Sequence init = args[1]; + try (final FunctionReference action = (FunctionReference) args[2].itemAt(0)) { + action.analyze(cachedContextInfo); + + // Collect items into a list for reverse iteration + final List items = new ArrayList<>(input.getItemCount()); + for (final SequenceIterator i = input.iterate(); i.hasNext(); ) { + items.add(i.nextItem()); + } + + // Build results from right to left + final List results = new ArrayList<>(items.size() + 1); + Sequence accumulator = init; + results.add(accumulator); + + for (int idx = items.size() - 1; idx >= 0; idx--) { + accumulator = action.evalFunction(null, null, + new Sequence[]{items.get(idx).toSequence(), accumulator}); + results.add(accumulator); + } + + // Reverse so first result is fold-right of entire sequence + Collections.reverse(results); + + final ValueSequence result = new ValueSequence(results.size()); + for (final Sequence s : results) { + result.add(new ArrayType(this, context, Collections.singletonList(s))); + } + return result; + } + } +} diff --git a/exist-core/src/main/java/org/exist/xquery/functions/fn/FnHighestLowest.java b/exist-core/src/main/java/org/exist/xquery/functions/fn/FnHighestLowest.java new file mode 100644 index 00000000000..a2abad4fd94 --- /dev/null +++ b/exist-core/src/main/java/org/exist/xquery/functions/fn/FnHighestLowest.java @@ -0,0 +1,226 @@ +/* + * eXist-db Open Source Native XML Database + * Copyright (C) 2001 The eXist-db Authors + * + * info@exist-db.org + * http://www.exist-db.org + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ +package org.exist.xquery.functions.fn; + +import java.util.ArrayList; +import java.util.List; + +import com.ibm.icu.text.Collator; +import org.exist.dom.QName; +import org.exist.xquery.AnalyzeContextInfo; +import org.exist.xquery.BasicFunction; +import org.exist.xquery.Cardinality; +import org.exist.xquery.ErrorCodes; +import org.exist.xquery.Function; +import org.exist.xquery.FunctionSignature; +import org.exist.xquery.XPathException; +import org.exist.xquery.XQueryContext; +import org.exist.xquery.value.AtomicValue; +import org.exist.xquery.value.DoubleValue; +import org.exist.xquery.value.FloatValue; +import org.exist.xquery.value.FunctionParameterSequenceType; +import org.exist.xquery.value.FunctionReference; +import org.exist.xquery.value.FunctionReturnSequenceType; +import org.exist.xquery.value.Item; +import org.exist.xquery.value.NumericValue; +import org.exist.xquery.value.Sequence; +import org.exist.xquery.value.SequenceIterator; +import org.exist.xquery.value.SequenceType; +import org.exist.xquery.value.Type; +import org.exist.xquery.value.ValueSequence; + +/** + * Implements fn:highest and fn:lowest (XQuery 4.0). + * + * Returns items from the input having the highest/lowest key values. + */ +public class FnHighestLowest extends BasicFunction { + + public static final FunctionSignature[] FN_HIGHEST = { + new FunctionSignature( + new QName("highest", Function.BUILTIN_FUNCTION_NS), + "Returns items with the highest key value.", + new SequenceType[] { + new FunctionParameterSequenceType("input", Type.ITEM, Cardinality.ZERO_OR_MORE, "The input sequence") + }, + new FunctionReturnSequenceType(Type.ITEM, Cardinality.ZERO_OR_MORE, "items with highest key")), + new FunctionSignature( + new QName("highest", Function.BUILTIN_FUNCTION_NS), + "Returns items with the highest key value.", + new SequenceType[] { + new FunctionParameterSequenceType("input", Type.ITEM, Cardinality.ZERO_OR_MORE, "The input sequence"), + new FunctionParameterSequenceType("collation", Type.STRING, Cardinality.ZERO_OR_ONE, "The collation URI") + }, + new FunctionReturnSequenceType(Type.ITEM, Cardinality.ZERO_OR_MORE, "items with highest key")), + new FunctionSignature( + new QName("highest", Function.BUILTIN_FUNCTION_NS), + "Returns items with the highest key value.", + new SequenceType[] { + new FunctionParameterSequenceType("input", Type.ITEM, Cardinality.ZERO_OR_MORE, "The input sequence"), + new FunctionParameterSequenceType("collation", Type.STRING, Cardinality.ZERO_OR_ONE, "The collation URI"), + new FunctionParameterSequenceType("key", Type.FUNCTION, Cardinality.ZERO_OR_ONE, "Key function") + }, + new FunctionReturnSequenceType(Type.ITEM, Cardinality.ZERO_OR_MORE, "items with highest key")) + }; + + public static final FunctionSignature[] FN_LOWEST = { + new FunctionSignature( + new QName("lowest", Function.BUILTIN_FUNCTION_NS), + "Returns items with the lowest key value.", + new SequenceType[] { + new FunctionParameterSequenceType("input", Type.ITEM, Cardinality.ZERO_OR_MORE, "The input sequence") + }, + new FunctionReturnSequenceType(Type.ITEM, Cardinality.ZERO_OR_MORE, "items with lowest key")), + new FunctionSignature( + new QName("lowest", Function.BUILTIN_FUNCTION_NS), + "Returns items with the lowest key value.", + new SequenceType[] { + new FunctionParameterSequenceType("input", Type.ITEM, Cardinality.ZERO_OR_MORE, "The input sequence"), + new FunctionParameterSequenceType("collation", Type.STRING, Cardinality.ZERO_OR_ONE, "The collation URI") + }, + new FunctionReturnSequenceType(Type.ITEM, Cardinality.ZERO_OR_MORE, "items with lowest key")), + new FunctionSignature( + new QName("lowest", Function.BUILTIN_FUNCTION_NS), + "Returns items with the lowest key value.", + new SequenceType[] { + new FunctionParameterSequenceType("input", Type.ITEM, Cardinality.ZERO_OR_MORE, "The input sequence"), + new FunctionParameterSequenceType("collation", Type.STRING, Cardinality.ZERO_OR_ONE, "The collation URI"), + new FunctionParameterSequenceType("key", Type.FUNCTION, Cardinality.ZERO_OR_ONE, "Key function") + }, + new FunctionReturnSequenceType(Type.ITEM, Cardinality.ZERO_OR_MORE, "items with lowest key")) + }; + + private AnalyzeContextInfo cachedContextInfo; + + public FnHighestLowest(final XQueryContext context, final FunctionSignature signature) { + super(context, signature); + } + + @Override + public void analyze(final AnalyzeContextInfo contextInfo) throws XPathException { + cachedContextInfo = new AnalyzeContextInfo(contextInfo); + super.analyze(cachedContextInfo); + } + + @Override + public Sequence eval(final Sequence[] args, final Sequence contextSequence) throws XPathException { + final Sequence input = args[0]; + if (input.isEmpty()) { + return Sequence.EMPTY_SEQUENCE; + } + + // Resolve collation + final Collator collator; + if (args.length >= 2 && !args[1].isEmpty()) { + collator = context.getCollator(args[1].getStringValue()); + } else { + collator = context.getDefaultCollator(); + } + + // Resolve key function (default is data#1) + FunctionReference keyRef = null; + if (args.length >= 3 && !args[2].isEmpty()) { + keyRef = (FunctionReference) args[2].itemAt(0); + keyRef.analyze(cachedContextInfo); + } + + final boolean findHighest = isCalledAs("highest"); + + // Compute keys for all items + final List items = new ArrayList<>(input.getItemCount()); + final List keys = new ArrayList<>(input.getItemCount()); + + for (final SequenceIterator i = input.iterate(); i.hasNext(); ) { + final Item item = i.nextItem(); + items.add(item); + + // Compute key: apply key function or default atomization (fn:data) + final AtomicValue keyVal; + if (keyRef != null) { + final Sequence keyResult = keyRef.evalFunction(null, null, new Sequence[]{item.toSequence()}); + if (keyResult.isEmpty()) { + keyVal = null; + } else { + AtomicValue kv = keyResult.itemAt(0).atomize(); + if (kv.getType() == Type.UNTYPED_ATOMIC) { + kv = kv.convertTo(Type.DOUBLE); + } + keyVal = kv; + } + } else { + // Default key is fn:data() — atomize the item directly + final AtomicValue atomized = item.atomize(); + if (atomized.getType() == Type.UNTYPED_ATOMIC) { + keyVal = atomized.convertTo(Type.DOUBLE); + } else { + keyVal = atomized; + } + } + keys.add(keyVal); + } + + // Find the extreme value + AtomicValue extremeKey = null; + for (final AtomicValue key : keys) { + if (key == null || isNaN(key)) { + continue; + } + if (extremeKey == null) { + extremeKey = key; + } else { + final int cmp = key.compareTo(collator, extremeKey); + if (findHighest ? cmp > 0 : cmp < 0) { + extremeKey = key; + } + } + } + + if (extremeKey == null) { + return Sequence.EMPTY_SEQUENCE; + } + + // Collect all items with the extreme key value + final ValueSequence result = new ValueSequence(); + for (int i = 0; i < items.size(); i++) { + final AtomicValue key = keys.get(i); + if (key != null && !isNaN(key) && key.compareTo(collator, extremeKey) == 0) { + result.add(items.get(i)); + } + } + + if (keyRef != null) { + keyRef.close(); + } + + return result; + } + + private static boolean isNaN(final AtomicValue v) { + if (v instanceof DoubleValue) { + return Double.isNaN(((DoubleValue) v).getDouble()); + } + if (v instanceof FloatValue) { + return Float.isNaN(((FloatValue) v).getValue()); + } + return false; + } +} diff --git a/exist-core/src/main/java/org/exist/xquery/functions/fn/FnHtmlDoc.java b/exist-core/src/main/java/org/exist/xquery/functions/fn/FnHtmlDoc.java new file mode 100644 index 00000000000..bade6cf1717 --- /dev/null +++ b/exist-core/src/main/java/org/exist/xquery/functions/fn/FnHtmlDoc.java @@ -0,0 +1,71 @@ +/* + * eXist-db Open Source Native XML Database + * Copyright (C) 2001 The eXist-db Authors + * + * info@exist-db.org + * http://www.exist-db.org + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ +package org.exist.xquery.functions.fn; + +import org.exist.dom.QName; +import org.exist.xquery.*; +import org.exist.xquery.value.*; + +/** + * fn:html-doc($uri) — Like fn:doc but for HTML. + * Loads HTML from a URI, parses it through fn:parse-html, returns XHTML document. + */ +public class FnHtmlDoc extends BasicFunction { + + public static final FunctionSignature FN_HTML_DOC = new FunctionSignature( + new QName("html-doc", Function.BUILTIN_FUNCTION_NS), + "Loads an HTML resource from a URI and returns the parsed XHTML document.", + new SequenceType[] { + new FunctionParameterSequenceType("uri", Type.STRING, + Cardinality.ZERO_OR_ONE, "The URI of the HTML resource") + }, + new FunctionReturnSequenceType(Type.DOCUMENT, Cardinality.ZERO_OR_ONE, + "The parsed XHTML document")); + + public FnHtmlDoc(final XQueryContext context, final FunctionSignature signature) { + super(context, signature); + } + + @Override + public Sequence eval(final Sequence[] args, final Sequence contextSequence) throws XPathException { + if (args[0].isEmpty()) { + return Sequence.EMPTY_SEQUENCE; + } + + final String uri = args[0].getStringValue(); + + // Load text content using unparsed-text logic + final FunUnparsedText unparsedText = new FunUnparsedText(context, + FunUnparsedText.FS_UNPARSED_TEXT[0]); + final Sequence textResult = unparsedText.eval( + new Sequence[]{new StringValue(this, uri)}, contextSequence); + + if (textResult.isEmpty()) { + return Sequence.EMPTY_SEQUENCE; + } + + // Parse through fn:parse-html + final FnParseHtml parseHtml = new FnParseHtml(context, + FnParseHtml.FN_PARSE_HTML[0]); + return parseHtml.eval(new Sequence[]{textResult}, contextSequence); + } +} diff --git a/exist-core/src/main/java/org/exist/xquery/functions/fn/FnIdentityVoid.java b/exist-core/src/main/java/org/exist/xquery/functions/fn/FnIdentityVoid.java new file mode 100644 index 00000000000..777b717788d --- /dev/null +++ b/exist-core/src/main/java/org/exist/xquery/functions/fn/FnIdentityVoid.java @@ -0,0 +1,78 @@ +/* + * eXist-db Open Source Native XML Database + * Copyright (C) 2001 The eXist-db Authors + * + * info@exist-db.org + * http://www.exist-db.org + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ +package org.exist.xquery.functions.fn; + +import org.exist.dom.QName; +import org.exist.xquery.BasicFunction; +import org.exist.xquery.Cardinality; +import org.exist.xquery.Function; +import org.exist.xquery.FunctionSignature; +import org.exist.xquery.XPathException; +import org.exist.xquery.XQueryContext; +import org.exist.xquery.value.FunctionParameterSequenceType; +import org.exist.xquery.value.FunctionReturnSequenceType; +import org.exist.xquery.value.Sequence; +import org.exist.xquery.value.SequenceType; +import org.exist.xquery.value.Type; + +/** + * Implements fn:identity and fn:void (XQuery 4.0). + */ +public class FnIdentityVoid extends BasicFunction { + + public static final FunctionSignature FN_IDENTITY = new FunctionSignature( + new QName("identity", Function.BUILTIN_FUNCTION_NS), + "Returns its argument value unchanged.", + new SequenceType[] { + new FunctionParameterSequenceType("input", Type.ITEM, Cardinality.ZERO_OR_MORE, "The input value") + }, + new FunctionReturnSequenceType(Type.ITEM, Cardinality.ZERO_OR_MORE, "the input value unchanged")); + + public static final FunctionSignature[] FN_VOID = { + new FunctionSignature( + new QName("void", Function.BUILTIN_FUNCTION_NS), + "Absorbs the argument and returns the empty sequence.", + new SequenceType[] { + new FunctionParameterSequenceType("input", Type.ITEM, Cardinality.ZERO_OR_MORE, "The input to discard") + }, + new FunctionReturnSequenceType(Type.EMPTY_SEQUENCE, Cardinality.EMPTY_SEQUENCE, "the empty sequence")), + new FunctionSignature( + new QName("void", Function.BUILTIN_FUNCTION_NS), + "Returns the empty sequence.", + new SequenceType[] {}, + new FunctionReturnSequenceType(Type.EMPTY_SEQUENCE, Cardinality.EMPTY_SEQUENCE, "the empty sequence")) + }; + + public FnIdentityVoid(final XQueryContext context, final FunctionSignature signature) { + super(context, signature); + } + + @Override + public Sequence eval(final Sequence[] args, final Sequence contextSequence) throws XPathException { + if (isCalledAs("identity")) { + return args[0]; + } else { + // void: discard input, return empty sequence + return Sequence.EMPTY_SEQUENCE; + } + } +} diff --git a/exist-core/src/main/java/org/exist/xquery/functions/fn/FnInScopeNamespaces.java b/exist-core/src/main/java/org/exist/xquery/functions/fn/FnInScopeNamespaces.java new file mode 100644 index 00000000000..b75d1d508bf --- /dev/null +++ b/exist-core/src/main/java/org/exist/xquery/functions/fn/FnInScopeNamespaces.java @@ -0,0 +1,152 @@ +/* + * eXist-db Open Source Native XML Database + * Copyright (C) 2001 The eXist-db Authors + * + * info@exist-db.org + * http://www.exist-db.org + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ +package org.exist.xquery.functions.fn; + +import org.exist.Namespaces; +import org.exist.dom.QName; +import org.exist.xquery.*; +import org.exist.xquery.functions.map.MapType; +import org.exist.xquery.value.*; +import org.w3c.dom.Element; +import org.w3c.dom.Node; + +import java.util.LinkedHashMap; +import java.util.Map; +import java.util.Set; +import java.util.HashSet; + +/** + * Implements XQuery 4.0 fn:in-scope-namespaces. + * + * Returns a map(xs:string, xs:string) where keys are namespace prefixes + * (empty string for the default namespace) and values are namespace URIs. + * + * Uses nearest-ancestor-wins semantics: for each prefix, the declaration on + * the nearest ancestor (or the element itself) takes precedence. + */ +public class FnInScopeNamespaces extends BasicFunction { + + public static final FunctionSignature FN_IN_SCOPE_NAMESPACES = new FunctionSignature( + new QName("in-scope-namespaces", Function.BUILTIN_FUNCTION_NS), + "Returns a map from namespace prefixes to namespace URIs for all in-scope namespaces of the given element.", + new SequenceType[]{ + new FunctionParameterSequenceType("element", Type.ELEMENT, Cardinality.EXACTLY_ONE, "The element node") + }, + new FunctionReturnSequenceType(Type.MAP_ITEM, Cardinality.EXACTLY_ONE, "A map of prefix to URI")); + + public FnInScopeNamespaces(final XQueryContext context, final FunctionSignature signature) { + super(context, signature); + } + + @Override + public Sequence eval(final Sequence[] args, final Sequence contextSequence) throws XPathException { + final NodeValue nodeValue = (NodeValue) args[0].itemAt(0); + + // Collect all in-scope namespaces with nearest-ancestor-wins semantics + final Map nsMap = new LinkedHashMap<>(); + nsMap.put("xml", Namespaces.XML_NS); + + // Start with static context namespaces (lowest priority) + final Map inScopePrefixes = context.getInScopePrefixes(); + if (inScopePrefixes != null) { + nsMap.putAll(inScopePrefixes); + } + + // Walk from element up to root, collecting namespace declarations. + // Track which prefixes we've already seen from closer ancestors + // so that nearer declarations override farther ones. + final Set seen = new HashSet<>(); + final Map elementNs = new LinkedHashMap<>(); + Node node = nodeValue.getNode(); + + if (context.preserveNamespaces()) { + while (node != null && node.getNodeType() == Node.ELEMENT_NODE) { + if (context.inheritNamespaces() || node == nodeValue.getNode()) { + collectElementNamespaces((Element) node, elementNs, seen); + } + node = node.getParentNode(); + } + } + + // Element declarations override static context (merge on top) + nsMap.putAll(elementNs); + + // Clean up: remove entries where both key and value are empty + nsMap.entrySet().removeIf(entry -> + (entry.getKey() == null || entry.getKey().isEmpty()) && + (entry.getValue() == null || entry.getValue().isEmpty())); + + // Build the result map + MapType result = new MapType(this, context); + for (final Map.Entry entry : nsMap.entrySet()) { + result = (MapType) result.put( + new StringValue(this, entry.getKey()), + new StringValue(this, entry.getValue())); + } + + return result; + } + + /** + * Collect namespace declarations from a single element, respecting nearest-wins. + * Only adds prefixes not already in the {@code seen} set. + */ + private static void collectElementNamespaces(final Element element, final Map nsMap, final Set seen) { + // Element's own namespace + final String namespaceURI = element.getNamespaceURI(); + if (namespaceURI != null && !namespaceURI.isEmpty()) { + final String prefix = element.getPrefix(); + final String key = prefix == null ? "" : prefix; + if (seen.add(key)) { + nsMap.put(key, namespaceURI); + } + } + + // Namespace declarations from the element + if (element instanceof org.exist.dom.memtree.ElementImpl) { + final Map elemNs = new LinkedHashMap<>(); + ((org.exist.dom.memtree.ElementImpl) element).getNamespaceMap(elemNs); + for (final Map.Entry entry : elemNs.entrySet()) { + if (seen.add(entry.getKey())) { + nsMap.put(entry.getKey(), entry.getValue()); + } + } + } else if (element instanceof org.exist.dom.persistent.ElementImpl) { + final org.exist.dom.persistent.ElementImpl elemImpl = (org.exist.dom.persistent.ElementImpl) element; + if (elemImpl.declaresNamespacePrefixes()) { + for (final java.util.Iterator i = elemImpl.getPrefixes(); i.hasNext(); ) { + final String prefix = i.next(); + if (seen.add(prefix)) { + nsMap.put(prefix, elemImpl.getNamespaceForPrefix(prefix)); + } + } + } + } + + // Handle undeclaration: if namespace URI is explicitly empty, remove the prefix + if (namespaceURI != null && namespaceURI.isEmpty()) { + final String prefix = element.getPrefix(); + final String key = prefix == null ? "" : prefix; + nsMap.remove(key); + } + } +} diff --git a/exist-core/src/main/java/org/exist/xquery/functions/fn/FnInsertSeparator.java b/exist-core/src/main/java/org/exist/xquery/functions/fn/FnInsertSeparator.java new file mode 100644 index 00000000000..ffe3729a0d3 --- /dev/null +++ b/exist-core/src/main/java/org/exist/xquery/functions/fn/FnInsertSeparator.java @@ -0,0 +1,74 @@ +/* + * eXist-db Open Source Native XML Database + * Copyright (C) 2001 The eXist-db Authors + * + * info@exist-db.org + * http://www.exist-db.org + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ +package org.exist.xquery.functions.fn; + +import org.exist.dom.QName; +import org.exist.xquery.BasicFunction; +import org.exist.xquery.Cardinality; +import org.exist.xquery.Function; +import org.exist.xquery.FunctionSignature; +import org.exist.xquery.XPathException; +import org.exist.xquery.XQueryContext; +import org.exist.xquery.value.FunctionParameterSequenceType; +import org.exist.xquery.value.FunctionReturnSequenceType; +import org.exist.xquery.value.Sequence; +import org.exist.xquery.value.SequenceType; +import org.exist.xquery.value.Type; +import org.exist.xquery.value.ValueSequence; + +/** + * Implements fn:insert-separator (XQuery 4.0). + * + * Inserts a separator between adjacent items in a sequence. + */ +public class FnInsertSeparator extends BasicFunction { + + public static final FunctionSignature FN_INSERT_SEPARATOR = new FunctionSignature( + new QName("insert-separator", Function.BUILTIN_FUNCTION_NS), + "Inserts a separator between adjacent items in a sequence.", + new SequenceType[] { + new FunctionParameterSequenceType("input", Type.ITEM, Cardinality.ZERO_OR_MORE, "The input sequence"), + new FunctionParameterSequenceType("separator", Type.ITEM, Cardinality.ZERO_OR_MORE, "The separator to insert") + }, + new FunctionReturnSequenceType(Type.ITEM, Cardinality.ZERO_OR_MORE, "the sequence with separators inserted")); + + public FnInsertSeparator(final XQueryContext context, final FunctionSignature signature) { + super(context, signature); + } + + @Override + public Sequence eval(final Sequence[] args, final Sequence contextSequence) throws XPathException { + final Sequence input = args[0]; + final Sequence separator = args[1]; + final int inputSize = input.getItemCount(); + if (inputSize <= 1 || separator.isEmpty()) { + return input; + } + final ValueSequence result = new ValueSequence(inputSize + (inputSize - 1) * separator.getItemCount()); + result.add(input.itemAt(0)); + for (int i = 1; i < inputSize; i++) { + result.addAll(separator); + result.add(input.itemAt(i)); + } + return result; + } +} diff --git a/exist-core/src/main/java/org/exist/xquery/functions/fn/FnInvisibleXml.java b/exist-core/src/main/java/org/exist/xquery/functions/fn/FnInvisibleXml.java new file mode 100644 index 00000000000..3599bf77dac --- /dev/null +++ b/exist-core/src/main/java/org/exist/xquery/functions/fn/FnInvisibleXml.java @@ -0,0 +1,308 @@ +/* + * eXist-db Open Source Native XML Database + * Copyright (C) 2001 The eXist-db Authors + * + * info@exist-db.org + * http://www.exist-db.org + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ +package org.exist.xquery.functions.fn; + +import de.bottlecaps.markup.Blitz; +import de.bottlecaps.markup.BlitzException; +import de.bottlecaps.markup.BlitzParseException; + +import org.exist.Namespaces; +import org.exist.dom.QName; +import org.exist.dom.memtree.DocumentImpl; +import org.exist.dom.memtree.SAXAdapter; +import org.exist.util.XMLReaderPool; +import org.exist.xquery.value.NodeValue; +import org.exist.xquery.AbstractExpression; +import org.exist.xquery.AnalyzeContextInfo; +import org.exist.xquery.BasicFunction; +import org.exist.xquery.Cardinality; +import org.exist.xquery.ErrorCodes; +import org.exist.xquery.Function; +import org.exist.xquery.FunctionCall; +import org.exist.xquery.FunctionSignature; +import org.exist.xquery.UserDefinedFunction; +import org.exist.xquery.XPathException; +import org.exist.xquery.XQueryContext; +import org.exist.xquery.functions.map.AbstractMapType; +import org.exist.xquery.util.ExpressionDumper; +import org.exist.xquery.value.BooleanValue; +import org.exist.xquery.value.FunctionParameterSequenceType; +import org.exist.xquery.value.FunctionReference; +import org.exist.xquery.value.FunctionReturnSequenceType; +import org.exist.xquery.value.Item; +import org.exist.xquery.value.Sequence; +import org.exist.xquery.value.SequenceType; +import org.exist.xquery.value.StringValue; +import org.exist.xquery.value.Type; +import org.xml.sax.InputSource; +import org.xml.sax.XMLReader; + +import javax.xml.XMLConstants; +import java.io.StringReader; + +/** + * Implements fn:invisible-xml() (XQuery 4.0). + * + * Compiles an Invisible XML grammar and returns a function that parses input + * strings into XML documents. + * + * Uses the Markup Blitz library for ixml grammar compilation and parsing. + * Integration pattern informed by BaseX's implementation. + */ +public class FnInvisibleXml extends BasicFunction { + + // Blitz.generateFromXml() is not thread-safe — synchronize XML grammar compilation + private static final Object BLITZ_XML_LOCK = new Object(); + + private static final FunctionParameterSequenceType PARAM_GRAMMAR = + new FunctionParameterSequenceType("grammar", Type.ITEM, + Cardinality.ZERO_OR_ONE, "The ixml grammar (string or element node)"); + private static final FunctionParameterSequenceType PARAM_OPTIONS = + new FunctionParameterSequenceType("options", Type.MAP_ITEM, + Cardinality.ZERO_OR_ONE, "Options map (fail-on-error: xs:boolean)"); + private static final FunctionReturnSequenceType RETURN_TYPE = + new FunctionReturnSequenceType(Type.FUNCTION, Cardinality.EXACTLY_ONE, + "a function that parses strings according to the grammar"); + + public static final FunctionSignature[] SIGNATURES = { + new FunctionSignature( + new QName("invisible-xml", Function.BUILTIN_FUNCTION_NS), + "Compiles an Invisible XML grammar and returns a parsing function.", + new SequenceType[] { PARAM_GRAMMAR }, + RETURN_TYPE), + new FunctionSignature( + new QName("invisible-xml", Function.BUILTIN_FUNCTION_NS), + "Compiles an Invisible XML grammar and returns a parsing function.", + new SequenceType[] { PARAM_GRAMMAR, PARAM_OPTIONS }, + RETURN_TYPE) + }; + + private AnalyzeContextInfo cachedContextInfo; + + public FnInvisibleXml(final XQueryContext context, final FunctionSignature signature) { + super(context, signature); + } + + @Override + public void analyze(final AnalyzeContextInfo contextInfo) throws XPathException { + cachedContextInfo = new AnalyzeContextInfo(contextInfo); + super.analyze(cachedContextInfo); + } + + @Override + public Sequence eval(final Sequence[] args, final Sequence contextSequence) throws XPathException { + final Sequence grammarArg = args[0]; + + // Parse options — default fail-on-error is false per spec + boolean failOnError = false; + if (args.length > 1 && !args[1].isEmpty()) { + final AbstractMapType options = (AbstractMapType) args[1].itemAt(0); + final Sequence failOpt = options.get(new StringValue(this, "fail-on-error")); + if (!failOpt.isEmpty()) { + final Item failItem = failOpt.itemAt(0); + if (failItem.getType() != Type.BOOLEAN) { + throw new XPathException(this, ErrorCodes.XPTY0004, + "Option 'fail-on-error' must be xs:boolean, got: " + + Type.getTypeName(failItem.getType())); + } + failOnError = ((BooleanValue) failItem).getValue(); + } else if (options.contains(new StringValue(this, "fail-on-error"))) { + // Key exists but value is empty sequence + throw new XPathException(this, ErrorCodes.XPTY0004, + "Option 'fail-on-error' must be xs:boolean, got empty sequence"); + } + // else: key not present, use default (false) + } + + // Compile the grammar + final de.bottlecaps.markup.blitz.Parser parser; + try { + if (grammarArg.isEmpty()) { + // Empty sequence = use default ixml grammar + parser = failOnError + ? Blitz.generate(Blitz.ixmlGrammar(), Blitz.Option.FAIL_ON_ERROR) + : Blitz.generate(Blitz.ixmlGrammar()); + } else { + final Item grammarItem = grammarArg.itemAt(0); + final int grammarType = grammarItem.getType(); + + if (Type.subTypeOf(grammarType, Type.ELEMENT)) { + // Element node — serialize to XML string and use generateFromXml + // Synchronized: Blitz.generateFromXml() is not thread-safe + final String xmlGrammar = serializeItem(grammarItem); + synchronized (BLITZ_XML_LOCK) { + parser = failOnError + ? Blitz.generateFromXml(xmlGrammar, Blitz.Option.FAIL_ON_ERROR) + : Blitz.generateFromXml(xmlGrammar); + } + } else if (Type.subTypeOf(grammarType, Type.STRING) || + grammarType == Type.UNTYPED_ATOMIC) { + // String grammar + final String grammarStr = grammarItem.getStringValue(); + parser = failOnError + ? Blitz.generate(grammarStr, Blitz.Option.FAIL_ON_ERROR) + : Blitz.generate(grammarStr); + } else if (Type.subTypeOf(grammarType, Type.NODE)) { + // Other node types (document, etc.) — not valid + throw new XPathException(this, ErrorCodes.FOIX0001, + "Grammar must be an element node or string, got: " + + Type.getTypeName(grammarType)); + } else { + throw new XPathException(this, ErrorCodes.XPTY0004, + "Grammar must be a string or element node, got: " + + Type.getTypeName(grammarType)); + } + } + } catch (final BlitzParseException ex) { + throw new XPathException(this, ErrorCodes.FOIX0001, + "Invalid ixml grammar at line " + ex.getLine() + ", column " + ex.getColumn() + + ": " + ex.getOffendingToken()); + } catch (final BlitzException ex) { + throw new XPathException(this, ErrorCodes.FOIX0001, + "Invalid ixml grammar: " + ex.getMessage()); + } + + // Create a function item that parses input strings using the compiled grammar + final QName inputParam = new QName("input", XMLConstants.NULL_NS_URI); + + final FunctionSignature parseSig = new FunctionSignature( + new QName("invisible-xml-parser", Function.BUILTIN_FUNCTION_NS), + new SequenceType[] { + new FunctionParameterSequenceType("input", Type.STRING, + Cardinality.EXACTLY_ONE, "The string to parse") + }, + new FunctionReturnSequenceType(Type.DOCUMENT, Cardinality.EXACTLY_ONE, + "the parsed XML document")); + + final UserDefinedFunction func = new UserDefinedFunction(context, parseSig); + func.addVariable(inputParam); + func.setFunctionBody(new ParseExpression(context, parser, inputParam, failOnError)); + + final FunctionCall call = new FunctionCall(context, func); + call.setLocation(getLine(), getColumn()); + + return new FunctionReference(this, call); + } + + private String serializeItem(final Item item) throws XPathException { + try { + final org.exist.storage.serializers.Serializer serializer = + context.getBroker().borrowSerializer(); + try { + serializer.setProperty(javax.xml.transform.OutputKeys.OMIT_XML_DECLARATION, "yes"); + serializer.setProperty(javax.xml.transform.OutputKeys.INDENT, "no"); + return serializer.serialize((NodeValue) item); + } finally { + context.getBroker().returnSerializer(serializer); + } + } catch (final Exception ex) { + throw new XPathException(this, ErrorCodes.FOIX0001, + "Failed to serialize grammar node: " + ex.getMessage()); + } + } + + /** + * Expression that parses an input string using a compiled ixml parser. + */ + private static class ParseExpression extends AbstractExpression { + + private final de.bottlecaps.markup.blitz.Parser parser; + private final QName inputVar; + private final boolean failOnError; + + ParseExpression(final XQueryContext context, final de.bottlecaps.markup.blitz.Parser parser, + final QName inputVar, final boolean failOnError) { + super(context); + this.parser = parser; + this.inputVar = inputVar; + this.failOnError = failOnError; + } + + @Override + public Sequence eval(final Sequence contextSequence, final Item contextItem) throws XPathException { + final String input = context.resolveVariable(inputVar).getValue().getStringValue(); + + // Parse the input using the compiled ixml parser + final String xmlResult; + try { + xmlResult = parser.parse(input); + } catch (final BlitzParseException ex) { + if (failOnError) { + throw new XPathException(this, ErrorCodes.FOIX0002, + "ixml parse error at line " + ex.getLine() + ", column " + ex.getColumn() + + ": " + ex.getOffendingToken()); + } + // Should not happen when FAIL_ON_ERROR is not set, but handle gracefully + throw new XPathException(this, ErrorCodes.FOIX0002, + "ixml parse error: " + ex.getMessage()); + } catch (final BlitzException ex) { + throw new XPathException(this, ErrorCodes.FOIX0002, + "ixml parse error: " + ex.getMessage()); + } + + // Check for ixml:state="failed" on the root element when fail-on-error is true + if (failOnError && xmlResult.contains("ixml:state=\"failed\"")) { + throw new XPathException(this, ErrorCodes.FOIX0002, + "ixml parse failed: input is ambiguous or does not match the grammar"); + } + + // Parse the XML string into an in-memory document + return parseXmlString(xmlResult); + } + + private DocumentImpl parseXmlString(final String xml) throws XPathException { + final XMLReaderPool parserPool = context.getBroker().getBrokerPool().getXmlReaderPool(); + XMLReader xr = null; + try { + xr = parserPool.borrowXMLReader(); + final InputSource src = new InputSource(new StringReader(xml)); + final SAXAdapter adapter = new SAXAdapter(this, context); + xr.setContentHandler(adapter); + xr.setProperty(Namespaces.SAX_LEXICAL_HANDLER, adapter); + xr.parse(src); + return adapter.getDocument(); + } catch (final Exception ex) { + throw new XPathException(this, ErrorCodes.FOIX0002, + "Failed to parse ixml output as XML: " + ex.getMessage()); + } finally { + if (xr != null) { + parserPool.returnXMLReader(xr); + } + } + } + + @Override + public int returnsType() { + return Type.DOCUMENT; + } + + @Override + public void analyze(final AnalyzeContextInfo contextInfo) throws XPathException { + // nothing to analyze + } + + @Override + public void dump(final ExpressionDumper dumper) { + dumper.display("invisible-xml-parser(...)"); + } + } +} diff --git a/exist-core/src/main/java/org/exist/xquery/functions/fn/FnIsNaN.java b/exist-core/src/main/java/org/exist/xquery/functions/fn/FnIsNaN.java new file mode 100644 index 00000000000..5e3e8b1754b --- /dev/null +++ b/exist-core/src/main/java/org/exist/xquery/functions/fn/FnIsNaN.java @@ -0,0 +1,71 @@ +/* + * eXist-db Open Source Native XML Database + * Copyright (C) 2001 The eXist-db Authors + * + * info@exist-db.org + * http://www.exist-db.org + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ +package org.exist.xquery.functions.fn; + +import org.exist.dom.QName; +import org.exist.xquery.BasicFunction; +import org.exist.xquery.Cardinality; +import org.exist.xquery.Function; +import org.exist.xquery.FunctionSignature; +import org.exist.xquery.XPathException; +import org.exist.xquery.XQueryContext; +import org.exist.xquery.value.BooleanValue; +import org.exist.xquery.value.DoubleValue; +import org.exist.xquery.value.FloatValue; +import org.exist.xquery.value.FunctionParameterSequenceType; +import org.exist.xquery.value.FunctionReturnSequenceType; +import org.exist.xquery.value.Item; +import org.exist.xquery.value.Sequence; +import org.exist.xquery.value.SequenceType; +import org.exist.xquery.value.Type; + +/** + * Implements fn:is-NaN (XQuery 4.0). + * + * Returns true if the argument is the xs:float or xs:double value NaN. + */ +public class FnIsNaN extends BasicFunction { + + public static final FunctionSignature FN_IS_NAN = new FunctionSignature( + new QName("is-NaN", Function.BUILTIN_FUNCTION_NS), + "Returns true if the argument is the xs:float or xs:double value NaN.", + new SequenceType[] { + new FunctionParameterSequenceType("value", Type.ANY_ATOMIC_TYPE, Cardinality.EXACTLY_ONE, "The value to test") + }, + new FunctionReturnSequenceType(Type.BOOLEAN, Cardinality.EXACTLY_ONE, "true if the value is NaN")); + + public FnIsNaN(final XQueryContext context, final FunctionSignature signature) { + super(context, signature); + } + + @Override + public Sequence eval(final Sequence[] args, final Sequence contextSequence) throws XPathException { + final Item item = args[0].itemAt(0); + final int type = item.getType(); + if (type == Type.DOUBLE) { + return BooleanValue.valueOf(Double.isNaN(((DoubleValue) item).getValue())); + } else if (type == Type.FLOAT) { + return BooleanValue.valueOf(Float.isNaN(((FloatValue) item).getValue())); + } + return BooleanValue.FALSE; + } +} diff --git a/exist-core/src/main/java/org/exist/xquery/functions/fn/FnItemsAt.java b/exist-core/src/main/java/org/exist/xquery/functions/fn/FnItemsAt.java new file mode 100644 index 00000000000..55c9bf64d74 --- /dev/null +++ b/exist-core/src/main/java/org/exist/xquery/functions/fn/FnItemsAt.java @@ -0,0 +1,79 @@ +/* + * eXist-db Open Source Native XML Database + * Copyright (C) 2001 The eXist-db Authors + * + * info@exist-db.org + * http://www.exist-db.org + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ +package org.exist.xquery.functions.fn; + +import org.exist.dom.QName; +import org.exist.xquery.BasicFunction; +import org.exist.xquery.Cardinality; +import org.exist.xquery.Function; +import org.exist.xquery.FunctionSignature; +import org.exist.xquery.XPathException; +import org.exist.xquery.XQueryContext; +import org.exist.xquery.value.FunctionParameterSequenceType; +import org.exist.xquery.value.FunctionReturnSequenceType; +import org.exist.xquery.value.IntegerValue; +import org.exist.xquery.value.Item; +import org.exist.xquery.value.Sequence; +import org.exist.xquery.value.SequenceIterator; +import org.exist.xquery.value.SequenceType; +import org.exist.xquery.value.Type; +import org.exist.xquery.value.ValueSequence; + +/** + * Implements fn:items-at (XQuery 4.0). + * + * Returns items from the input at the positions specified by the second argument. + */ +public class FnItemsAt extends BasicFunction { + + public static final FunctionSignature FN_ITEMS_AT = new FunctionSignature( + new QName("items-at", Function.BUILTIN_FUNCTION_NS), + "Returns the items at the specified positions in the input sequence.", + new SequenceType[] { + new FunctionParameterSequenceType("input", Type.ITEM, Cardinality.ZERO_OR_MORE, "The input sequence"), + new FunctionParameterSequenceType("at", Type.INTEGER, Cardinality.ZERO_OR_MORE, "The positions to select") + }, + new FunctionReturnSequenceType(Type.ITEM, Cardinality.ZERO_OR_MORE, "items at the specified positions")); + + public FnItemsAt(final XQueryContext context, final FunctionSignature signature) { + super(context, signature); + } + + @Override + public Sequence eval(final Sequence[] args, final Sequence contextSequence) throws XPathException { + final Sequence input = args[0]; + final Sequence at = args[1]; + if (input.isEmpty() || at.isEmpty()) { + return Sequence.EMPTY_SEQUENCE; + } + final int inputSize = input.getItemCount(); + final ValueSequence result = new ValueSequence(); + for (final SequenceIterator i = at.iterate(); i.hasNext(); ) { + final Item posItem = i.nextItem(); + final int pos = (int) ((IntegerValue) posItem).getLong(); + if (pos >= 1 && pos <= inputSize) { + result.add(input.itemAt(pos - 1)); + } + } + return result; + } +} diff --git a/exist-core/src/main/java/org/exist/xquery/functions/fn/FnMessage.java b/exist-core/src/main/java/org/exist/xquery/functions/fn/FnMessage.java new file mode 100644 index 00000000000..db50f1319b4 --- /dev/null +++ b/exist-core/src/main/java/org/exist/xquery/functions/fn/FnMessage.java @@ -0,0 +1,85 @@ +/* + * eXist-db Open Source Native XML Database + * Copyright (C) 2001 The eXist-db Authors + * + * info@exist-db.org + * http://www.exist-db.org + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ +package org.exist.xquery.functions.fn; + +import org.apache.logging.log4j.LogManager; +import org.apache.logging.log4j.Logger; +import org.exist.dom.QName; +import org.exist.xquery.BasicFunction; +import org.exist.xquery.Cardinality; +import org.exist.xquery.Function; +import org.exist.xquery.FunctionSignature; +import org.exist.xquery.XPathException; +import org.exist.xquery.XQueryContext; +import org.exist.xquery.value.FunctionParameterSequenceType; +import org.exist.xquery.value.FunctionReturnSequenceType; +import org.exist.xquery.value.Sequence; +import org.exist.xquery.value.SequenceType; +import org.exist.xquery.value.Type; + +/** + * Implements XQuery 4.0 fn:message. + * + * Similar to fn:trace but returns empty-sequence() instead of passing through values. + * Outputs the input values (and optional label) to the log. + */ +public class FnMessage extends BasicFunction { + + private static final Logger LOG = LogManager.getLogger(FnMessage.class); + + public static final FunctionSignature[] FN_MESSAGE = { + new FunctionSignature( + new QName("message", Function.BUILTIN_FUNCTION_NS), + "Outputs values to the log and returns empty sequence.", + new SequenceType[] { + new FunctionParameterSequenceType("input", Type.ITEM, Cardinality.ZERO_OR_MORE, "The values to output") + }, + new FunctionReturnSequenceType(Type.EMPTY_SEQUENCE, Cardinality.EMPTY_SEQUENCE, "empty sequence")), + new FunctionSignature( + new QName("message", Function.BUILTIN_FUNCTION_NS), + "Outputs values to the log with a label and returns empty sequence.", + new SequenceType[] { + new FunctionParameterSequenceType("input", Type.ITEM, Cardinality.ZERO_OR_MORE, "The values to output"), + new FunctionParameterSequenceType("label", Type.STRING, Cardinality.ZERO_OR_ONE, "Optional label for the output") + }, + new FunctionReturnSequenceType(Type.EMPTY_SEQUENCE, Cardinality.EMPTY_SEQUENCE, "empty sequence")) + }; + + public FnMessage(final XQueryContext context, final FunctionSignature signature) { + super(context, signature); + } + + @Override + public Sequence eval(final Sequence[] args, final Sequence contextSequence) throws XPathException { + final Sequence input = args[0]; + final String label = (args.length > 1 && !args[1].isEmpty()) ? args[1].getStringValue() : null; + + final String value = input.getStringValue(); + if (label != null && !label.isEmpty()) { + LOG.info("{}: {}", label, value); + } else { + LOG.info("{}", value); + } + + return Sequence.EMPTY_SEQUENCE; + } +} diff --git a/exist-core/src/main/java/org/exist/xquery/functions/fn/FnModule.java b/exist-core/src/main/java/org/exist/xquery/functions/fn/FnModule.java index 413c58b5f3d..9aeade4aab4 100644 --- a/exist-core/src/main/java/org/exist/xquery/functions/fn/FnModule.java +++ b/exist-core/src/main/java/org/exist/xquery/functions/fn/FnModule.java @@ -67,13 +67,15 @@ public class FnModule extends AbstractInternalModule { new FunctionDef(FunData.signatures[1], FunData.class), new FunctionDef(FunDateTime.signature, FunDateTime.class), new FunctionDef(FunDeepEqual.signatures[0], FunDeepEqual.class), - new FunctionDef(FunDeepEqual.signatures[1], FunDeepEqual.class), + new FunctionDef(FnDeepEqualOptions.FN_DEEP_EQUAL_OPTIONS, FnDeepEqualOptions.class), new FunctionDef(FunDefaultCollation.signature, FunDefaultCollation.class), new FunctionDef(FnDefaultLanguage.FS_DEFAULT_LANGUAGE, FnDefaultLanguage.class), new FunctionDef(FunDistinctValues.signatures[0], FunDistinctValues.class), new FunctionDef(FunDistinctValues.signatures[1], FunDistinctValues.class), new FunctionDef(FunDoc.signature, FunDoc.class), + new FunctionDef(FunDoc.signatureWithOptions, FunDoc.class), new FunctionDef(FunDocAvailable.signature, FunDocAvailable.class), + new FunctionDef(FunDocAvailable.signatureWithOptions, FunDocAvailable.class), new FunctionDef(FunDocumentURI.FS_DOCUMENT_URI_0, FunDocumentURI.class), new FunctionDef(FunDocumentURI.FS_DOCUMENT_URI_1, FunDocumentURI.class), new FunctionDef(FunElementWithId.FS_ELEMENT_WITH_ID_SIGNATURES[0], FunElementWithId.class), @@ -178,6 +180,7 @@ public class FnModule extends AbstractInternalModule { new FunctionDef(FnOuterMost.FNS_OUTERMOST, FnOuterMost.class), new FunctionDef(FunPath.FS_PATH_SIGNATURES[0], FunPath.class), new FunctionDef(FunPath.FS_PATH_SIGNATURES[1], FunPath.class), + new FunctionDef(FunPath.FS_PATH_SIGNATURES[2], FunPath.class), new FunctionDef(FunPosition.signature, FunPosition.class), new FunctionDef(FunQName.signature, FunQName.class), new FunctionDef(FunRemove.signature, FunRemove.class), @@ -190,6 +193,7 @@ public class FnModule extends AbstractInternalModule { new FunctionDef(FunRoot.signatures[1], FunRoot.class), new FunctionDef(FunRound.FN_ROUND_SIGNATURES[0], FunRound.class), new FunctionDef(FunRound.FN_ROUND_SIGNATURES[1], FunRound.class), + new FunctionDef(FunRound.FN_ROUND_SIGNATURES[2], FunRound.class), new FunctionDef(FunRoundHalfToEven.FN_ROUND_HALF_TO_EVEN_SIGNATURES[0], FunRoundHalfToEven.class), new FunctionDef(FunRoundHalfToEven.FN_ROUND_HALF_TO_EVEN_SIGNATURES[1], FunRoundHalfToEven.class), new FunctionDef(FunSerialize.signatures[0], FunSerialize.class), @@ -240,8 +244,10 @@ public class FnModule extends AbstractInternalModule { new FunctionDef(FunEquals.signatures[1], FunEquals.class), new FunctionDef(FunAnalyzeString.signatures[0], FunAnalyzeString.class), new FunctionDef(FunAnalyzeString.signatures[1], FunAnalyzeString.class), - new FunctionDef(FunHeadTail.signatures[0], FunHeadTail.class), - new FunctionDef(FunHeadTail.signatures[1], FunHeadTail.class), + new FunctionDef(FunHeadTail.FN_HEAD, FunHeadTail.class), + new FunctionDef(FunHeadTail.FN_TAIL, FunHeadTail.class), + new FunctionDef(FunHeadTail.FN_FOOT, FunHeadTail.class), + new FunctionDef(FunHeadTail.FN_TRUNK, FunHeadTail.class), new FunctionDef(FunHigherOrderFun.FN_FOR_EACH, FunHigherOrderFun.class), new FunctionDef(FunHigherOrderFun.FN_FOR_EACH_PAIR, FunHigherOrderFun.class), new FunctionDef(FunHigherOrderFun.FN_FILTER, FunHigherOrderFun.class), @@ -252,6 +258,8 @@ public class FnModule extends AbstractInternalModule { new FunctionDef(FunEnvironment.signature[1], FunEnvironment.class), new FunctionDef(ParsingFunctions.signatures[0], ParsingFunctions.class), new FunctionDef(ParsingFunctions.signatures[1], ParsingFunctions.class), + new FunctionDef(ParsingFunctions.signatures[2], ParsingFunctions.class), + new FunctionDef(ParsingFunctions.signatures[3], ParsingFunctions.class), new FunctionDef(JSON.FS_PARSE_JSON[0], JSON.class), new FunctionDef(JSON.FS_PARSE_JSON[1], JSON.class), new FunctionDef(JSON.FS_JSON_DOC[0], JSON.class), @@ -272,7 +280,126 @@ public class FnModule extends AbstractInternalModule { new FunctionDef(FnRandomNumberGenerator.FS_RANDOM_NUMBER_GENERATOR[0], FnRandomNumberGenerator.class), new FunctionDef(FnRandomNumberGenerator.FS_RANDOM_NUMBER_GENERATOR[1], FnRandomNumberGenerator.class), new FunctionDef(FunContainsToken.FS_CONTAINS_TOKEN[0], FunContainsToken.class), - new FunctionDef(FunContainsToken.FS_CONTAINS_TOKEN[1], FunContainsToken.class) + new FunctionDef(FunContainsToken.FS_CONTAINS_TOKEN[1], FunContainsToken.class), + // XQuery 4.0 functions + new FunctionDef(FnIdentityVoid.FN_IDENTITY, FnIdentityVoid.class), + new FunctionDef(FnIdentityVoid.FN_VOID[0], FnIdentityVoid.class), + new FunctionDef(FnIdentityVoid.FN_VOID[1], FnIdentityVoid.class), + new FunctionDef(FnIsNaN.FN_IS_NAN, FnIsNaN.class), + new FunctionDef(FnCharacters.FN_CHARACTERS, FnCharacters.class), + new FunctionDef(FnGraphemes.FN_GRAPHEMES, FnGraphemes.class), + new FunctionDef(FnParseHtml.FN_PARSE_HTML[0], FnParseHtml.class), + new FunctionDef(FnParseHtml.FN_PARSE_HTML[1], FnParseHtml.class), + new FunctionDef(FnCollation.FN_COLLATION[0], FnCollation.class), + new FunctionDef(FnCollation.FN_COLLATION[1], FnCollation.class), + new FunctionDef(FnCollation.FN_COLLATION_AVAILABLE, FnCollation.class), + new FunctionDef(FnHtmlDoc.FN_HTML_DOC, FnHtmlDoc.class), + new FunctionDef(FnUnparsedBinary.FN_UNPARSED_BINARY, FnUnparsedBinary.class), + new FunctionDef(FnSchemaType.FN_SCHEMA_TYPE, FnSchemaType.class), + new FunctionDef(FnElementToMapPlan.FN_ELEMENT_TO_MAP_PLAN, FnElementToMapPlan.class), + new FunctionDef(FnGet.FN_GET, FnGet.class), + new FunctionDef(FnFunctionAnnotations.FN_FUNCTION_ANNOTATIONS, FnFunctionAnnotations.class), + new FunctionDef(FnFunctionIdentity.FN_FUNCTION_IDENTITY, FnFunctionIdentity.class), + new FunctionDef(FnDateTimeParts.FN_BUILD_DATETIME, FnDateTimeParts.class), + new FunctionDef(FnDateTimeParts.FN_PARTS_OF_DATETIME, FnDateTimeParts.class), + new FunctionDef(FnReplicate.FN_REPLICATE, FnReplicate.class), + new FunctionDef(FnInsertSeparator.FN_INSERT_SEPARATOR, FnInsertSeparator.class), + new FunctionDef(FnAllEqualDifferent.FN_ALL_EQUAL[0], FnAllEqualDifferent.class), + new FunctionDef(FnAllEqualDifferent.FN_ALL_EQUAL[1], FnAllEqualDifferent.class), + new FunctionDef(FnAllEqualDifferent.FN_ALL_DIFFERENT[0], FnAllEqualDifferent.class), + new FunctionDef(FnAllEqualDifferent.FN_ALL_DIFFERENT[1], FnAllEqualDifferent.class), + new FunctionDef(FnItemsAt.FN_ITEMS_AT, FnItemsAt.class), + new FunctionDef(FnHigherOrderFun40.FN_INDEX_WHERE, FnHigherOrderFun40.class), + new FunctionDef(FnHigherOrderFun40.FN_TAKE_WHILE, FnHigherOrderFun40.class), + new FunctionDef(FnHigherOrderFun40.FN_WHILE_DO, FnHigherOrderFun40.class), + new FunctionDef(FnHigherOrderFun40.FN_DO_UNTIL, FnHigherOrderFun40.class), + new FunctionDef(FnHigherOrderFun40.FN_SORT_WITH, FnHigherOrderFun40.class), + new FunctionDef(FnSlice.FN_SLICE[0], FnSlice.class), + new FunctionDef(FnSlice.FN_SLICE[1], FnSlice.class), + new FunctionDef(FnSlice.FN_SLICE[2], FnSlice.class), + new FunctionDef(FnSlice.FN_SLICE[3], FnSlice.class), + new FunctionDef(FnDuplicateValues.FN_DUPLICATE_VALUES[0], FnDuplicateValues.class), + new FunctionDef(FnDuplicateValues.FN_DUPLICATE_VALUES[1], FnDuplicateValues.class), + new FunctionDef(FnHash.FN_HASH[0], FnHash.class), + new FunctionDef(FnHash.FN_HASH[1], FnHash.class), + new FunctionDef(FnHash.FN_HASH[2], FnHash.class), + new FunctionDef(FnOp.FN_OP, FnOp.class), + new FunctionDef(FnChar.FN_CHAR, FnChar.class), + new FunctionDef(FnAtomicEqual.FN_ATOMIC_EQUAL, FnAtomicEqual.class), + new FunctionDef(FnExpandedQName.FN_EXPANDED_QNAME, FnExpandedQName.class), + new FunctionDef(FnHighestLowest.FN_HIGHEST[0], FnHighestLowest.class), + new FunctionDef(FnHighestLowest.FN_HIGHEST[1], FnHighestLowest.class), + new FunctionDef(FnHighestLowest.FN_HIGHEST[2], FnHighestLowest.class), + new FunctionDef(FnHighestLowest.FN_LOWEST[0], FnHighestLowest.class), + new FunctionDef(FnHighestLowest.FN_LOWEST[1], FnHighestLowest.class), + new FunctionDef(FnHighestLowest.FN_LOWEST[2], FnHighestLowest.class), + new FunctionDef(FnPartition.FN_PARTITION, FnPartition.class), + new FunctionDef(FnParseUri.FN_PARSE_URI[0], FnParseUri.class), + new FunctionDef(FnParseUri.FN_PARSE_URI[1], FnParseUri.class), + new FunctionDef(FnBuildUri.FN_BUILD_URI[0], FnBuildUri.class), + new FunctionDef(FnBuildUri.FN_BUILD_URI[1], FnBuildUri.class), + new FunctionDef(FnHigherOrderFun40.FN_SCAN_LEFT, FnHigherOrderFun40.class), + new FunctionDef(FnHigherOrderFun40.FN_SCAN_RIGHT, FnHigherOrderFun40.class), + // XQuery 4.0 functions — batch 1: HOFs and subsequence matching + new FunctionDef(FnEverySome.FN_EVERY[0], FnEverySome.class), + new FunctionDef(FnEverySome.FN_EVERY[1], FnEverySome.class), + new FunctionDef(FnEverySome.FN_SOME[0], FnEverySome.class), + new FunctionDef(FnEverySome.FN_SOME[1], FnEverySome.class), + new FunctionDef(FnSortBy.FN_SORT_BY, FnSortBy.class), + new FunctionDef(FnPartialApply.FN_PARTIAL_APPLY, FnPartialApply.class), + new FunctionDef(FnSubsequenceMatching.FN_CONTAINS_SUBSEQUENCE[0], FnSubsequenceMatching.class), + new FunctionDef(FnSubsequenceMatching.FN_CONTAINS_SUBSEQUENCE[1], FnSubsequenceMatching.class), + new FunctionDef(FnSubsequenceMatching.FN_STARTS_WITH_SUBSEQUENCE[0], FnSubsequenceMatching.class), + new FunctionDef(FnSubsequenceMatching.FN_STARTS_WITH_SUBSEQUENCE[1], FnSubsequenceMatching.class), + new FunctionDef(FnSubsequenceMatching.FN_ENDS_WITH_SUBSEQUENCE[0], FnSubsequenceMatching.class), + new FunctionDef(FnSubsequenceMatching.FN_ENDS_WITH_SUBSEQUENCE[1], FnSubsequenceMatching.class), + // XQuery 4.0 functions — batch 2: string/number/URI + new FunctionDef(FnDecodeFromUri.FN_DECODE_FROM_URI, FnDecodeFromUri.class), + new FunctionDef(FnParseInteger.FN_PARSE_INTEGER[0], FnParseInteger.class), + new FunctionDef(FnParseInteger.FN_PARSE_INTEGER[1], FnParseInteger.class), + new FunctionDef(FnDivideDecimals.FN_DIVIDE_DECIMALS[0], FnDivideDecimals.class), + new FunctionDef(FnDivideDecimals.FN_DIVIDE_DECIMALS[1], FnDivideDecimals.class), + // XQuery 4.0 functions — batch 3: node and type + new FunctionDef(FnDistinctOrderedNodes.FN_DISTINCT_ORDERED_NODES, FnDistinctOrderedNodes.class), + new FunctionDef(FnSiblings.FN_SIBLINGS[0], FnSiblings.class), + new FunctionDef(FnSiblings.FN_SIBLINGS[1], FnSiblings.class), + new FunctionDef(FnTypeOf.FN_TYPE_OF, FnTypeOf.class), + // XQuery 4.0 functions — batch 4: date/time and misc + new FunctionDef(FnUnixDateTime.FN_UNIX_DATETIME[0], FnUnixDateTime.class), + new FunctionDef(FnUnixDateTime.FN_UNIX_DATETIME[1], FnUnixDateTime.class), + new FunctionDef(FnMessage.FN_MESSAGE[0], FnMessage.class), + new FunctionDef(FnMessage.FN_MESSAGE[1], FnMessage.class), + // XQuery 4.0 functions — batch 2 (continued): parse-QName + new FunctionDef(FnParseQName.FN_PARSE_QNAME, FnParseQName.class), + // XQuery 4.0 functions — batch 3 (continued): type annotation + new FunctionDef(FnTypeAnnotation.FN_ATOMIC_TYPE_ANNOTATION, FnTypeAnnotation.class), + new FunctionDef(FnTypeAnnotation.FN_NODE_TYPE_ANNOTATION, FnTypeAnnotation.class), + // XQuery 4.0 functions — batch 4 (continued): civil-timezone + new FunctionDef(FnCivilTimezone.FN_CIVIL_TIMEZONE[0], FnCivilTimezone.class), + new FunctionDef(FnCivilTimezone.FN_CIVIL_TIMEZONE[1], FnCivilTimezone.class), + // XQuery 4.0 functions — batch 5: CSV functions + new FunctionDef(CsvFunctions.FN_CSV_TO_ARRAYS[0], CsvFunctions.class), + new FunctionDef(CsvFunctions.FN_CSV_TO_ARRAYS[1], CsvFunctions.class), + new FunctionDef(CsvFunctions.FN_PARSE_CSV[0], CsvFunctions.class), + new FunctionDef(CsvFunctions.FN_PARSE_CSV[1], CsvFunctions.class), + new FunctionDef(CsvFunctions.FN_CSV_TO_XML[0], CsvFunctions.class), + new FunctionDef(CsvFunctions.FN_CSV_TO_XML[1], CsvFunctions.class), + new FunctionDef(CsvFunctions.FN_CSV_DOC[0], CsvFunctions.class), + new FunctionDef(CsvFunctions.FN_CSV_DOC[1], CsvFunctions.class), + // XQuery 4.0 functions — batch 6: subsequence-where, seconds, in-scope-namespaces + new FunctionDef(FnSubsequenceWhere.FN_SUBSEQUENCE_WHERE[0], FnSubsequenceWhere.class), + new FunctionDef(FnSubsequenceWhere.FN_SUBSEQUENCE_WHERE[1], FnSubsequenceWhere.class), + new FunctionDef(FnSeconds.FN_SECONDS, FnSeconds.class), + new FunctionDef(FnInScopeNamespaces.FN_IN_SCOPE_NAMESPACES, FnInScopeNamespaces.class), + // XQuery 4.0 functions — batch 7: transitive-closure, element-to-map + new FunctionDef(FnTransitiveClosure.FN_TRANSITIVE_CLOSURE, FnTransitiveClosure.class), + new FunctionDef(FnElementToMap.FN_ELEMENT_TO_MAP[0], FnElementToMap.class), + new FunctionDef(FnElementToMap.FN_ELEMENT_TO_MAP[1], FnElementToMap.class), + + // --- Invisible XML (feature/fn-invisible-xml) --- + new FunctionDef(FnInvisibleXml.SIGNATURES[0], FnInvisibleXml.class), + new FunctionDef(FnInvisibleXml.SIGNATURES[1], FnInvisibleXml.class) + // --- End Invisible XML --- }; static { diff --git a/exist-core/src/main/java/org/exist/xquery/functions/fn/FnOp.java b/exist-core/src/main/java/org/exist/xquery/functions/fn/FnOp.java new file mode 100644 index 00000000000..f0785604f57 --- /dev/null +++ b/exist-core/src/main/java/org/exist/xquery/functions/fn/FnOp.java @@ -0,0 +1,404 @@ +/* + * eXist-db Open Source Native XML Database + * Copyright (C) 2001 The eXist-db Authors + * + * info@exist-db.org + * http://www.exist-db.org + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ +package org.exist.xquery.functions.fn; + +import org.exist.dom.QName; +import org.exist.xquery.AbstractExpression; +import org.exist.xquery.AnalyzeContextInfo; +import org.exist.xquery.BasicFunction; +import org.exist.xquery.Cardinality; +import org.exist.xquery.ErrorCodes; +import org.exist.xquery.Function; +import org.exist.xquery.FunctionCall; +import org.exist.xquery.FunctionSignature; +import org.exist.xquery.UserDefinedFunction; +import org.exist.xquery.ValueComparison; +import org.exist.xquery.XPathException; +import org.exist.xquery.XQueryContext; +import org.exist.xquery.util.ExpressionDumper; +import org.exist.xquery.value.BooleanValue; +import org.exist.xquery.value.ComputableValue; +import org.exist.xquery.value.FunctionParameterSequenceType; +import org.exist.xquery.value.FunctionReference; +import org.exist.xquery.value.FunctionReturnSequenceType; +import org.exist.xquery.value.IntegerValue; +import org.exist.xquery.value.Item; +import org.exist.xquery.value.Sequence; +import org.exist.xquery.value.SequenceType; +import org.exist.xquery.value.StringValue; +import org.exist.xquery.value.Type; +import org.exist.xquery.value.ValueSequence; + +/** + * Implements fn:op (XQuery 4.0). + * + * Returns a function reference for a named operator. + */ +public class FnOp extends BasicFunction { + + private static final QName PARAM_A = new QName("a", javax.xml.XMLConstants.NULL_NS_URI); + private static final QName PARAM_B = new QName("b", javax.xml.XMLConstants.NULL_NS_URI); + + public static final FunctionSignature FN_OP = new FunctionSignature( + new QName("op", Function.BUILTIN_FUNCTION_NS), + "Returns a function that applies a given operator.", + new SequenceType[] { + new FunctionParameterSequenceType("operator", Type.STRING, Cardinality.EXACTLY_ONE, "The operator name") + }, + new FunctionReturnSequenceType(Type.FUNCTION, Cardinality.EXACTLY_ONE, "a function implementing the operator")); + + public FnOp(final XQueryContext context, final FunctionSignature signature) { + super(context, signature); + } + + private static final ErrorCodes.ErrorCode FOAP0001 = new ErrorCodes.ErrorCode( + "FOAP0001", "Invalid operator name"); + + @Override + public Sequence eval(final Sequence[] args, final Sequence contextSequence) throws XPathException { + final String operator = args[0].getStringValue(); + + // Validate operator name + if (!isValidOperator(operator)) { + throw new XPathException(this, ErrorCodes.XPTY0004, + "Unknown operator: " + operator); + } + + // Create a UserDefinedFunction with 2 parameters ($a, $b) + final FunctionSignature opSig = new FunctionSignature( + new QName("op#" + operator, Function.BUILTIN_FUNCTION_NS), + new SequenceType[] { + new FunctionParameterSequenceType("a", Type.ITEM, Cardinality.ZERO_OR_MORE, "left operand"), + new FunctionParameterSequenceType("b", Type.ITEM, Cardinality.ZERO_OR_MORE, "right operand") + }, + new FunctionReturnSequenceType(Type.ITEM, Cardinality.ZERO_OR_MORE, "result")); + + final UserDefinedFunction func = new UserDefinedFunction(context, opSig); + func.addVariable(PARAM_A); + func.addVariable(PARAM_B); + + // Set the body to an expression that evaluates the operator + func.setFunctionBody(new OperatorExpression(context, operator)); + + final FunctionCall call = new FunctionCall(context, func); + call.setLocation(getLine(), getColumn()); + + return new FunctionReference(this, call); + } + + private boolean isValidOperator(final String op) { + switch (op) { + case ",": case "and": case "or": + case "+": case "-": case "*": case "div": case "idiv": case "mod": + case "=": case "<": case "<=": case ">": case ">=": case "!=": + case "eq": case "lt": case "le": case "gt": case "ge": case "ne": + case "<<": case ">>": case "precedes": case "follows": + case "precedes-or-is": case "follows-or-is": + case "is": case "is-not": + case "||": case "|": case "union": case "except": case "intersect": + case "to": case "otherwise": + return true; + default: + return false; + } + } + + /** + * Expression that evaluates an operator on two variables $a and $b + * from the local variable context. + */ + private static class OperatorExpression extends AbstractExpression { + + private final String operator; + + OperatorExpression(final XQueryContext context, final String operator) { + super(context); + this.operator = operator; + } + + @Override + public Sequence eval(final Sequence contextSequence, final Item contextItem) throws XPathException { + final Sequence a = context.resolveVariable(PARAM_A).getValue(); + final Sequence b = context.resolveVariable(PARAM_B).getValue(); + + switch (operator) { + // Arithmetic + case "+": return arithmetic(a, b, "plus"); + case "-": return arithmetic(a, b, "minus"); + case "*": return arithmetic(a, b, "mult"); + case "div": return arithmetic(a, b, "div"); + case "idiv": return arithmetic(a, b, "idiv"); + case "mod": return arithmetic(a, b, "mod"); + + // General comparison + case "=": return generalCompare(a, b, org.exist.xquery.Constants.Comparison.EQ); + case "!=": return generalCompare(a, b, org.exist.xquery.Constants.Comparison.NEQ); + case "<": return generalCompare(a, b, org.exist.xquery.Constants.Comparison.LT); + case "<=": return generalCompare(a, b, org.exist.xquery.Constants.Comparison.LTEQ); + case ">": return generalCompare(a, b, org.exist.xquery.Constants.Comparison.GT); + case ">=": return generalCompare(a, b, org.exist.xquery.Constants.Comparison.GTEQ); + + // Value comparison + case "eq": return valueCompare(a, b, org.exist.xquery.Constants.Comparison.EQ); + case "ne": return valueCompare(a, b, org.exist.xquery.Constants.Comparison.NEQ); + case "lt": return valueCompare(a, b, org.exist.xquery.Constants.Comparison.LT); + case "le": return valueCompare(a, b, org.exist.xquery.Constants.Comparison.LTEQ); + case "gt": return valueCompare(a, b, org.exist.xquery.Constants.Comparison.GT); + case "ge": return valueCompare(a, b, org.exist.xquery.Constants.Comparison.GTEQ); + + // Boolean + case "and": return BooleanValue.valueOf(a.effectiveBooleanValue() && b.effectiveBooleanValue()); + case "or": return BooleanValue.valueOf(a.effectiveBooleanValue() || b.effectiveBooleanValue()); + + // String concatenation + case "||": return new StringValue(this, a.getStringValue() + b.getStringValue()); + + // Sequence + case ",": return opComma(a, b); + case "|": + case "union": return opVenn(a, b, "union"); + case "except": return opVenn(a, b, "except"); + case "intersect": return opVenn(a, b, "intersect"); + case "to": return opTo(a, b); + case "otherwise": return a.isEmpty() ? b : a; + + // Node comparison + case "is": return nodeIs(a, b); + case "is-not": return nodeIsNot(a, b); + case "<<": + case "precedes": return nodePrecedes(a, b); + case ">>": + case "follows": return nodeFollows(a, b); + case "precedes-or-is": return nodePrecedesOrIs(a, b); + case "follows-or-is": return nodeFollowsOrIs(a, b); + + default: + throw new XPathException(this, ErrorCodes.FOJS0005, "Unknown operator: " + operator); + } + } + + private Sequence arithmetic(final Sequence a, final Sequence b, final String op) throws XPathException { + if (a.isEmpty() || b.isEmpty()) { + return Sequence.EMPTY_SEQUENCE; + } + final ComputableValue left = toComputable(a.itemAt(0).atomize()); + final ComputableValue right = toComputable(b.itemAt(0).atomize()); + switch (op) { + case "plus": return left.plus(right); + case "minus": return left.minus(right); + case "mult": return left.mult(right); + case "div": return left.div(right); + case "idiv": return ((org.exist.xquery.value.NumericValue) left).idiv((org.exist.xquery.value.NumericValue) right); + case "mod": return ((org.exist.xquery.value.NumericValue) left).mod((org.exist.xquery.value.NumericValue) right); + default: throw new IllegalStateException(); + } + } + + private Sequence generalCompare(final Sequence a, final Sequence b, + final org.exist.xquery.Constants.Comparison comp) throws XPathException { + // General comparison: existential semantics — true if any pair matches + if (a.isEmpty() || b.isEmpty()) { + return BooleanValue.FALSE; + } + final com.ibm.icu.text.Collator collator = context.getDefaultCollator(); + for (int i = 0; i < a.getItemCount(); i++) { + final org.exist.xquery.value.AtomicValue lv = a.itemAt(i).atomize(); + for (int j = 0; j < b.getItemCount(); j++) { + final org.exist.xquery.value.AtomicValue rv = b.itemAt(j).atomize(); + if (ValueComparison.compareAtomic(collator, lv, rv, + org.exist.xquery.Constants.StringTruncationOperator.NONE, comp)) { + return BooleanValue.TRUE; + } + } + } + return BooleanValue.FALSE; + } + + private Sequence valueCompare(final Sequence a, final Sequence b, + final org.exist.xquery.Constants.Comparison comp) throws XPathException { + if (a.isEmpty() || b.isEmpty()) { + return Sequence.EMPTY_SEQUENCE; + } + if (a.getItemCount() > 1 || b.getItemCount() > 1) { + throw new XPathException(this, ErrorCodes.XPTY0004, + "Value comparison requires singleton operands"); + } + final org.exist.xquery.value.AtomicValue lv = a.itemAt(0).atomize(); + final org.exist.xquery.value.AtomicValue rv = b.itemAt(0).atomize(); + final com.ibm.icu.text.Collator collator = context.getDefaultCollator(); + return BooleanValue.valueOf(ValueComparison.compareAtomic(collator, lv, rv, + org.exist.xquery.Constants.StringTruncationOperator.NONE, comp)); + } + + private Sequence opComma(final Sequence a, final Sequence b) throws XPathException { + final ValueSequence result = new ValueSequence(a.getItemCount() + b.getItemCount()); + result.addAll(a); + result.addAll(b); + return result; + } + + private Sequence opVenn(final Sequence a, final Sequence b, final String op) throws XPathException { + // Check that operands are nodes + for (int i = 0; i < a.getItemCount(); i++) { + if (!(a.itemAt(i) instanceof org.w3c.dom.Node)) { + throw new XPathException(this, ErrorCodes.XPTY0004, + "Set operation requires node operands, got " + Type.getTypeName(a.itemAt(i).getType())); + } + } + for (int i = 0; i < b.getItemCount(); i++) { + if (!(b.itemAt(i) instanceof org.w3c.dom.Node)) { + throw new XPathException(this, ErrorCodes.XPTY0004, + "Set operation requires node operands, got " + Type.getTypeName(b.itemAt(i).getType())); + } + } + try { + switch (op) { + case "union": return a.toNodeSet().union(b.toNodeSet()); + case "except": return a.toNodeSet().except(b.toNodeSet()); + case "intersect": return a.toNodeSet().intersection(b.toNodeSet()); + default: throw new IllegalStateException(); + } + } catch (final XPathException e) { + throw new XPathException(this, ErrorCodes.XPTY0004, e.getMessage()); + } + } + + private Sequence opTo(final Sequence a, final Sequence b) throws XPathException { + if (a.isEmpty() || b.isEmpty()) { + return Sequence.EMPTY_SEQUENCE; + } + final long start = ((IntegerValue) a.itemAt(0)).getLong(); + final long end = ((IntegerValue) b.itemAt(0)).getLong(); + if (start > end) { + return Sequence.EMPTY_SEQUENCE; + } + final ValueSequence result = new ValueSequence((int) (end - start + 1)); + for (long i = start; i <= end; i++) { + result.add(new IntegerValue(this, i)); + } + return result; + } + + private void checkNodeOperands(final Sequence a, final Sequence b) throws XPathException { + if (!a.isEmpty() && !(a.itemAt(0) instanceof org.w3c.dom.Node)) { + throw new XPathException(this, ErrorCodes.XPTY0004, + "Node comparison requires node operands, got " + Type.getTypeName(a.itemAt(0).getType())); + } + if (!b.isEmpty() && !(b.itemAt(0) instanceof org.w3c.dom.Node)) { + throw new XPathException(this, ErrorCodes.XPTY0004, + "Node comparison requires node operands, got " + Type.getTypeName(b.itemAt(0).getType())); + } + } + + private Sequence nodeIs(final Sequence a, final Sequence b) throws XPathException { + checkNodeOperands(a, b); + if (a.isEmpty() || b.isEmpty()) { + return Sequence.EMPTY_SEQUENCE; + } + return BooleanValue.valueOf(a.itemAt(0).equals(b.itemAt(0))); + } + + private Sequence nodeIsNot(final Sequence a, final Sequence b) throws XPathException { + checkNodeOperands(a, b); + if (a.isEmpty() || b.isEmpty()) { + return Sequence.EMPTY_SEQUENCE; + } + return BooleanValue.valueOf(!a.itemAt(0).equals(b.itemAt(0))); + } + + private ComputableValue toComputable(final org.exist.xquery.value.AtomicValue v) throws XPathException { + if (v instanceof ComputableValue) { + return (ComputableValue) v; + } + // Untyped atomic → promote to xs:double for arithmetic + if (v.getType() == Type.UNTYPED_ATOMIC) { + return (ComputableValue) v.convertTo(Type.DOUBLE); + } + throw new XPathException(this, ErrorCodes.XPTY0004, + "Cannot use " + Type.getTypeName(v.getType()) + " in arithmetic"); + } + + private int nodeCompare(final Sequence a, final Sequence b) throws XPathException { + checkNodeOperands(a, b); + final Item left = a.itemAt(0); + final Item right = b.itemAt(0); + if (left instanceof org.exist.dom.persistent.NodeProxy && right instanceof org.exist.dom.persistent.NodeProxy) { + return ((org.exist.dom.persistent.NodeProxy) left).compareTo((org.exist.dom.persistent.NodeProxy) right); + } + // For in-memory nodes, compare using NodeId if available + if (left instanceof org.exist.dom.memtree.NodeImpl && right instanceof org.exist.dom.memtree.NodeImpl) { + final org.exist.dom.memtree.NodeImpl leftNode = (org.exist.dom.memtree.NodeImpl) left; + final org.exist.dom.memtree.NodeImpl rightNode = (org.exist.dom.memtree.NodeImpl) right; + return Integer.compare(leftNode.getNodeNumber(), rightNode.getNodeNumber()); + } + throw new XPathException(this, ErrorCodes.XPTY0004, + "Node comparison requires node operands"); + } + + private Sequence nodePrecedes(final Sequence a, final Sequence b) throws XPathException { + checkNodeOperands(a, b); + if (a.isEmpty() || b.isEmpty()) { + return Sequence.EMPTY_SEQUENCE; + } + return BooleanValue.valueOf(nodeCompare(a, b) < 0); + } + + private Sequence nodeFollows(final Sequence a, final Sequence b) throws XPathException { + checkNodeOperands(a, b); + if (a.isEmpty() || b.isEmpty()) { + return Sequence.EMPTY_SEQUENCE; + } + return BooleanValue.valueOf(nodeCompare(a, b) > 0); + } + + private Sequence nodePrecedesOrIs(final Sequence a, final Sequence b) throws XPathException { + checkNodeOperands(a, b); + if (a.isEmpty() || b.isEmpty()) { + return Sequence.EMPTY_SEQUENCE; + } + return BooleanValue.valueOf(nodeCompare(a, b) <= 0); + } + + private Sequence nodeFollowsOrIs(final Sequence a, final Sequence b) throws XPathException { + checkNodeOperands(a, b); + if (a.isEmpty() || b.isEmpty()) { + return Sequence.EMPTY_SEQUENCE; + } + return BooleanValue.valueOf(nodeCompare(a, b) >= 0); + } + + @Override + public int returnsType() { + return Type.ITEM; + } + + @Override + public void analyze(final AnalyzeContextInfo contextInfo) throws XPathException { + // nothing to analyze + } + + @Override + public void dump(final ExpressionDumper dumper) { + dumper.display("op(\"" + operator + "\")"); + } + } +} diff --git a/exist-core/src/main/java/org/exist/xquery/functions/fn/FnParseHtml.java b/exist-core/src/main/java/org/exist/xquery/functions/fn/FnParseHtml.java new file mode 100644 index 00000000000..b7c0325aba9 --- /dev/null +++ b/exist-core/src/main/java/org/exist/xquery/functions/fn/FnParseHtml.java @@ -0,0 +1,177 @@ +/* + * eXist-db Open Source Native XML Database + * Copyright (C) 2001 The eXist-db Authors + * + * info@exist-db.org + * http://www.exist-db.org + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ +package org.exist.xquery.functions.fn; + +import nu.validator.htmlparser.common.XmlViolationPolicy; +import nu.validator.htmlparser.sax.HtmlParser; +import org.exist.Namespaces; +import org.exist.dom.QName; +import org.exist.dom.memtree.SAXAdapter; +import org.exist.xquery.*; +import org.exist.xquery.functions.map.MapType; +import org.exist.xquery.value.*; +import org.xml.sax.InputSource; +import org.xml.sax.SAXException; + +import java.io.IOException; +import java.io.StringReader; +import java.nio.charset.Charset; + +/** + * Implements fn:parse-html (XQuery 4.0). + * + * Parses an HTML string (which may be malformed) into an XDM document node + * with all elements in the XHTML namespace. + */ +public class FnParseHtml extends BasicFunction { + + public static final FunctionSignature[] FN_PARSE_HTML = { + new FunctionSignature( + new QName("parse-html", Function.BUILTIN_FUNCTION_NS), + "Parses the supplied HTML string into an XDM document node. " + + "The input need not be well-formed; it is processed by an HTML parser " + + "that corrects errors and produces well-formed XHTML output.", + new SequenceType[] { + new FunctionParameterSequenceType("value", Type.ITEM, + Cardinality.ZERO_OR_ONE, "The HTML to parse (string or binary)") + }, + new FunctionReturnSequenceType(Type.DOCUMENT, Cardinality.ZERO_OR_ONE, + "The parsed XHTML document")), + new FunctionSignature( + new QName("parse-html", Function.BUILTIN_FUNCTION_NS), + "Parses the supplied HTML string into an XDM document node with options. " + + "The input need not be well-formed; it is processed by an HTML parser " + + "that corrects errors and produces well-formed XHTML output.", + new SequenceType[] { + new FunctionParameterSequenceType("value", Type.ITEM, + Cardinality.ZERO_OR_ONE, "The HTML to parse (string or binary)"), + new FunctionParameterSequenceType("options", Type.MAP_ITEM, + Cardinality.EXACTLY_ONE, "Options map") + }, + new FunctionReturnSequenceType(Type.DOCUMENT, Cardinality.ZERO_OR_ONE, + "The parsed XHTML document")) + }; + + public FnParseHtml(final XQueryContext context, final FunctionSignature signature) { + super(context, signature); + } + + @Override + public Sequence eval(final Sequence[] args, final Sequence contextSequence) throws XPathException { + if (args[0].isEmpty()) { + return Sequence.EMPTY_SEQUENCE; + } + + // Extract options if present + boolean failOnError = false; + String encoding = "UTF-8"; + if (getArgumentCount() == 2 && !args[1].isEmpty()) { + final MapType options = (MapType) args[1].itemAt(0); + failOnError = getBooleanOption(options, "fail-on-error", false); + encoding = getStringOption(options, "encoding", "UTF-8"); + + // Validate option types per spec — unknown options with wrong types raise XPTY0004 + validateOptionType(options, "include-template-content"); + validateOptionType(options, "exclude-template-content"); + } + + // Get the HTML content as a string + final String htmlContent = getHtmlContent(args[0].itemAt(0), encoding); + + // Parse with the configured HTML-to-XML parser + return parseHtml(htmlContent, failOnError); + } + + private String getHtmlContent(final Item item, final String encoding) throws XPathException { + if (item instanceof BinaryValue) { + final BinaryValue binary = (BinaryValue) item; + try (final java.io.InputStream is = binary.getInputStream()) { + final Charset charset = Charset.forName(encoding); + return new String(is.readAllBytes(), charset); + } catch (final Exception e) { + throw new XPathException(this, ErrorCodes.FODC0006, + "Error decoding binary value: " + e.getMessage()); + } + } + return item.getStringValue(); + } + + private Sequence parseHtml(final String htmlContent, final boolean failOnError) throws XPathException { + final SAXAdapter adapter = new SAXAdapter(this, context); + + try { + // Use Validator.nu HTML5 parser — SAX-based, same pipeline as NekoHTML + // but follows the WHATWG HTML5 parsing algorithm. Outputs XHTML namespace + // by default, handles