diff --git a/exist-core/src/main/java/org/exist/xquery/functions/fn/CollectionQueryParameters.java b/exist-core/src/main/java/org/exist/xquery/functions/fn/CollectionQueryParameters.java new file mode 100644 index 00000000000..2d78d01d8a3 --- /dev/null +++ b/exist-core/src/main/java/org/exist/xquery/functions/fn/CollectionQueryParameters.java @@ -0,0 +1,237 @@ +/* + * eXist-db Open Source Native XML Database + * Copyright (C) 2001 The eXist-db Authors + * + * info@exist-db.org + * http://www.exist-db.org + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ +package org.exist.xquery.functions.fn; + +import org.exist.xquery.ErrorCodes; +import org.exist.xquery.Expression; +import org.exist.xquery.XPathException; + +import javax.annotation.Nullable; +import java.util.Arrays; +import java.util.HashMap; +import java.util.Map; +import java.util.Set; + +/** + * Saxon-style query string parameters shared by {@link FunUriCollection} + * (database collections) and {@link ExtCollection} (file: URI collections). + * + *

Supports four parameters:

+ * + * + *

Each consumer specifies which subset of parameters it accepts via the {@code allowedKeys} + * argument to {@link #parse(String, Set, Expression)}. Unknown or invalid keys/values raise + * {@link ErrorCodes#FODC0004}.

+ */ +public final class CollectionQueryParameters { + + public static final String KEY_SELECT = "select"; + public static final String KEY_MATCH = "match"; + public static final String KEY_CONTENT_TYPE = "content-type"; + public static final String KEY_STABLE = "stable"; + + public static final String VALUE_CONTENT_TYPE_DOCUMENT = "application/vnd.existdb.document"; + public static final String VALUE_CONTENT_TYPE_DOCUMENT_BINARY = "application/vnd.existdb.document+binary"; + public static final String VALUE_CONTENT_TYPE_DOCUMENT_XML = "application/vnd.existdb.document+xml"; + public static final String VALUE_CONTENT_TYPE_SUBCOLLECTION = "application/vnd.existdb.collection"; + public static final String[] VALUE_CONTENT_TYPES = { + VALUE_CONTENT_TYPE_DOCUMENT, + VALUE_CONTENT_TYPE_DOCUMENT_BINARY, + VALUE_CONTENT_TYPE_DOCUMENT_XML, + VALUE_CONTENT_TYPE_SUBCOLLECTION + }; + + public static final String VALUE_STABLE_NO = "no"; + public static final String VALUE_STABLE_YES = "yes"; + public static final String[] VALUE_STABLES = { + VALUE_STABLE_NO, + VALUE_STABLE_YES + }; + + /** Keys accepted by fn:uri-collection (no select). */ + public static final Set URI_COLLECTION_KEYS = Set.of(KEY_MATCH, KEY_CONTENT_TYPE, KEY_STABLE); + + /** Keys accepted by fn:collection() with file: URIs (includes select). */ + public static final Set FILE_COLLECTION_KEYS = Set.of(KEY_SELECT, KEY_MATCH, KEY_CONTENT_TYPE, KEY_STABLE); + + @Nullable private final String select; + @Nullable private final String match; + @Nullable private final String contentType; + private final boolean stable; + private final Map rawMap; + + private CollectionQueryParameters(@Nullable final String select, + @Nullable final String match, + @Nullable final String contentType, + final boolean stable, + final Map rawMap) { + this.select = select; + this.match = match; + this.contentType = contentType; + this.stable = stable; + this.rawMap = rawMap; + } + + /** + * Parse the query string portion of a URI string. + * + * @param uriOrQueryString the full URI string (with scheme/path) or just the query portion; + * if {@code null} or has no query, returns parameters with all defaults + * @param allowedKeys the set of accepted parameter keys; any other key raises FODC0004 + * @param caller the calling expression for error reporting + * @return parsed parameters + * @throws XPathException FODC0004 if a key is not in {@code allowedKeys} or a value is invalid + */ + public static CollectionQueryParameters parse(@Nullable final String uriOrQueryString, + final Set allowedKeys, + final Expression caller) throws XPathException { + final Map map = parseQueryString(uriOrQueryString); + validate(map, allowedKeys, caller); + + return new CollectionQueryParameters( + map.get(KEY_SELECT), + map.get(KEY_MATCH), + map.get(KEY_CONTENT_TYPE), + !map.containsKey(KEY_STABLE) || VALUE_STABLE_YES.equals(map.get(KEY_STABLE)), + map); + } + + /** Parse query parameters from the URI string into a key/value map. */ + private static Map parseQueryString(@Nullable final String uri) { + final Map map = new HashMap<>(); + if (uri == null) { + return map; + } + final int questionMarkIndex = uri.indexOf('?'); + if (questionMarkIndex < 0 || questionMarkIndex + 1 >= uri.length()) { + return map; + } + final String[] keyValuePairs = uri.substring(questionMarkIndex + 1).split("&"); + for (final String keyValuePair : keyValuePairs) { + final int equalIndex = keyValuePair.indexOf('='); + if (equalIndex >= 0) { + if (equalIndex + 1 < keyValuePair.length()) { + map.put(keyValuePair.substring(0, equalIndex).trim(), + keyValuePair.substring(equalIndex + 1).trim()); + } else { + map.put(keyValuePair.substring(0, equalIndex).trim(), ""); + } + } else { + map.put(keyValuePair.trim(), ""); + } + } + return map; + } + + private static void validate(final Map map, final Set allowedKeys, + final Expression caller) throws XPathException { + for (final Map.Entry entry : map.entrySet()) { + final String key = entry.getKey(); + final String value = entry.getValue(); + + if (!allowedKeys.contains(key)) { + throw new XPathException(caller, ErrorCodes.FODC0004, + String.format("Unexpected query string \"%s\".", entry)); + } + + if (key.equals(KEY_CONTENT_TYPE)) { + if (Arrays.stream(VALUE_CONTENT_TYPES).noneMatch(v -> v.equals(value))) { + throw new XPathException(caller, ErrorCodes.FODC0004, + String.format("Invalid query-string value \"%s\".", entry)); + } + } else if (key.equals(KEY_STABLE)) { + if (Arrays.stream(VALUE_STABLES).noneMatch(v -> v.equals(value))) { + throw new XPathException(caller, ErrorCodes.FODC0004, + String.format("Invalid query-string value \"%s\".", entry)); + } + } + // KEY_SELECT and KEY_MATCH accept any string value + } + } + + /** + * Strip the {@code stable=...} parameter from a URI string for cache keying. + * Used by fn:uri-collection to cache results regardless of the {@code stable} setting. + */ + public static String stripStableParameter(final String uriWithQueryString) { + String result = uriWithQueryString.replaceAll( + String.format("%s\\s*=\\s*\\byes|no\\b\\s*&+", KEY_STABLE), ""); + if (result.endsWith("?")) { + result = result.substring(0, result.length() - 1); + } + return result; + } + + @Nullable + public String getSelect() { + return select; + } + + @Nullable + public String getMatch() { + return match; + } + + @Nullable + public String getContentType() { + return contentType; + } + + public boolean isStable() { + return stable; + } + + public boolean hasContentType() { + return contentType != null; + } + + /** True if the content-type filter selects (or includes) XML documents. */ + public boolean includesXmlDocuments() { + return contentType == null + || VALUE_CONTENT_TYPE_DOCUMENT.equals(contentType) + || VALUE_CONTENT_TYPE_DOCUMENT_XML.equals(contentType); + } + + /** True if the content-type filter selects (or includes) binary documents. */ + public boolean includesBinaryDocuments() { + return contentType == null + || VALUE_CONTENT_TYPE_DOCUMENT.equals(contentType) + || VALUE_CONTENT_TYPE_DOCUMENT_BINARY.equals(contentType); + } + + /** True if the content-type filter selects (or includes) sub-collections. */ + public boolean includesSubcollections() { + return contentType == null + || VALUE_CONTENT_TYPE_SUBCOLLECTION.equals(contentType); + } + + /** Returns the raw key/value map of all query parameters that were present. */ + public Map getRawMap() { + return rawMap; + } +} diff --git a/exist-core/src/main/java/org/exist/xquery/functions/fn/ExtCollection.java b/exist-core/src/main/java/org/exist/xquery/functions/fn/ExtCollection.java index e298e084cc8..bbce1eb09d2 100644 --- a/exist-core/src/main/java/org/exist/xquery/functions/fn/ExtCollection.java +++ b/exist-core/src/main/java/org/exist/xquery/functions/fn/ExtCollection.java @@ -21,6 +21,8 @@ */ package org.exist.xquery.functions.fn; +import org.apache.logging.log4j.LogManager; +import org.apache.logging.log4j.Logger; import org.exist.collections.Collection; import org.exist.dom.QName; import org.exist.dom.persistent.DefaultDocumentSet; @@ -38,9 +40,21 @@ import org.exist.xquery.functions.xmldb.XMLDBModule; import org.exist.xquery.value.*; +import java.io.IOException; +import java.io.InputStream; import java.net.URI; import java.net.URISyntaxException; +import java.nio.file.DirectoryStream; +import java.nio.file.Files; +import java.nio.file.Path; +import java.nio.file.Paths; +import java.util.ArrayList; +import java.util.Comparator; import java.util.Iterator; +import java.util.List; +import java.util.regex.Pattern; + +import org.exist.xquery.util.DocUtils; import static org.exist.xquery.FunctionDSL.*; @@ -49,6 +63,8 @@ */ public class ExtCollection extends BasicFunction { + private static final Logger LOG = LogManager.getLogger(ExtCollection.class); + private static final String FS_COLLECTION_NAME = "collection"; static final FunctionSignature[] FS_COLLECTION = functionSignatures( new QName(FS_COLLECTION_NAME, Function.BUILTIN_FUNCTION_NS), @@ -78,13 +94,35 @@ public ExtCollection(final XQueryContext context, final FunctionSignature signat @Override public Sequence eval(final Sequence[] args, final Sequence contextSequence) throws XPathException { final URI collectionUri; + final String rawQueryString; if (args.length == 0 || args[0].isEmpty()) { collectionUri = null; + rawQueryString = null; } else { - collectionUri = asUri(args[0].itemAt(0).getStringValue()); + // Split off any Saxon-style query string before parsing the URI. + // The query may contain regex characters (^, [, ], +, etc.) that + // Java's URI class rejects, so we strip and pass it separately. + final String input = args[0].itemAt(0).getStringValue(); + final int q = input.indexOf('?'); + if (q >= 0) { + collectionUri = asUri(input.substring(0, q)); + rawQueryString = input.substring(q + 1); + } else { + collectionUri = asUri(input); + rawQueryString = null; + } } - return getCollectionItems(new URI[] { collectionUri }); + return getCollectionItems(collectionUri, rawQueryString); + } + + private Sequence getCollectionItems(final URI collectionUri, final String rawQueryString) throws XPathException { + if (collectionUri == null) { + return getDefaultCollectionItems(); + } + final Sequence result = new ValueSequence(); + getCollectionItems(collectionUri, rawQueryString, result); + return result; } protected Sequence getCollectionItems(final URI[] collectionUris) throws XPathException { @@ -95,7 +133,9 @@ protected Sequence getCollectionItems(final URI[] collectionUris) throws XPathEx final Sequence result = new ValueSequence(); for (final URI collectionUri : collectionUris) { - getCollectionItems(collectionUri, result); + // No raw query string from this code path; subclasses (e.g. FunXCollection) + // do not split off the query string before calling + getCollectionItems(collectionUri, null, result); } return result; } @@ -115,11 +155,15 @@ private Sequence getDefaultCollectionItems() throws XPathException { } } - private void getCollectionItems(final URI collectionUri, final Sequence items) throws XPathException { + private void getCollectionItems(final URI collectionUri, final String rawQueryString, final Sequence items) throws XPathException { final Sequence dynamicCollection = context.getDynamicallyAvailableCollection(collectionUri.toString()); if (dynamicCollection != null) { items.addAll(dynamicCollection); + } else if ("file".equals(collectionUri.getScheme())) { + // file: URI — scan directory for XML files + getFileCollectionItems(collectionUri, rawQueryString, items); + } else { final MutableDocumentSet ndocs = new DefaultDocumentSet(); final XmldbURI uri = XmldbURI.create(collectionUri); @@ -147,6 +191,96 @@ private void getCollectionItems(final URI collectionUri, final Sequence items) t } } + /** + * Scan a file: URI directory for documents and parse XML files into in-memory documents. + *

+ * Supports Saxon-style query string parameters (aligned with fn:uri-collection): + *

+ *
    + *
  • {@code select=*.xml} — glob pattern for filename matching (default: {@code *.xml})
  • + *
  • {@code match=regex} — additional regex filter on filenames
  • + *
  • {@code content-type=...} — MIME filter; for file: URIs: + * {@code application/vnd.existdb.document+xml} or {@code application/vnd.existdb.document} + * selects XML files (the only kind fn:collection returns)
  • + *
  • {@code stable=yes|no} — when {@code yes} (default), files are returned in alphabetical order
  • + *
+ *

+ * Only DBA users can access the file system directly. + *

+ */ + private void getFileCollectionItems(final URI collectionUri, final String rawQueryString, final Sequence items) throws XPathException { + // Security: only DBA users can access file: URIs + if (!context.getBroker().getCurrentSubject().hasDbaRole()) { + throw new XPathException(this, ErrorCodes.FODC0002, + "Permission denied: only DBA users can access file: URIs in fn:collection()"); + } + + // Parse Saxon-style query parameters from the raw query string. + // We use rawQueryString (passed separately from the URI) because the query + // may contain regex characters like ^, [, ], +, $ that Java's URI class rejects. + final CollectionQueryParameters params = CollectionQueryParameters.parse( + rawQueryString != null ? "?" + rawQueryString : null, + CollectionQueryParameters.FILE_COLLECTION_KEYS, + this); + + // fn:collection() returns documents (XML), so a content-type that excludes XML + // would yield an empty result. Detect that early. + if (params.hasContentType() && !params.includesXmlDocuments()) { + return; + } + + // Default glob pattern is *.xml (XML files only). User-supplied select overrides. + final String globPattern = params.getSelect() != null ? params.getSelect() : "*.xml"; + + // Compile match regex if present + final Pattern matchPattern = (params.getMatch() != null && !params.getMatch().isEmpty()) + ? Pattern.compile(params.getMatch()) + : null; + + final Path dir = Paths.get(collectionUri.getPath()); + if (!Files.isDirectory(dir)) { + throw new XPathException(this, ErrorCodes.FODC0002, + "Directory does not exist: " + dir); + } + + // Collect candidate files matching all filters + final List candidates = new ArrayList<>(); + try (final DirectoryStream stream = Files.newDirectoryStream(dir, globPattern)) { + for (final Path file : stream) { + if (!Files.isRegularFile(file) || !Files.isReadable(file)) { + continue; + } + if (matchPattern != null && !matchPattern.matcher(file.getFileName().toString()).find()) { + continue; + } + candidates.add(file); + } + } catch (final IOException e) { + throw new XPathException(this, ErrorCodes.FODC0002, + "Error reading directory: " + e.getMessage()); + } + + // Apply stable ordering (alphabetical by filename) when stable=yes (the default) + if (params.isStable()) { + candidates.sort(Comparator.comparing(p -> p.getFileName().toString())); + } + + // Parse each candidate as XML and add to items; skip non-parseable files + for (final Path file : candidates) { + try (final InputStream is = Files.newInputStream(file)) { + final org.exist.dom.memtree.DocumentImpl doc = + DocUtils.parse(context, is, this); + doc.setDocumentURI(file.toUri().toString()); + items.add(doc); + } catch (final XPathException | IOException e) { + // Skip non-parseable files (they may not be well-formed XML) + if (LOG.isDebugEnabled()) { + LOG.debug("Skipping non-parseable file in collection: {}", file, e); + } + } + } + } + protected URI asUri(final String path) throws XPathException { try { URI uri = new URI(path); diff --git a/exist-core/src/main/java/org/exist/xquery/functions/fn/FunUriCollection.java b/exist-core/src/main/java/org/exist/xquery/functions/fn/FunUriCollection.java index 3b9426af31e..5aefc380c45 100644 --- a/exist-core/src/main/java/org/exist/xquery/functions/fn/FunUriCollection.java +++ b/exist-core/src/main/java/org/exist/xquery/functions/fn/FunUriCollection.java @@ -78,28 +78,6 @@ public class FunUriCollection extends BasicFunction { ) ); - private static final String KEY_CONTENT_TYPE = "content-type"; - private static final String VALUE_CONTENT_TYPE_DOCUMENT = "application/vnd.existdb.document"; - private static final String VALUE_CONTENT_TYPE_DOCUMENT_BINARY = "application/vnd.existdb.document+binary"; - private static final String VALUE_CONTENT_TYPE_DOCUMENT_XML = "application/vnd.existdb.document+xml"; - private static final String VALUE_CONTENT_TYPE_SUBCOLLECTION = "application/vnd.existdb.collection"; - private static final String[] VALUE_CONTENT_TYPES = { - VALUE_CONTENT_TYPE_DOCUMENT, - VALUE_CONTENT_TYPE_DOCUMENT_BINARY, - VALUE_CONTENT_TYPE_DOCUMENT_XML, - VALUE_CONTENT_TYPE_SUBCOLLECTION - }; - - private static final String KEY_STABLE = "stable"; - private static final String VALUE_STABLE_NO = "no"; - private static final String VALUE_STABLE_YES = "yes"; - private static final String[] VALUE_STABLES = { - VALUE_STABLE_NO, - VALUE_STABLE_YES - }; - - private static final String KEY_MATCH = "match"; - public FunUriCollection(final XQueryContext context, final FunctionSignature signature) { super(context, signature); } @@ -114,10 +92,7 @@ public Sequence eval(final Sequence[] args, final Sequence contextSequence) thro final String uriWithQueryString = args[0].toString(); final int queryStringIndex = uriWithQueryString.indexOf('?'); final String uriWithoutQueryString = (queryStringIndex >= 0) ? uriWithQueryString.substring(0, queryStringIndex) : uriWithQueryString; - String uriWithoutStableQueryString = uriWithQueryString.replaceAll(String.format("%s\\s*=\\s*\\byes|no\\b\\s*&+", KEY_STABLE), ""); - if (uriWithoutStableQueryString.endsWith("?")) { - uriWithoutStableQueryString = uriWithoutStableQueryString.substring(0, uriWithoutStableQueryString.length() - 1); - } + final String uriWithoutStableQueryString = CollectionQueryParameters.stripStableParameter(uriWithQueryString); final XmldbURI uri; try { @@ -126,21 +101,15 @@ public Sequence eval(final Sequence[] args, final Sequence contextSequence) thro throw new XPathException(this, ErrorCodes.FODC0004, String.format("\"%s\" is not a valid URI.", args[0].toString())); } - final Map queryStringMap = parseQueryString(uriWithQueryString); - checkQueryStringMap(queryStringMap); + final CollectionQueryParameters params = CollectionQueryParameters.parse( + uriWithQueryString, CollectionQueryParameters.URI_COLLECTION_KEYS, this); - if ((!queryStringMap.containsKey(KEY_STABLE) || queryStringMap.get(KEY_STABLE).equals(VALUE_STABLE_YES)) && - context.getCachedUriCollectionResults().containsKey(uriWithoutStableQueryString)) { + if (params.isStable() && context.getCachedUriCollectionResults().containsKey(uriWithoutStableQueryString)) { result = context.getCachedUriCollectionResults().get(uriWithoutStableQueryString); } else { - final boolean binaryUrisIncluded = !queryStringMap.containsKey(KEY_CONTENT_TYPE) || - (queryStringMap.get(KEY_CONTENT_TYPE).equals(VALUE_CONTENT_TYPE_DOCUMENT) || - queryStringMap.get(KEY_CONTENT_TYPE).equals(VALUE_CONTENT_TYPE_DOCUMENT_BINARY)); - final boolean subcollectionUrisIncluded = !queryStringMap.containsKey(KEY_CONTENT_TYPE) || - queryStringMap.get(KEY_CONTENT_TYPE).equals(VALUE_CONTENT_TYPE_SUBCOLLECTION); - final boolean xmlUrisIncluded = !queryStringMap.containsKey(KEY_CONTENT_TYPE) || - (queryStringMap.get(KEY_CONTENT_TYPE).equals(VALUE_CONTENT_TYPE_DOCUMENT) || - queryStringMap.get(KEY_CONTENT_TYPE).equals(VALUE_CONTENT_TYPE_DOCUMENT_XML)); + final boolean binaryUrisIncluded = params.includesBinaryDocuments(); + final boolean subcollectionUrisIncluded = params.includesSubcollections(); + final boolean xmlUrisIncluded = params.includesXmlDocuments(); try (final Collection collection = context.getBroker().openCollection(uri, Lock.LockMode.READ_LOCK)) { if (collection != null) { @@ -168,8 +137,8 @@ public Sequence eval(final Sequence[] args, final Sequence contextSequence) thro throw new XPathException(this, ErrorCodes.FODC0002, e); } - if (queryStringMap.containsKey(KEY_MATCH) && !queryStringMap.get(KEY_MATCH).isEmpty()) { - final Pattern pattern = PatternFactory.getInstance().getPattern(queryStringMap.get(KEY_MATCH)); + if (params.getMatch() != null && !params.getMatch().isEmpty()) { + final Pattern pattern = PatternFactory.getInstance().getPattern(params.getMatch()); final List matchedResultUris = resultUris.stream().filter(resultUri -> pattern.matcher(resultUri).find()).collect(Collectors.toList()); if (matchedResultUris.isEmpty()) { result = Sequence.EMPTY_SEQUENCE; @@ -195,46 +164,4 @@ public Sequence eval(final Sequence[] args, final Sequence contextSequence) thro return result; } - - private static Map parseQueryString(final String uri) { - final Map map = new HashMap<>(); - if (uri != null) { - final int questionMarkIndex = uri.indexOf('?'); - if (questionMarkIndex >= 0 && questionMarkIndex + 1 < uri.length()) { - String[] keyValuePairs = uri.substring(questionMarkIndex + 1).split("&"); - for (String keyValuePair : keyValuePairs) { - int equalIndex = keyValuePair.indexOf('='); - if (equalIndex >= 0) { - if (equalIndex + 1 < uri.length()) { - map.put(keyValuePair.substring(0, equalIndex).trim(), keyValuePair.substring(equalIndex + 1).trim()); - } else { - map.put(keyValuePair.substring(0, equalIndex).trim(), ""); - } - } else { - map.put(keyValuePair.trim(), ""); - } - } - } - } - - return map; - } - - private void checkQueryStringMap(final Map queryStringMap) throws XPathException { - for (Map.Entry queryStringEntry : queryStringMap.entrySet()) { - final String key = queryStringEntry.getKey(); - final String value = queryStringEntry.getValue(); - if (key.equals(KEY_CONTENT_TYPE)) { - if (Arrays.stream(VALUE_CONTENT_TYPES).noneMatch(contentTypeValue -> contentTypeValue.equals(value))) { - throw new XPathException(this, ErrorCodes.FODC0004, String.format("Invalid query-string value \"%s\".", queryStringEntry)); - } - } else if (key.equals(KEY_STABLE)) { - if (Arrays.stream(VALUE_STABLES).noneMatch(stableValue -> stableValue.equals(value))) { - throw new XPathException(this, ErrorCodes.FODC0004, String.format("Invalid query-string value \"%s\".", queryStringEntry)); - } - } else if (!key.equals(KEY_MATCH)) { - throw new XPathException(this, ErrorCodes.FODC0004, String.format("Unexpected query string \"%s\".", queryStringEntry)); - } - } - } } diff --git a/exist-core/src/test/java/org/exist/xquery/functions/fn/CollectionFileUriTest.java b/exist-core/src/test/java/org/exist/xquery/functions/fn/CollectionFileUriTest.java new file mode 100644 index 00000000000..abc5dc8dae3 --- /dev/null +++ b/exist-core/src/test/java/org/exist/xquery/functions/fn/CollectionFileUriTest.java @@ -0,0 +1,229 @@ +/* + * eXist-db Open Source Native XML Database + * Copyright (C) 2001 The eXist-db Authors + * + * info@exist-db.org + * http://www.exist-db.org + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ +package org.exist.xquery.functions.fn; + +import org.exist.EXistException; +import org.exist.security.PermissionDeniedException; +import org.exist.storage.BrokerPool; +import org.exist.storage.DBBroker; +import org.exist.test.ExistEmbeddedServer; +import org.exist.xquery.CompiledXQuery; +import org.exist.xquery.XPathException; +import org.exist.xquery.XQuery; +import org.exist.xquery.XQueryContext; +import org.exist.xquery.value.Sequence; +import org.junit.AfterClass; +import org.junit.BeforeClass; +import org.junit.ClassRule; +import org.junit.Test; + +import java.io.IOException; +import java.nio.file.Files; +import java.nio.file.Path; +import java.util.Comparator; +import java.util.Optional; +import java.util.stream.Stream; + +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertTrue; +import static org.junit.Assert.fail; + +/** + * Tests for fn:collection() with file: URIs and Saxon-style query string parameters. + *

+ * Creates a temp directory with a mix of files (XML, non-XML, malformed) and verifies + * the {@code select}, {@code match}, {@code content-type}, and {@code stable} parameters. + */ +public class CollectionFileUriTest { + + @ClassRule + public static final ExistEmbeddedServer existEmbeddedServer = new ExistEmbeddedServer(true, true); + + private static Path tempDir; + + @BeforeClass + public static void setUp() throws IOException { + tempDir = Files.createTempDirectory("exist-collection-file-uri-test-"); + + // 5 well-formed XML files + Files.writeString(tempDir.resolve("doc1.xml"), "1"); + Files.writeString(tempDir.resolve("doc2.xml"), "2"); + Files.writeString(tempDir.resolve("doc3.xml"), "3"); + Files.writeString(tempDir.resolve("alpha.xml"), "alpha"); + Files.writeString(tempDir.resolve("beta.xml"), "beta"); + + // Non-XML files (should be excluded by default *.xml glob) + Files.writeString(tempDir.resolve("readme.txt"), "not xml"); + Files.writeString(tempDir.resolve("data.json"), "{\"k\":1}"); + } + + @AfterClass + public static void tearDown() throws IOException { + if (tempDir != null && Files.exists(tempDir)) { + try (final Stream walk = Files.walk(tempDir)) { + walk.sorted(Comparator.reverseOrder()).forEach(p -> { + try { + Files.delete(p); + } catch (final IOException ignored) { + } + }); + } + } + } + + private Sequence runQuery(final String xquery) throws EXistException, PermissionDeniedException, XPathException, IOException { + final BrokerPool pool = existEmbeddedServer.getBrokerPool(); + try (final DBBroker broker = pool.get(Optional.of(pool.getSecurityManager().getSystemSubject()))) { + final XQuery xqueryService = pool.getXQueryService(); + final XQueryContext context = new XQueryContext(pool); + final CompiledXQuery compiled = xqueryService.compile(context, xquery); + return xqueryService.execute(broker, compiled, null); + } + } + + private String fileUri() { + return tempDir.toUri().toString(); + } + + @Test + public void defaultGlobReturnsAllXml() throws Exception { + // No params: default *.xml glob, returns all 5 XML files + final Sequence result = runQuery("count(fn:collection('" + fileUri() + "'))"); + assertEquals("default glob should match all 5 XML files", "5", result.getStringValue()); + } + + @Test + public void selectGlob() throws Exception { + // ?select=doc*.xml — only doc1, doc2, doc3 + final Sequence result = runQuery("count(fn:collection('" + fileUri() + "?select=doc*.xml'))"); + assertEquals("select=doc*.xml should match 3 files", "3", result.getStringValue()); + } + + @Test + public void matchRegex() throws Exception { + // ?match=^doc[0-9]+\.xml$ — exactly the 3 doc files + final Sequence result = runQuery( + "count(fn:collection('" + fileUri() + "?match=^doc[0-9]+\\.xml$'))"); + assertEquals("match regex should select 3 doc files", "3", result.getStringValue()); + } + + @Test + public void selectAndMatchCombined() throws Exception { + // ?select=*.xml&match=^doc — only doc1/2/3 (excludes alpha, beta) + // Build URI with concat() to avoid the literal & in XQuery string + final Sequence result = runQuery( + "count(fn:collection(concat('" + fileUri() + "?select=*.xml', codepoints-to-string(38), 'match=^doc')))"); + assertEquals("select + match combined", "3", result.getStringValue()); + } + + @Test + public void stableYesGivesAlphabeticalOrder() throws Exception { + // ?stable=yes — files sorted alphabetically: alpha, beta, doc1, doc2, doc3 + final Sequence result = runQuery( + "string-join(\n" + + " for $d in fn:collection('" + fileUri() + "?stable=yes')\n" + + " return tokenize(document-uri($d), '/')[last()],\n" + + " ',')"); + assertEquals("stable=yes should sort alphabetically", + "alpha.xml,beta.xml,doc1.xml,doc2.xml,doc3.xml", result.getStringValue()); + } + + @Test + public void stableIsDefaultYes() throws Exception { + // No stable= param: default is yes (alphabetical) + final Sequence result = runQuery( + "string-join(\n" + + " for $d in fn:collection('" + fileUri() + "')\n" + + " return tokenize(document-uri($d), '/')[last()],\n" + + " ',')"); + assertEquals("default ordering should be alphabetical", + "alpha.xml,beta.xml,doc1.xml,doc2.xml,doc3.xml", result.getStringValue()); + } + + @Test + public void contentTypeXml() throws Exception { + // content-type=application/vnd.existdb.document+xml — XML documents only (default for fn:collection) + final Sequence result = runQuery( + "count(fn:collection('" + fileUri() + "?content-type=application/vnd.existdb.document+xml'))"); + assertEquals("xml content-type should match all 5 XML files", "5", result.getStringValue()); + } + + @Test + public void contentTypeBinaryReturnsEmpty() throws Exception { + // fn:collection() doesn't return binary docs — content-type=binary returns nothing + final Sequence result = runQuery( + "count(fn:collection('" + fileUri() + "?content-type=application/vnd.existdb.document+binary'))"); + assertEquals("binary content-type should return 0 documents", "0", result.getStringValue()); + } + + @Test + public void allParametersCombined() throws Exception { + // All four parameters together: select=doc*.xml & match=[12] & content-type=xml & stable=yes + // Build the URI via concat() to avoid the literal & in XQuery string + final String amp = "', codepoints-to-string(38), '"; + final Sequence result = runQuery( + "string-join(\n" + + " for $d in fn:collection(concat('" + fileUri() + "?select=doc*.xml" + amp + + "match=doc[12]" + amp + + "content-type=application/vnd.existdb.document+xml" + amp + + "stable=yes'))\n" + + " return tokenize(document-uri($d), '/')[last()],\n" + + " ',')"); + assertEquals("all params combined should give doc1, doc2 in order", + "doc1.xml,doc2.xml", result.getStringValue()); + } + + @Test + public void invalidQueryParamRaisesError() throws Exception { + // Unknown parameter should raise FODC0004 + try { + runQuery("fn:collection('" + fileUri() + "?bogus=foo')"); + fail("expected FODC0004 for unknown query parameter"); + } catch (final XPathException e) { + assertTrue("error should be FODC0004 but was " + e.getErrorCode(), + e.getErrorCode().getErrorQName().getLocalPart().equals("FODC0004")); + } + } + + @Test + public void invalidStableValueRaisesError() throws Exception { + // stable=maybe is invalid + try { + runQuery("fn:collection('" + fileUri() + "?stable=maybe')"); + fail("expected FODC0004 for invalid stable value"); + } catch (final XPathException e) { + assertTrue("error should be FODC0004 but was " + e.getErrorCode(), + e.getErrorCode().getErrorQName().getLocalPart().equals("FODC0004")); + } + } + + @Test + public void invalidContentTypeRaisesError() throws Exception { + try { + runQuery("fn:collection('" + fileUri() + "?content-type=text/plain')"); + fail("expected FODC0004 for invalid content-type value"); + } catch (final XPathException e) { + assertTrue("error should be FODC0004 but was " + e.getErrorCode(), + e.getErrorCode().getErrorQName().getLocalPart().equals("FODC0004")); + } + } +} diff --git a/exist-core/src/test/xquery/xquery3/fnCollectionFileUri.xql b/exist-core/src/test/xquery/xquery3/fnCollectionFileUri.xql new file mode 100644 index 00000000000..2e6dfca36a8 --- /dev/null +++ b/exist-core/src/test/xquery/xquery3/fnCollectionFileUri.xql @@ -0,0 +1,36 @@ +(: + : eXist-db Open Source Native XML Database + : Copyright (C) 2001 The eXist-db Authors + : + : info@exist-db.org + : http://www.exist-db.org + : + : This library is free software; you can redistribute it and/or + : modify it under the terms of the GNU Lesser General Public + : License as published by the Free Software Foundation; either + : version 2.1 of the License, or (at your option) any later version. + : + : This library is distributed in the hope that it will be useful, + : but WITHOUT ANY WARRANTY; without even the implied warranty of + : MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + : Lesser General Public License for more details. + : + : You should have received a copy of the GNU Lesser General Public + : License along with this library; if not, write to the Free Software + : Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + :) +xquery version "3.1"; + +(:~ + : Tests for fn:collection() with file: URIs. + :) +module namespace cfu="http://exist-db.org/xquery/test/collection-file-uri"; + +declare namespace test="http://exist-db.org/xquery/xqsuite"; + +(: Non-existing directory should throw FODC0002 :) +declare + %test:assertError("FODC0002") +function cfu:collection-nonexistent-dir() { + collection("file:///nonexistent-dir-xyz-42-does-not-exist") +};