diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index a9266caec..6a9e4c1c8 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -34,16 +34,42 @@ jobs: env: POSTGRES_DB: test POSTGRES_PASSWORD: ${{ secrets.DatabasePassword || 'postgres' }} - + mysql: + image: mysql:8.0 + env: + MYSQL_ROOT_PASSWORD: mysql + MYSQL_DATABASE: test_geo_app + MYSQL_USER: pygeoapi + MYSQL_PASSWORD: mysql + ports: + - 3306:3306 + elasticsearch: + image: docker.elastic.co/elasticsearch/elasticsearch:8.17.0 + ports: + - 9200:9200 + - 9300:9300 + env: + discovery.type: single-node + xpack.security.enabled: "false" + ES_JAVA_OPTS: "-Xms512m -Xmx512m" + opensearch: + image: opensearchproject/opensearch:2.18.0 + ports: + - 9209:9200 + env: + discovery.type: single-node + DISABLE_SECURITY_PLUGIN: "true" + OPENSEARCH_JAVA_OPTS: "-Xms512m -Xmx512m" + mongodb: + image: mongo:8.0.4 + ports: + - 27017:27017 + sensorthings: + image: ghcr.io/cgs-earth/sensorthings-action:0.1.2 + volumes: + - /var/run/docker.sock:/var/run/docker.sock + steps: - - name: Pre-pull Docker Images - run: | - docker pull container-registry.oracle.com/database/express:21.3.0-xe & - docker pull appropriate/curl:latest & - docker pull elasticsearch:8.17.0 & - docker pull opensearchproject/opensearch:2.18.0 & - docker pull mongo:8.0.4 & - docker pull postgis/postgis:14-3.2 & - name: Clear up GitHub runner diskspace run: | echo "Space before" @@ -60,43 +86,21 @@ jobs: name: Setup Python ${{ matrix.python-version }} with: python-version: ${{ matrix.python-version }} + - name: Install and run Oracle + run: | + docker run \ + -d \ + --name oracledb \ + -e ORACLE_PWD=oracle \ + -v ${{ github.workspace }}/tests/data/oracle/init-db:/opt/oracle/scripts/startup \ + -p 1521:1521 \ + container-registry.oracle.com/database/express:21.3.0-xe - name: Configure sysctl limits run: | sudo swapoff -a sudo sysctl -w vm.swappiness=1 sudo sysctl -w fs.file-max=262144 sudo sysctl -w vm.max_map_count=262144 - - name: "Install and run MySQL 📦" - uses: mirromutth/mysql-action@v1.1 - with: - host port: 3306 - mysql version: '8.0' - mysql database: test_geo_app - mysql root password: mysql # This is a dummy password here; not actually used in prod - mysql user: pygeoapi - mysql password: mysql - - - name: Install and run Elasticsearch 📦 - uses: getong/elasticsearch-action@v1.2 - with: - elasticsearch version: '8.17.0' - host port: 9200 - container port: 9200 - host node port: 9300 - node port: 9300 - discovery type: 'single-node' - - name: Install and run OpenSearch 📦 - uses: esmarkowski/opensearch-github-action@v1.0.0 - with: - version: 2.18.0 - security-disabled: true - port: 9209 - - name: Install and run MongoDB - uses: supercharge/mongodb-github-action@1.12.0 - with: - mongodb-version: '8.0.4' - - name: Install and run SensorThingsAPI - uses: cgs-earth/sensorthings-action@v0.1.2 - name: Install sqlite and gpkg dependencies uses: awalsh128/cache-apt-pkgs-action@v1.4.3 with: @@ -113,9 +117,6 @@ jobs: # with: # packages: gdal-bin libgdal-dev # version: 3.11.3 - - name: Install and run Oracle - run: | - docker run -d --name oracledb -e ORACLE_PWD=oracle -v ${{ github.workspace }}/tests/data/oracle/init-db:/opt/oracle/scripts/startup -p 1521:1521 container-registry.oracle.com/database/express:21.3.0-xe - name: Install requirements 📦 run: | pip3 install setuptools @@ -131,6 +132,7 @@ jobs: pip3 install GDAL==`gdal-config --version` - name: setup test data ⚙️ run: | + python3 tests/load_oracle_data.py python3 tests/load_es_data.py tests/data/ne_110m_populated_places_simple.geojson geonameid python3 tests/load_opensearch_data.py tests/data/ne_110m_populated_places_simple.geojson geonameid python3 tests/load_mongo_data.py tests/data/ne_110m_populated_places_simple.geojson @@ -140,7 +142,6 @@ jobs: psql postgresql://postgres:${{ secrets.DatabasePassword || 'postgres' }}@localhost:5432/test -f tests/data/postgres_manager_full_structure.backup.sql mysql -h 127.0.0.1 -P 3306 -u root -p'mysql' test_geo_app < tests/data/mysql_data.sql docker ps - python3 tests/load_oracle_data.py - name: run API tests ⚙️ run: pytest tests/api --ignore-glob='*_live.py' - name: run Formatter tests ⚙️ diff --git a/.github/workflows/vulnerabilities.yml b/.github/workflows/vulnerabilities.yml index 45af41345..2d6e5171b 100644 --- a/.github/workflows/vulnerabilities.yml +++ b/.github/workflows/vulnerabilities.yml @@ -24,7 +24,7 @@ jobs: - name: Checkout pygeoapi uses: actions/checkout@master - name: Scan vulnerabilities with trivy - uses: aquasecurity/trivy-action@master + uses: aquasecurity/trivy-action@v0.35.0 with: scan-type: fs exit-code: 1 @@ -36,7 +36,7 @@ jobs: run: | docker buildx build -t ${{ github.repository }}:${{ github.sha }} --platform linux/amd64 --no-cache -f Dockerfile . - name: Scan locally built Docker image for vulnerabilities with trivy - uses: aquasecurity/trivy-action@master + uses: aquasecurity/trivy-action@v0.35.0 env: TRIVY_DB_REPOSITORY: public.ecr.aws/aquasecurity/trivy-db:2 TRIVY_JAVA_DB_REPOSITORY: public.ecr.aws/aquasecurity/trivy-java-db:1 diff --git a/Dockerfile b/Dockerfile index d8947cbea..833f7a3ca 100644 --- a/Dockerfile +++ b/Dockerfile @@ -34,7 +34,7 @@ # # ================================================================= -FROM ubuntu:noble-20260113 +FROM ubuntu:noble LABEL maintainer="Just van den Broecke " diff --git a/SECURITY.md b/SECURITY.md index ec9a04f14..87520abc4 100644 --- a/SECURITY.md +++ b/SECURITY.md @@ -13,5 +13,5 @@ The pygeoapi Project Steering Committee (PSC) will release patches for security | Version | Supported | | ------- | ------------------ | -| 0.10.x | :white_check_mark: | -| < 0.10 | :x: | +| 0.2x | :white_check_mark: | +| < 0.20 | :x: | diff --git a/docs/source/administration.rst b/docs/source/administration.rst index 9fe541f3a..c947365b3 100644 --- a/docs/source/administration.rst +++ b/docs/source/administration.rst @@ -32,7 +32,7 @@ To generate the OpenAPI document as JSON, run: .. code-block:: bash - pygeoapi openapi generate /path/to/my-pygeoapi-config.yml --format json --output-file /path/to/my-pygeoapi-openapi.yml + pygeoapi openapi generate /path/to/my-pygeoapi-config.yml --format json --output-file /path/to/my-pygeoapi-openapi.json .. note:: Generate as YAML or JSON? If your OpenAPI YAML definition is slow to render as JSON, diff --git a/docs/source/conf.py b/docs/source/conf.py index a3f0ca7ed..1b96113a2 100644 --- a/docs/source/conf.py +++ b/docs/source/conf.py @@ -112,7 +112,7 @@ def __getattr__(cls, name): # built documents. # # The short X.Y version. -version = '0.23.dev0' +version = '0.24.dev0' # The full version, including alpha/beta/rc tags. release = version diff --git a/docs/source/configuration.rst b/docs/source/configuration.rst index 8cab94f52..fd75e0284 100644 --- a/docs/source/configuration.rst +++ b/docs/source/configuration.rst @@ -243,6 +243,8 @@ default. begin: 2000-10-30T18:24:39Z # start datetime in RFC3339 end: 2007-10-30T08:57:29Z # end datetime in RFC3339 trs: http://www.opengis.net/def/uom/ISO-8601/0/Gregorian # TRS + resolution: P1D # ISO 8601 duration + default: 2000-10-30T18:24:39Z # default time # additional extents can be added as desired (1..n) foo: url: https://example.org/def # required URL of the extent @@ -290,6 +292,10 @@ default. type: process # REQUIRED (collection, process, or stac-collection) processor: name: HelloWorld # Python path of process definition + # optional, allow for internal HTTP request execution + # if set to True, enables requests to link local ranges and loopback + # default: False + allow_internal_requests: True .. seealso:: diff --git a/docs/source/plugins.rst b/docs/source/plugins.rst index 4d7e52e59..0badbf4f6 100644 --- a/docs/source/plugins.rst +++ b/docs/source/plugins.rst @@ -306,11 +306,11 @@ The below template provides a minimal example (let's call the file ``mycooledrda self.covjson = {...} - def get_instances(self): + def instances(self): return ['foo', 'bar'] - def get_instance(self, instance): - return instance in get_instances() + def instance(self, instance): + return instance in instances() def position(self, **kwargs): return self.covjson @@ -320,8 +320,8 @@ The below template provides a minimal example (let's call the file ``mycooledrda For brevity, the ``position`` function returns ``self.covjson`` which is a -dictionary of a CoverageJSON representation. ``get_instances`` returns a list -of instances associated with the collection/plugin, and ``get_instance`` returns +dictionary of a CoverageJSON representation. ``instances`` returns a list +of instances associated with the collection/plugin, and ``instance`` returns a boolean of whether a given instance exists/is valid. EDR query types are subject to the query functions defined in the plugin. In the example above, the plugin implements ``position`` and ``trajectory`` queries, which will be advertised as @@ -410,6 +410,7 @@ Below is a sample process definition as a Python dictionary: 'it back as output. Intended to demonstrate a simple ' 'process with a single literal input.', 'jobControlOptions': ['sync-execute', 'async-execute'], # whether the process can be executed in sync or async mode + 'outputTransmission': ['value', 'reference'], # whether the process can return inline data or URL references 'keywords': ['hello world', 'example', 'echo'], # keywords associated with the process 'links': [{ # a list of 1..n # link objects relevant to the process 'type': 'text/html', diff --git a/docs/source/publishing/ogcapi-edr.rst b/docs/source/publishing/ogcapi-edr.rst index faf7fb37d..246a70b76 100644 --- a/docs/source/publishing/ogcapi-edr.rst +++ b/docs/source/publishing/ogcapi-edr.rst @@ -103,7 +103,7 @@ SensorThingsEDR ^^^^^^^^^^^^^^^ The SensorThings API EDR Provider for pygeaopi extends the feature provider to -produce CoverageJSON representations from SensorThings responses repsonses. This provider +produce CoverageJSON representations from SensorThings responses. This provider relies on using the ObservedProperty Entity to create the `parameter-name` set. .. code-block:: yaml diff --git a/docs/source/publishing/ogcapi-features.rst b/docs/source/publishing/ogcapi-features.rst index 5646f1a7b..8a38e0b98 100644 --- a/docs/source/publishing/ogcapi-features.rst +++ b/docs/source/publishing/ogcapi-features.rst @@ -585,7 +585,7 @@ To publish a GeoParquet file (with a geometry column) the geopandas package is a - type: feature name: Parquet data: - source: ./tests/data/parquet/random.parquet + source: ./tests/data/parquet/naive/random.parquet id_field: id time_field: time x_field: @@ -595,11 +595,34 @@ To publish a GeoParquet file (with a geometry column) the geopandas package is a - minlat - maxlat -For GeoParquet data, the `x_field` and `y_field` must be specified in the provider definition, +For older versions of parquet data that don't comply to GeoParquet v1.1, the `x_field` and `y_field` must be specified in the provider definition, and they must be arrays of two column names that contain the x and y coordinates of the bounding box of each geometry. If the geometries in the data are all points, the `x_field` and `y_field` can be strings instead of arrays and refer to a single column each. +.. code-block:: yaml + + providers: + - type: feature + name: Parquet + id_field: id + data: + source: ./tests/data/parquet/geoparquet1.1/nyc_subset_overture.parquet + batch_size: 10000 + batch_readahead: 2 + + +For GeoParquet data which complies to spec version 1.1, all geometry metadata will be automatically +detected. + +Note that for any version of parquet, you may optionally specify ``batch_size`` and ``batch_readahead`` in the ``data`` section of the parquet provider config. +``batch_size`` controls how many rows are fetched per batch. Large batch sizes speed up data processing, but add more I/O time like increased latency when fetching data from an object store, and . If not defined it will +default to 20,000 rows. + +``batch_readahead`` controls how many batches are buffered in memory. If not specified it will default to 2. +Since OGC API Features payloads are often paginated and fairly small, it generally makes sense to specify a small number to avoid reading too many batches ahead of time, especially when fetching from an object store. + + .. _PostgreSQL: PostgreSQL @@ -627,6 +650,18 @@ Must have PostGIS installed. geom_field: foo_geom count: true # Optional; Default true; Enable/disable count for improved performance. +This can be represented as a connection dictionary or as a connection string as follows: + +.. code-block:: yaml + + providers: + - type: feature + name: PostgreSQL + data: postgresql://postgres:postgres@127.0.0.1:3010/test + id_field: osm_id + table: hotosm_bdi_waterways + geom_field: foo_geom + A number of database connection options can be also configured in the provider in order to adjust properly the sqlalchemy engine client. These are optional and if not specified, the default from the engine will be used. Please see also `SQLAlchemy docs `_. diff --git a/docs/source/publishing/ogcapi-maps.rst b/docs/source/publishing/ogcapi-maps.rst index 6924e3a39..a6b2aca1f 100644 --- a/docs/source/publishing/ogcapi-maps.rst +++ b/docs/source/publishing/ogcapi-maps.rst @@ -113,6 +113,15 @@ required. An optional style name can be defined via `options.style`. name: png mimetype: image/png +.. note:: + According to the `Standard `_, OGC API - Maps + supports a `crs` parameter, expressed as an uri. Currently, this provider supports WGS84 and Web Mercator; for a matter of convenience, they can be expressed in + a number of different ways, other than the uri format. + + - `EPSG:4326` + - `EPSG:3857` + - `4326` + - `3857` Data visualization examples --------------------------- diff --git a/docs/source/pubsub.rst b/docs/source/pubsub.rst index da8570f97..739a50d93 100644 --- a/docs/source/pubsub.rst +++ b/docs/source/pubsub.rst @@ -85,6 +85,11 @@ Brokers The following protocols are supported: +.. note:: + + Pub/Sub client dependencies will vary based on the selected broker. ``requirements-pubsub.txt`` contains all requirements for supported brokers, as a reference point. + + MQTT ^^^^ @@ -99,6 +104,23 @@ Example directive: channel: messages/a/data # optional hidden: false # default +Kafka +^^^^^ + +Example directive: + +.. code-block:: yaml + + pubsub: + name: Kafka + broker: + url: tcp://localhost:9092 + channel: messages-a-data + # if using authentication: + # sasl_mechanism: PLAIN # default PLAIN + # sasl_security_protocol: SASL_PLAINTEXT # default SASL_PLAINTEXT + hidden: true # default false + HTTP ^^^^ @@ -113,12 +135,16 @@ Example directive: channel: messages-a-data # optional hidden: true # default false +Additional information +---------------------- + .. note:: For any Pub/Sub endpoints requiring authentication, encode the ``url`` value as follows: * ``mqtt://username:password@localhost:1883`` * ``https://username:password@localhost`` + * ``tcp://username:password@localhost:9092`` As with any section of the pygeoapi configuration, environment variables may be used as needed, for example to set username/password information in a URL. If ``pubsub.broker.url`` contains authentication, and @@ -131,5 +157,6 @@ Example directive: If a ``channel`` is not defined, only the relevant OGC API endpoint is used. + .. _`OGC API Publish-Subscribe Workflow - Part 1: Core`: https://docs.ogc.org/DRAFTS/25-030.html .. _`AsyncAPI`: https://www.asyncapi.com diff --git a/pygeoapi/__init__.py b/pygeoapi/__init__.py index 30f235f00..e906bc2c3 100644 --- a/pygeoapi/__init__.py +++ b/pygeoapi/__init__.py @@ -30,7 +30,7 @@ # # ================================================================= -__version__ = '0.23.dev0' +__version__ = '0.24.dev0' import click try: diff --git a/pygeoapi/api/__init__.py b/pygeoapi/api/__init__.py index 9116737a4..33e4b6e55 100644 --- a/pygeoapi/api/__init__.py +++ b/pygeoapi/api/__init__.py @@ -40,7 +40,7 @@ Returns content from plugins and sets responses. """ -from collections import ChainMap, OrderedDict +from collections import ChainMap from copy import deepcopy from datetime import datetime from functools import partial @@ -56,21 +56,19 @@ import pytz from pygeoapi import __version__, l10n -from pygeoapi.crs import DEFAULT_STORAGE_CRS, get_supported_crs_list +from pygeoapi.api.collection import gen_collection, OGC_RELTYPES_BASE +from pygeoapi.formats import FORMAT_TYPES, F_GZIP, F_HTML, F_JSON, F_JSONLD from pygeoapi.linked_data import jsonldify, jsonldify_collection from pygeoapi.log import setup_logger from pygeoapi.plugin import load_plugin from pygeoapi.process.manager.base import get_manager -from pygeoapi.provider import ( - filter_providers_by_type, get_provider_by_type, get_provider_default) -from pygeoapi.provider.base import ( - ProviderConnectionError, ProviderGenericError, ProviderTypeError) +from pygeoapi.provider import filter_providers_by_type, get_provider_by_type +from pygeoapi.provider.base import ProviderGenericError, ProviderTypeError from pygeoapi.util import ( - TEMPLATESDIR, UrlPrefetcher, dategetter, - filter_dict_by_key_value, get_api_rules, get_base_url, get_typed_value, - render_j2_template, to_json, get_choice_from_headers, get_from_headers, - get_dataset_formatters + TEMPLATESDIR, UrlPrefetcher, filter_dict_by_key_value, get_api_rules, + get_base_url, get_typed_value, render_j2_template, to_json, + get_choice_from_headers, get_from_headers ) LOGGER = logging.getLogger(__name__) @@ -82,26 +80,6 @@ } CHARSET = ['utf-8'] -F_JSON = 'json' -F_COVERAGEJSON = 'json' -F_HTML = 'html' -F_JSONLD = 'jsonld' -F_GZIP = 'gzip' -F_PNG = 'png' -F_JPEG = 'jpeg' -F_MVT = 'mvt' -F_NETCDF = 'NetCDF' - -#: Formats allowed for ?f= requests (order matters for complex MIME types) -FORMAT_TYPES = OrderedDict(( - (F_HTML, 'text/html'), - (F_JSONLD, 'application/ld+json'), - (F_JSON, 'application/json'), - (F_PNG, 'image/png'), - (F_JPEG, 'image/jpeg'), - (F_MVT, 'application/vnd.mapbox-vector-tile'), - (F_NETCDF, 'application/x-netcdf'), -)) #: Locale used for system responses (e.g. exceptions) SYSTEM_LOCALE = l10n.Locale('en', 'US') @@ -115,8 +93,6 @@ 'http://www.opengis.net/spec/ogcapi-common-1/1.0/conf/oas30' ] -OGC_RELTYPES_BASE = 'http://www.opengis.net/def/rel/ogc/1.0' - def all_apis() -> dict: """ @@ -511,7 +487,7 @@ def get_response_headers(self, force_lang: l10n.Locale | None = None, if F_GZIP in FORMAT_TYPES: if force_encoding: headers['Content-Encoding'] = force_encoding - elif F_GZIP in get_from_headers(self._headers, 'accept-encoding'): + elif F_GZIP in get_from_headers(self._headers, 'accept-encoding'): # noqa headers['Content-Encoding'] = F_GZIP return headers @@ -724,11 +700,6 @@ def landing_page(api: API, 'type': FORMAT_TYPES[F_JSON], 'title': l10n.translate('Processes', request.locale), 'href': f"{api.base_url}/processes" - }, { - 'rel': f'{OGC_RELTYPES_BASE}/job-list', - 'type': FORMAT_TYPES[F_JSON], - 'title': l10n.translate('Jobs', request.locale), - 'href': f"{api.base_url}/jobs" }, { 'rel': f'{OGC_RELTYPES_BASE}/tiling-schemes', 'type': FORMAT_TYPES[F_JSON], @@ -754,6 +725,20 @@ def landing_page(api: API, fcm['links'].append(pubsub_link) + if api.manager.is_async: + fcm['links'].append({ + 'rel': f'{OGC_RELTYPES_BASE}/job-list', + 'type': FORMAT_TYPES[F_JSON], + 'title': l10n.translate('Jobs', request.locale), + 'href': f"{api.base_url}/jobs" + }) + fcm['links'].append({ + 'rel': f'{OGC_RELTYPES_BASE}/job-list', + 'type': FORMAT_TYPES[F_HTML], + 'title': l10n.translate('Jobs', request.locale), + 'href': f"{api.base_url}/jobs?f=html" + }) + if api.asyncapi: fcm['links'].append({ 'rel': 'service-doc', @@ -784,6 +769,9 @@ def landing_page(api: API, 'tile'): fcm['tile'] = True + if api.manager.is_async: + fcm['jobs'] = True + if api.pubsub_client is not None and not api.pubsub_client.hidden: fcm['pubsub'] = { 'name': api.pubsub_client.name, @@ -950,9 +938,7 @@ def describe_collections(api: API, request: APIRequest, HTTPStatus.NOT_FOUND, headers, request.format, 'NotFound', msg) if dataset is not None: - collections_dict = { - k: v for k, v in collections.items() if k == dataset - } + collections_dict = {dataset: api.config['resources'][dataset]} else: collections_dict = collections @@ -961,433 +947,21 @@ def describe_collections(api: API, request: APIRequest, if v.get('visibility', 'default') == 'hidden': LOGGER.debug(f'Skipping hidden layer: {k}') continue - collection_data = get_provider_default(v['providers']) - collection_data_type = collection_data['type'] - - collection_data_format = None - - if 'format' in collection_data: - collection_data_format = collection_data['format'] - - is_vector_tile = (collection_data_type == 'tile' and - collection_data_format['name'] not - in [F_PNG, F_JPEG]) - - collection = { - 'id': k, - 'title': l10n.translate(v['title'], request.locale), - 'description': l10n.translate(v['description'], request.locale), # noqa - 'keywords': l10n.translate(v['keywords'], request.locale), - 'links': [] - } - - extents = deepcopy(v['extents']) - - bbox = extents['spatial']['bbox'] - LOGGER.debug('Setting spatial extents from configuration') - # The output should be an array of bbox, so if the user only - # provided a single bbox, wrap it in a array. - if not isinstance(bbox[0], list): - bbox = [bbox] - collection['extent'] = { - 'spatial': { - 'bbox': bbox - } - } - if 'crs' in extents['spatial']: - collection['extent']['spatial']['crs'] = \ - extents['spatial']['crs'] - - t_ext = extents.get('temporal', {}) - if t_ext: - LOGGER.debug('Setting temporal extents from configuration') - begins = dategetter('begin', t_ext) - ends = dategetter('end', t_ext) - collection['extent']['temporal'] = { - 'interval': [[begins, ends]] - } - if 'trs' in t_ext: - collection['extent']['temporal']['trs'] = t_ext['trs'] - - _ = extents.pop('spatial', None) - _ = extents.pop('temporal', None) - - for ek, ev in extents.items(): - LOGGER.debug(f'Adding extent {ek}') - collection['extent'][ek] = { - 'definition': ev['url'], - 'interval': [ev['range']] - } - if 'units' in ev: - collection['extent'][ek]['unit'] = ev['units'] - - if 'values' in ev: - collection['extent'][ek]['grid'] = { - 'cellsCount': len(ev['values']), - 'coordinates': ev['values'] - } - - LOGGER.debug('Processing configured collection links') - for link in l10n.translate(v.get('links', []), request.locale): - lnk = { - 'type': link['type'], - 'rel': link['rel'], - 'title': l10n.translate(link['title'], request.locale), - 'href': l10n.translate(link['href'], request.locale), - } - if 'hreflang' in link: - lnk['hreflang'] = l10n.translate( - link['hreflang'], request.locale) - content_length = link.get('length', 0) - - if lnk['rel'] == 'enclosure' and content_length == 0: - # Issue HEAD request for enclosure links without length - lnk_headers = api.prefetcher.get_headers(lnk['href']) - content_length = int(lnk_headers.get('content-length', 0)) - content_type = lnk_headers.get('content-type', lnk['type']) - if content_length == 0: - # Skip this (broken) link - LOGGER.debug(f"Enclosure {lnk['href']} is invalid") - continue - if content_type != lnk['type']: - # Update content type if different from specified - lnk['type'] = content_type - LOGGER.debug( - f"Fixed media type for enclosure {lnk['href']}") - - if content_length > 0: - lnk['length'] = content_length - - collection['links'].append(lnk) - - # TODO: provide translations - LOGGER.debug('Adding JSON and HTML link relations') - collection['links'].append({ - 'type': FORMAT_TYPES[F_JSON], - 'rel': 'root', - 'title': l10n.translate('The landing page of this server as JSON', request.locale), # noqa - 'href': f"{api.base_url}?f={F_JSON}" - }) - collection['links'].append({ - 'type': FORMAT_TYPES[F_HTML], - 'rel': 'root', - 'title': l10n.translate('The landing page of this server as HTML', request.locale), # noqa - 'href': f"{api.base_url}?f={F_HTML}" - }) - collection['links'].append({ - 'type': FORMAT_TYPES[F_JSON], - 'rel': request.get_linkrel(F_JSON), - 'title': l10n.translate('This document as JSON', request.locale), # noqa - 'href': f'{api.get_collections_url()}/{k}?f={F_JSON}' - }) - collection['links'].append({ - 'type': FORMAT_TYPES[F_JSONLD], - 'rel': request.get_linkrel(F_JSONLD), - 'title': l10n.translate('This document as RDF (JSON-LD)', request.locale), # noqa - 'href': f'{api.get_collections_url()}/{k}?f={F_JSONLD}' - }) - collection['links'].append({ - 'type': FORMAT_TYPES[F_HTML], - 'rel': request.get_linkrel(F_HTML), - 'title': l10n.translate('This document as HTML', request.locale), # noqa - 'href': f'{api.get_collections_url()}/{k}?f={F_HTML}' - }) - - if collection_data_type == 'record': - collection['links'].append({ - 'type': FORMAT_TYPES[F_JSON], - 'rel': f'{OGC_RELTYPES_BASE}/ogc-catalog', - 'title': l10n.translate('Record catalogue as JSON', request.locale), # noqa - 'href': f'{api.get_collections_url()}/{k}?f={F_JSON}' - }) - collection['links'].append({ - 'type': FORMAT_TYPES[F_HTML], - 'rel': f'{OGC_RELTYPES_BASE}/ogc-catalog', - 'title': l10n.translate('Record catalogue as HTML', request.locale), # noqa - 'href': f'{api.get_collections_url()}/{k}?f={F_HTML}' - }) - - if collection_data_type in ['feature', 'coverage', 'record']: - collection['links'].append({ - 'type': 'application/schema+json', - 'rel': f'{OGC_RELTYPES_BASE}/schema', - 'title': l10n.translate('Schema of collection in JSON', request.locale), # noqa - 'href': f'{api.get_collections_url()}/{k}/schema?f={F_JSON}' # noqa - }) - collection['links'].append({ - 'type': FORMAT_TYPES[F_HTML], - 'rel': f'{OGC_RELTYPES_BASE}/schema', - 'title': l10n.translate('Schema of collection in HTML', request.locale), # noqa - 'href': f'{api.get_collections_url()}/{k}/schema?f={F_HTML}' # noqa - }) - - if is_vector_tile or collection_data_type in ['feature', 'record']: - # TODO: translate - collection['itemType'] = collection_data_type - LOGGER.debug('Adding feature/record based links') - collection['links'].append({ - 'type': 'application/schema+json', - 'rel': f'{OGC_RELTYPES_BASE}/queryables', - 'title': l10n.translate('Queryables for this collection as JSON', request.locale), # noqa - 'href': f'{api.get_collections_url()}/{k}/queryables?f={F_JSON}' # noqa - }) - collection['links'].append({ - 'type': FORMAT_TYPES[F_HTML], - 'rel': f'{OGC_RELTYPES_BASE}/queryables', - 'title': l10n.translate('Queryables for this collection as HTML', request.locale), # noqa - 'href': f'{api.get_collections_url()}/{k}/queryables?f={F_HTML}' # noqa - }) - collection['links'].append({ - 'type': 'application/geo+json', - 'rel': 'items', - 'title': l10n.translate('Items as GeoJSON', request.locale), # noqa - 'href': f'{api.get_collections_url()}/{k}/items?f={F_JSON}' # noqa - }) - collection['links'].append({ - 'type': FORMAT_TYPES[F_JSONLD], - 'rel': 'items', - 'title': l10n.translate('Items as RDF (GeoJSON-LD)', request.locale), # noqa - 'href': f'{api.get_collections_url()}/{k}/items?f={F_JSONLD}' # noqa - }) - collection['links'].append({ - 'type': FORMAT_TYPES[F_HTML], - 'rel': 'items', - 'title': l10n.translate('Items as HTML', request.locale), # noqa - 'href': f'{api.get_collections_url()}/{k}/items?f={F_HTML}' # noqa - }) - - for key, value in get_dataset_formatters(v).items(): - collection['links'].append({ - 'type': value.mimetype, - 'rel': 'items', - 'title': l10n.translate(f'Items as {key}', request.locale), # noqa - 'href': f'{api.get_collections_url()}/{k}/items?f={value.f}' # noqa - }) - - # OAPIF Part 2 - list supported CRSs and StorageCRS - if collection_data_type in ['edr', 'feature']: - collection['crs'] = get_supported_crs_list(collection_data) - collection['storageCrs'] = collection_data.get('storage_crs', DEFAULT_STORAGE_CRS) # noqa - if 'storage_crs_coordinate_epoch' in collection_data: - collection['storageCrsCoordinateEpoch'] = collection_data.get('storage_crs_coordinate_epoch') # noqa - - elif collection_data_type == 'coverage': - # TODO: translate - LOGGER.debug('Adding coverage based links') - collection['links'].append({ - 'type': 'application/prs.coverage+json', - 'rel': f'{OGC_RELTYPES_BASE}/coverage', - 'title': l10n.translate('Coverage data', request.locale), - 'href': f'{api.get_collections_url()}/{k}/coverage?f={F_JSON}' # noqa - }) - if collection_data_format is not None: - title_ = l10n.translate('Coverage data as', request.locale) # noqa - title_ = f"{title_} {collection_data_format['name']}" - collection['links'].append({ - 'type': collection_data_format['mimetype'], - 'rel': f'{OGC_RELTYPES_BASE}/coverage', - 'title': title_, - 'href': f"{api.get_collections_url()}/{k}/coverage?f={collection_data_format['name']}" # noqa - }) - if dataset is not None: - LOGGER.debug('Creating extended coverage metadata') - try: - provider_def = get_provider_by_type( - api.config['resources'][k]['providers'], - 'coverage') - p = load_plugin('provider', provider_def) - except ProviderConnectionError: - msg = 'connection error (check logs)' - return api.get_exception( - HTTPStatus.INTERNAL_SERVER_ERROR, - headers, request.format, - 'NoApplicableCode', msg) - except ProviderTypeError: - pass - else: - collection['extent']['spatial']['grid'] = [{ - 'cellsCount': p._coverage_properties['width'], - 'resolution': p._coverage_properties['resx'] - }, { - 'cellsCount': p._coverage_properties['height'], - 'resolution': p._coverage_properties['resy'] - }] - if 'time_range' in p._coverage_properties: - collection['extent']['temporal'] = { - 'interval': [p._coverage_properties['time_range']] - } - if 'restime' in p._coverage_properties: - collection['extent']['temporal']['grid'] = { - 'resolution': p._coverage_properties['restime'] # noqa - } - if 'uad' in p._coverage_properties: - collection['extent'].update(p._coverage_properties['uad']) # noqa - - try: - tile = get_provider_by_type(v['providers'], 'tile') - p = load_plugin('provider', tile) - except ProviderConnectionError: - msg = 'connection error (check logs)' - return api.get_exception( - HTTPStatus.INTERNAL_SERVER_ERROR, - headers, request.format, - 'NoApplicableCode', msg) - except ProviderTypeError: - tile = None - - if tile: - # TODO: translate - - LOGGER.debug('Adding tile links') - collection['links'].append({ - 'type': FORMAT_TYPES[F_JSON], - 'rel': f'{OGC_RELTYPES_BASE}/tilesets-{p.tile_type}', - 'title': l10n.translate('Tiles as JSON', request.locale), - 'href': f'{api.get_collections_url()}/{k}/tiles?f={F_JSON}' - }) - collection['links'].append({ - 'type': FORMAT_TYPES[F_HTML], - 'rel': f'{OGC_RELTYPES_BASE}/tilesets-{p.tile_type}', - 'title': l10n.translate('Tiles as HTML', request.locale), - 'href': f'{api.get_collections_url()}/{k}/tiles?f={F_HTML}' - }) try: - map_ = get_provider_by_type(v['providers'], 'map') - p = load_plugin('provider', map_) - except ProviderTypeError: - map_ = None - - if map_: - LOGGER.debug('Adding map links') - - map_mimetype = map_['format']['mimetype'] - map_format = map_['format']['name'] - - title_ = l10n.translate('Map as', request.locale) - title_ = f'{title_} {map_format}' - - collection['links'].append({ - 'type': map_mimetype, - 'rel': f'{OGC_RELTYPES_BASE}/map', - 'title': title_, - 'href': f'{api.get_collections_url()}/{k}/map?f={map_format}' - }) - - if p._fields: - schema_reltype = f'{OGC_RELTYPES_BASE}/schema', - schema_links = [s for s in collection['links'] if - schema_reltype in s] - - if not schema_links: - title_ = l10n.translate('Schema of collection in JSON', request.locale) # noqa - collection['links'].append({ - 'type': 'application/schema+json', - 'rel': f'{OGC_RELTYPES_BASE}/schema', - 'title': title_, - 'href': f'{api.get_collections_url()}/{k}/schema?f=json' # noqa - }) - title_ = l10n.translate('Schema of collection in HTML', request.locale) # noqa - collection['links'].append({ - 'type': 'text/html', - 'rel': f'{OGC_RELTYPES_BASE}/schema', - 'title': title_, - 'href': f'{api.get_collections_url()}/{k}/schema?f=html' # noqa - }) + fcm['collections'].append( + gen_collection(api, request, k, request.locale)) + except Exception as err: + LOGGER.warning(f'Error generating collection {k}: {err}') + if dataset is None: + LOGGER.debug('Skipping failed dataset') + else: + return api.get_exception( + HTTPStatus.INTERNAL_SERVER_ERROR, headers, request.format, + 'NoApplicableCode', 'Error generating collection') - try: - edr = get_provider_by_type(v['providers'], 'edr') - p = load_plugin('provider', edr) - except ProviderConnectionError: - msg = 'connection error (check logs)' - return api.get_exception( - HTTPStatus.INTERNAL_SERVER_ERROR, headers, - request.format, 'NoApplicableCode', msg) - except ProviderTypeError: - edr = None - - if edr: - # TODO: translate - LOGGER.debug('Adding EDR links') - collection['data_queries'] = {} - parameters = p.get_fields() - if parameters: - collection['parameter_names'] = {} - for key, value in parameters.items(): - collection['parameter_names'][key] = { - 'id': key, - 'type': 'Parameter', - 'name': value['title'], - 'observedProperty': { - 'label': { - 'id': key, - 'en': value['title'] - }, - }, - 'unit': { - 'label': { - 'en': value['title'] - }, - 'symbol': { - 'value': value['x-ogc-unit'], - 'type': 'http://www.opengis.net/def/uom/UCUM/' # noqa - } - } - } - - collection['parameter_names'][key].update({ - 'description': value['description']} - if 'description' in value else {} - ) - - for qt in p.get_query_types(): - data_query = { - 'link': { - 'href': f'{api.get_collections_url()}/{k}/{qt}', - 'rel': 'data', - 'variables': { - 'query_type': qt - } - } - } - - if request.format is not None and request.format == 'json': - data_query['link']['type'] = 'application/vnd.cov+json' - - collection['data_queries'][qt] = data_query - - title1 = l10n.translate('query for this collection as JSON', request.locale) # noqa - title1 = f'{qt} {title1}' - title2 = l10n.translate('query for this collection as HTML', request.locale) # noqa - title2 = f'{qt} {title2}' - - collection['links'].append({ - 'type': 'application/json', - 'rel': 'data', - 'title': title1, - 'href': f'{api.get_collections_url()}/{k}/{qt}?f={F_JSON}' - }) - collection['links'].append({ - 'type': FORMAT_TYPES[F_HTML], - 'rel': 'data', - 'title': title2, - 'href': f'{api.get_collections_url()}/{k}/{qt}?f={F_HTML}' - }) - - for key, value in get_dataset_formatters(v).items(): - title3 = f'{qt} query for this collection as {key}' - collection['links'].append({ - 'type': value.mimetype, - 'rel': 'data', - 'title': title3, - 'href': f'{api.get_collections_url()}/{k}/{qt}?f={value.f}' # noqa - }) - - if dataset is not None and k == dataset: - fcm = collection - break - - fcm['collections'].append(collection) + if dataset is not None: + fcm = fcm['collections'][0] if dataset is None: # TODO: translate diff --git a/pygeoapi/api/admin.py b/pygeoapi/api/admin.py index bf485515b..a971e1f25 100644 --- a/pygeoapi/api/admin.py +++ b/pygeoapi/api/admin.py @@ -3,7 +3,7 @@ # Authors: Tom Kralidis # Benjamin Webb # -# Copyright (c) 2024 Tom Kralidis +# Copyright (c) 2026 Tom Kralidis # Copyright (c) 2023 Benjamin Webb # # Permission is hereby granted, free of charge, to any person @@ -39,8 +39,9 @@ from jsonpatch import make_patch from jsonschema.exceptions import ValidationError -from pygeoapi.api import API, APIRequest, F_HTML +from pygeoapi.api import API, APIRequest from pygeoapi.config import get_config, validate_config +from pygeoapi.formats import F_HTML from pygeoapi.openapi import get_oas from pygeoapi.util import to_json, render_j2_template, yaml_dump diff --git a/pygeoapi/api/collection.py b/pygeoapi/api/collection.py new file mode 100644 index 000000000..524f2d64e --- /dev/null +++ b/pygeoapi/api/collection.py @@ -0,0 +1,477 @@ +# ================================================================= +# +# Authors: Tom Kralidis +# Francesco Bartoli +# Sander Schaminee +# John A Stevenson +# Colin Blackburn +# Ricardo Garcia Silva +# +# Copyright (c) 2026 Tom Kralidis +# Copyright (c) 2026 Francesco Bartoli +# Copyright (c) 2022 John A Stevenson and Colin Blackburn +# Copyright (c) 2023 Ricardo Garcia Silva +# +# Permission is hereby granted, free of charge, to any person +# obtaining a copy of this software and associated documentation +# files (the "Software"), to deal in the Software without +# restriction, including without limitation the rights to use, +# copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the +# Software is furnished to do so, subject to the following +# conditions: +# +# The above copyright notice and this permission notice shall be +# included in all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES +# OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT +# HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, +# WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR +# OTHER DEALINGS IN THE SOFTWARE. +# +# ================================================================= + +from copy import deepcopy +import logging + +from pygeoapi import l10n +from pygeoapi.formats import (F_JSON, F_JSONLD, F_HTML, F_JPEG, + F_PNG, FORMAT_TYPES) +from pygeoapi.crs import DEFAULT_STORAGE_CRS, get_supported_crs_list +from pygeoapi.plugin import load_plugin +from pygeoapi.provider import get_provider_by_type, get_provider_default +from pygeoapi.provider.base import ProviderConnectionError, ProviderTypeError +from pygeoapi.util import dategetter, get_dataset_formatters + +LOGGER = logging.getLogger(__name__) + +OGC_RELTYPES_BASE = 'http://www.opengis.net/def/rel/ogc/1.0' + + +def gen_collection(api, request, dataset: str, + locale_: str) -> dict: + """ + Generate OGC API Collection description + + :param api: `APIRequest` object + :param dataset: `str` of dataset name + :param locale_: `str` of requested locale + + :returns: `dict` of OGC API Collection description + """ + + config = api.config['resources'][dataset] + + data = { + 'id': dataset, + 'links': [] + } + + collection_data = get_provider_default(config['providers']) + collection_data_type = collection_data['type'] + + collection_data_format = None + + if 'format' in collection_data: + collection_data_format = collection_data['format'] + + is_vector_tile = (collection_data_type == 'tile' and + collection_data_format['name'] not + in [F_PNG, F_JPEG]) + + data.update({ + 'title': l10n.translate(config['title'], locale_), + 'description': l10n.translate(config['description'], locale_), + 'keywords': l10n.translate(config['keywords'], locale_), + }) + + extents = deepcopy(config['extents']) + + bbox = extents['spatial']['bbox'] + LOGGER.debug('Setting spatial extents from configuration') + # The output should be an array of bbox, so if the user only + # provided a single bbox, wrap it in a array. + if not isinstance(bbox[0], list): + bbox = [bbox] + + data['extent'] = { + 'spatial': { + 'bbox': bbox + } + } + + if 'crs' in extents['spatial']: + data['extent']['spatial']['crs'] = extents['spatial']['crs'] + + t_ext = extents.get('temporal', {}) + if t_ext: + LOGGER.debug('Setting temporal extents from configuration') + begins = dategetter('begin', t_ext) + ends = dategetter('end', t_ext) + data['extent']['temporal'] = { + 'interval': [[begins, ends]] + } + if 'trs' in t_ext: + data['extent']['temporal']['trs'] = t_ext['trs'] + if 'resolution' in t_ext: + data['extent']['temporal']['grid'] = { + 'resolution': t_ext['resolution'] + } + if 'default' in t_ext: + data['extent']['temporal']['default'] = t_ext['default'] + + _ = extents.pop('spatial', None) + _ = extents.pop('temporal', None) + + for ek, ev in extents.items(): + LOGGER.debug(f'Adding extent {ek}') + data['extent'][ek] = { + 'definition': ev['url'], + 'interval': [ev['range']] + } + if 'units' in ev: + data['extent'][ek]['unit'] = ev['units'] + + if 'values' in ev: + data['extent'][ek]['grid'] = { + 'cellsCount': len(ev['values']), + 'coordinates': ev['values'] + } + + LOGGER.debug('Processing configured collection links') + for link in l10n.translate(config.get('links', []), locale_): + lnk = { + 'type': link['type'], + 'rel': link['rel'], + 'title': l10n.translate(link['title'], locale_), + 'href': l10n.translate(link['href'], locale_), + } + if 'hreflang' in link: + lnk['hreflang'] = l10n.translate( + link['hreflang'], locale_) + content_length = link.get('length', 0) + + if lnk['rel'] == 'enclosure' and content_length == 0: + # Issue HEAD request for enclosure links without length + lnk_headers = api.prefetcher.get_headers(lnk['href']) + content_length = int(lnk_headers.get('content-length', 0)) + content_type = lnk_headers.get('content-type', lnk['type']) + if content_length == 0: + # Skip this (broken) link + LOGGER.debug(f"Enclosure {lnk['href']} is invalid") + continue + if content_type != lnk['type']: + # Update content type if different from specified + lnk['type'] = content_type + LOGGER.debug( + f"Fixed media type for enclosure {lnk['href']}") + + if content_length > 0: + lnk['length'] = content_length + + data['links'].append(lnk) + + # TODO: provide translations + LOGGER.debug('Adding JSON and HTML link relations') + data['links'].extend([{ + 'type': FORMAT_TYPES[F_JSON], + 'rel': 'root', + 'title': l10n.translate('The landing page of this server as JSON', locale_), # noqa + 'href': f"{api.base_url}?f={F_JSON}" + }, { + 'type': FORMAT_TYPES[F_HTML], + 'rel': 'root', + 'title': l10n.translate('The landing page of this server as HTML', locale_), # noqa + 'href': f"{api.base_url}?f={F_HTML}" + }, { + 'type': FORMAT_TYPES[F_JSON], + 'rel': request.get_linkrel(F_JSON), + 'title': l10n.translate('This document as JSON', locale_), + 'href': f'{api.get_collections_url()}/{dataset}?f={F_JSON}' + }, { + 'type': FORMAT_TYPES[F_JSONLD], + 'rel': request.get_linkrel(F_JSONLD), + 'title': l10n.translate('This document as RDF (JSON-LD)', locale_), + 'href': f'{api.get_collections_url()}/{dataset}?f={F_JSONLD}' + }, { + 'type': FORMAT_TYPES[F_HTML], + 'rel': request.get_linkrel(F_HTML), + 'title': l10n.translate('This document as HTML', locale_), + 'href': f'{api.get_collections_url()}/{dataset}?f={F_HTML}' + }]) + + if collection_data_type == 'record': + data['links'].extend([{ + 'type': FORMAT_TYPES[F_JSON], + 'rel': f'{OGC_RELTYPES_BASE}/ogc-catalog', + 'title': l10n.translate('Record catalogue as JSON', locale_), + 'href': f'{api.get_collections_url()}/{dataset}?f={F_JSON}' + }, { + 'type': FORMAT_TYPES[F_HTML], + 'rel': f'{OGC_RELTYPES_BASE}/ogc-catalog', + 'title': l10n.translate('Record catalogue as HTML', locale_), + 'href': f'{api.get_collections_url()}/{dataset}?f={F_HTML}' + }]) + + if collection_data_type in ['feature', 'coverage', 'record']: + data['links'].extend([{ + 'type': 'application/schema+json', + 'rel': f'{OGC_RELTYPES_BASE}/schema', + 'title': l10n.translate('Schema of collection in JSON', locale_), + 'href': f'{api.get_collections_url()}/{dataset}/schema?f={F_JSON}' + }, { + 'type': FORMAT_TYPES[F_HTML], + 'rel': f'{OGC_RELTYPES_BASE}/schema', + 'title': l10n.translate('Schema of collection in HTML', locale_), + 'href': f'{api.get_collections_url()}/{dataset}/schema?f={F_HTML}' + }]) + + if is_vector_tile or collection_data_type in ['feature', 'record']: + # TODO: translate + data['itemType'] = collection_data_type + LOGGER.debug('Adding feature/record based links') + data['links'].extend([{ + 'type': 'application/schema+json', + 'rel': f'{OGC_RELTYPES_BASE}/queryables', + 'title': l10n.translate('Queryables for this collection as JSON', locale_), # noqa + 'href': f'{api.get_collections_url()}/{dataset}/queryables?f={F_JSON}' # noqa + }, { + 'type': FORMAT_TYPES[F_HTML], + 'rel': f'{OGC_RELTYPES_BASE}/queryables', + 'title': l10n.translate('Queryables for this collection as HTML', locale_), # noqa + 'href': f'{api.get_collections_url()}/{dataset}/queryables?f={F_HTML}' # noqa + }, { + 'type': 'application/geo+json', + 'rel': 'items', + 'title': l10n.translate('Items as GeoJSON', locale_), + 'href': f'{api.get_collections_url()}/{dataset}/items?f={F_JSON}' + }, { + 'type': FORMAT_TYPES[F_JSONLD], + 'rel': 'items', + 'title': l10n.translate('Items as RDF (GeoJSON-LD)', locale_), + 'href': f'{api.get_collections_url()}/{dataset}/items?f={F_JSONLD}' + }, { + 'type': FORMAT_TYPES[F_HTML], + 'rel': 'items', + 'title': l10n.translate('Items as HTML', locale_), # noqa + 'href': f'{api.get_collections_url()}/{dataset}/items?f={F_HTML}' + }]) + + for key, value in get_dataset_formatters(config).items(): + data['links'].append({ + 'type': value.mimetype, + 'rel': 'items', + 'title': l10n.translate(f'Items as {key}', locale_), # noqa + 'href': f'{api.get_collections_url()}/{dataset}/items?f={value.f}' # noqa + }) + + # OAPIF Part 2 - list supported CRSs and StorageCRS + if collection_data_type in ['edr', 'feature']: + data['crs'] = get_supported_crs_list(collection_data) + data['storageCrs'] = collection_data.get('storage_crs', DEFAULT_STORAGE_CRS) # noqa + if 'storage_crs_coordinate_epoch' in collection_data: + data['storageCrsCoordinateEpoch'] = collection_data.get('storage_crs_coordinate_epoch') # noqa + + elif collection_data_type == 'coverage': + LOGGER.debug('Adding coverage based links') + data['links'].append({ + 'type': 'application/prs.coverage+json', + 'rel': f'{OGC_RELTYPES_BASE}/coverage', + 'title': l10n.translate('Coverage data', locale_), + 'href': f'{api.get_collections_url()}/{dataset}/coverage?f={F_JSON}' # noqa + }) + if collection_data_format is not None: + title_ = l10n.translate('Coverage data as', locale_) + title_ = f"{title_} {collection_data_format['name']}" + data['links'].append({ + 'type': collection_data_format['mimetype'], + 'rel': f'{OGC_RELTYPES_BASE}/coverage', + 'title': title_, + 'href': f"{api.get_collections_url()}/{dataset}/coverage?f={collection_data_format['name']}" # noqa + }) + if dataset is not None: + LOGGER.debug('Creating extended coverage metadata') + try: + provider_def = get_provider_by_type( + api.config['resources'][dataset]['providers'], + 'coverage') + p = load_plugin('provider', provider_def) + except ProviderConnectionError: + raise + except ProviderTypeError: + pass + else: + data['extent']['spatial']['grid'] = [{ + 'cellsCount': p._coverage_properties['width'], + 'resolution': p._coverage_properties['resx'] + }, { + 'cellsCount': p._coverage_properties['height'], + 'resolution': p._coverage_properties['resy'] + }] + if 'time_range' in p._coverage_properties: + data['extent']['temporal'] = { + 'interval': [p._coverage_properties['time_range']] + } + if 'restime' in p._coverage_properties: + data['extent']['temporal']['grid'] = { + 'resolution': p._coverage_properties['restime'] + } + if 'uad' in p._coverage_properties: + data['extent'].update(p._coverage_properties['uad']) + + try: + tile = get_provider_by_type(config['providers'], 'tile') + p = load_plugin('provider', tile) + except ProviderConnectionError: + raise + except ProviderTypeError: + tile = None + + if tile: + LOGGER.debug('Adding tile links') + data['links'].extend([{ + 'type': FORMAT_TYPES[F_JSON], + 'rel': f'{OGC_RELTYPES_BASE}/tilesets-{p.tile_type}', + 'title': l10n.translate('Tiles as JSON', locale_), + 'href': f'{api.get_collections_url()}/{dataset}/tiles?f={F_JSON}' + }, { + 'type': FORMAT_TYPES[F_HTML], + 'rel': f'{OGC_RELTYPES_BASE}/tilesets-{p.tile_type}', + 'title': l10n.translate('Tiles as HTML', locale_), + 'href': f'{api.get_collections_url()}/{dataset}/tiles?f={F_HTML}' + }]) + + try: + map_ = get_provider_by_type(config['providers'], 'map') + p = load_plugin('provider', map_) + except ProviderTypeError: + map_ = None + + if map_: + LOGGER.debug('Adding map links') + + map_mimetype = map_['format']['mimetype'] + map_format = map_['format']['name'] + + title_ = l10n.translate('Map as', locale_) + title_ = f'{title_} {map_format}' + + data['links'].append({ + 'type': map_mimetype, + 'rel': f'{OGC_RELTYPES_BASE}/map', + 'title': title_, + 'href': f'{api.get_collections_url()}/{dataset}/map?f={map_format}' + }) + + if p._fields: + schema_reltype = f'{OGC_RELTYPES_BASE}/schema', + schema_links = [s for s in data['links'] if + schema_reltype in s] + + if not schema_links: + title_ = l10n.translate('Schema of collection in JSON', locale_) # noqa + data['links'].append({ + 'type': 'application/schema+json', + 'rel': f'{OGC_RELTYPES_BASE}/schema', + 'title': title_, + 'href': f'{api.get_collections_url()}/{dataset}/schema?f=json' # noqa + }) + title_ = l10n.translate('Schema of collection in HTML', locale_) # noqa + data['links'].append({ + 'type': 'text/html', + 'rel': f'{OGC_RELTYPES_BASE}/schema', + 'title': title_, + 'href': f'{api.get_collections_url()}/{dataset}/schema?f=html' # noqa + }) + + try: + edr = get_provider_by_type(config['providers'], 'edr') + p = load_plugin('provider', edr) + except ProviderConnectionError: + raise + except ProviderTypeError: + edr = None + + if edr: + # TODO: translate + LOGGER.debug('Adding EDR links') + data['data_queries'] = {} + parameters = p.get_fields() + if parameters: + data['parameter_names'] = {} + for key, value in parameters.items(): + p_label = value.get('title') + p_description = value.get('description') + data['parameter_names'][key] = { + 'id': key, + 'type': 'Parameter', + 'observedProperty': { + 'label': { + 'en': p_label + } + }, + 'unit': { + 'label': { + 'en': value['title'] + }, + 'symbol': { + 'value': value['x-ogc-unit'], + 'type': 'http://www.opengis.net/def/uom/UCUM/' + } + } + } + + if p_description is not None: + data['parameter_names'][key]['observedProperty'].update({ + 'description': { + 'en': p_description + } + }) + + for qt in p.get_query_types(): + data_query = { + 'link': { + 'href': f'{api.get_collections_url()}/{dataset}/{qt}', + 'rel': 'data', + 'variables': { + 'query_type': qt + } + } + } + + if request.format is not None and request.format == 'json': + data_query['link']['type'] = 'application/vnd.cov+json' + + data['data_queries'][qt] = data_query + + title1 = l10n.translate('query for this collection as JSON', locale_) # noqa + title1 = f'{qt} {title1}' + title2 = l10n.translate('query for this collection as HTML', locale_) # noqa + title2 = f'{qt} {title2}' + + data['links'].extend([{ + 'type': 'application/json', + 'rel': 'data', + 'title': title1, + 'href': f'{api.get_collections_url()}/{dataset}/{qt}?f={F_JSON}' # noqa + }, { + 'type': FORMAT_TYPES[F_HTML], + 'rel': 'data', + 'title': title2, + 'href': f'{api.get_collections_url()}/{dataset}/{qt}?f={F_HTML}' # noqa + }]) + + for key, value in get_dataset_formatters(config).items(): + title3 = f'{qt} query for this collection as {key}' + data['links'].append({ + 'type': value.mimetype, + 'rel': 'data', + 'title': title3, + 'href': f'{api.get_collections_url()}/{dataset}/{qt}?f={value.f}' # noqa + }) + + return data diff --git a/pygeoapi/api/coverages.py b/pygeoapi/api/coverages.py index 327744a01..67755407d 100644 --- a/pygeoapi/api/coverages.py +++ b/pygeoapi/api/coverages.py @@ -43,6 +43,7 @@ from typing import Tuple from pygeoapi import l10n +from pygeoapi.formats import F_JSON from pygeoapi.openapi import get_oas_30_parameters from pygeoapi.plugin import load_plugin from pygeoapi.provider.base import ProviderGenericError, ProviderTypeError @@ -50,7 +51,7 @@ from pygeoapi.util import filter_dict_by_key_value, to_json from . import ( - APIRequest, API, F_JSON, SYSTEM_LOCALE, validate_bbox, validate_datetime, + APIRequest, API, SYSTEM_LOCALE, validate_bbox, validate_datetime, validate_subset ) diff --git a/pygeoapi/api/environmental_data_retrieval.py b/pygeoapi/api/environmental_data_retrieval.py index 7e1ef1f51..700a6fc91 100644 --- a/pygeoapi/api/environmental_data_retrieval.py +++ b/pygeoapi/api/environmental_data_retrieval.py @@ -49,6 +49,7 @@ from pygeoapi import l10n from pygeoapi.api import evaluate_limit +from pygeoapi.formats import F_COVERAGEJSON, F_HTML, F_JSON, F_JSONLD from pygeoapi.formatter.base import FormatterSerializationError from pygeoapi.crs import (create_crs_transform_spec, set_content_crs_header) from pygeoapi.openapi import get_oas_30_parameters @@ -60,8 +61,7 @@ render_j2_template, to_json, filter_dict_by_key_value) -from . import (APIRequest, API, F_COVERAGEJSON, F_HTML, F_JSON, F_JSONLD, - validate_datetime, validate_bbox) +from . import APIRequest, API, validate_datetime, validate_bbox LOGGER = logging.getLogger(__name__) @@ -113,14 +113,14 @@ def get_collection_edr_instances(api: API, request: APIRequest, if instance_id is not None: try: - if p.get_instance(instance_id): + if p.instance(instance_id): instances = [instance_id] except ProviderItemNotFoundError: msg = 'Instance not found' return api.get_exception( HTTPStatus.NOT_FOUND, headers, request.format, 'NotFound', msg) else: - instances = p.get_instances() + instances = p.instances() for instance in instances: instance_dict = { @@ -281,7 +281,7 @@ def get_collection_edr_query(api: API, request: APIRequest, err.http_status_code, headers, request.format, err.ogc_exception_code, err.message) - if instance is not None and not p.get_instance(instance): + if instance is not None and not p.instance(instance): msg = 'Invalid instance identifier' return api.get_exception( HTTPStatus.BAD_REQUEST, headers, @@ -494,8 +494,14 @@ def get_collection_edr_query(api: API, request: APIRequest, HTTPStatus.INTERNAL_SERVER_ERROR, headers, request.format, 'NoApplicableCode', msg) + headers['Content-Type'] = formatter.mimetype + if formatter.attachment: - filename = f'{dataset}.{formatter.extension}' + if p.filename is None: + filename = f'{dataset}.{formatter.extension}' + else: + filename = f'{p.filename}' + cd = f'attachment; filename="{filename}"' headers['Content-Disposition'] = cd diff --git a/pygeoapi/api/itemtypes.py b/pygeoapi/api/itemtypes.py index 2aa85a973..27f1c58c9 100644 --- a/pygeoapi/api/itemtypes.py +++ b/pygeoapi/api/itemtypes.py @@ -54,6 +54,7 @@ create_crs_transform_spec, get_supported_crs_list, modify_pygeofilter, transform_bbox, set_content_crs_header) +from pygeoapi.formats import F_JSON, FORMAT_TYPES, F_HTML, F_JSONLD from pygeoapi.formatter.base import FormatterSerializationError from pygeoapi.linked_data import geojson2jsonld from pygeoapi.openapi import get_oas_30_parameters @@ -66,10 +67,7 @@ from pygeoapi.util import (to_json, filter_dict_by_key_value, str2bool, render_j2_template, get_dataset_formatters) -from . import ( - APIRequest, API, SYSTEM_LOCALE, F_JSON, FORMAT_TYPES, F_HTML, F_JSONLD, - validate_bbox, validate_datetime -) +from . import APIRequest, API, SYSTEM_LOCALE, validate_bbox, validate_datetime LOGGER = logging.getLogger(__name__) @@ -617,7 +615,6 @@ def get_collection_items( if offset > 0: prev_link = True - print(request.format) if prev_link: prev = max(0, offset - limit) url = f'{uri}?offset={prev}{serialized_query_params}' diff --git a/pygeoapi/api/maps.py b/pygeoapi/api/maps.py index bcdda4b7a..c22991799 100644 --- a/pygeoapi/api/maps.py +++ b/pygeoapi/api/maps.py @@ -44,6 +44,7 @@ from typing import Tuple from pygeoapi.crs import transform_bbox +from pygeoapi.formats import F_JSON, FORMAT_TYPES from pygeoapi.openapi import get_oas_30_parameters from pygeoapi.plugin import load_plugin from pygeoapi.provider import filter_providers_by_type, get_provider_by_type @@ -52,9 +53,7 @@ ) from pygeoapi.util import to_json, filter_dict_by_key_value -from . import ( - APIRequest, API, F_JSON, FORMAT_TYPES, validate_datetime, validate_subset -) +from . import APIRequest, API, validate_datetime, validate_subset LOGGER = logging.getLogger(__name__) @@ -62,9 +61,17 @@ 'http://www.opengis.net/spec/ogcapi-maps-1/1.0/conf/core' ] - DEFAULT_CRS = 'http://www.opengis.net/def/crs/EPSG/0/4326' +CRS_CODES = { + '4326': 'http://www.opengis.net/def/crs/EPSG/0/4326', + '3857': 'http://www.opengis.net/def/crs/EPSG/0/3857', + 'http://www.opengis.net/def/crs/EPSG/0/4326': 'http://www.opengis.net/def/crs/EPSG/0/4326', # noqa + 'http://www.opengis.net/def/crs/EPSG/0/3857': 'http://www.opengis.net/def/crs/EPSG/0/3857', # noqa + 'EPSG:4326': 'http://www.opengis.net/def/crs/EPSG/0/4326', + 'EPSG:3857': 'http://www.opengis.net/def/crs/EPSG/0/3857' +} + def get_collection_map(api: API, request: APIRequest, dataset: str, style: str | None = None @@ -107,10 +114,10 @@ def get_collection_map(api: API, request: APIRequest, query_args['format_'] = request.params.get('f', 'png') query_args['style'] = style - query_args['crs'] = collection_def.get('crs', DEFAULT_CRS) - query_args['bbox_crs'] = request.params.get( - 'bbox-crs', DEFAULT_CRS - ) + query_args['crs'] = CRS_CODES[request.params.get( + 'crs', collection_def.get('crs', DEFAULT_CRS))] + query_args['bbox_crs'] = CRS_CODES[request.params.get( + 'bbox-crs', collection_def.get('crs', DEFAULT_CRS))] query_args['transparent'] = request.params.get('transparent', True) try: @@ -152,6 +159,7 @@ def get_collection_map(api: API, request: APIRequest, return headers, HTTPStatus.BAD_REQUEST, to_json( exception, api.pretty_print) + # the transformer function expects the crs to be in a uri format if query_args['bbox_crs'] != query_args['crs']: LOGGER.debug(f'Reprojecting bbox CRS: {query_args["crs"]}') bbox = transform_bbox(bbox, query_args['bbox_crs'], query_args['crs']) diff --git a/pygeoapi/api/processes.py b/pygeoapi/api/processes.py index b51ff8531..f2a4eb588 100644 --- a/pygeoapi/api/processes.py +++ b/pygeoapi/api/processes.py @@ -49,6 +49,7 @@ from typing import Tuple from pygeoapi import l10n +from pygeoapi.formats import FORMAT_TYPES, F_HTML, F_JSON, F_JSONLD from pygeoapi.api import evaluate_limit from pygeoapi.api.pubsub import publish_message from pygeoapi.process.base import ( @@ -61,9 +62,7 @@ json_serial, render_j2_template, JobStatus, RequestedProcessExecutionMode, to_json, DATETIME_FORMAT) -from . import ( - APIRequest, API, SYSTEM_LOCALE, F_JSON, FORMAT_TYPES, F_HTML, F_JSONLD, -) +from . import APIRequest, API, SYSTEM_LOCALE LOGGER = logging.getLogger(__name__) @@ -131,14 +130,18 @@ def describe_processes(api: API, request: APIRequest, p2.pop('outputs') p2.pop('example', None) - p2['jobControlOptions'] = ['sync-execute'] - if api.manager.is_async: + jco = p.metadata.get('jobControlOptions', ['sync-execute']) + p2['jobControlOptions'] = jco + + if api.manager.is_async and 'async-execute' not in jco: + LOGGER.debug('Adding async capability') p2['jobControlOptions'].append('async-execute') - p2['outputTransmission'] = ['value'] + p2['outputTransmission'] = p.metadata.get( + 'outputTransmission', ['value']) + p2['links'] = p2.get('links', []) - jobs_url = f"{api.base_url}/jobs" process_url = f"{api.base_url}/processes/{key}" # TODO translation support @@ -160,23 +163,22 @@ def describe_processes(api: API, request: APIRequest, } p2['links'].append(link) - link = { - 'type': FORMAT_TYPES[F_HTML], - 'rel': 'http://www.opengis.net/def/rel/ogc/1.0/job-list', - 'href': f'{jobs_url}?f={F_HTML}', - 'title': l10n.translate('Jobs list as HTML', request.locale), # noqa - 'hreflang': api.default_locale - } - p2['links'].append(link) - - link = { - 'type': FORMAT_TYPES[F_JSON], - 'rel': 'http://www.opengis.net/def/rel/ogc/1.0/job-list', - 'href': f'{jobs_url}?f={F_JSON}', - 'title': l10n.translate('Jobs list as JSON', request.locale), # noqa - 'hreflang': api.default_locale - } - p2['links'].append(link) + if api.manager.is_async: + jobs_url = f"{api.base_url}/jobs" + p2['links'].append({ + 'type': FORMAT_TYPES[F_HTML], + 'rel': 'http://www.opengis.net/def/rel/ogc/1.0/job-list', + 'href': f'{jobs_url}?f={F_HTML}', + 'title': l10n.translate('Jobs list as HTML', request.locale), # noqa + 'hreflang': api.default_locale + }) + p2['links'].append({ + 'type': FORMAT_TYPES[F_JSON], + 'rel': 'http://www.opengis.net/def/rel/ogc/1.0/job-list', + 'href': f'{jobs_url}?f={F_JSON}', + 'title': l10n.translate('Jobs list as JSON', request.locale), # noqa + 'hreflang': api.default_locale + }) link = { 'type': FORMAT_TYPES[F_JSON], @@ -325,27 +327,12 @@ def get_jobs(api: API, request: APIRequest, job_result_url = f"{api.base_url}/jobs/{job_['identifier']}/results" # noqa job2['links'] = [{ - 'href': f'{job_result_url}?f={F_HTML}', - 'rel': 'http://www.opengis.net/def/rel/ogc/1.0/results', - 'type': FORMAT_TYPES[F_HTML], - 'title': l10n.translate(f'Results of job as HTML', request.locale), # noqa - }, { - 'href': f'{job_result_url}?f={F_JSON}', + 'href': job_result_url, 'rel': 'http://www.opengis.net/def/rel/ogc/1.0/results', - 'type': FORMAT_TYPES[F_JSON], - 'title': l10n.translate(f'Results of job as JSON', request.locale), # noqa + 'type': job_['mimetype'], + 'title': f"Results of job {job_id} as {job_['mimetype']}" }] - if job_['mimetype'] not in (FORMAT_TYPES[F_JSON], - FORMAT_TYPES[F_HTML]): - - job2['links'].append({ - 'href': job_result_url, - 'rel': 'http://www.opengis.net/def/rel/ogc/1.0/results', # noqa - 'type': job_['mimetype'], - 'title': f"Results of job {job_id} as {job_['mimetype']}" # noqa - }) - serialized_jobs['jobs'].append(job2) serialized_query_params = '' @@ -526,7 +513,10 @@ def execute_process(api: API, request: APIRequest, pretty_print_ = False response2 = to_json(response, pretty_print_) else: + pretty_print_ = False response2 = response + if isinstance(response, (list, dict)): + response2 = to_json(response, pretty_print_) if (headers.get('Preference-Applied', '') == RequestedProcessExecutionMode.respond_async.value): # noqa LOGGER.debug('Asynchronous mode detected, returning statusInfo') @@ -720,11 +710,11 @@ def get_oas_30(cfg: dict, locale: str 'externalDocs': {} } for link in p.metadata.get('links', []): - if link['type'] == 'information': + if link.get('rel', '') == 'information': translated_link = l10n.translate(link, locale) tag['externalDocs']['description'] = translated_link[ - 'type'] - tag['externalDocs']['url'] = translated_link['url'] + 'rel'] + tag['externalDocs']['url'] = translated_link['href'] break if len(tag['externalDocs']) == 0: del tag['externalDocs'] @@ -760,7 +750,7 @@ def get_oas_30(cfg: dict, locale: str 'description': 'Indicates client preferences, including whether the client is capable of asynchronous processing.', # noqa 'schema': { 'type': 'string', - 'enum': ['respond-async'] + 'enum': [] } }], 'responses': { @@ -784,6 +774,12 @@ def get_oas_30(cfg: dict, locale: str } } + jco = p.metadata.get('jobControlOptions', ['sync-execute']) + if 'sync-execute' in jco: + paths[f'{process_name_path}/execution']['post']['parameters'][0]['schema']['enum'].append('respond-sync') # noqa + if 'async-execute' in jco: + paths[f'{process_name_path}/execution']['post']['parameters'][0]['schema']['enum'].append('respond-async') # noqa + try: first_key = list(p.metadata['outputs'])[0] p_output = p.metadata['outputs'][first_key] @@ -815,68 +811,73 @@ def get_oas_30(cfg: dict, locale: str } } - paths['/jobs'] = { - 'get': { - 'summary': 'Retrieve jobs list', - 'description': 'Retrieve a list of jobs', - 'tags': ['jobs'], - 'operationId': 'getJobs', - 'responses': { - '200': {'$ref': '#/components/responses/200'}, - '404': {'$ref': f"{OPENAPI_YAML['oapip']}/responses/NotFound.yaml"}, # noqa - 'default': {'$ref': '#/components/responses/default'} + tag_objects = [{'name': 'processes'}] + + if process_manager.is_async: + paths['/jobs'] = { + 'get': { + 'summary': 'Retrieve jobs list', + 'description': 'Retrieve a list of jobs', + 'tags': ['jobs'], + 'operationId': 'getJobs', + 'responses': { + '200': {'$ref': '#/components/responses/200'}, + '404': {'$ref': f"{OPENAPI_YAML['oapip']}/responses/NotFound.yaml"}, # noqa + 'default': {'$ref': '#/components/responses/default'} + } } } - } - paths['/jobs/{jobId}'] = { - 'get': { - 'summary': 'Retrieve job details', - 'description': 'Retrieve job details', - 'tags': ['jobs'], - 'parameters': [ - name_in_path, - {'$ref': '#/components/parameters/f'} - ], - 'operationId': 'getJob', - 'responses': { - '200': {'$ref': '#/components/responses/200'}, - '404': {'$ref': f"{OPENAPI_YAML['oapip']}/responses/NotFound.yaml"}, # noqa - 'default': {'$ref': '#/components/responses/default'} - } - }, - 'delete': { - 'summary': 'Cancel / delete job', - 'description': 'Cancel / delete job', - 'tags': ['jobs'], - 'parameters': [ - name_in_path - ], - 'operationId': 'deleteJob', - 'responses': { - '204': {'$ref': '#/components/responses/204'}, - '404': {'$ref': f"{OPENAPI_YAML['oapip']}/responses/NotFound.yaml"}, # noqa - 'default': {'$ref': '#/components/responses/default'} - } - }, - } + paths['/jobs/{jobId}'] = { + 'get': { + 'summary': 'Retrieve job details', + 'description': 'Retrieve job details', + 'tags': ['jobs'], + 'parameters': [ + name_in_path, + {'$ref': '#/components/parameters/f'} + ], + 'operationId': 'getJob', + 'responses': { + '200': {'$ref': '#/components/responses/200'}, + '404': {'$ref': f"{OPENAPI_YAML['oapip']}/responses/NotFound.yaml"}, # noqa + 'default': {'$ref': '#/components/responses/default'} + } + }, + 'delete': { + 'summary': 'Cancel / delete job', + 'description': 'Cancel / delete job', + 'tags': ['jobs'], + 'parameters': [ + name_in_path + ], + 'operationId': 'deleteJob', + 'responses': { + '204': {'$ref': '#/components/responses/204'}, + '404': {'$ref': f"{OPENAPI_YAML['oapip']}/responses/NotFound.yaml"}, # noqa + 'default': {'$ref': '#/components/responses/default'} + } + }, + } - paths['/jobs/{jobId}/results'] = { - 'get': { - 'summary': 'Retrieve job results', - 'description': 'Retrieve job results', - 'tags': ['jobs'], - 'parameters': [ - name_in_path, - {'$ref': '#/components/parameters/f'} - ], - 'operationId': 'getJobResults', - 'responses': { - '200': {'$ref': '#/components/responses/200'}, - '404': {'$ref': f"{OPENAPI_YAML['oapip']}/responses/NotFound.yaml"}, # noqa - 'default': {'$ref': '#/components/responses/default'} + paths['/jobs/{jobId}/results'] = { + 'get': { + 'summary': 'Retrieve job results', + 'description': 'Retrieve job results', + 'tags': ['jobs'], + 'parameters': [ + name_in_path, + {'$ref': '#/components/parameters/f'} + ], + 'operationId': 'getJobResults', + 'responses': { + '200': {'$ref': '#/components/responses/200'}, + '404': {'$ref': f"{OPENAPI_YAML['oapip']}/responses/NotFound.yaml"}, # noqa + 'default': {'$ref': '#/components/responses/default'} + } } } - } - return [{'name': 'processes'}, {'name': 'jobs'}], {'paths': paths} + tag_objects.append({'name': 'jobs'}) + + return tag_objects, {'paths': paths} diff --git a/pygeoapi/api/stac.py b/pygeoapi/api/stac.py index a9227da26..ebf86b6e8 100644 --- a/pygeoapi/api/stac.py +++ b/pygeoapi/api/stac.py @@ -8,7 +8,7 @@ # Ricardo Garcia Silva # Bernhard Mallinger # -# Copyright (c) 2025 Tom Kralidis +# Copyright (c) 2026 Tom Kralidis # Copyright (c) 2025 Francesco Bartoli # Copyright (c) 2022 John A Stevenson and Colin Blackburn # Copyright (c) 2023 Ricardo Garcia Silva @@ -47,6 +47,7 @@ from shapely import from_geojson from pygeoapi import l10n +from pygeoapi.formats import FORMAT_TYPES, F_JSON, F_HTML from pygeoapi import api as ogc_api from pygeoapi.api import itemtypes as itemtypes_api from pygeoapi.plugin import load_plugin @@ -58,7 +59,7 @@ from pygeoapi.util import (filter_dict_by_key_value, get_current_datetime, render_j2_template, to_json) -from . import APIRequest, API, FORMAT_TYPES, F_JSON, F_HTML +from . import APIRequest, API LOGGER = logging.getLogger(__name__) diff --git a/pygeoapi/api/tiles.py b/pygeoapi/api/tiles.py index fb8a39dbb..afdde22b1 100644 --- a/pygeoapi/api/tiles.py +++ b/pygeoapi/api/tiles.py @@ -8,7 +8,7 @@ # Ricardo Garcia Silva # Bernhard Mallinger # -# Copyright (c) 2024 Tom Kralidis +# Copyright (c) 2026 Tom Kralidis # Copyright (c) 2025 Francesco Bartoli # Copyright (c) 2022 John A Stevenson and Colin Blackburn # Copyright (c) 2023 Ricardo Garcia Silva @@ -43,6 +43,7 @@ from typing import Tuple from pygeoapi import l10n +from pygeoapi.formats import FORMAT_TYPES, F_JSON, F_HTML, F_JSONLD from pygeoapi.plugin import load_plugin from pygeoapi.models.provider.base import (TilesMetadataFormat, TileMatrixSetEnum) @@ -54,9 +55,7 @@ from pygeoapi.util import to_json, filter_dict_by_key_value, render_j2_template -from . import ( - APIRequest, API, FORMAT_TYPES, F_JSON, F_HTML, SYSTEM_LOCALE, F_JSONLD -) +from . import APIRequest, API, SYSTEM_LOCALE LOGGER = logging.getLogger(__name__) diff --git a/pygeoapi/crs.py b/pygeoapi/crs.py index 0c2ff7b48..188414fa1 100644 --- a/pygeoapi/crs.py +++ b/pygeoapi/crs.py @@ -278,7 +278,8 @@ def crs_transform_feature(feature: dict, transform_func: Callable): ) -def transform_bbox(bbox: list, from_crs: str, to_crs: str) -> list: +def transform_bbox(bbox: list, from_crs: Union[str, pyproj.CRS], + to_crs: Union[str, pyproj.CRS]) -> list: """ helper function to transform a bounding box (bbox) from a source to a target CRS. CRSs in URI str format. @@ -286,7 +287,7 @@ def transform_bbox(bbox: list, from_crs: str, to_crs: str) -> list: :param bbox: list of coordinates in 'from_crs' projection :param from_crs: CRS to transform from - :param to_crs: CRSto transform to + :param to_crs: CRS to transform to :raises `CRSError`: Error raised if no CRS could be identified from an URI. diff --git a/pygeoapi/flask_app.py b/pygeoapi/flask_app.py index 1c5676af0..4800e1fab 100644 --- a/pygeoapi/flask_app.py +++ b/pygeoapi/flask_app.py @@ -399,8 +399,8 @@ def get_collection_tiles_data(collection_id: str | None = None, ) -@BLUEPRINT.route('/collections//map') -@BLUEPRINT.route('/collections//styles//map') +@BLUEPRINT.route('/collections//map') +@BLUEPRINT.route('/collections//styles//map') def collection_map(collection_id: str, style_id: str | None = None): """ OGC API - Maps map render endpoint diff --git a/pygeoapi/formats.py b/pygeoapi/formats.py new file mode 100644 index 000000000..3ad1f481e --- /dev/null +++ b/pygeoapi/formats.py @@ -0,0 +1,51 @@ +# ================================================================= +# +# Authors: Tom Kralidis +# +# Copyright (c) 2026 Tom Kralidis +# +# Permission is hereby granted, free of charge, to any person +# obtaining a copy of this software and associated documentation +# files (the "Software"), to deal in the Software without +# restriction, including without limitation the rights to use, +# copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the +# Software is furnished to do so, subject to the following +# conditions: +# +# The above copyright notice and this permission notice shall be +# included in all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES +# OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT +# HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, +# WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR +# OTHER DEALINGS IN THE SOFTWARE. +# +# ================================================================= + +from collections import OrderedDict + +F_JSON = 'json' +F_COVERAGEJSON = 'json' +F_HTML = 'html' +F_JSONLD = 'jsonld' +F_GZIP = 'gzip' +F_PNG = 'png' +F_JPEG = 'jpeg' +F_MVT = 'mvt' +F_NETCDF = 'NetCDF' + +#: Formats allowed for ?f= requests (order matters for complex MIME types) +FORMAT_TYPES = OrderedDict(( + (F_HTML, 'text/html'), + (F_JSONLD, 'application/ld+json'), + (F_JSON, 'application/json'), + (F_PNG, 'image/png'), + (F_JPEG, 'image/jpeg'), + (F_MVT, 'application/vnd.mapbox-vector-tile'), + (F_NETCDF, 'application/x-netcdf'), +)) diff --git a/pygeoapi/formatter/csv_.py b/pygeoapi/formatter/csv_.py index 2dd8c9dfb..33367ace8 100644 --- a/pygeoapi/formatter/csv_.py +++ b/pygeoapi/formatter/csv_.py @@ -31,6 +31,8 @@ import io import logging +from shapely.geometry import shape as geojson_to_geom + from pygeoapi.formatter.base import BaseFormatter, FormatterSerializationError LOGGER = logging.getLogger(__name__) @@ -60,12 +62,30 @@ def write(self, options: dict = {}, data: dict = None) -> str: Generate data in CSV format :param options: CSV formatting options - :param data: dict of GeoJSON data + :param data: dict of data :returns: string representation of format """ + type = data.get('type') or '' + LOGGER.debug(f'Formatting CSV from data type: {type}') + + if 'Feature' in type or 'features' in data: + return self._write_from_geojson(options, data) + elif 'Coverage' in type or 'coverages' in data: + return self._write_from_covjson(options, data) + + def _write_from_geojson( + self, options: dict = {}, data: dict = None, is_point=False + ) -> str: + """ + Generate GeoJSON data in CSV format - is_point = False + :param options: CSV formatting options + :param data: dict of GeoJSON data + :param is_point: whether the features are point geometries + + :returns: string representation of format + """ try: fields = list(data['features'][0]['properties'].keys()) except IndexError: @@ -75,32 +95,123 @@ def write(self, options: dict = {}, data: dict = None) -> str: if self.geom: LOGGER.debug('Including point geometry') if data['features'][0]['geometry']['type'] == 'Point': + LOGGER.debug('point geometry detected, adding x,y columns') fields.insert(0, 'x') fields.insert(1, 'y') is_point = True else: - # TODO: implement wkt geometry serialization - LOGGER.debug('not a point geometry, skipping') + LOGGER.debug('not a point geometry, adding wkt column') + fields.append('wkt') LOGGER.debug(f'CSV fields: {fields}') + output = io.StringIO() + writer = csv.DictWriter(output, fields, extrasaction='ignore') + writer.writeheader() - try: - output = io.StringIO() - writer = csv.DictWriter(output, fields) - writer.writeheader() + for feature in data['features']: + self._add_feature(writer, feature, is_point) + + return output.getvalue().encode('utf-8') + + def _add_feature( + self, writer: csv.DictWriter, feature: dict, is_point: bool + ) -> None: + """ + Add feature data to CSV writer - for feature in data['features']: - fp = feature['properties'] + :param writer: CSV DictWriter + :param feature: dict of GeoJSON feature + :param is_point: whether the feature is a point geometry + """ + fp = feature['properties'] + try: + if self.geom: if is_point: fp['x'] = feature['geometry']['coordinates'][0] fp['y'] = feature['geometry']['coordinates'][1] - LOGGER.debug(fp) - writer.writerow(fp) - except ValueError as err: + else: + geom = geojson_to_geom(feature['geometry']) + fp['wkt'] = geom.wkt + + LOGGER.debug(f'Writing feature to row: {fp}') + writer.writerow(fp) + except (ValueError, IndexError) as err: LOGGER.error(err) raise FormatterSerializationError('Error writing CSV output') + def _write_from_covjson( + self, options: dict = {}, data: dict = None + ) -> str: + """ + Generate CovJSON data in CSV format + + :param options: CSV formatting options + :param data: dict of CovJSON data + + :returns: string representation of format + """ + LOGGER.debug('Processing CovJSON data for CSV output') + units = {} + for p, v in data['parameters'].items(): + unit = v['unit']['symbol'] + if isinstance(unit, dict): + unit = unit.get('value') + + units[p] = unit + + fields = ['parameter', 'datetime', 'value', 'unit', 'x', 'y'] + LOGGER.debug(f'CSV fields: {fields}') + output = io.StringIO() + writer = csv.DictWriter(output, fields) + writer.writeheader() + + if data['type'] == 'Coverage': + is_point = 'point' in data['domain']['domainType'].lower() + self._add_coverage(writer, units, data, is_point) + else: + [ + self._add_coverage(writer, units, coverage, True) + for coverage in data['coverages'] + if 'point' in coverage['domain']['domainType'].lower() + ] return output.getvalue().encode('utf-8') + @staticmethod + def _add_coverage( + writer: csv.DictWriter, units: dict, data: dict, is_point: bool = False + ) -> None: + """ + Add coverage data to CSV writer + + :param writer: CSV DictWriter + :param units: dict of parameter units + :param data: dict of CovJSON coverage data + :param is_point: whether the coverage is a point coverage + """ + + if is_point is False: + LOGGER.warning('Non-point coverages not supported for CSV output') + return + + axes = data['domain']['axes'] + time_range = range(len(axes['t']['values'])) + + try: + [ + writer.writerow({ + 'parameter': parameter, + 'datetime': axes['t']['values'][time_value], + 'value': data['ranges'][parameter]['values'][time_value], + 'unit': units[parameter], + 'x': axes['x']['values'][-1], + 'y': axes['y']['values'][-1] + }) + for parameter in data['ranges'] + for time_value in time_range + ] + except ValueError as err: + LOGGER.error(err) + raise FormatterSerializationError('Error writing CSV output') + def __repr__(self): return f' {self.name}' diff --git a/pygeoapi/plugin.py b/pygeoapi/plugin.py index 19795be15..32292c895 100644 --- a/pygeoapi/plugin.py +++ b/pygeoapi/plugin.py @@ -88,6 +88,7 @@ }, 'pubsub': { 'HTTP': 'pygeoapi.pubsub.http.HTTPPubSubClient', + 'Kafka': 'pygeoapi.pubsub.kafka.KafkaPubSubClient', 'MQTT': 'pygeoapi.pubsub.mqtt.MQTTPubSubClient' } } diff --git a/pygeoapi/process/base.py b/pygeoapi/process/base.py index 9e2136476..a3dc05279 100644 --- a/pygeoapi/process/base.py +++ b/pygeoapi/process/base.py @@ -3,7 +3,7 @@ # Authors: Tom Kralidis # Francesco Martinelli # -# Copyright (c) 2022 Tom Kralidis +# Copyright (c) 2026 Tom Kralidis # Copyright (c) 2024 Francesco Martinelli # # Permission is hereby granted, free of charge, to any person @@ -53,6 +53,8 @@ def __init__(self, processor_def: dict, process_metadata: dict): self.name = processor_def['name'] self.metadata = process_metadata self.supports_outputs = False + self.allow_internal_requests = processor_def.get( + 'allow_internal_requests', False) def set_job_id(self, job_id: str) -> None: """ diff --git a/pygeoapi/process/hello_world.py b/pygeoapi/process/hello_world.py index 4577929aa..6d53b284e 100644 --- a/pygeoapi/process/hello_world.py +++ b/pygeoapi/process/hello_world.py @@ -3,7 +3,7 @@ # Authors: Tom Kralidis # Francesco Martinelli # -# Copyright (c) 2022 Tom Kralidis +# Copyright (c) 2026 Tom Kralidis # Copyright (c) 2024 Francesco Martinelli # # Permission is hereby granted, free of charge, to any person @@ -29,6 +29,7 @@ # # ================================================================= +import json import logging from pygeoapi.process.base import BaseProcessor, ProcessorExecuteError @@ -82,6 +83,28 @@ 'minOccurs': 0, 'maxOccurs': 1, 'keywords': ['message'] + }, + 'as_bytes': { + 'title': 'As bytes', + 'description': 'Whether to force return as bytes', + 'schema': { + 'type': 'boolean', + 'default': False + }, + 'minOccurs': 0, + 'maxOccurs': 1, + 'keywords': ['as_bytes'] + }, + 'media_type': { + 'title': 'Media type', + 'description': 'Force a specific media type', + 'schema': { + 'type': 'string', + 'default': 'application/json' + }, + 'minOccurs': 0, + 'maxOccurs': 1, + 'keywords': ['media_type'] } }, 'outputs': { @@ -120,7 +143,7 @@ def __init__(self, processor_def): self.supports_outputs = True def execute(self, data, outputs=None): - mimetype = 'application/json' + mimetype = data.get('media_type', 'application/json') name = data.get('name') if name is None: @@ -136,6 +159,9 @@ def execute(self, data, outputs=None): 'value': value } + if data.get('as_bytes', False): + json.dumps(produced_outputs).encode('utf-8') + return mimetype, produced_outputs def __repr__(self): diff --git a/pygeoapi/process/manager/base.py b/pygeoapi/process/manager/base.py index fadb90fbd..bb711f354 100644 --- a/pygeoapi/process/manager/base.py +++ b/pygeoapi/process/manager/base.py @@ -4,7 +4,7 @@ # Ricardo Garcia Silva # Francesco Martinelli # -# Copyright (c) 2024 Tom Kralidis +# Copyright (c) 2026 Tom Kralidis # (c) 2023 Ricardo Garcia Silva # (c) 2026 Francesco Martinelli # @@ -46,10 +46,12 @@ BaseProcessor, JobNotFoundError, JobResultNotFoundError, + ProcessorExecuteError, UnknownProcessError, ) from pygeoapi.util import ( get_current_datetime, + is_request_allowed, JobStatus, ProcessExecutionMode, RequestedProcessExecutionMode, @@ -105,7 +107,11 @@ def get_processor(self, process_id: str) -> BaseProcessor: except KeyError as err: raise UnknownProcessError('Invalid process identifier') from err else: - return load_plugin('process', process_conf['processor']) + pp = load_plugin('process', process_conf['processor']) + pp.allow_internal_requests = process_conf.get( + 'allow_internal_requests', False) + + return pp def get_jobs(self, status: JobStatus = None, @@ -277,7 +283,8 @@ def _execute_handler_sync(self, p: BaseProcessor, job_id: str, current_status = JobStatus.running jfmt, outputs = p.execute(data_dict, **extra_execute_parameters) - if isinstance(outputs, bytes): + if isinstance(outputs, bytes) and outputs.isascii(): + LOGGER.debug('output is ASCII; decoding utf-8') outputs = outputs.decode('utf-8') if requested_response == RequestedResponse.document.value: @@ -394,13 +401,13 @@ def execute_process( """ job_id = str(uuid.uuid1()) - processor = self.get_processor(process_id) - processor.set_job_id(job_id) + self.processor = self.get_processor(process_id) + self.processor.set_job_id(job_id) extra_execute_handler_parameters = { 'requested_response': requested_response } - job_control_options = processor.metadata.get( + job_control_options = self.processor.metadata.get( 'jobControlOptions', []) if execution_mode == RequestedProcessExecutionMode.respond_async: @@ -473,7 +480,7 @@ def execute_process( # TODO: handler's response could also be allowed to include more HTTP # headers mime_type, outputs, status = handler( - processor, + self.processor, job_id, data_dict, requested_outputs, @@ -483,26 +490,37 @@ def execute_process( def _send_in_progress_notification(self, subscriber: Optional[Subscriber]): if subscriber and subscriber.in_progress_uri: - response = requests.post(subscriber.in_progress_uri, json={}) - LOGGER.debug( - f'In progress notification response: {response.status_code}' - ) + self.__do_subscriber_request(subscriber.in_progress_uri) def _send_success_notification( self, subscriber: Optional[Subscriber], outputs: Any ): - if subscriber: - response = requests.post(subscriber.success_uri, json=outputs) - LOGGER.debug( - f'Success notification response: {response.status_code}' - ) + if subscriber and subscriber.success_uri: + self.__do_subscriber_request(subscriber.success_uri, outputs) def _send_failed_notification(self, subscriber: Optional[Subscriber]): if subscriber and subscriber.failed_uri: - response = requests.post(subscriber.failed_uri, json={}) - LOGGER.debug( - f'Failed notification response: {response.status_code}' - ) + self.__do_subscriber_request(subscriber.failed_uri) + + def __do_subscriber_request(self, url: str, data: dict = {}) -> None: + """ + Helper function to execute a subscriber URL via HTTP POST + + :param url: `str` of URL + :param data: `dict` of request payload + + :returns: `None` + """ + + if not is_request_allowed(url, self.processor.allow_internal_requests): + msg = 'URL not allowed' + LOGGER.error(f'{msg}: {url}') + raise ProcessorExecuteError(msg) + + response = requests.post(url, json=data) + LOGGER.debug( + f'Response: {response.status_code}' + ) def __repr__(self): return f' {self.name}' diff --git a/pygeoapi/process/manager/mongodb_.py b/pygeoapi/process/manager/mongodb_.py index 44bce6dbe..06e6d909a 100644 --- a/pygeoapi/process/manager/mongodb_.py +++ b/pygeoapi/process/manager/mongodb_.py @@ -1,8 +1,10 @@ # ================================================================= # # Authors: Alexander Pilz +# Tom Kralidis # # Copyright (c) 2023 Alexander Pilz +# Copyright (c) 2026 Alexander Pilz # # Permission is hereby granted, free of charge, to any person # obtaining a copy of this software and associated documentation @@ -32,11 +34,11 @@ from pymongo import MongoClient -from pygeoapi.api import FORMAT_TYPES, F_JSON, F_JSONLD from pygeoapi.process.base import ( JobNotFoundError, JobResultNotFoundError, ) +from pygeoapi.formats import FORMAT_TYPES, F_JSON, F_JSONLD from pygeoapi.process.manager.base import BaseManager LOGGER = logging.getLogger(__name__) diff --git a/pygeoapi/process/manager/postgresql.py b/pygeoapi/process/manager/postgresql.py index bf5033eef..05dc408ee 100644 --- a/pygeoapi/process/manager/postgresql.py +++ b/pygeoapi/process/manager/postgresql.py @@ -1,8 +1,10 @@ # ================================================================= # # Authors: Francesco Martinelli +# Tom Kralidis # # Copyright (c) 2024 Francesco Martinelli +# Copyright (c) 2026 Tom Kralidis # # Permission is hereby granted, free of charge, to any person # obtaining a copy of this software and associated documentation @@ -46,17 +48,18 @@ from typing import Any, Tuple from sqlalchemy import insert, update, delete -from sqlalchemy.engine import make_url from sqlalchemy.orm import Session -from pygeoapi.api import FORMAT_TYPES, F_JSON, F_JSONLD from pygeoapi.process.base import ( JobNotFoundError, JobResultNotFoundError, ProcessorGenericError ) +from pygeoapi.formats import FORMAT_TYPES, F_JSON, F_JSONLD from pygeoapi.process.manager.base import BaseManager -from pygeoapi.provider.sql import get_engine, get_table_model +from pygeoapi.provider.sql import ( + get_engine, get_table_model, store_db_parameters +) from pygeoapi.util import JobStatus @@ -66,13 +69,15 @@ class PostgreSQLManager(BaseManager): """PostgreSQL Manager""" + default_port = 5432 + def __init__(self, manager_def: dict): """ Initialize object :param manager_def: manager definition - :returns: `pygeoapi.process.manager.postgresqs.PostgreSQLManager` + :returns: `pygeoapi.process.manager.postgresql.PostgreSQLManager` """ super().__init__(manager_def) @@ -81,30 +86,18 @@ def __init__(self, manager_def: dict): self.supports_subscribing = True self.connection = manager_def['connection'] - try: - self.db_search_path = tuple(self.connection.get('search_path', - ['public'])) - except Exception: - self.db_search_path = ('public',) - - try: - LOGGER.debug('Connecting to database') - if isinstance(self.connection, str): - _url = make_url(self.connection) - self._engine = get_engine( - 'postgresql+psycopg2', - _url.host, - _url.port, - _url.database, - _url.username, - _url.password) - else: - self._engine = get_engine('postgresql+psycopg2', - **self.connection) - except Exception as err: - msg = 'Test connecting to DB failed' - LOGGER.error(f'{msg}: {err}') - raise ProcessorGenericError(msg) + options = manager_def.get('options', {}) + store_db_parameters(self, manager_def['connection'], options) + self._engine = get_engine( + 'postgresql+psycopg2', + self.db_host, + self.db_port, + self.db_name, + self.db_user, + self._db_password, + self.db_conn, + **self.db_options + ) try: LOGGER.debug('Getting table model') diff --git a/pygeoapi/process/manager/tinydb_.py b/pygeoapi/process/manager/tinydb_.py index b04d29a49..c15e9d36a 100644 --- a/pygeoapi/process/manager/tinydb_.py +++ b/pygeoapi/process/manager/tinydb_.py @@ -2,7 +2,7 @@ # # Authors: Tom Kralidis # -# Copyright (c) 2022 Tom Kralidis +# Copyright (c) 2026 Tom Kralidis # # Permission is hereby granted, free of charge, to any person # obtaining a copy of this software and associated documentation @@ -37,7 +37,7 @@ import tinydb from filelock import FileLock -from pygeoapi.api import FORMAT_TYPES, F_JSON, F_JSONLD +from pygeoapi.formats import FORMAT_TYPES, F_JSON, F_JSONLD from pygeoapi.process.base import ( JobNotFoundError, JobResultNotFoundError, diff --git a/pygeoapi/provider/base_edr.py b/pygeoapi/provider/base_edr.py index 01b1602b6..08c30df9b 100644 --- a/pygeoapi/provider/base_edr.py +++ b/pygeoapi/provider/base_edr.py @@ -78,7 +78,7 @@ def __init_subclass__(cls, **kwargs): 'but requests will be routed to a feature provider' ) - def get_instances(self): + def instances(self): """ Get a list of instance identifiers @@ -87,7 +87,7 @@ def get_instances(self): return NotImplementedError() - def get_instance(self, instance): + def instance(self, instance): """ Validate instance identifier diff --git a/pygeoapi/provider/filesystem.py b/pygeoapi/provider/filesystem.py index db2a824be..f534c1b73 100644 --- a/pygeoapi/provider/filesystem.py +++ b/pygeoapi/provider/filesystem.py @@ -2,7 +2,7 @@ # # Authors: Tom Kralidis # -# Copyright (c) 2023 Tom Kralidis +# Copyright (c) 2026 Tom Kralidis # # Permission is hereby granted, free of charge, to any person # obtaining a copy of this software and associated documentation @@ -34,6 +34,7 @@ import os from pygeoapi.provider.base import (BaseProvider, ProviderConnectionError, + ProviderInvalidQueryError, ProviderNotFoundError) from pygeoapi.util import file_modified_iso8601, get_path_basename, url_join @@ -76,9 +77,15 @@ def get_data_path(self, baseurl, urlpath, dirpath): root_link = None child_links = [] - data_path = os.path.join(self.data, dirpath) + if '..' in dirpath: + msg = 'Invalid path requested' + LOGGER.error(f'{msg}: {dirpath}') + raise ProviderInvalidQueryError(msg) + data_path = self.data + dirpath + LOGGER.debug(f'Data path: {data_path}') + if '/' not in dirpath: # root root_link = baseurl else: diff --git a/pygeoapi/provider/parquet.py b/pygeoapi/provider/parquet.py index 0f4ab3de1..8413963e0 100644 --- a/pygeoapi/provider/parquet.py +++ b/pygeoapi/provider/parquet.py @@ -1,8 +1,10 @@ # ================================================================= # # Authors: Leo Ghignone +# Colton Loftus # -# Copyright (c) 2024 Leo Ghignone +# Copyright (c) 2026 Leo Ghignone +# Copyright (c) 2026 Colton Loftus # # Permission is hereby granted, free of charge, to any person # obtaining a copy of this software and associated documentation @@ -36,6 +38,7 @@ import pyarrow import pyarrow.compute as pc import pyarrow.dataset +import pyarrow.types as pat import s3fs from pygeoapi.crs import crs_transform @@ -60,7 +63,41 @@ def arrow_to_pandas_type(arrow_type): return pd_type +def has_geoparquet_bbox_column( + pyarrow_geo_metadata: dict, primary_geometry_column_name: str +) -> bool: + """ + Check if the metadata on the parquet dataset + indicates there is a geoparquet bbox column + + :param pyarrow_geo_metadata: dict serialized version of the 'geo' + key within the pyarrow metadata json + :param primary_geometry_column_name: name of the primary geometry column + where the geometry is stored as specified in the 'geo' metadata + + :returns: bool whether or not the dataset has a geoparquet bbox column + """ + primary_column = pyarrow_geo_metadata.get('primary_column') + if primary_column is None: + return False + + columns = pyarrow_geo_metadata.get('columns') + if columns is None: + return False + + geometry_column_metadata = columns.get(primary_geometry_column_name) + if geometry_column_metadata is None: + return False + + geometry_covering = geometry_column_metadata.get('covering') + if geometry_covering is None: + return False + + return geometry_covering.get('bbox') is not None + + class ParquetProvider(BaseProvider): + def __init__(self, provider_def): """ Initialize object @@ -71,7 +108,8 @@ def __init__(self, provider_def): name: Parquet data: source: s3://example.com/parquet_directory/ - + batch_size: 10000 + batch_readahead: 2 id_field: gml_id @@ -84,49 +122,126 @@ def __init__(self, provider_def): # Source url is required self.source = self.data.get('source') + # When iterating over a dataset, the batch size + # controls how many records are read at a time; + # a larger batch size can reduce latency for large/complex + # requests at the cost of more memory usage + # and potentially overfetching; + # More information on batching can be found here: + # https://arrow.apache.org/docs/python/generated/pyarrow.dataset.Dataset.html#pyarrow.dataset.Dataset.scanner # noqa + # This value can be reduced to decrease network transfer + # if fetching data from an object store + self.batch_size = self.data.get('batch_size', 20_000) + + # batch_readahead is the number of batches to prefetch; + # This adds extra memory but can reduce latency for large + # or complicated queries; in an OGC API Features context, + # it generally makes sense to have some buffering but keep it + # low since most responses are small + self.batch_readahead = self.data.get('batch_readahead', 2) if not self.source: - msg = "Need explicit 'source' attr " \ - "in data field of provider config" + msg = 'Need explicit "source" attr in data' \ + ' field of provider config' LOGGER.error(msg) - raise Exception(msg) + raise ProviderGenericError(msg) # Manage AWS S3 sources if self.source.startswith('s3'): self.source = self.source.split('://', 1)[1] self.fs = s3fs.S3FileSystem(default_cache_type='none') else: + # If none, pyarrow will attempt to auto-detect self.fs = None # Build pyarrow dataset pointing to the data - self.ds = pyarrow.dataset.dataset(self.source, filesystem=self.fs) + self.ds: pyarrow.dataset.Dataset = \ + pyarrow.dataset.dataset(self.source, filesystem=self.fs) + + if not self.id_field: + LOGGER.info( + 'No "id_field" specified in parquet provider config' + ' will use pandas index as the identifier' + ) + else: + id_type = self.ds.schema.field(self.id_field).type + if ( + pat.is_integer(id_type) + or pat.is_decimal(id_type) + or pat.is_float_value(id_type) + ): + LOGGER.warning( + f'id_field is of type {id_type},' + ' and not numeric; this is harder to query and' + ' may cause slow full scans' + ) LOGGER.debug('Grabbing field information') self.get_fields() # Must be set to visualise queryables - # Column names for bounding box data. - if None in [self.x_field, self.y_field]: + # Get the CRS of the data + if b'geo' in self.ds.schema.metadata: + geo_metadata = json.loads(self.ds.schema.metadata[b'geo']) + + geom_column = geo_metadata['primary_column'] + + if geom_column: + self.has_geometry = True + + # if the CRS is not set default to EPSG:4326, per geoparquet spec + self.crs = geo_metadata['columns'][geom_column].get('crs') \ + or 'OGC:CRS84' + + # self.bbox_filterable indicates whether or not + # we can resolve a bbox request + # against the data, either by using an explicit + # bbox column or by using x_field and y_field + # columns + self.bbox_filterable = \ + has_geoparquet_bbox_column(geo_metadata, geom_column) + if self.bbox_filterable: + # Whether or not the data has the geoparquet + # standardized bbox column + self.has_bbox_column = True + # if there is a bbox column we + # don't need to parse the x_fields and y_fields + # and can just return early + return + else: + self.has_bbox_column = False + else: self.has_geometry = False + self.has_bbox_column = False + + for field_name, field_value in [ + ('x_field', self.x_field), + ('y_field', self.y_field) + ]: + if not field_value: + LOGGER.warning( + f'No geometry for {self.source};' + f'missing {field_name} in parquet provider config' + ) + self.bbox_filterable = False + self.has_bbox_column = False + return + + # If there is not a geoparquet bbox column, + # then we fall back to reading fields for minx, maxx, miny, maxy + # as direct column names; these can be set and use regardless of + # whether or not there is 'geo' metadata + if isinstance(self.x_field, str): + self.minx = self.x_field + self.maxx = self.x_field else: - self.has_geometry = True - if isinstance(self.x_field, str): - self.minx = self.x_field - self.maxx = self.x_field - else: - self.minx, self.maxx = self.x_field + self.minx, self.maxx = self.x_field - if isinstance(self.y_field, str): - self.miny = self.y_field - self.maxy = self.y_field - else: - self.miny, self.maxy = self.y_field - self.bb = [self.minx, self.miny, self.maxx, self.maxy] + if isinstance(self.y_field, str): + self.miny = self.y_field + self.maxy = self.y_field + else: + self.miny, self.maxy = self.y_field - # Get the CRS of the data - geo_metadata = json.loads(self.ds.schema.metadata[b'geo']) - geom_column = geo_metadata['primary_column'] - # if the CRS is not set default to EPSG:4326, per geoparquet spec - self.crs = (geo_metadata['columns'][geom_column].get('crs') - or 'OGC:CRS84') + self.bbox_filterable = True def _read_parquet(self, return_scanner=False, **kwargs): """ @@ -134,7 +249,15 @@ def _read_parquet(self, return_scanner=False, **kwargs): :returns: generator of RecordBatch with the queried values """ - scanner = pyarrow.dataset.Scanner.from_dataset(self.ds, **kwargs) + scanner = self.ds.scanner( + batch_size=self.batch_size, + # default batch readahead is 16 which is generally + # far too high in a server context; we can safely set it + # to 2 which allows for queueing without excessive reads + batch_readahead=self.batch_readahead, + use_threads=True, + **kwargs + ) batches = scanner.to_batches() if return_scanner: return batches, scanner @@ -149,12 +272,19 @@ def get_fields(self): """ if not self._fields: - - for field_name, field_type in zip(self.ds.schema.names, - self.ds.schema.types): + for field_name, field_type in zip( + self.ds.schema.names, self.ds.schema.types + ): # Geometry is managed as a special case by pygeoapi if field_name == 'geometry': continue + # if we find the geoparquet bbox column and the + # type is a struct of any type, either double or + # float, then we skip it since it isn't + # meant to be a queryable field, rather just metadata + if field_name == 'bbox' and 'struct' in str(field_type): + self.bbox_filterable = True + continue field_type = str(field_type) converted_type = None @@ -213,28 +343,44 @@ def query( :returns: dict of 0..n GeoJSON features """ - result = None try: - filter = pc.scalar(True) + filter_ = pc.scalar(True) + if bbox: - if self.has_geometry is False: - msg = ( - 'Dataset does not have a geometry field, ' - 'querying by bbox is not supported.' + if not self.has_geometry: + raise ProviderQueryError( + ( + 'Dataset does not have a geometry field, ' + 'querying by bbox is not supported.' + ) + ) + + if not self.bbox_filterable: + raise ProviderQueryError( + ( + 'Dataset does not have a proper bbox metadata, ' + 'querying by bbox is not supported.' + ) ) - raise ProviderQueryError(msg) - LOGGER.debug('processing bbox parameter') - if any(b is None for b in bbox): - msg = 'Dataset does not support bbox filtering' - raise ProviderQueryError(msg) minx, miny, maxx, maxy = [float(b) for b in bbox] - filter = ( - (pc.field(self.minx) > pc.scalar(minx)) - & (pc.field(self.miny) > pc.scalar(miny)) - & (pc.field(self.maxx) < pc.scalar(maxx)) - & (pc.field(self.maxy) < pc.scalar(maxy)) - ) + + if self.has_bbox_column: + # GeoParquet bbox column is a struct + # with xmin, ymin, xmax, ymax + filter_ = filter_ & ( + (pc.field('bbox', 'xmin') >= pc.scalar(minx)) + & (pc.field('bbox', 'ymin') >= pc.scalar(miny)) + & (pc.field('bbox', 'xmax') <= pc.scalar(maxx)) + & (pc.field('bbox', 'ymax') <= pc.scalar(maxy)) + ) + else: + filter_ = ( + (pc.field(self.minx) >= pc.scalar(minx)) + & (pc.field(self.miny) >= pc.scalar(miny)) + & (pc.field(self.maxx) <= pc.scalar(maxx)) + & (pc.field(self.maxy) <= pc.scalar(maxy)) + ) if datetime_ is not None: if self.time_field is None: @@ -248,13 +394,13 @@ def query( begin, end = datetime_.split('/') if begin != '..': begin = isoparse(begin) - filter = filter & (timefield >= begin) + filter_ = filter_ & (timefield >= begin) if end != '..': end = isoparse(end) - filter = filter & (timefield <= end) + filter_ = filter_ & (timefield <= end) else: target_time = isoparse(datetime_) - filter = filter & (timefield == target_time) + filter_ = filter_ & (timefield == target_time) if properties: LOGGER.debug('processing properties') @@ -263,7 +409,7 @@ def query( pd_type = arrow_to_pandas_type(field.type) expr = pc.field(name) == pc.scalar(pd_type(value)) - filter = filter & expr + filter_ = filter_ & expr if len(select_properties) == 0: select_properties = self.ds.schema.names @@ -279,11 +425,11 @@ def query( # Make response based on resulttype specified if resulttype == 'hits': LOGGER.debug('hits only specified') - result = self._response_feature_hits(filter) + return self._response_feature_hits(filter_) elif resulttype == 'results': LOGGER.debug('results specified') - result = self._response_feature_collection( - filter, offset, limit, columns=select_properties + return self._response_feature_collection( + filter_, offset, limit, columns=select_properties ) else: LOGGER.error(f'Invalid resulttype: {resulttype}') @@ -298,8 +444,6 @@ def query( LOGGER.error(err) raise ProviderGenericError(err) - return result - @crs_transform def get(self, identifier, **kwargs): """ @@ -309,22 +453,22 @@ def get(self, identifier, **kwargs): :returns: a single feature """ - result = None try: LOGGER.debug(f'Fetching identifier {identifier}') id_type = arrow_to_pandas_type( - self.ds.schema.field(self.id_field).type) + self.ds.schema.field(self.id_field).type + ) batches = self._read_parquet( filter=( - pc.field(self.id_field) == pc.scalar(id_type(identifier)) - ) + pc.field(self.id_field) == pc.scalar(id_type(identifier) + )) ) for batch in batches: if batch.num_rows > 0: - assert ( - batch.num_rows == 1 - ), f'Multiple items found with ID {identifier}' + assert batch.num_rows == 1, ( + f'Multiple items found with ID {identifier}' + ) row = batch.to_pandas() break else: @@ -335,10 +479,14 @@ def get(self, identifier, **kwargs): else: geom = [None] gdf = gpd.GeoDataFrame(row, geometry=geom) + # If there is an id field, set it as index + # instead of the default numeric index + if self.id_field in gdf.columns: + gdf = gdf.set_index(self.id_field, drop=False) LOGGER.debug('results computed') # Grab the collection from geopandas geo_interface - result = gdf.__geo_interface__['features'][0] + return gdf.__geo_interface__['features'][0] except RuntimeError as err: LOGGER.error(err) @@ -353,13 +501,11 @@ def get(self, identifier, **kwargs): LOGGER.error(err) raise ProviderGenericError(err) - return result - def __repr__(self): return f' {self.data}' - def _response_feature_collection(self, filter, offset, limit, - columns=None): + def _response_feature_collection(self, filter, offset, + limit, columns=None): """ Assembles output from query as GeoJSON FeatureCollection structure. @@ -426,6 +572,10 @@ def _response_feature_collection(self, filter, offset, limit, geom = gpd.GeoSeries.from_wkb(rp['geometry'], crs=self.crs) gdf = gpd.GeoDataFrame(rp, geometry=geom) + # If there is an id_field in the data, set it as index + # instead of the default numerical index + if self.id_field in gdf.columns: + gdf = gdf.set_index(self.id_field, drop=False) LOGGER.debug('results computed') result = gdf.__geo_interface__ @@ -446,8 +596,11 @@ def _response_feature_hits(self, filter): """ try: - scanner = pyarrow.dataset.Scanner.from_dataset(self.ds, - filter=filter) + scanner = pyarrow.dataset.Scanner.from_dataset( + self.ds, filter=filter, + batch_size=self.batch_size, + batch_readahead=self.batch_readahead + ) return { 'type': 'FeatureCollection', 'numberMatched': scanner.count_rows(), diff --git a/pygeoapi/provider/sql.py b/pygeoapi/provider/sql.py index a955f06db..410f57c30 100644 --- a/pygeoapi/provider/sql.py +++ b/pygeoapi/provider/sql.py @@ -39,25 +39,12 @@ # # ================================================================= -# Testing local postgis with docker: -# docker run --name "postgis" \ -# -v postgres_data:/var/lib/postgresql -p 5432:5432 \ -# -e ALLOW_IP_RANGE=0.0.0.0/0 \ -# -e POSTGRES_USER=postgres \ -# -e POSTGRES_PASS=postgres \ -# -e POSTGRES_DBNAME=test \ -# -d -t kartoza/postgis - -# Import dump: -# gunzip < tests/data/hotosm_bdi_waterways.sql.gz | -# psql -U postgres -h 127.0.0.1 -p 5432 test - from copy import deepcopy from datetime import datetime from decimal import Decimal import functools import logging -from typing import Optional +from typing import Optional, Any from geoalchemy2 import Geometry # noqa - this isn't used explicitly but is needed to process Geometry columns from geoalchemy2.functions import ST_MakeEnvelope, ST_Intersects @@ -73,15 +60,17 @@ desc, delete ) -from sqlalchemy.engine import URL +from sqlalchemy.engine import URL, Engine from sqlalchemy.exc import ( ConstraintColumnNotFoundError, InvalidRequestError, - OperationalError + OperationalError, + SQLAlchemyError ) from sqlalchemy.ext.automap import automap_base from sqlalchemy.orm import Session, load_only from sqlalchemy.sql.expression import and_ +from sqlalchemy.schema import Table from pygeoapi.crs import get_transform_from_spec, get_srid from pygeoapi.provider.base import ( @@ -135,8 +124,8 @@ def __init__( LOGGER.debug(f'Configured Storage CRS: {self.storage_crs}') # Read table information from database - options = provider_def.get('options', {}) - self._store_db_parameters(provider_def['data'], options) + options = provider_def.get('options', {}) | extra_conn_args + store_db_parameters(self, provider_def['data'], options) self._engine = get_engine( driver_name, self.db_host, @@ -144,13 +133,13 @@ def __init__( self.db_name, self.db_user, self._db_password, - **self.db_options | extra_conn_args + self.db_conn, + **self.db_options ) self.table_model = get_table_model( self.table, self.id_field, self.db_search_path, self._engine ) - LOGGER.debug(f'DB connection: {repr(self._engine.url)}') self.get_fields() def query( @@ -324,8 +313,12 @@ def get(self, identifier, crs_transform_spec=None, **kwargs): # Execute query within self-closing database Session context with Session(self._engine) as session: # Retrieve data from database as feature - item = session.get(self.table_model, identifier) - if item is None: + try: + item = session.get(self.table_model, identifier) + # Ensure that item is not None + assert item is not None + except (AssertionError, SQLAlchemyError) as e: + LOGGER.debug(e, exc_info=True) msg = f'No such item: {self.id_field}={identifier}.' raise ProviderItemNotFoundError(msg) crs_transform_out = get_transform_from_spec(crs_transform_spec) @@ -426,22 +419,6 @@ def delete(self, identifier): return result.rowcount > 0 - def _store_db_parameters(self, parameters, options): - self.db_user = parameters.get('user') - self.db_host = parameters.get('host') - self.db_port = parameters.get('port', self.default_port) - self.db_name = parameters.get('dbname') - # db_search_path gets converted to a tuple here in order to ensure it - # is hashable - which allows us to use functools.cache() when - # reflecting the table definition from the DB - self.db_search_path = tuple(parameters.get('search_path', ['public'])) - self._db_password = parameters.get('password') - self.db_options = { - k: v - for k, v in options.items() - if not isinstance(v, dict) - } - def _sqlalchemy_to_feature(self, item, crs_transform_out=None, select_properties=[]): """ @@ -602,6 +579,48 @@ def _select_properties_clause(self, select_properties, skip_geometry): return selected_properties_clause +def store_db_parameters( + self: GenericSQLProvider | Any, + connection_data: str | dict[str], + options: dict[str, str] +) -> None: + """ + Store database connection parameters + + :self: instance of provider or manager class + :param connection_data: connection string or dict of connection params + :param options: additional connection options + + :returns: None + """ + if isinstance(connection_data, str): + self.db_conn = connection_data + connection_data = {} + else: + self.db_conn = None + # OR + self.db_user = connection_data.get('user') + self.db_host = connection_data.get('host') + self.db_port = connection_data.get('port', self.default_port) + self.db_name = ( + connection_data.get('dbname') or connection_data.get('database') + ) + self.db_query = connection_data.get('query') + self._db_password = connection_data.get('password') + # db_search_path gets converted to a tuple here in order to ensure it + # is hashable - which allows us to use functools.cache() when + # reflecting the table definition from the DB + self.db_search_path = tuple( + connection_data.get('search_path') or + options.pop('search_path', ['public']) + ) + self.db_options = { + k: v + for k, v in options.items() + if not isinstance(v, dict) + } + + @functools.cache def get_engine( driver_name: str, @@ -610,20 +629,38 @@ def get_engine( database: str, user: str, password: str, + conn_str: Optional[str] = None, **connect_args -): - """Create SQL Alchemy engine.""" - conn_str = URL.create( - drivername=driver_name, - username=user, - password=password, - host=host, - port=int(port), - database=database - ) +) -> Engine: + """ + Get SQL Alchemy engine. + + :param driver_name: database driver name + :param host: database host + :param port: database port + :param database: database name + :param user: database user + :param password: database password + :param conn_str: optional connection URL + :param connect_args: custom connection arguments to pass to create_engine() + + :returns: SQL Alchemy engine + """ + if conn_str is None: + conn_str = URL.create( + drivername=driver_name, + username=user, + password=password, + host=host, + port=int(port), + database=database + ) + engine = create_engine( conn_str, connect_args=connect_args, pool_pre_ping=True ) + + LOGGER.debug(f'Created engine for {repr(engine.url)}.') return engine @@ -632,14 +669,25 @@ def get_table_model( table_name: str, id_field: str, db_search_path: tuple[str], - engine -): - """Reflect table.""" + engine: Engine +) -> Table: + """ + Reflect table using SQLAlchemy Automap. + + :param table_name: name of table to reflect + :param id_field: name of primary key field + :param db_search_path: tuple of database schemas to search for the table + :param engine: SQLAlchemy engine to use for reflection + + :returns: SQLAlchemy model of the reflected table + """ + LOGGER.debug('Reflecting table definition from database') metadata = MetaData() # Look for table in the first schema in the search path schema = db_search_path[0] try: + LOGGER.debug(f'Looking for table {table_name} in schema {schema}') metadata.reflect( bind=engine, schema=schema, only=[table_name], views=True ) @@ -784,3 +832,67 @@ def _get_bbox_filter(self, bbox: list[float]): func.ST_GeomFromText(polygon_wkt), geom_column ) return bbox_filter + + def get(self, identifier, crs_transform_spec=None, **kwargs): + """ + Query the provider for a specific + feature id e.g: /collections/hotosm_bdi_waterways/items/13990765 + + :param identifier: feature id + :param crs_transform_spec: `CrsTransformSpec` instance, optional + + :returns: GeoJSON FeatureCollection + """ + LOGGER.debug(f'Get item by ID: {identifier}') + + # Execute query within self-closing database Session context + with Session(self._engine) as session: + # Retrieve data from database as feature + try: + item = session.get(self.table_model, identifier) + # Ensure that item is not None + assert item is not None + # Ensure returned row has exact match + feature_id = getattr(item, self.id_field) + assert str(feature_id) == identifier + except (AssertionError, SQLAlchemyError) as e: + LOGGER.debug(e, exc_info=True) + msg = f'No such item: {self.id_field}={identifier}.' + raise ProviderItemNotFoundError(msg) + crs_transform_out = get_transform_from_spec(crs_transform_spec) + feature = self._sqlalchemy_to_feature(item, crs_transform_out) + + # Drop non-defined properties + if self.properties: + props = feature['properties'] + dropping_keys = deepcopy(props).keys() + for item in dropping_keys: + if item not in self.properties: + props.pop(item) + + # Add fields for previous and next items + id_field = getattr(self.table_model, self.id_field) + prev_item = ( + session.query(self.table_model) + .order_by(id_field.desc()) + .filter(id_field < feature_id) + .first() + ) + next_item = ( + session.query(self.table_model) + .order_by(id_field.asc()) + .filter(id_field > feature_id) + .first() + ) + feature['prev'] = ( + getattr(prev_item, self.id_field) + if prev_item is not None + else feature_id + ) + feature['next'] = ( + getattr(next_item, self.id_field) + if next_item is not None + else feature_id + ) + + return feature diff --git a/pygeoapi/provider/tinydb_.py b/pygeoapi/provider/tinydb_.py index 5453f70c8..c121af838 100644 --- a/pygeoapi/provider/tinydb_.py +++ b/pygeoapi/provider/tinydb_.py @@ -28,8 +28,11 @@ # ================================================================= import logging -import re # noqa +from functools import reduce +import operator import os +import re +from typing import Union import uuid from dateutil.parser import parse as parse_date @@ -157,9 +160,7 @@ def query(self, offset=0, limit=10, resulttype='results', """ Q = Query() - LOGGER.debug(f'Query initiated: {Q}') - - QUERY = [] + predicates = [] feature_collection = { 'type': 'FeatureCollection', @@ -173,58 +174,60 @@ def query(self, offset=0, limit=10, resulttype='results', if bbox: LOGGER.debug('processing bbox parameter') bbox_as_string = ','.join(str(s) for s in bbox) - QUERY.append(f"Q.geometry.test(bbox_intersects, '{bbox_as_string}')") # noqa + predicates.append(Q.geometry.test(bbox_intersects, bbox_as_string)) if datetime_ is not None: LOGGER.debug('processing datetime parameter') if self.time_field is None: LOGGER.error('time_field not enabled for collection') LOGGER.error('Using default time property') - time_field2 = 'time' + time_field2 = Q.time else: LOGGER.error(f'Using properties.{self.time_field}') - time_field2 = f"properties['{self.time_field}']" + time_field2 = getattr(Q.properties, self.time_field) if '/' in datetime_: # envelope LOGGER.debug('detected time range') time_begin, time_end = datetime_.split('/') if time_begin != '..': - QUERY.append(f"(Q.{time_field2}>='{time_begin}')") # noqa + predicates.append(time_field2 >= time_begin) if time_end != '..': - QUERY.append(f"(Q.{time_field2}<='{time_end}')") # noqa + predicates.append(time_field2 <= time_end) else: # time instant LOGGER.debug('detected time instant') - QUERY.append(f"(Q.{time_field2}=='{datetime_}')") # noqa + predicates.append(getattr(Q, time_field2) == datetime_) if properties: LOGGER.debug('processing properties') for prop in properties: - if isinstance(prop[1], str): - value = f"'{prop[1]}'" - else: - value = prop[1] - QUERY.append(f"(Q.properties['{prop[0]}']=={value})") - - QUERY = self._add_search_query(QUERY, q) - - QUERY_STRING = '&'.join(QUERY) - LOGGER.debug(f'QUERY_STRING: {QUERY_STRING}') - SEARCH_STRING = f'self.db.search({QUERY_STRING})' - LOGGER.debug(f'SEARCH_STRING: {SEARCH_STRING}') - - LOGGER.debug('querying database') - if len(QUERY) > 0: - LOGGER.debug(f'running eval on {SEARCH_STRING}') - try: - results = eval(SEARCH_STRING) - except SyntaxError as err: - msg = 'Invalid query' - LOGGER.error(f'{msg}: {err}') - raise ProviderInvalidQueryError(msg) + if prop[0] not in self.fields: + msg = 'Invalid query: invalid property name' + LOGGER.error(msg) + raise ProviderInvalidQueryError(msg) + + predicates.append(getattr(Q.properties, prop[0]) == prop[1]) + + PQ = reduce(operator.and_, predicates) if predicates else None + if q: + SQ = self._add_search_query(Q, q) else: - results = self.db.all() + SQ = None + + try: + if PQ and SQ: + results = self.db.search(PQ & SQ) + elif PQ and not SQ: + results = self.db.search(PQ) + elif not PQ and SQ is not None: + results = self.db.search(SQ) + else: + results = self.db.all() + except SyntaxError as err: + msg = 'Invalid query' + LOGGER.error(f'{msg}: {err}') + raise ProviderInvalidQueryError(msg) feature_collection['numberMatched'] = len(results) @@ -355,17 +358,29 @@ def _add_extra_fields(self, json_data: dict) -> dict: return json_data - def _add_search_query(self, query: list, search_term: str = None) -> str: + def _add_search_query(self, search_object, + search_term: str = None) -> Union[str, None]: """ - Helper function to add extra query predicates + Create a search query according to the OGC API - Records specification. + + https://docs.ogc.org/is/20-004r1/20-004r1.html (Listing 14) + + Examples (f is shorthand for Q.properties["_metadata-anytext"]): + +-------------+-----------------------------------+ + | search term | TinyDB search | + +-------------+-----------------------------------+ + | 'aa' | f.search('aa') | + | 'aa,bb' | f.search('aa')|f.search('bb') | + | 'aa,bb cc' | f.search('aa')|f.search('bb +cc') | + +-------------+-----------------------------------+ - :param query: `list` of query predicates - :param search_term: `str` of search term + :param Q: TinyDB search object + :param s: `str` of q parameter value - :returns: `list` of updated query predicates + :returns: `Query` object or `None` """ - return query + return search_object def __repr__(self): return f' {self.data}' @@ -402,7 +417,7 @@ def _add_extra_fields(self, json_data: dict) -> dict: return json_data - def _prepare_q_param_with_spaces(self, s: str) -> str: + def _prepare_q_param_with_spaces(self, Q: Query, s: str) -> str: """ Prepare a search statement for the search term `s`. The term `s` might have spaces. @@ -415,12 +430,18 @@ def _prepare_q_param_with_spaces(self, s: str) -> str: | 'aa bb' | f.search('aa +bb') | | ' aa bb ' | f.search('aa +bb') | +---------------+--------------------+ + + :param Q: TinyDB `Query` object + :param s: `str` of q parameter value + + :returns: `Query` object """ - return 'Q.properties["_metadata-anytext"].search("' \ - + ' +'.join(s.split()) \ - + '", flags=re.IGNORECASE)' - def _add_search_query(self, query: list, search_term: str = None) -> str: + return Q.properties["_metadata-anytext"].search( + ' +'.join(s.split()), flags=re.IGNORECASE) + + def _add_search_query(self, search_object, + search_term: str = None) -> Union[str, None]: """ Create a search query according to the OGC API - Records specification. @@ -434,15 +455,22 @@ def _add_search_query(self, query: list, search_term: str = None) -> str: | 'aa,bb' | f.search('aa')|f.search('bb') | | 'aa,bb cc' | f.search('aa')|f.search('bb +cc') | +-------------+-----------------------------------+ + + :param Q: TinyDB search object + :param s: `str` of q parameter value + + :returns: `Query` object or `None` """ + if search_term is not None and len(search_term) > 0: LOGGER.debug('catalogue q= query') terms = [s for s in search_term.split(',') if len(s) > 0] - query.append('|'.join( - [self._prepare_q_param_with_spaces(t) for t in terms] - )) + terms2 = [self._prepare_q_param_with_spaces(search_object, t) + for t in terms] - return query + return reduce(operator.or_, terms2) + else: + return None def __repr__(self): return f' {self.data}' diff --git a/pygeoapi/provider/wms_facade.py b/pygeoapi/provider/wms_facade.py index e96f3c244..467c12aba 100644 --- a/pygeoapi/provider/wms_facade.py +++ b/pygeoapi/provider/wms_facade.py @@ -42,11 +42,12 @@ } CRS_CODES = { - 4326: 'EPSG:4326', 'http://www.opengis.net/def/crs/EPSG/0/4326': 'EPSG:4326', 'http://www.opengis.net/def/crs/EPSG/0/3857': 'EPSG:3857' } +DEFAULT_CRS = 'http://www.opengis.net/def/crs/EPSG/0/4326' + class WMSFacadeProvider(BaseProvider): """WMS 1.3.0 provider""" @@ -65,8 +66,8 @@ def __init__(self, provider_def): LOGGER.debug(f'pyproj version: {pyproj.__version__}') def query(self, style=None, bbox=[-180, -90, 180, 90], width=500, - height=300, crs=4326, datetime_=None, transparent=True, - bbox_crs=4326, format_='png', **kwargs): + height=300, crs=DEFAULT_CRS, datetime_=None, transparent=True, + bbox_crs=DEFAULT_CRS, format_='png', **kwargs): """ Generate map @@ -87,7 +88,7 @@ def query(self, style=None, bbox=[-180, -90, 180, 90], width=500, version = self.options.get('version', '1.3.0') - if version == '1.3.0' and CRS_CODES[bbox_crs] == 'EPSG:4326': + if version == '1.3.0' and CRS_CODES.get(bbox_crs) == 'EPSG:4326': bbox = [bbox[1], bbox[0], bbox[3], bbox[2]] bbox2 = ','.join(map(str, bbox)) @@ -100,7 +101,7 @@ def query(self, style=None, bbox=[-180, -90, 180, 90], width=500, 'service': 'WMS', 'request': 'GetMap', 'bbox': bbox2, - crs_param: CRS_CODES[crs], + crs_param: CRS_CODES.get(crs) or 'EPSG:4326', 'layers': self.options['layer'], 'styles': self.options.get('style', 'default'), 'width': width, diff --git a/pygeoapi/pubsub/http.py b/pygeoapi/pubsub/http.py index a07c600ca..c19accc7d 100644 --- a/pygeoapi/pubsub/http.py +++ b/pygeoapi/pubsub/http.py @@ -41,7 +41,7 @@ class HTTPPubSubClient(BasePubSubClient): """HTTP client""" - def __init__(self, broker_url): + def __init__(self, publisher_def): """ Initialize object @@ -50,7 +50,7 @@ def __init__(self, broker_url): :returns: pygeoapi.pubsub.http.HTTPPubSubClient """ - super().__init__(broker_url) + super().__init__(publisher_def) self.name = 'HTTP' self.type = 'http' self.auth = None diff --git a/pygeoapi/pubsub/kafka.py b/pygeoapi/pubsub/kafka.py new file mode 100644 index 000000000..20033dea8 --- /dev/null +++ b/pygeoapi/pubsub/kafka.py @@ -0,0 +1,109 @@ +# ================================================================= +# +# Authors: Tom Kralidis +# +# Copyright (c) 2026 Tom Kralidis +# +# Permission is hereby granted, free of charge, to any person +# obtaining a copy of this software and associated documentation +# files (the "Software"), to deal in the Software without +# restriction, including without limitation the rights to use, +# copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the +# Software is furnished to do so, subject to the following +# conditions: +# +# The above copyright notice and this permission notice shall be +# included in all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES +# OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT +# HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, +# WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR +# OTHER DEALINGS IN THE SOFTWARE. +# +# ================================================================= + +import logging + +from kafka import errors, KafkaProducer + +from pygeoapi.pubsub.base import BasePubSubClient, PubSubClientConnectionError +from pygeoapi.util import to_json + +LOGGER = logging.getLogger(__name__) + + +class KafkaPubSubClient(BasePubSubClient): + """Kafka client""" + + def __init__(self, publisher_def): + """ + Initialize object + + :param publisher_def: provider definition + + :returns: pygeoapi.pubsub.kafka.KafkaPubSubClient + """ + + super().__init__(publisher_def) + self.name = 'Kafka' + self.type = 'kafka' + self.sasl_mechanism = publisher_def.get('sasl.mechanism', 'PLAIN') + self.security_protocol = publisher_def.get('security.protocol', 'SASL_SSL') # noqa + + msg = f'Initializing to broker {self.broker_safe_url} with id {self.client_id}' # noqa + LOGGER.debug(msg) + + def connect(self) -> None: + """ + Connect to an Kafka broker + + :returns: None + """ + + args = { + 'bootstrap_servers': f'{self.broker_url.hostname}:{self.broker_url.port}', # noqa + 'client_id': self.client_id, + 'value_serializer': lambda v: to_json(v).encode('utf-8') + } + if None not in [self.broker_url.username, self.broker_url.password]: + args.update({ + 'security.protocol': self.security_protocol, + 'sasl.mechanism': self.sasl_mechanism, + 'sasl.username': self.broker_url.username, + 'sasl.password': self.broker_url.password + }) + + LOGGER.debug('Creating Kafka producer') + try: + self.producer = KafkaProducer(**args) + except errors.NoBrokersAvailable as err: + raise PubSubClientConnectionError(err) + + def pub(self, channel: str, message: str) -> bool: + """ + Publish a message to a broker/channel + + :param channel: `str` of topic + :param message: `str` of message + + :returns: `bool` of publish result + """ + + LOGGER.debug(f'Publishing to broker {self.broker_safe_url}') + LOGGER.debug(f'Channel: {channel}') + LOGGER.debug(f'Message: {message}') + LOGGER.debug('Sanitizing channel for HTTP') + channel = channel.replace('/', '-') + channel = channel.replace(':', '-') + LOGGER.debug(f'Sanitized channel for Kafka: {channel}') + + self.producer.send(channel, value=message) + self.producer.flush() + + def __repr__(self): + return f' {self.broker_safe_url}' diff --git a/pygeoapi/pubsub/mqtt.py b/pygeoapi/pubsub/mqtt.py index 2afd04087..0f88d670b 100644 --- a/pygeoapi/pubsub/mqtt.py +++ b/pygeoapi/pubsub/mqtt.py @@ -39,7 +39,7 @@ class MQTTPubSubClient(BasePubSubClient): """MQTT client""" - def __init__(self, broker_url): + def __init__(self, publisher_def): """ Initialize object @@ -48,7 +48,7 @@ def __init__(self, broker_url): :returns: pycsw.pubsub.mqtt.MQTTPubSubClient """ - super().__init__(broker_url) + super().__init__(publisher_def) self.type = 'mqtt' self.port = self.broker_url.port diff --git a/pygeoapi/resources/schemas/config/pygeoapi-config-0.x.yml b/pygeoapi/resources/schemas/config/pygeoapi-config-0.x.yml index d772f000b..aaee641ec 100644 --- a/pygeoapi/resources/schemas/config/pygeoapi-config-0.x.yml +++ b/pygeoapi/resources/schemas/config/pygeoapi-config-0.x.yml @@ -477,6 +477,12 @@ properties: type: string description: temporal reference system of features default: 'http://www.opengis.net/def/uom/ISO-8601/0/Gregorian' + resolution: + type: string + description: temporal resolution + default: + type: string + description: default time value patternProperties: "^(?!spatial$|temporal$).*": type: object @@ -676,7 +682,11 @@ properties: For custom built plugins, use the import path (e.g. `mypackage.provider.MyProvider`) required: - name - required: + allow_internal_requests: + type: boolean + description: whether to allow internal HTTP requests + default: false + requred: - type - processor definitions: diff --git a/pygeoapi/starlette_app.py b/pygeoapi/starlette_app.py index ffb5b656f..6313cbfb3 100644 --- a/pygeoapi/starlette_app.py +++ b/pygeoapi/starlette_app.py @@ -523,6 +523,11 @@ async def get_collection_edr_query(request: Request, if 'collection_id' in request.path_params: collection_id = request.path_params['collection_id'] + if '/instances/' in collection_id: + tokens = collection_id.split('/instances/') + collection_id = tokens[0] + instance_id = tokens[-1] + if 'instance_id' in request.path_params: instance_id = request.path_params['instance_id'] diff --git a/pygeoapi/templates/collections/collection.html b/pygeoapi/templates/collections/collection.html index 57146a506..4a3b0f7f5 100644 --- a/pygeoapi/templates/collections/collection.html +++ b/pygeoapi/templates/collections/collection.html @@ -98,7 +98,7 @@

Parameters

{% for parameter in data['parameter_names'].values() %} {{ parameter['id'] }} - {{ parameter['name'] }} + {{ parameter['observedProperty']['label'].values()|first }}{% if parameter['observedProperty']['description'] %}
{{ parameter['observedProperty']['description'].values()|first }}{% endif %} {{ parameter['unit']['symbol']['value'] }} {% endfor %} diff --git a/pygeoapi/templates/collections/items/index.html b/pygeoapi/templates/collections/items/index.html index 5db56046d..88fa36b51 100644 --- a/pygeoapi/templates/collections/items/index.html +++ b/pygeoapi/templates/collections/items/index.html @@ -52,7 +52,7 @@

{% for l in data['links'] if l.rel == 'collection' %} {{ l['title'] }} {% en
- +
@@ -192,6 +192,7 @@

{% for l in data['links'] if l.rel == 'collection' %} {{ l['title'] }} {% en document.getElementById("q").addEventListener("keydown", function(event) { if (event.key === "Enter") { + event.preventDefault(); submitForm(); } }); @@ -201,19 +202,19 @@

{% for l in data['links'] if l.rel == 'collection' %} {{ l['title'] }} {% en var datetime = []; var q = document.getElementById('q').value; - var datetime_begin = document.getElementById('datetime_begin').value; - var datetime_end = document.getElementById('datetime_end').value; + var datetime_begin = document.getElementById('datetime_begin'); + var datetime_end = document.getElementById('datetime_end'); if (q) { query_string.push('q=' + encodeURIComponent(q)); } - if (datetime_begin !== "") { - datetime.push(datetime_begin + 'T00:00:00Z'); + if (datetime_begin.value !== "") { + datetime.push(datetime_begin.value + 'T00:00:00Z'); } else { datetime.push('..'); } - if (datetime_end !== "") { - datetime.push(datetime_end + 'T23:59:59Z'); + if (datetime_end.value !== "") { + datetime.push(datetime_end.value + 'T23:59:59Z'); } else { datetime.push('..'); } @@ -231,6 +232,8 @@

{% for l in data['links'] if l.rel == 'collection' %} {{ l['title'] }} {% en if (query_string.length > 0) { document.location.href = '{{ data['items_path'] }}' + '?' + query_string.join('&'); } + datetime_begin.disabled = true; + datetime_end.disabled = true; } {% endif %} var map = L.map('items-map').setView([{{ 45 }}, {{ -75 }}], 5); @@ -288,6 +291,9 @@

{% for l in data['links'] if l.rel == 'collection' %} {{ l['title'] }} {% en setRectangle(map.getBounds().pad(-0.95)); } } + + var form = document.getElementById("searchForm"); + form.addEventListener("submit", submitForm); {% endif %} {% endif %} diff --git a/pygeoapi/templates/landing_page.html b/pygeoapi/templates/landing_page.html index 69e601787..11e8d7dfb 100644 --- a/pygeoapi/templates/landing_page.html +++ b/pygeoapi/templates/landing_page.html @@ -75,6 +75,7 @@

{% trans %}Processes{% endtrans %}

{% trans %}View the processes in this service{% endtrans %}

+ {% if data['jobs'] %}

{% trans %}Jobs{% endtrans %}

@@ -82,6 +83,7 @@

{% trans %}Jobs{% endtrans %}

{% endif %} + {% endif %} {% if data['tile'] %}

{% trans %}Tile Matrix Sets{% endtrans %}

diff --git a/pygeoapi/templates/processes/process.html b/pygeoapi/templates/processes/process.html index 66d560771..453ad2207 100644 --- a/pygeoapi/templates/processes/process.html +++ b/pygeoapi/templates/processes/process.html @@ -73,8 +73,10 @@

{% trans %}Execution modes{% endtrans %}

{% if 'sync-execute' in data.jobControlOptions %}
  • {% trans %}Synchronous{% endtrans %}
  • {% endif %} {% if 'async-execute' in data.jobControlOptions %}
  • {% trans %}Asynchronous{% endtrans %}
  • {% endif %} + {% if data['jobs'] %}

    {% trans %}Jobs{% endtrans %}

    {% trans %}Browse jobs{% endtrans %} + {% endif %}

    {% trans %}Links{% endtrans %}

      {% for link in data['links'] %} diff --git a/pygeoapi/util.py b/pygeoapi/util.py index c91a4c3e0..30b29f5d8 100644 --- a/pygeoapi/util.py +++ b/pygeoapi/util.py @@ -36,6 +36,7 @@ from decimal import Decimal from enum import Enum from heapq import heappush +import ipaddress import json import logging import mimetypes @@ -43,6 +44,7 @@ import pathlib from pathlib import Path import re +import socket from typing import Any, IO, Union, List, Optional from urllib.parse import urlparse from urllib.request import urlopen @@ -261,8 +263,15 @@ def to_json(dict_: dict, pretty: bool = False) -> str: else: indent = None - return json.dumps(dict_, default=json_serial, indent=indent, - separators=(',', ':')) + LOGGER.debug('Dumping JSON') + json_dump = json.dumps(dict_, default=json_serial, indent=indent, + separators=(',', ':')) + + LOGGER.debug('Escaping < and >') + json_dump = json_dump.replace('<', '<') + json_dump = json_dump.replace('>', '>') + + return json_dump def format_datetime(value: str, format_: str = DATETIME_FORMAT) -> str: @@ -748,3 +757,30 @@ def remove_url_auth(url: str) -> str: u = urlparse(url) auth = f'{u.username}:{u.password}@' return url.replace(auth, '') + + +def is_request_allowed(url: str, allow_internal: bool = False) -> bool: + """ + Test whether an HTTP request is allowed to be executed + + :param url: `str` of URL + :param allow_internal: `bool` of whether internal requests are + allowed (default `False`) + + :returns: `bool` of whether HTTP request execution is allowed + """ + + is_allowed = False + + u = urlparse(url) + + ip = socket.gethostbyname(u.hostname) + + is_private = ipaddress.ip_address(ip).is_private + + if not is_private: + is_allowed = True + if is_private and allow_internal: + is_allowed = True + + return is_allowed diff --git a/requirements-pubsub.txt b/requirements-pubsub.txt index 8579e8b22..1e32c725e 100644 --- a/requirements-pubsub.txt +++ b/requirements-pubsub.txt @@ -1 +1,2 @@ +kafka-python paho-mqtt diff --git a/setup.py b/setup.py index 161941e21..efd7dd99c 100644 --- a/setup.py +++ b/setup.py @@ -2,7 +2,7 @@ # # Authors: Tom Kralidis # -# Copyright (c) 2025 Tom Kralidis +# Copyright (c) 2026 Tom Kralidis # # Permission is hereby granted, free of charge, to any person # obtaining a copy of this software and associated documentation @@ -84,26 +84,7 @@ def finalize_options(self): def run(self): import subprocess - errno = subprocess.call(['pytest', 'tests/test_api.py']) - raise SystemExit(errno) - - -class PyCoverage(Command): - user_options = [] - - def initialize_options(self): - pass - - def finalize_options(self): - pass - - def run(self): - import subprocess - - errno = subprocess.call(['coverage', 'run', '--source=pygeoapi', - '-m', 'unittest', - 'pygeoapi.tests.run_tests']) - errno = subprocess.call(['coverage', 'report', '-m']) + errno = subprocess.call(['pytest', 'tests/api/test_api.py']) raise SystemExit(errno) @@ -169,14 +150,12 @@ def get_package_version(): 'Environment :: Console', 'Intended Audience :: Developers', 'Intended Audience :: Science/Research', - 'License :: OSI Approved :: MIT License', 'Operating System :: OS Independent', 'Programming Language :: Python', 'Topic :: Scientific/Engineering :: GIS' ], cmdclass={ 'test': PyTest, - 'coverage': PyCoverage, 'cleanbuild': PyCleanBuild } ) diff --git a/tests/api/test_api.py b/tests/api/test_api.py index 816fe8178..3236ce503 100644 --- a/tests/api/test_api.py +++ b/tests/api/test_api.py @@ -6,7 +6,7 @@ # Bernhard Mallinger # Francesco Bartoli # -# Copyright (c) 2024 Tom Kralidis +# Copyright (c) 2026 Tom Kralidis # Copyright (c) 2022 John A Stevenson and Colin Blackburn # Copyright (c) 2026 Francesco Bartoli # @@ -41,11 +41,11 @@ import pytest from pygeoapi.api import ( - API, APIRequest, CONFORMANCE_CLASSES, FORMAT_TYPES, F_HTML, F_JSON, - F_JSONLD, F_GZIP, __version__, validate_bbox, validate_datetime, - evaluate_limit, validate_subset, landing_page, openapi_, conformance, - describe_collections, get_collection_schema, -) + API, APIRequest, CONFORMANCE_CLASSES, __version__, validate_bbox, + validate_datetime, evaluate_limit, validate_subset, landing_page, openapi_, + conformance, describe_collections, get_collection_schema) + +from pygeoapi.formats import FORMAT_TYPES, F_GZIP, F_JSON, F_JSONLD, F_HTML from pygeoapi.util import yaml_load, get_api_rules, get_base_url from tests.util import (get_test_file_path, mock_api_request, mock_flask, @@ -79,6 +79,13 @@ def config_hidden_resources(): return yaml_load(fh) +@pytest.fixture() +def config_failing_collection(): + filename = 'pygeoapi-test-config-failing-collection.yml' + with open(get_test_file_path(filename)) as fh: + return yaml_load(fh) + + @pytest.fixture() def enclosure_api(config_enclosure, openapi): """ Returns an API instance with a collection with enclosure links. """ @@ -98,6 +105,11 @@ def api_hidden_resources(config_hidden_resources, openapi): return API(config_hidden_resources, openapi) +@pytest.fixture() +def api_failing_collection(config_failing_collection, openapi): + return API(config_failing_collection, openapi) + + def test_apirequest(api_): # Test without (valid) locales with pytest.raises(ValueError): @@ -508,7 +520,7 @@ def test_root(config, api_): for link in root['links']) assert any(link['href'].endswith('f=html') and link['rel'] == 'alternate' for link in root['links']) - assert len(root['links']) == 12 + assert len(root['links']) == 13 assert 'title' in root assert root['title'] == 'pygeoapi default instance' assert 'description' in root @@ -635,7 +647,11 @@ def test_describe_collections(config, api_): 'interval': [ ['2000-10-30T18:24:39+00:00', '2007-10-30T08:57:29+00:00'] ], - 'trs': 'http://www.opengis.net/def/uom/ISO-8601/0/Gregorian' + 'trs': 'http://www.opengis.net/def/uom/ISO-8601/0/Gregorian', + 'grid': { + 'resolution': 'P1D' + }, + 'default': '2000-10-30T18:24:39+00:00' } } @@ -725,6 +741,22 @@ def test_describe_collections_hidden_resources( assert len(collections['collections']) == 1 +def test_describe_collections_failing_collection( + config_failing_collection, api_failing_collection): + req = mock_api_request({}) + rsp_headers, code, response = describe_collections(api_failing_collection, req) # noqa + assert code == HTTPStatus.OK + + assert len(config_failing_collection['resources']) == 3 + + collections = json.loads(response) + assert len(collections['collections']) == 2 + + req = mock_api_request({}) + rsp_headers, code, response = describe_collections(api_failing_collection, req, 'cmip5') # noqa + assert code == HTTPStatus.INTERNAL_SERVER_ERROR + + def test_describe_collections_json_ld(config, api_): req = mock_api_request({'f': 'jsonld'}) rsp_headers, code, response = describe_collections(api_, req, 'obs') diff --git a/tests/api/test_environmental_data_retrieval.py b/tests/api/test_environmental_data_retrieval.py index 8d028a7c2..38ee4f852 100644 --- a/tests/api/test_environmental_data_retrieval.py +++ b/tests/api/test_environmental_data_retrieval.py @@ -41,6 +41,23 @@ from tests.util import mock_api_request +def test_describe_collection_edr(config, api_): + req = mock_api_request() + rsp_headers, code, response = describe_collections(api_, req, 'icoads-sst') + collection = json.loads(response) + parameter_names = list(collection['parameter_names'].keys()) + parameter_names.sort() + assert len(parameter_names) == 4 + assert parameter_names == ['AIRT', 'SST', 'UWND', 'VWND'] + + sst = collection['parameter_names']['SST'] + assert sst['id'] == 'SST' + assert sst['type'] == 'Parameter' + assert sst['observedProperty']['label']['en'] == 'SEA SURFACE TEMPERATURE' + assert sst['unit']['label']['en'] == 'SEA SURFACE TEMPERATURE' + assert sst['unit']['symbol']['value'] == 'Deg C' + + def test_get_collection_edr_query(config, api_): # edr resource req = mock_api_request() diff --git a/tests/api/test_itemtypes.py b/tests/api/test_itemtypes.py index dd3fb9431..e5fceaefc 100644 --- a/tests/api/test_itemtypes.py +++ b/tests/api/test_itemtypes.py @@ -5,7 +5,7 @@ # Colin Blackburn # Francesco Bartoli # -# Copyright (c) 2025 Tom Kralidis +# Copyright (c) 2026 Tom Kralidis # Copyright (c) 2022 John A Stevenson and Colin Blackburn # Copyright (c) 2025 Francesco Bartoli # @@ -42,12 +42,12 @@ import pyproj from shapely.geometry import Point -from pygeoapi.api import (API, FORMAT_TYPES, F_GZIP, F_HTML, F_JSONLD, - apply_gzip) +from pygeoapi.api import API, apply_gzip from pygeoapi.api.itemtypes import ( get_collection_queryables, get_collection_item, get_collection_items, manage_collection_item) from pygeoapi.crs import get_crs +from pygeoapi.formats import FORMAT_TYPES, F_GZIP, F_HTML, F_JSONLD from pygeoapi.util import yaml_load from tests.util import get_test_file_path, mock_api_request diff --git a/tests/api/test_processes.py b/tests/api/test_processes.py index a4bd3794f..894579afe 100644 --- a/tests/api/test_processes.py +++ b/tests/api/test_processes.py @@ -5,7 +5,7 @@ # Colin Blackburn # Bernhard Mallinger # -# Copyright (c) 2024 Tom Kralidis +# Copyright (c) 2026 Tom Kralidis # Copyright (c) 2022 John A Stevenson and Colin Blackburn # # Permission is hereby granted, free of charge, to any person @@ -37,12 +37,28 @@ import time from unittest import mock -from pygeoapi.api import FORMAT_TYPES, F_HTML, F_JSON +import pytest + +from pygeoapi.api import API from pygeoapi.api.processes import ( describe_processes, execute_process, delete_job, get_job_result, get_jobs ) +from pygeoapi.formats import FORMAT_TYPES, F_HTML, F_JSON +from pygeoapi.util import yaml_load + +from tests.util import get_test_file_path, mock_api_request + + +@pytest.fixture() +def config_process_metadata() -> dict: + """ Returns a pygeoapi configuration with process metadata.""" + with open(get_test_file_path('pygeoapi-test-config-process-metadata.yml')) as fh: # noqa + return yaml_load(fh) -from tests.util import mock_api_request + +@pytest.fixture() +def api_process_metadata(config_process_metadata, openapi): + return API(config_process_metadata, openapi) def test_describe_processes(config, api_): @@ -79,7 +95,7 @@ def test_describe_processes(config, api_): assert process['title'] == 'Hello World' assert len(process['keywords']) == 3 assert len(process['links']) == 6 - assert len(process['inputs']) == 2 + assert len(process['inputs']) == 4 assert len(process['outputs']) == 1 assert len(process['outputTransmission']) == 1 assert len(process['jobControlOptions']) == 2 @@ -143,8 +159,8 @@ def test_describe_processes(config, api_): # Test describe doesn't crash if example is missing req = mock_api_request() - processor = api_.manager.get_processor("hello-world") - example = processor.metadata.pop("example") + processor = api_.manager.get_processor('hello-world') + example = processor.metadata.pop('example') rsp_headers, code, response = describe_processes(api_, req) processor.metadata['example'] = example data = json.loads(response) @@ -152,6 +168,23 @@ def test_describe_processes(config, api_): assert len(data['processes']) == 2 +def test_describe_processes_metadata(config_process_metadata, + api_process_metadata): + + req = mock_api_request({'limit': 1}) + # Test for description of single processes + rsp_headers, code, response = describe_processes( + api_process_metadata, req, 'echo') + data = json.loads(response) + assert code == HTTPStatus.OK + assert len(data['jobControlOptions']) == 2 + assert 'sync-execute' in data['jobControlOptions'] + assert 'async-execute' in data['jobControlOptions'] + assert len(data['outputTransmission']) == 2 + assert 'value' in data['outputTransmission'] + assert 'reference' in data['outputTransmission'] + + def test_execute_process(config, api_): req_body_0 = { 'inputs': { @@ -209,6 +242,18 @@ def test_execute_process(config, api_): 'name': 'Test document' } } + req_body_10 = { + 'inputs': { + 'name': 'Test document as bytes response', + 'as_bytes': True + } + } + req_body_11 = { + 'inputs': { + 'name': 'Test document as text/plain media type', + 'media_type': 'text/plain' + } + } cleanup_jobs = set() @@ -377,6 +422,19 @@ def test_execute_process(config, api_): response2 = '{"id":"echo","value":"Hello Test document!"}' assert response == response2 + req = mock_api_request(data=req_body_10) + rsp_headers, code, response = execute_process(api_, req, 'hello-world') + + response2 = '{"id":"echo","value":"Hello Test document as bytes response!"}' # noqa + assert response == response2 + + req = mock_api_request(data=req_body_11) + rsp_headers, code, response = execute_process(api_, req, 'hello-world') + + assert rsp_headers['Content-Type'] == 'text/plain' + response2 = '{"id":"echo","value":"Hello Test document as text/plain media type!"}' # noqa + assert response == response2 + # Cleanup time.sleep(2) # Allow time for any outstanding async jobs for _, job_id in cleanup_jobs: diff --git a/tests/api/test_pubsub.py b/tests/api/test_pubsub.py index 243c4661d..9226b97dd 100644 --- a/tests/api/test_pubsub.py +++ b/tests/api/test_pubsub.py @@ -54,7 +54,7 @@ def test_landing_page(config, openapi, asyncapi): content = json.loads(response) - assert len(content['links']) == 15 + assert len(content['links']) == 16 for link in content['links']: if link.get('rel') == 'hub': @@ -76,7 +76,7 @@ def test_landing_page(config, openapi, asyncapi): content = json.loads(response) - assert len(content['links']) == 12 + assert len(content['links']) == 13 for link in content['links']: if link.get('rel') == 'hub': @@ -96,7 +96,7 @@ def test_landing_page(config, openapi, asyncapi): content = json.loads(response) - assert len(content['links']) == 15 + assert len(content['links']) == 16 for link in content['links']: if link.get('rel') == 'hub': diff --git a/tests/api/test_stac.py b/tests/api/test_stac.py index dea4de7bb..1da63919b 100644 --- a/tests/api/test_stac.py +++ b/tests/api/test_stac.py @@ -2,7 +2,7 @@ # # Authors: Tom Kralidis # -# Copyright (c) 2025 Tom Kralidis +# Copyright (c) 2026 Tom Kralidis # # Permission is hereby granted, free of charge, to any person # obtaining a copy of this software and associated documentation @@ -31,8 +31,8 @@ import pytest -from pygeoapi.api import FORMAT_TYPES, F_JSON from pygeoapi.api.stac import search, landing_page +from pygeoapi.formats import FORMAT_TYPES, F_JSON from pygeoapi.util import yaml_load from tests.util import get_test_file_path, mock_api_request diff --git a/tests/api/test_tiles.py b/tests/api/test_tiles.py index d804f6a21..c463abf10 100644 --- a/tests/api/test_tiles.py +++ b/tests/api/test_tiles.py @@ -5,7 +5,7 @@ # Colin Blackburn # Bernhard Mallinger # -# Copyright (c) 2024 Tom Kralidis +# Copyright (c) 2026 Tom Kralidis # Copyright (c) 2022 John A Stevenson and Colin Blackburn # Copyright (c) 2025 Joana Simoes # @@ -37,12 +37,12 @@ from http import HTTPStatus import pytest -from pygeoapi.api import FORMAT_TYPES, F_HTML from pygeoapi.api.tiles import ( get_collection_tiles, tilematrixset, tilematrixsets, get_collection_tiles_metadata, get_collection_tiles_data ) +from pygeoapi.formats import FORMAT_TYPES, F_HTML from pygeoapi.models.provider.base import TileMatrixSetEnum from tests.util import mock_api_request diff --git a/tests/data/parquet/geoparquet1.1/data-polygon-encoding_wkb_no_bbox.parquet b/tests/data/parquet/geoparquet1.1/data-polygon-encoding_wkb_no_bbox.parquet new file mode 100644 index 000000000..cce77baad Binary files /dev/null and b/tests/data/parquet/geoparquet1.1/data-polygon-encoding_wkb_no_bbox.parquet differ diff --git a/tests/data/parquet/geoparquet1.1/nyc_subset_overture.parquet b/tests/data/parquet/geoparquet1.1/nyc_subset_overture.parquet new file mode 100644 index 000000000..9abe3f071 Binary files /dev/null and b/tests/data/parquet/geoparquet1.1/nyc_subset_overture.parquet differ diff --git a/tests/data/random.parquet b/tests/data/parquet/naive/random.parquet similarity index 100% rename from tests/data/random.parquet rename to tests/data/parquet/naive/random.parquet diff --git a/tests/data/random_nocrs.parquet b/tests/data/parquet/naive/random_nocrs.parquet similarity index 100% rename from tests/data/random_nocrs.parquet rename to tests/data/parquet/naive/random_nocrs.parquet diff --git a/tests/data/random_nogeom.parquet b/tests/data/parquet/naive/random_nogeom.parquet similarity index 100% rename from tests/data/random_nogeom.parquet rename to tests/data/parquet/naive/random_nogeom.parquet diff --git a/tests/formatter/test_csv__formatter.py b/tests/formatter/test_csv__formatter.py index c01e23c24..31036e180 100644 --- a/tests/formatter/test_csv__formatter.py +++ b/tests/formatter/test_csv__formatter.py @@ -27,12 +27,17 @@ # # ================================================================= -import csv -import io +from csv import DictReader +from io import StringIO +import json + import pytest +from pygeoapi.formatter.base import FormatterSerializationError from pygeoapi.formatter.csv_ import CSVFormatter +from ..util import get_test_file_path + @pytest.fixture() def fixture(): @@ -57,13 +62,87 @@ def fixture(): return data +@pytest.fixture +def point_coverage_data(): + data = { + 'type': 'Coverage', + 'domain': { + 'type': 'Domain', + 'domainType': 'PointSeries', + 'axes': { + 'x': {'values': [-10.1]}, + 'y': {'values': [-40.2]}, + 't': {'values': [ + '2013-01-01', '2013-01-02', '2013-01-03', + '2013-01-04', '2013-01-05', '2013-01-06']} + } + }, + 'parameters': { + 'PSAL': { + 'type': 'Parameter', + 'description': {'en': 'The measured salinity'}, + 'unit': {'symbol': 'psu'}, + 'observedProperty': { + 'id': 'http://vocab.nerc.ac.uk/standard_name/sea_water_salinity/', # noqa + 'label': {'en': 'Sea Water Salinity'} + } + } + }, + 'ranges': { + 'PSAL': { + 'axisNames': ['t'], + 'shape': [6], + 'values': [ + 43.9599, 43.9599, 43.9640, 43.9640, 43.9679, 43.987 + ] + } + } + } + + return data + + +@pytest.fixture +def data(): + data_path = get_test_file_path('data/items.geojson') + with open(data_path, 'r', encoding='utf-8') as fh: + return json.load(fh) + + +@pytest.fixture(scope='function') +def csv_reader_geom_enabled(data): + """csv_reader with geometry enabled""" + formatter = CSVFormatter({'geom': True}) + output = formatter.write(data=data) + return DictReader(StringIO(output.decode('utf-8'))) + + +@pytest.fixture +def invalid_geometry_data(): + return { + 'features': [ + { + 'id': 1, + 'type': 'Feature', + 'properties': { + 'id': 1, + 'title': 'Invalid Point Feature' + }, + 'geometry': { + 'type': 'Point', + 'coordinates': [-130.44472222222223] + } + } + ] + } + def test_csv__formatter(fixture): f = CSVFormatter({'geom': True}) f_csv = f.write(data=fixture) - buffer = io.StringIO(f_csv.decode('utf-8')) - reader = csv.DictReader(buffer) + buffer = StringIO(f_csv.decode('utf-8')) + reader = DictReader(buffer) header = list(reader.fieldnames) @@ -80,3 +159,89 @@ def test_csv__formatter(fixture): assert data['id'] == '1972' assert data['foo'] == 'bar' assert data['title'] == '' + + +def test_write_with_geometry_enabled(csv_reader_geom_enabled): + """Test CSV output with geometry enabled""" + rows = list(csv_reader_geom_enabled) + + # Verify the header + header = list(csv_reader_geom_enabled.fieldnames) + assert len(header) == 4 + + # Verify number of rows + assert len(rows) == 9 + + +def test_write_without_geometry(data): + formatter = CSVFormatter({'geom': False}) + output = formatter.write(data=data) + csv_reader = DictReader(StringIO(output.decode('utf-8'))) + + """Test CSV output with geometry disabled""" + rows = list(csv_reader) + + # Verify headers don't include geometry + headers = csv_reader.fieldnames + assert 'geometry' not in headers + + # Verify data + first_row = rows[0] + assert first_row['uri'] == \ + 'http://localhost:5000/collections/objects/items/1' + assert first_row['name'] == 'LineString' + + +def test_write_empty_features(): + """Test handling of empty feature collection""" + formatter = CSVFormatter({'geom': True}) + data = { + 'features': [] + } + output = formatter.write(data=data) + assert output == '' + + +@pytest.mark.parametrize( + 'row_index,expected_wkt', + [ + (2, 'POINT (-85 33)'), + (3, 'MULTILINESTRING ((10 10, 20 20, 10 40), (40 40, 30 30, 40 20, 30 10))'), # noqa + (4, 'POLYGON ((30 10, 40 40, 20 40, 10 20, 30 10))'), + (5, 'POLYGON ((35 10, 45 45, 15 40, 10 20, 35 10), (20 30, 35 35, 30 20, 20 30))'), # noqa + (6, 'MULTIPOLYGON (((30 20, 45 40, 10 40, 30 20)), ((15 5, 40 10, 10 20, 5 10, 15 5)))') # noqa + ] +) +def test_wkt(csv_reader_geom_enabled, row_index, expected_wkt): + """Test CSV output of multi-point geometry""" + rows = list(csv_reader_geom_enabled) + + # Verify data + geometry_row = rows[row_index] + assert geometry_row['wkt'] == expected_wkt + + +def test_invalid_geometry_data(invalid_geometry_data): + formatter = CSVFormatter({'geom': True}) + with pytest.raises(FormatterSerializationError): + formatter.write(data=invalid_geometry_data) + + +def test_point_coverage_csv(point_coverage_data): + """Test CSV output of point coverage data""" + formatter = CSVFormatter({'geom': True}) + output = formatter.write(data=point_coverage_data) + csv_reader = DictReader(StringIO(output.decode('utf-8'))) + rows = list(csv_reader) + + # Verify number of rows + assert len(rows) == 6 + + # Verify data + first_row = rows[0] + assert first_row['parameter'] == 'PSAL' + assert first_row['datetime'] == '2013-01-01' + assert first_row['value'] == '43.9599' + assert first_row['unit'] == 'psu' + assert first_row['x'] == '-10.1' + assert first_row['y'] == '-40.2' diff --git a/tests/other/test_ogr_capabilities.py b/tests/other/test_ogr_capabilities.py index eb547d571..3f7a3848f 100644 --- a/tests/other/test_ogr_capabilities.py +++ b/tests/other/test_ogr_capabilities.py @@ -1,8 +1,10 @@ # ================================================================= # # Authors: Just van den Broecke +# Tom Kralidis # # Copyright (c) 2023 Just van den Broecke +# Copyright (c) 2026 Tom Kralidis # # Permission is hereby granted, free of charge, to any person # obtaining a copy of this software and associated documentation @@ -50,7 +52,6 @@ def get_axis_order(coords): def test_transforms(): version_num = int(gdal.VersionInfo('VERSION_NUM')) assert version_num > 3000000, f'GDAL version={version_num} must be > 3.0.0' - print(f'GDAL Version num = {version_num}') pyproj.show_versions() FORCE_LON_LAT = osr.OAMS_TRADITIONAL_GIS_ORDER @@ -68,7 +69,6 @@ def test_transforms(): } for crs in CRS_DICT: - print(f'Testing CRS={crs}') crs_entry = CRS_DICT[crs] source = get_spatial_ref(28992, AUTH_COMPLIANT) target = get_spatial_ref(crs_entry['epsg'], crs_entry['mapping']) @@ -85,7 +85,6 @@ def test_transforms(): axis_order = get_axis_order(result) # Axis order should match that of CRS - print(f'Transform result={result} Axis order={axis_order}') crs_axis_order = crs_entry['order'] assert axis_order == crs_axis_order, f'Axis order for {crs} after Transform should be {crs_axis_order} result={result}' # noqa @@ -106,7 +105,6 @@ def test_transforms(): # Determine Axis order after ExportToJson coords = json_feature['geometry']['coordinates'] axis_order = get_axis_order(coords) - print(f'ExportToJson result={coords} Axis order={axis_order}') assert axis_order == crs_axis_order, f'Axis order for {crs} after ExportToJson should be {crs_axis_order} coords={coords}' # noqa diff --git a/tests/other/test_util.py b/tests/other/test_util.py index 4e54840c6..df9ea2f57 100644 --- a/tests/other/test_util.py +++ b/tests/other/test_util.py @@ -2,7 +2,7 @@ # # Authors: Tom Kralidis # -# Copyright (c) 2025 Tom Kralidis +# Copyright (c) 2026 Tom Kralidis # # Permission is hereby granted, free of charge, to any person # obtaining a copy of this software and associated documentation @@ -33,6 +33,7 @@ from io import StringIO from unittest import mock import uuid +from xml.sax.saxutils import unescape import pytest @@ -74,6 +75,24 @@ def test_get_typed_value(): assert isinstance(value, bool) +@pytest.mark.parametrize('data,minified,pretty_printed', [ + [{'foo': 'bar'}, '{"foo":"bar"}', '{\n "foo":"bar"\n}'], + [{'foo': 'bar'}, + '{"foo<script>alert(\\"hi\\")</script>":"bar"}', + '{\n "foo<script>alert(\\"hi\\")</script>":"bar"\n}'] +]) +def test_to_json(data, minified, pretty_printed): + output = util.to_json(data) + assert output == minified + assert util.to_json(data, pretty=True) == pretty_printed + + unescaped_output = unescape(output) + if '<' in output: + assert '<' in unescaped_output + if '>' in output: + assert '>' in unescaped_output + + def test_yaml_load(config): assert isinstance(config, dict) with pytest.raises(FileNotFoundError): @@ -310,3 +329,21 @@ def test_get_choice_from_headers(): 'accept') == 'application/ld+json' assert util.get_choice_from_headers( {'accept-language': 'en_US', 'accept': '*/*'}, 'accept') == '*/*' + + +@pytest.mark.parametrize('url,allow_internal,result', [ + ['http://127.0.0.1/test', False, False], + ['http://127.0.0.1/test', True, True], + ['http://192.168.0.12/test', False, False], + ['http://192.168.0.12/test', True, True], + ['http://169.254.0.11/test', False, False], + ['http://169.254.0.11/test', True, True], + ['http://0.0.0.0/test', True, True], + ['http://0.0.0.0/test', False, False], + ['http://localhost:5000/test', False, False], + ['http://localhost:5000/test', True, True], + ['https://pygeoapi.io', False, True], + ['https://pygeoapi.io', True, True] +]) +def test_is_request_allowed(url, allow_internal, result): + assert util.is_request_allowed(url, allow_internal) is result diff --git a/tests/provider/test_csw_provider.py b/tests/provider/test_csw_provider.py index 129a47f2a..5e0232408 100644 --- a/tests/provider/test_csw_provider.py +++ b/tests/provider/test_csw_provider.py @@ -29,11 +29,14 @@ # # ================================================================= +from unittest import mock import pytest from pygeoapi.provider.base import ProviderItemNotFoundError from pygeoapi.provider.csw_facade import CSWFacadeProvider +CSW_PROVIDER = 'pygeoapi.provider.csw_facade.CatalogueServiceWeb' + @pytest.fixture() def config(): @@ -46,14 +49,166 @@ def config(): } -def test_domains(config): +@pytest.fixture() +def mock_csw_record(): + """Mock owslib CSW record""" + record = mock.MagicMock() + record.identifier = 'urn:uuid:19887a8a-f6b0-4a63-ae56-7fba0e17801f' + record.title = 'Lorem ipsum' + record.abstract = 'Lorem ipsum dolor sit amet' + record.type = 'http://purl.org/dc/dcmitype/Image' + record.subjects = ['Tourism--Greece'] + record.date = '2006-03-26' + record.created = None + record.modified = None + record.rights = None + record.language = None + record.bbox = None # No geometry for first record + record.references = [] + record.uris = [] + return record + + +@pytest.fixture() +def mock_csw_record_polygon(): + """Mock owslib CSW record with polygon geometry""" + record = mock.MagicMock() + record.identifier = 'urn:uuid:1ef30a8b-876d-4828-9246-c37ab4510bbd' + record.title = 'Maecenas enim' + record.abstract = 'Maecenas enim' + record.type = 'http://purl.org/dc/dcmitype/Text' + record.subjects = [] + record.date = '2006-05-12' + record.created = None + record.modified = None + record.rights = None + record.language = None + record.bbox = mock.MagicMock() + record.bbox.minx = '13.754' + record.bbox.miny = '60.042' + record.bbox.maxx = '15.334' + record.bbox.maxy = '61.645' + record.references = [] + record.uris = [] + return record + + +@pytest.fixture() +def mock_csw_get_record(): + """Mock owslib CSW record for get operations""" + record = mock.MagicMock() + record.identifier = 'urn:uuid:a06af396-3105-442d-8b40-22b57a90d2f2' + record.title = 'Lorem ipsum dolor sit amet' + record.abstract = 'Lorem ipsum dolor sit amet' + record.type = 'http://purl.org/dc/dcmitype/Image' + record.subjects = [] + record.date = None + record.created = None + record.modified = None + record.rights = None + record.language = None + record.bbox = None + record.references = [] + record.uris = [] + return record + + +@pytest.fixture() +def mock_csw(mock_csw_record, mock_csw_record_polygon, mock_csw_get_record): + """Mock CSW service""" + with mock.patch(CSW_PROVIDER) as mock_csw_class: + csw_instance = mock.MagicMock() + mock_csw_class.return_value = csw_instance + + def mock_getrecords2(*args, **kwargs): + # Simulate different responses based on parameters + limit = kwargs.get('maxrecords', 10) + offset = kwargs.get('startposition', 0) + constraints = kwargs.get('constraints', []) + + # All available records + all_records = [ + ( + 'urn:uuid:19887a8a-f6b0-4a63-ae56-7fba0e17801f', + mock_csw_record + ), + ( + 'urn:uuid:1ef30a8b-876d-4828-9246-c37ab4510bbd', + mock_csw_record_polygon + ) + ] + + # Simulate filtering based on query constraints + filtered_records = all_records[:] + + # Simulate different total counts based on constraints + total_matches = 12 # Default total + if constraints: + # If there are constraints + # simulate fewer matches + constraint_str = str(constraints) + if 'lorem' in constraint_str.lower(): + total_matches = 5 + # Keep both records for lorem search + elif 'maecenas' in constraint_str.lower(): + total_matches = 1 + # Keep only the second record for maecenas search + filtered_records = [all_records[1]] + elif 'datetime' in constraint_str.lower(): + total_matches = 1 if '2006-05-12' in constraint_str else 3 + # Keep appropriate records based on date + if '2006-05-12' in constraint_str: + # Second record has matching date + filtered_records = [all_records[1]] + + # Apply offset and limit to filtered records + paginated_records = filtered_records[offset:offset+limit] + + # Convert to dictionary format expected by CSW + csw_instance.records = { + record_id: record for record_id, record in paginated_records + } + csw_instance.results = { + 'matches': total_matches, + 'returned': len(paginated_records) + } + + def mock_getrecordbyid(identifiers, **kwargs): + identifier = identifiers[0] + if identifier == 'urn:uuid:a06af396-3105-442d-8b40-22b57a90d2f2': + csw_instance.records = {identifier: mock_csw_get_record} + else: + csw_instance.records = {} + + def mock_getdomain(property_name, **kwargs): + # Mock domain values for testing + domain_values = { + 'type': [ + 'http://purl.org/dc/dcmitype/Image', + 'http://purl.org/dc/dcmitype/Text', + 'http://purl.org/dc/dcmitype/Dataset', + 'http://purl.org/dc/dcmitype/Service' + ] + } + csw_instance.results = { + 'values': domain_values.get(property_name, []) + } + + csw_instance.getrecords2.side_effect = mock_getrecords2 + csw_instance.getrecordbyid.side_effect = mock_getrecordbyid + csw_instance.getdomain.side_effect = mock_getdomain + + yield csw_instance + + +def test_domains(config, mock_csw): p = CSWFacadeProvider(config) domains, current = p.get_domains() assert current - expected_properties = ['description', 'keywords', 'title', 'type'] + expected_properties = ['date', 'description', 'keywords', 'title', 'type'] assert sorted(domains.keys()) == expected_properties @@ -66,7 +221,7 @@ def test_domains(config): assert list(domains.keys()) == ['type'] -def test_query(config): +def test_query(config, mock_csw): p = CSWFacadeProvider(config) fields = p.get_fields() @@ -76,9 +231,9 @@ def test_query(config): assert value['type'] == 'string' results = p.query() - assert len(results['features']) == 10 + assert len(results['features']) == 2 # Mock returns 2 records assert results['numberMatched'] == 12 - assert results['numberReturned'] == 10 + assert results['numberReturned'] == 2 assert results['features'][0]['id'] == 'urn:uuid:19887a8a-f6b0-4a63-ae56-7fba0e17801f' # noqa assert results['features'][0]['geometry'] is None assert results['features'][0]['properties']['title'] == 'Lorem ipsum' @@ -92,20 +247,11 @@ def test_query(config): assert len(results['features']) == 1 assert results['features'][0]['id'] == 'urn:uuid:19887a8a-f6b0-4a63-ae56-7fba0e17801f' # noqa - results = p.query(offset=2, limit=1) + results = p.query(offset=1, limit=1) assert len(results['features']) == 1 assert results['features'][0]['id'] == 'urn:uuid:1ef30a8b-876d-4828-9246-c37ab4510bbd' # noqa - assert len(results['features'][0]['properties']) == 2 - - results = p.query(q='lorem') - assert results['numberMatched'] == 5 - - results = p.query(q='lorem', sortby=[{'property': 'title', 'order': '-'}]) - assert results['numberMatched'] == 5 - results = p.query(resulttype='hits') - assert len(results['features']) == 0 assert results['numberMatched'] == 12 results = p.query(bbox=[-10, 40, 0, 60]) @@ -115,23 +261,10 @@ def test_query(config): assert len(results['features']) == 2 results = p.query(properties=[('title', 'Maecenas enim')]) - assert len(results['features']) == 1 - - properties = [ - ('title', 'Maecenas enim'), - ('type', 'http://purl.org/dc/dcmitype/Text') - ] - results = p.query(properties=properties) - assert len(results['features']) == 1 - - results = p.query(datetime_='2006-05-12') - assert len(results['features']) == 1 - - results = p.query(datetime_='2004/2007') - assert len(results['features']) == 3 + assert len(results['features']) == 2 -def test_get(config): +def test_get(config, mock_csw): p = CSWFacadeProvider(config) result = p.get('urn:uuid:a06af396-3105-442d-8b40-22b57a90d2f2') @@ -146,7 +279,7 @@ def test_get(config): assert 'service=CSW' in xml_link['href'] -def test_get_not_existing_item_raise_exception(config): +def test_get_not_existing_item_raise_exception(config, mock_csw): """Testing query for a not existing object""" p = CSWFacadeProvider(config) with pytest.raises(ProviderItemNotFoundError): diff --git a/tests/provider/test_csw_provider_live.py b/tests/provider/test_csw_provider_live.py new file mode 100644 index 000000000..129a47f2a --- /dev/null +++ b/tests/provider/test_csw_provider_live.py @@ -0,0 +1,153 @@ +# ================================================================= +# +# Authors: Tom Kralidis +# Francesco Bartoli +# +# Copyright (c) 2025 Tom Kralidis +# Copyright (c) 2025 Francesco Bartoli +# +# Permission is hereby granted, free of charge, to any person +# obtaining a copy of this software and associated documentation +# files (the "Software"), to deal in the Software without +# restriction, including without limitation the rights to use, +# copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the +# Software is furnished to do so, subject to the following +# conditions: +# +# The above copyright notice and this permission notice shall be +# included in all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES +# OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT +# HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, +# WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR +# OTHER DEALINGS IN THE SOFTWARE. +# +# ================================================================= + +import pytest + +from pygeoapi.provider.base import ProviderItemNotFoundError +from pygeoapi.provider.csw_facade import CSWFacadeProvider + + +@pytest.fixture() +def config(): + return { + 'name': 'CSWFacade', + 'type': 'record', + 'data': 'https://demo.pycsw.org/cite/csw', + 'id_field': 'identifier', + 'time_field': 'date' + } + + +def test_domains(config): + p = CSWFacadeProvider(config) + + domains, current = p.get_domains() + + assert current + + expected_properties = ['description', 'keywords', 'title', 'type'] + + assert sorted(domains.keys()) == expected_properties + + assert len(domains['type']) == 4 + + domains, current = p.get_domains(['type']) + + assert current + + assert list(domains.keys()) == ['type'] + + +def test_query(config): + p = CSWFacadeProvider(config) + + fields = p.get_fields() + assert len(fields) == 9 + + for key, value in fields.items(): + assert value['type'] == 'string' + + results = p.query() + assert len(results['features']) == 10 + assert results['numberMatched'] == 12 + assert results['numberReturned'] == 10 + assert results['features'][0]['id'] == 'urn:uuid:19887a8a-f6b0-4a63-ae56-7fba0e17801f' # noqa + assert results['features'][0]['geometry'] is None + assert results['features'][0]['properties']['title'] == 'Lorem ipsum' + assert results['features'][0]['properties']['keywords'][0] == 'Tourism--Greece' # noqa + + assert results['features'][1]['geometry']['type'] == 'Polygon' + assert results['features'][1]['geometry']['coordinates'][0][0][0] == 13.754 + assert results['features'][1]['geometry']['coordinates'][0][0][1] == 60.042 + + results = p.query(limit=1) + assert len(results['features']) == 1 + assert results['features'][0]['id'] == 'urn:uuid:19887a8a-f6b0-4a63-ae56-7fba0e17801f' # noqa + + results = p.query(offset=2, limit=1) + assert len(results['features']) == 1 + assert results['features'][0]['id'] == 'urn:uuid:1ef30a8b-876d-4828-9246-c37ab4510bbd' # noqa + + assert len(results['features'][0]['properties']) == 2 + + results = p.query(q='lorem') + assert results['numberMatched'] == 5 + + results = p.query(q='lorem', sortby=[{'property': 'title', 'order': '-'}]) + assert results['numberMatched'] == 5 + + results = p.query(resulttype='hits') + assert len(results['features']) == 0 + assert results['numberMatched'] == 12 + + results = p.query(bbox=[-10, 40, 0, 60]) + assert len(results['features']) == 2 + + results = p.query(bbox=[-10, 40, 0, 60, 0, 0]) + assert len(results['features']) == 2 + + results = p.query(properties=[('title', 'Maecenas enim')]) + assert len(results['features']) == 1 + + properties = [ + ('title', 'Maecenas enim'), + ('type', 'http://purl.org/dc/dcmitype/Text') + ] + results = p.query(properties=properties) + assert len(results['features']) == 1 + + results = p.query(datetime_='2006-05-12') + assert len(results['features']) == 1 + + results = p.query(datetime_='2004/2007') + assert len(results['features']) == 3 + + +def test_get(config): + p = CSWFacadeProvider(config) + + result = p.get('urn:uuid:a06af396-3105-442d-8b40-22b57a90d2f2') + assert result['id'] == 'urn:uuid:a06af396-3105-442d-8b40-22b57a90d2f2' + assert result['geometry'] is None + assert result['properties']['title'] == 'Lorem ipsum dolor sit amet' + assert result['properties']['type'] == 'http://purl.org/dc/dcmitype/Image' + + xml_link = result['links'][0] + assert xml_link['rel'] == 'alternate' + assert xml_link['type'] == 'application/xml' + assert 'service=CSW' in xml_link['href'] + + +def test_get_not_existing_item_raise_exception(config): + """Testing query for a not existing object""" + p = CSWFacadeProvider(config) + with pytest.raises(ProviderItemNotFoundError): + p.get('404') diff --git a/tests/provider/test_filesystem_provider.py b/tests/provider/test_filesystem_provider.py index 824c23eb9..37208acb6 100644 --- a/tests/provider/test_filesystem_provider.py +++ b/tests/provider/test_filesystem_provider.py @@ -2,7 +2,7 @@ # # Authors: Tom Kralidis # -# Copyright (c) 2021 Tom Kralidis +# Copyright (c) 2026 Tom Kralidis # # Permission is hereby granted, free of charge, to any person # obtaining a copy of this software and associated documentation @@ -30,6 +30,7 @@ import os import pytest +from pygeoapi.provider.base import ProviderInvalidQueryError from pygeoapi.provider.filesystem import FileSystemProvider THISDIR = os.path.dirname(os.path.realpath(__file__)) @@ -54,7 +55,7 @@ def test_query(config): r = p.get_data_path(baseurl, urlpath, dirpath) - assert len(r['links']) == 13 + assert len(r['links']) == 14 r = p.get_data_path(baseurl, urlpath, '/poi_portugal') @@ -73,3 +74,6 @@ def test_query(config): 'osm_id': 'int' } assert r['assets']['default']['href'] == 'http://example.org/stac/poi_portugal.gpkg' # noqa + + with pytest.raises(ProviderInvalidQueryError): + _ = p.get_data_path(baseurl, urlpath, '../../poi_portugal') diff --git a/tests/provider/test_mysql_provider.py b/tests/provider/test_mysql_provider.py index 0f470d750..c92ec01fd 100644 --- a/tests/provider/test_mysql_provider.py +++ b/tests/provider/test_mysql_provider.py @@ -37,44 +37,45 @@ PASSWORD = os.environ.get('MYSQL_PASSWORD', 'mysql') -""" -For local testing, a MySQL database can be spun up with docker -compose as follows: - -services: - - mysql: - image: mysql:8 - ports: - - 3306:3306 - environment: - MYSQL_ROOT_PASSWORD: mysql - MYSQL_USER: pygeoapi - MYSQL_PASSWORD: mysql - MYSQL_DATABASE: test_geo_app - volumes: - - ./tests/data/mysql_data.sql:/docker-entrypoint-initdb.d/init.sql:ro -""" - - -@pytest.fixture() -def config(): - return { +# Testing local MySQL with docker: +''' +docker run --name mysql-test \ + -e MYSQL_ROOT_PASSWORD=mysql \ + -e MYSQL_USER=pygeoapi \ + -e MYSQL_PASSWORD=mysql \ + -e MYSQL_DATABASE=test_geo_app \ + -p 3306:3306 \ + -v ./tests/data/mysql_data.sql:/docker-entrypoint-initdb.d/init.sql:ro \ + -d mysql:8 +''' + + +@pytest.fixture(params=['default', 'connection_string']) +def config(request): + config_ = { 'name': 'MySQL', 'type': 'feature', - 'data': { + 'options': {'connect_timeout': 10}, + 'id_field': 'locationID', + 'table': 'location', + 'geom_field': 'locationCoordinates' + } + if request.param == 'default': + config_['data'] = { 'host': 'localhost', 'dbname': 'test_geo_app', 'user': 'root', 'port': 3306, 'password': PASSWORD, 'search_path': ['test_geo_app'] - }, - 'options': {'connect_timeout': 10}, - 'id_field': 'locationID', - 'table': 'location', - 'geom_field': 'locationCoordinates' - } + } + elif request.param == 'connection_string': + config_['data'] = ( + f'mysql+pymysql://root:{PASSWORD}@localhost:3306/test_geo_app' + ) + config_['options']['search_path'] = ['test_geo_app'] + + return config_ def test_valid_connection_options(config): @@ -87,7 +88,8 @@ def test_valid_connection_options(config): 'keepalives', 'keepalives_idle', 'keepalives_count', - 'keepalives_interval' + 'keepalives_interval', + 'search_path' ] @@ -164,6 +166,19 @@ def test_query_skip_geometry(config): assert feature['geometry'] is None +def test_get_with_injection(config): + """Testing query for injection attack string""" + p = MySQLProvider(config) + feature = p.get('1') + assert feature.get('type') == 'Feature' + + with pytest.raises(ProviderItemNotFoundError): + p.get('1; DROP TABLE location;') + + with pytest.raises(ProviderItemNotFoundError): + p.get('1') + + def test_get_not_existing_item_raise_exception(config): """Testing query for a not existing object""" p = MySQLProvider(config) diff --git a/tests/provider/test_parquet_provider.py b/tests/provider/test_parquet_provider.py index 736e3dff4..6d45a51d7 100644 --- a/tests/provider/test_parquet_provider.py +++ b/tests/provider/test_parquet_provider.py @@ -5,6 +5,7 @@ # # Copyright (c) 2024 Leo Ghignone # Copyright (c) 2025 Tom Kralidis +# Copyright (c) 2026 Colton Loftus # # Permission is hereby granted, free of charge, to any person # obtaining a copy of this software and associated documentation @@ -29,6 +30,8 @@ # # ================================================================= +from copy import copy + import pytest from pygeoapi.provider.base import ProviderItemNotFoundError @@ -36,15 +39,6 @@ from ..util import get_test_file_path -path = get_test_file_path( - 'data/random.parquet') - -path_nogeom = get_test_file_path( - 'data/random_nogeom.parquet') - -path_nocrs = get_test_file_path( - 'data/random_nocrs.parquet') - @pytest.fixture() def config_parquet(): @@ -52,13 +46,12 @@ def config_parquet(): 'name': 'Parquet', 'type': 'feature', 'data': { - 'source_type': 'Parquet', - 'source': path, + 'source': get_test_file_path('data/parquet/naive/random.parquet'), }, 'id_field': 'id', 'time_field': 'time', 'x_field': 'lon', - 'y_field': 'lat', + 'y_field': 'lat' } @@ -68,8 +61,8 @@ def config_parquet_nogeom_notime(): 'name': 'ParquetNoGeomNoTime', 'type': 'feature', 'data': { - 'source_type': 'Parquet', - 'source': path_nogeom, + 'source': get_test_file_path( + 'data/parquet/naive/random_nogeom.parquet') }, 'id_field': 'id' } @@ -81,162 +74,267 @@ def config_parquet_nocrs(): 'name': 'ParquetNoCrs', 'type': 'feature', 'data': { - 'source_type': 'Parquet', - 'source': path_nocrs, + 'source': get_test_file_path( + 'data/parquet/naive/random_nocrs.parquet') }, 'id_field': 'id', 'time_field': 'time', 'x_field': 'lon', - 'y_field': 'lat', + 'y_field': 'lat' + } + + +@pytest.fixture +def geoparquet_no_bbox(): + # Data originating from + # https://github.com/opengeospatial/geoparquet/blob/main/test_data/data-polygon-encoding_wkb.parquet + + # As CSV: + # "col","geometry" + # 0,"POLYGON ((30 10, 40 40, 20 40, 10 20, 30 10))" + # 1,"POLYGON ((35 10, 45 45, 15 40, 10 20, 35 10), (20 30, 35 35, 30 20, 20 30))" # noqa + # 2,"POLYGON EMPTY" + # 3, + return { + 'name': 'GeoparquetNoBbox', + 'type': 'feature', + 'data': { + 'source': get_test_file_path( + 'data/parquet/geoparquet1.1/data-polygon-encoding_wkb_no_bbox.parquet' # noqa + ) + } + } + + +@pytest.fixture +def geoparquet_with_bbox(): + # Geneated with the overture python CLI + # overturemaps download --bbox=-74,40.98,-73.98,41 -f geoparquet --type=building -o nyc_subset_overture.parquet # noqa + return { + 'name': 'GeoparquetWithBbox', + 'type': 'feature', + 'data': { + 'source': get_test_file_path( + 'data/parquet/geoparquet1.1/nyc_subset_overture.parquet' + ) + } } -def test_get_fields(config_parquet): - """Testing field types""" - - p = ParquetProvider(config_parquet) - results = p.get_fields() - assert results['lat']['type'] == 'number' - assert results['lon']['format'] == 'double' - assert results['time']['format'] == 'date-time' - - -def test_get(config_parquet): - """Testing query for a specific object""" - - p = ParquetProvider(config_parquet) - result = p.get('42') - assert result['id'] == '42' - assert result['properties']['lon'] == 4.947447 - - -def test_get_not_existing_feature_raise_exception( - config_parquet -): - """Testing query for a not existing object""" - p = ParquetProvider(config_parquet) - with pytest.raises(ProviderItemNotFoundError): - p.get(-1) - - -def test_query_hits(config_parquet): - """Testing query on entire collection for hits""" - - p = ParquetProvider(config_parquet) - feature_collection = p.query(resulttype='hits') - assert feature_collection.get('type') == 'FeatureCollection' - features = feature_collection.get('features') - assert len(features) == 0 - hits = feature_collection.get('numberMatched') - assert hits is not None - assert hits == 100 - - -def test_query_bbox_hits(config_parquet): - """Testing query for a valid JSON object with geometry""" - - p = ParquetProvider(config_parquet) - feature_collection = p.query( - bbox=[100, -50, 150, 0], - resulttype='hits') - assert feature_collection.get('type') == 'FeatureCollection' - features = feature_collection.get('features') - assert len(features) == 0 - hits = feature_collection.get('numberMatched') - assert hits is not None - assert hits == 6 - - -def test_query_with_limit(config_parquet): - """Testing query for a valid JSON object with geometry""" - - p = ParquetProvider(config_parquet) - feature_collection = p.query(limit=2, resulttype='results') - assert feature_collection.get('type') == 'FeatureCollection' - features = feature_collection.get('features') - assert len(features) == 2 - hits = feature_collection.get('numberMatched') - assert hits > 2 - feature = features[0] - properties = feature.get('properties') - assert properties is not None - geometry = feature.get('geometry') - assert geometry is not None - - -def test_query_with_offset(config_parquet): - """Testing query for a valid JSON object with geometry""" - - p = ParquetProvider(config_parquet) - feature_collection = p.query(offset=20, limit=10, resulttype='results') - assert feature_collection.get('type') == 'FeatureCollection' - features = feature_collection.get('features') - assert len(features) == 10 - hits = feature_collection.get('numberMatched') - assert hits > 30 - feature = features[0] - properties = feature.get('properties') - assert properties is not None - assert feature['id'] == '21' - assert properties['lat'] == 66.264988 - geometry = feature.get('geometry') - assert geometry is not None - - -def test_query_with_property(config_parquet): - """Testing query for a valid JSON object with property filter""" - - p = ParquetProvider(config_parquet) - feature_collection = p.query( - resulttype='results', - properties=[('lon', -12.855022)]) - assert feature_collection.get('type') == 'FeatureCollection' - features = feature_collection.get('features') - assert len(features) == 1 - for feature in features: - assert feature['properties']['lon'] == -12.855022 - - -def test_query_with_skip_geometry(config_parquet): - """Testing query for a valid JSON object with property filter""" - - p = ParquetProvider(config_parquet) - feature_collection = p.query(skip_geometry=True) - for feature in feature_collection['features']: - assert feature.get('geometry') is None - - -def test_query_with_datetime(config_parquet): - """Testing query for a valid JSON object with time""" - - p = ParquetProvider(config_parquet) - feature_collection = p.query( - datetime_='2022-05-01T00:00:00Z/2022-05-31T23:59:59Z') - assert feature_collection.get('type') == 'FeatureCollection' - features = feature_collection.get('features') - assert len(features) == 7 - for feature in feature_collection['features']: - time = feature['properties'][config_parquet['time_field']] - assert time.year == 2022 - assert time.month == 5 - - -def test_query_nogeom(config_parquet_nogeom_notime): - """Testing query for a valid JSON object without geometry""" - - p = ParquetProvider(config_parquet_nogeom_notime) - feature_collection = p.query(resulttype='results') - assert feature_collection.get('type') == 'FeatureCollection' - assert len(feature_collection.get('features')) > 0 - for feature in feature_collection['features']: - assert feature.get('geometry') is None - - -def test_query_nocrs(config_parquet_nocrs): - """Testing a parquet provider without CRS""" - - p = ParquetProvider(config_parquet_nocrs) - results = p.get_fields() - assert results['lat']['type'] == 'number' - assert results['lon']['format'] == 'double' - assert results['time']['format'] == 'date-time' +class TestParquetProviderWithNaiveOrMissingGeometry: + """Tests for parquet that do not comply to geoparquet standard""" + + def test_get_fields(self, config_parquet): + """Testing field types""" + + p = ParquetProvider(config_parquet) + assert p.bbox_filterable + assert p.has_geometry + assert not p.has_bbox_column + results = p.get_fields() + assert results['lat']['type'] == 'number' + assert results['lon']['format'] == 'double' + assert results['time']['format'] == 'date-time' + + def test_get(self, config_parquet): + """Testing query for a specific object""" + + p = ParquetProvider(config_parquet) + result = p.get('42') + assert result['id'] == '42' + assert result['properties']['lon'] == 4.947447 + + def test_get_not_existing_feature_raise_exception( + self, config_parquet + ): + """Testing query for a not existing object""" + p = ParquetProvider(config_parquet) + with pytest.raises(ProviderItemNotFoundError): + p.get(-1) + + def test_query_hits(self, config_parquet): + """Testing query on entire collection for hits""" + + p = ParquetProvider(config_parquet) + feature_collection = p.query(resulttype='hits') + assert feature_collection.get('type') == 'FeatureCollection' + features = feature_collection.get('features') + assert len(features) == 0 + hits = feature_collection.get('numberMatched') + assert hits is not None + assert hits == 100 + + def test_query_bbox_hits(self, config_parquet): + """Testing query for a valid JSON object with geometry""" + + p = ParquetProvider(config_parquet) + feature_collection = p.query( + bbox=[100, -50, 150, 0], + resulttype='hits') + assert feature_collection.get('type') == 'FeatureCollection' + features = feature_collection.get('features') + assert len(features) == 0 + hits = feature_collection.get('numberMatched') + assert hits is not None + assert hits == 6 + + def test_query_with_limit(self, config_parquet): + """Testing query for a valid JSON object with geometry""" + + p = ParquetProvider(config_parquet) + feature_collection = p.query(limit=2, resulttype='results') + assert feature_collection.get('type') == 'FeatureCollection' + features = feature_collection.get('features') + assert len(features) == 2 + hits = feature_collection.get('numberMatched') + assert hits > 2 + feature = features[0] + properties = feature.get('properties') + assert properties is not None + geometry = feature.get('geometry') + assert geometry is not None + + def test_query_with_offset(self, config_parquet): + """Testing query for a valid JSON object with geometry""" + + p = ParquetProvider(config_parquet) + feature_collection = p.query(offset=20, limit=10, resulttype='results') + assert feature_collection.get('type') == 'FeatureCollection' + features = feature_collection.get('features') + assert len(features) == 10 + hits = feature_collection.get('numberMatched') + assert hits > 30 + feature = features[0] + properties = feature.get('properties') + assert properties is not None + assert feature['id'] == '21' + assert properties['lat'] == 66.264988 + geometry = feature.get('geometry') + assert geometry is not None + + def test_query_with_property(self, config_parquet): + """Testing query for a valid JSON object with property filter""" + + p = ParquetProvider(config_parquet) + feature_collection = p.query( + resulttype='results', + properties=[('lon', -12.855022)]) + assert feature_collection.get('type') == 'FeatureCollection' + features = feature_collection.get('features') + assert len(features) == 1 + for feature in features: + assert feature['properties']['lon'] == -12.855022 + + def test_query_with_skip_geometry(self, config_parquet): + """Testing query for a valid JSON object with property filter""" + + p = ParquetProvider(config_parquet) + feature_collection = p.query(skip_geometry=True) + for feature in feature_collection['features']: + assert feature.get('geometry') is None + + def test_query_with_datetime(self, config_parquet): + """Testing query for a valid JSON object with time""" + + p = ParquetProvider(config_parquet) + feature_collection = p.query( + datetime_='2022-05-01T00:00:00Z/2022-05-31T23:59:59Z') + assert feature_collection.get('type') == 'FeatureCollection' + features = feature_collection.get('features') + assert len(features) == 7 + for feature in feature_collection['features']: + time = feature['properties'][config_parquet['time_field']] + assert time.year == 2022 + assert time.month == 5 + + def test_query_nogeom(self, config_parquet_nogeom_notime): + """Testing query for a valid JSON object without geometry""" + + p = ParquetProvider(config_parquet_nogeom_notime) + assert not p.has_geometry + assert not p.bbox_filterable + feature_collection = p.query(resulttype='results') + assert feature_collection.get('type') == 'FeatureCollection' + assert len(feature_collection.get('features')) > 0 + for feature in feature_collection['features']: + assert feature.get('geometry') is None + + def test_query_nocrs(self, config_parquet_nocrs): + """Testing a parquet provider without CRS""" + + p = ParquetProvider(config_parquet_nocrs) + assert p.bbox_filterable + assert p.has_geometry + assert not p.has_bbox_column + results = p.get_fields() + assert results['lat']['type'] == 'number' + assert results['lon']['format'] == 'double' + assert results['time']['format'] == 'date-time' + + +class TestParquetProviderWithGeoparquetMetadata: + + def test_file_without_bbox_without_id_specified(self, geoparquet_no_bbox): + + p = ParquetProvider(geoparquet_no_bbox) + assert not p.bbox_filterable + assert not p.has_bbox_column + assert p.id_field is None + results = p.get_fields() + assert results['col']['type'] == 'integer' + + feature_collection = p.query(resulttype='results') + assert feature_collection.get('type') == 'FeatureCollection' + assert feature_collection['features'][0]['geometry']['coordinates'] == ( # noqa + ( + ((30, 10), (40, 40), (20, 40), (10, 20), (30, 10)),) + ) + assert feature_collection['features'][0]['properties']['col'] == 0 + + def test_file_without_bbox_with_id_specified(self, geoparquet_no_bbox): + config = copy(geoparquet_no_bbox) + config['id_field'] = 'col' + + p = ParquetProvider( + config + ) + results = p.get_fields() + assert p.id_field == 'col' + assert results['col']['type'] == 'integer' + + feature_collection = p.query(resulttype='results') + assert feature_collection.get('type') == 'FeatureCollection' + assert feature_collection['features'][0]['geometry']['coordinates'] == ( # noqa + (((30, 10), (40, 40), (20, 40), (10, 20), (30, 10)),) + ) + assert feature_collection['features'][0]['properties']['col'] == 0 + assert feature_collection['features'][0]['id'] == '0' + + def test_get_by_id(self, geoparquet_no_bbox): + + config = copy(geoparquet_no_bbox) + config['id_field'] = 'col' + p = ParquetProvider( + config + ) + + feature = p.get('2') + assert feature.get('type') == 'Feature' + assert feature['geometry'] is None + + def test_file_with_bbox(self, geoparquet_with_bbox): + + p = ParquetProvider(geoparquet_with_bbox) + assert p.has_bbox_column + assert p.bbox_filterable + assert p.has_geometry + + hits = p.query(resulttype='hits')['numberMatched'] + assert hits == 679 + + huge_bbox = p.query(bbox=[-90, -90, 90, 90], resulttype='hits')[ + 'numberMatched' + ] + dataset_bounds = p.query(bbox=[-74.1, 40.97, -73.95, 41.1], + resulttype='hits')['numberMatched'] + assert huge_bbox == dataset_bounds diff --git a/tests/provider/test_postgresql_provider.py b/tests/provider/test_postgresql_provider.py index c27660caf..b2e5ae0ae 100644 --- a/tests/provider/test_postgresql_provider.py +++ b/tests/provider/test_postgresql_provider.py @@ -37,7 +37,17 @@ # ================================================================= # Needs to be run like: python3 -m pytest -# See pygeoapi/provider/postgresql.py for instructions on setting up +# Testing local postgis with docker: +''' +docker run --name postgis \ + --rm \ + -p 5432:5432 \ + -e ALLOW_IP_RANGE=0.0.0.0/0 \ + -e POSTGRES_USER=postgres \ + -e POSTGRES_PASS=postgres \ + -e POSTGRES_DBNAME=test \ + -d -t kartoza/postgis +''' # test database in Docker from http import HTTPStatus @@ -69,44 +79,58 @@ PASSWORD = os.environ.get('POSTGRESQL_PASSWORD', 'postgres') -@pytest.fixture() -def config(): - return { +@pytest.fixture(params=['default', 'connection_string']) +def config(request): + config_ = { 'name': 'PostgreSQL', 'type': 'feature', - 'data': {'host': '127.0.0.1', - 'dbname': 'test', - 'user': 'postgres', - 'password': PASSWORD, - 'search_path': ['osm', 'public'] - }, - 'options': { - 'connect_timeout': 10 - }, + 'options': {'connect_timeout': 10}, 'id_field': 'osm_id', 'table': 'hotosm_bdi_waterways', 'geom_field': 'foo_geom' } + if request.param == 'default': + config_['data'] = { + 'host': '127.0.0.1', + 'dbname': 'test', + 'user': 'postgres', + 'password': PASSWORD, + 'search_path': ['osm', 'public'] + } + elif request.param == 'connection_string': + config_['data'] = ( + f'postgresql://postgres:{PASSWORD}@127.0.0.1:5432/test' + ) + config_['options']['search_path'] = ['osm', 'public'] + return config_ -@pytest.fixture() -def config_types(): - return { + +@pytest.fixture(params=['default', 'connection_string']) +def config_types(request): + config_ = { 'name': 'PostgreSQL', 'type': 'feature', - 'data': {'host': '127.0.0.1', - 'dbname': 'test', - 'user': 'postgres', - 'password': PASSWORD, - 'search_path': ['public'] - }, - 'options': { - 'connect_timeout': 10 - }, + 'options': {'connect_timeout': 10}, 'id_field': 'id', 'table': 'foo', 'geom_field': 'the_geom' } + if request.param == 'default': + config_['data'] = { + 'host': '127.0.0.1', + 'dbname': 'test', + 'user': 'postgres', + 'password': PASSWORD, + 'search_path': ['public', 'osm'] + } + elif request.param == 'connection_string': + config_['data'] = ( + f'postgresql://postgres:{PASSWORD}@127.0.0.1:5432/test' + ) + config_['options']['search_path'] = ['public', 'osm'] + + return config_ @pytest.fixture() @@ -148,14 +172,20 @@ def test_valid_connection_options(config): for key in keys: assert key in ['connect_timeout', 'tcp_user_timeout', 'keepalives', 'keepalives_idle', 'keepalives_count', - 'keepalives_interval'] + 'keepalives_interval', 'search_path'] def test_schema_path_search(config): - config['data']['search_path'] = ['public', 'osm'] + if isinstance(config['data'], dict): + config['data']['search_path'] = ['public', 'osm'] + else: + config['options']['search_path'] = ['public', 'osm'] PostgreSQLProvider(config) - config['data']['search_path'] = ['public', 'notosm'] + if isinstance(config['data'], dict): + config['data']['search_path'] = ['public', 'notosm'] + else: + config['options']['search_path'] = ['public', 'notosm'] with pytest.raises(ProviderQueryError): PostgreSQLProvider(config) @@ -189,13 +219,13 @@ def test_query_materialised_view(config): provider = PostgreSQLProvider(config_materialised_view) # Only ID, width and depth properties should be available - assert set(provider.get_fields().keys()) == {"osm_id", "width", "depth"} + assert set(provider.get_fields().keys()) == {'osm_id', 'width', 'depth'} def test_query_with_property_filter(config): """Test query valid features when filtering by property""" p = PostgreSQLProvider(config) - feature_collection = p.query(properties=[("waterway", "stream")]) + feature_collection = p.query(properties=[('waterway', 'stream')]) features = feature_collection.get('features') stream_features = list( filter(lambda feature: feature['properties']['waterway'] == 'stream', @@ -246,19 +276,19 @@ def test_query_with_config_properties(config): feature = result.get('features')[0] properties = feature.get('properties') for property_name in properties.keys(): - assert property_name in config["properties"] + assert property_name in config['properties'] -@pytest.mark.parametrize("property_filter, expected", [ +@pytest.mark.parametrize('property_filter, expected', [ ([], 14776), - ([("waterway", "stream")], 13930), - ([("waterway", "this does not exist")], 0), + ([('waterway', 'stream')], 13930), + ([('waterway', 'this does not exist')], 0), ]) def test_query_hits_with_property_filter(config, property_filter, expected): """Test query resulttype=hits""" provider = PostgreSQLProvider(config) - results = provider.query(properties=property_filter, resulttype="hits") - assert results["numberMatched"] == expected + results = provider.query(properties=property_filter, resulttype='hits') + assert results['numberMatched'] == expected def test_query_bbox(config): @@ -324,6 +354,19 @@ def test_get_simple(config, id_, prev, next_): assert result['next'] == next_ +def test_get_with_injection(config): + """Testing query for injection attack string""" + p = PostgreSQLProvider(config) + feature = p.get('29701937') + assert feature.get('type') == 'Feature' + + with pytest.raises(ProviderItemNotFoundError): + p.get('29701937; DROP TABLE location;') + + with pytest.raises(ProviderItemNotFoundError): + p.get('29701937') + + def test_get_with_config_properties(config): """ Test that get is restricted by properties in the config. @@ -337,7 +380,7 @@ def test_get_with_config_properties(config): result = provider.get(80835483) properties = result.get('properties') for property_name in properties.keys(): - assert property_name in config["properties"] + assert property_name in config['properties'] def test_get_not_existing_item_raise_exception(config): @@ -376,7 +419,7 @@ def test_query_cql(config, cql, expected_ids): assert feature_collection.get('type') == 'FeatureCollection' features = feature_collection.get('features') - ids = [feature["id"] for feature in features] + ids = [feature['id'] for feature in features] assert ids == expected_ids @@ -385,7 +428,7 @@ def test_query_cql_properties_bbox_filters(config): # Arrange properties = [('waterway', 'stream')] bbox = [29, -2.8, 29.2, -2.9] - filterq = parse("osm_id BETWEEN 80800000 AND 80900000") + filterq = parse('osm_id BETWEEN 80800000 AND 80900000') expected_ids = [80835470] # Act @@ -395,7 +438,7 @@ def test_query_cql_properties_bbox_filters(config): bbox=bbox) # Assert - ids = [feature["id"] for feature in feature_collection.get('features')] + ids = [feature['id'] for feature in feature_collection.get('features')] assert ids == expected_ids @@ -457,9 +500,9 @@ def test_instantiation(config): provider = PostgreSQLProvider(config) # Assert - assert provider.name == "PostgreSQL" - assert provider.table == "hotosm_bdi_waterways" - assert provider.id_field == "osm_id" + assert provider.name == 'PostgreSQL' + assert provider.table == 'hotosm_bdi_waterways' + assert provider.id_field == 'osm_id' @pytest.mark.parametrize('bad_data, exception, match', [ @@ -484,8 +527,14 @@ def test_instantiation_with_bad_config(config, bad_data, exception, match): def test_instantiation_with_bad_credentials(config): # Arrange - config['data'].update({'user': 'bad_user'}) - match = r'Could not connect to .*bad_user:\*\*\*@' + if isinstance(config['data'], dict): + config['data'].update({'user': 'bad_user'}) + match = r'Could not connect to .*bad_user:\*\*\*@' + + else: + config['data'] = config['data'].replace('postgres:', 'bad_user:') + match = r'Could not connect to .*bad_user:\*\*\*@' + # Make sure we don't use a cached connection in the tests postgresql_provider_module._ENGINE_STORE = {} @@ -505,7 +554,7 @@ def test_engine_and_table_model_stores(config): # Same database connection details, but different table different_table = config.copy() - different_table.update(table="hotosm_bdi_drains") + different_table.update(table='hotosm_bdi_drains') provider2 = PostgreSQLProvider(different_table) assert repr(provider2._engine) == repr(provider0._engine) assert provider2._engine is provider0._engine @@ -515,7 +564,11 @@ def test_engine_and_table_model_stores(config): # and also a different table_model, as two databases may have different # tables with the same name different_host = config.copy() - different_host["data"]["host"] = "localhost" + if isinstance(config['data'], dict): + different_host['data']['host'] = 'localhost' + else: + different_host['data'] = config['data'].replace( + '127.0.0.1', 'localhost') provider3 = PostgreSQLProvider(different_host) assert provider3._engine is not provider0._engine assert provider3.table_model is not provider0.table_model @@ -584,7 +637,7 @@ def test_get_collection_items_postgresql_cql_invalid_filter_language(pg_api_): assert error_response['description'] == 'Invalid filter language' -@pytest.mark.parametrize("bad_cql", [ +@pytest.mark.parametrize('bad_cql', [ 'id IN (1, ~)', 'id EATS (1, 2)', # Valid CQL relations only 'id IN (1, 2' # At some point this may return UnexpectedEOF @@ -664,7 +717,7 @@ def test_get_collection_items_postgresql_cql_json_invalid_filter_language(pg_api """ # Arrange # CQL should never be parsed - cql = {"in": {"value": {"property": "id"}, "list": [1, 2]}} + cql = {'in': {'value': {'property': 'id'}, 'list': [1, 2]}} headers = {'CONTENT_TYPE': 'application/query-cql-json'} # Act @@ -681,9 +734,9 @@ def test_get_collection_items_postgresql_cql_json_invalid_filter_language(pg_api assert error_response['description'] == 'Bad CQL JSON' -@pytest.mark.parametrize("bad_cql", [ +@pytest.mark.parametrize('bad_cql', [ # Valid CQL relations only - {"eats": {"value": {"property": "id"}, "list": [1, 2]}}, + {'eats': {'value': {'property': 'id'}, 'list': [1, 2]}}, # At some point this may return UnexpectedEOF '{"in": {"value": {"property": "id"}, "list": [1, 2}}' ]) @@ -939,7 +992,7 @@ def test_provider_count_false_with_resulttype_hits(config): provider = PostgreSQLProvider(config) # Act - results = provider.query(resulttype="hits") + results = provider.query(resulttype='hits') # Assert assert results['numberMatched'] == 14776 diff --git a/tests/provider/test_wms_facade_provider.py b/tests/provider/test_wms_facade_provider.py new file mode 100644 index 000000000..e3ffd8bd9 --- /dev/null +++ b/tests/provider/test_wms_facade_provider.py @@ -0,0 +1,64 @@ +# ================================================================= +# +# Authors: Joana Simoes +# +# +# Copyright (c) 2026 Joana Simoes +# +# Permission is hereby granted, free of charge, to any person +# obtaining a copy of this software and associated documentation +# files (the "Software"), to deal in the Software without +# restriction, including without limitation the rights to use, +# copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the +# Software is furnished to do so, subject to the following +# conditions: +# +# The above copyright notice and this permission notice shall be +# included in all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES +# OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT +# HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, +# WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR +# OTHER DEALINGS IN THE SOFTWARE. +# +# ================================================================= + +import pytest + +from pygeoapi.provider.wms_facade import WMSFacadeProvider + + +@pytest.fixture() +def config(): + return { + 'name': 'WMSFacade', + 'type': 'map', + 'data': 'https://demo.mapserver.org/cgi-bin/msautotest', + 'options': { + 'layer': 'world_latlong', + 'style': 'default' + }, + 'format': { + 'name': 'png', + 'mimetype': 'image/png' + } + } + + +def test_query(config): + p = WMSFacadeProvider(config) + + results = p.query() + assert len(results) > 0 + + # an invalid CRS should return the default bbox (4326) + results2 = p.query(crs='http://www.opengis.net/def/crs/EPSG/0/1111') + assert len(results2) == len(results) + + results3 = p.query(crs='http://www.opengis.net/def/crs/EPSG/0/3857') + assert len(results3) != len(results) diff --git a/tests/pygeoapi-test-config-failing-collection.yml b/tests/pygeoapi-test-config-failing-collection.yml new file mode 100644 index 000000000..8baddbd90 --- /dev/null +++ b/tests/pygeoapi-test-config-failing-collection.yml @@ -0,0 +1,206 @@ +# ================================================================= +# +# Authors: Tom Kralidis +# +# Copyright (c) 2026 Tom Kralidis +# +# Permission is hereby granted, free of charge, to any person +# obtaining a copy of this software and associated documentation +# files (the "Software"), to deal in the Software without +# restriction, including without limitation the rights to use, +# copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the +# Software is furnished to do so, subject to the following +# conditions: +# +# The above copyright notice and this permission notice shall be +# included in all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES +# OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT +# HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, +# WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR +# OTHER DEALINGS IN THE SOFTWARE. +# +# ================================================================= + +server: + bind: + host: 0.0.0.0 + port: 5000 + url: http://localhost:5000/ + mimetype: application/json; charset=UTF-8 + encoding: utf-8 + gzip: false + languages: + # First language is the default language + - en-US + - fr-CA + cors: true + pretty_print: true + limits: + default_items: 10 + max_items: 10 + # templates: /path/to/templates + map: + url: https://tile.openstreetmap.org/{z}/{x}/{y}.png + attribution: '© OpenStreetMap contributors' + manager: + name: TinyDB + connection: /tmp/pygeoapi-test-process-manager.db + output_dir: /tmp + +logging: + level: DEBUG + #logfile: /tmp/pygeoapi.log + +metadata: + identification: + title: + en: pygeoapi default instance + fr: instance par défaut de pygeoapi + description: + en: pygeoapi provides an API to geospatial data + fr: pygeoapi fournit une API aux données géospatiales + keywords: + en: + - geospatial + - data + - api + fr: + - géospatiale + - données + - api + keywords_type: theme + terms_of_service: https://creativecommons.org/licenses/by/4.0/ + url: http://example.org + license: + name: CC-BY 4.0 license + url: https://creativecommons.org/licenses/by/4.0/ + provider: + name: Organization Name + url: https://pygeoapi.io + contact: + name: Lastname, Firstname + position: Position Title + address: Mailing Address + city: City + stateorprovince: Administrative Area + postalcode: Zip or Postal Code + country: Country + phone: +xx-xxx-xxx-xxxx + fax: +xx-xxx-xxx-xxxx + email: you@example.org + url: Contact URL + hours: Hours of Service + instructions: During hours of service. Off on weekends. + role: pointOfContact + +resources: + obs: + type: collection + title: + en: Observations + fr: Observations + description: + en: My cool observations + fr: Mes belles observations + keywords: + - observations + - monitoring + links: + - type: text/csv + rel: canonical + title: data + href: https://github.com/mapserver/mapserver/blob/branch-7-0/msautotest/wxs/data/obs.csv + hreflang: en-US + - type: text/csv + rel: alternate + title: data + href: https://raw.githubusercontent.com/mapserver/mapserver/branch-7-0/msautotest/wxs/data/obs.csv + hreflang: en-US + linked-data: + context: + - schema: https://schema.org/ + stn_id: + "@id": schema:identifier + "@type": schema:Text + datetime: + "@type": schema:DateTime + "@id": schema:observationDate + value: + "@type": schema:Number + "@id": schema:QuantitativeValue + extents: + spatial: + bbox: [-180,-90,180,90] + crs: http://www.opengis.net/def/crs/OGC/1.3/CRS84 + temporal: + begin: 2000-10-30T18:24:39Z + end: 2007-10-30T08:57:29Z + trs: http://www.opengis.net/def/uom/ISO-8601/0/Gregorian + providers: + - type: feature + name: CSV + data: tests/data/obs.csv + id_field: id + geometry: + x_field: long + y_field: lat + + cmip5: + type: collection + title: CMIP5 sample + description: CMIP5 sample + keywords: + - cmip5 + - climate + extents: + spatial: + bbox: [-150,40,-45,90] + crs: http://www.opengis.net/def/crs/OGC/1.3/CRS84 + links: + - type: text/html + rel: canonical + title: information + href: https://open.canada.ca/data/en/dataset/eddd6eaf-34d7-4452-a994-3d928115a68b + hreflang: en-CA + providers: + - type: coverage + name: xarray + data: tests/data/CMIP5_rcp8.5_annual_abs_latlon1x1_PCP_pctl25_P1Y.nc404 + x_field: lon + y_field: lat + time_field: time + format: + name: NetCDF + mimetype: application/x-netcdf + + objects: + type: collection + title: GeoJSON objects + description: GeoJSON geometry types for GeoSparql and Schema Geometry conversion. + keywords: + - shapes + links: + - type: text/html + rel: canonical + title: data source + href: https://en.wikipedia.org/wiki/GeoJSON + hreflang: en-US + extents: + spatial: + bbox: [-180,-90,180,90] + crs: http://www.opengis.net/def/crs/OGC/1.3/CRS84 + temporal: + begin: null + end: null # or empty (either means open ended) + providers: + - type: feature + name: GeoJSON + data: tests/data/items.geojson + id_field: fid + uri_field: uri diff --git a/tests/pygeoapi-test-config-process-metadata.yml b/tests/pygeoapi-test-config-process-metadata.yml new file mode 100644 index 000000000..1d8ddb8fd --- /dev/null +++ b/tests/pygeoapi-test-config-process-metadata.yml @@ -0,0 +1,106 @@ +# ================================================================= +# +# Authors: Tom Kralidis +# +# Copyright (c) 2026 Tom Kralidis +# +# Permission is hereby granted, free of charge, to any person +# obtaining a copy of this software and associated documentation +# files (the "Software"), to deal in the Software without +# restriction, including without limitation the rights to use, +# copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the +# Software is furnished to do so, subject to the following +# conditions: +# +# The above copyright notice and this permission notice shall be +# included in all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES +# OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT +# HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, +# WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR +# OTHER DEALINGS IN THE SOFTWARE. +# +# ================================================================= + +server: + bind: + host: 0.0.0.0 + port: 5000 + url: http://localhost:5000/ + mimetype: application/json; charset=UTF-8 + encoding: utf-8 + gzip: false + languages: + # First language is the default language + - en-US + - fr-CA + cors: true + pretty_print: true + limits: + default_items: 10 + max_items: 10 + # templates: /path/to/templates + map: + url: https://tile.openstreetmap.org/{z}/{x}/{y}.png + attribution: '© OpenStreetMap contributors' + manager: + name: TinyDB + connection: /tmp/pygeoapi-test-process-manager.db + output_dir: /tmp + +logging: + level: DEBUG + #logfile: /tmp/pygeoapi.log + +metadata: + identification: + title: + en: pygeoapi default instance + fr: instance par défaut de pygeoapi + description: + en: pygeoapi provides an API to geospatial data + fr: pygeoapi fournit une API aux données géospatiales + keywords: + en: + - geospatial + - data + - api + fr: + - géospatiale + - données + - api + keywords_type: theme + terms_of_service: https://creativecommons.org/licenses/by/4.0/ + url: http://example.org + license: + name: CC-BY 4.0 license + url: https://creativecommons.org/licenses/by/4.0/ + provider: + name: Organization Name + url: https://pygeoapi.io + contact: + name: Lastname, Firstname + position: Position Title + address: Mailing Address + city: City + stateorprovince: Administrative Area + postalcode: Zip or Postal Code + country: Country + phone: +xx-xxx-xxx-xxxx + fax: +xx-xxx-xxx-xxxx + email: you@example.org + url: Contact URL + hours: Hours of Service + instructions: During hours of service. Off on weekends. + role: pointOfContact + +resources: + echo: + type: process + processor: + name: Echo diff --git a/tests/pygeoapi-test-config.yml b/tests/pygeoapi-test-config.yml index 13dc63aa9..e64afbf28 100644 --- a/tests/pygeoapi-test-config.yml +++ b/tests/pygeoapi-test-config.yml @@ -2,7 +2,7 @@ # # Authors: Tom Kralidis # -# Copyright (c) 2019 Tom Kralidis +# Copyright (c) 2026 Tom Kralidis # # Permission is hereby granted, free of charge, to any person # obtaining a copy of this software and associated documentation @@ -142,6 +142,8 @@ resources: begin: 2000-10-30T18:24:39Z end: 2007-10-30T08:57:29Z trs: http://www.opengis.net/def/uom/ISO-8601/0/Gregorian + resolution: P1D + default: 2000-10-30T18:24:39Z providers: - type: feature name: CSV