diff --git a/docs/source/configuration.rst b/docs/source/configuration.rst index d718e85e6..fd75e0284 100644 --- a/docs/source/configuration.rst +++ b/docs/source/configuration.rst @@ -292,6 +292,10 @@ default. type: process # REQUIRED (collection, process, or stac-collection) processor: name: HelloWorld # Python path of process definition + # optional, allow for internal HTTP request execution + # if set to True, enables requests to link local ranges and loopback + # default: False + allow_internal_requests: True .. seealso:: diff --git a/pygeoapi/process/base.py b/pygeoapi/process/base.py index 9e2136476..a3dc05279 100644 --- a/pygeoapi/process/base.py +++ b/pygeoapi/process/base.py @@ -3,7 +3,7 @@ # Authors: Tom Kralidis # Francesco Martinelli # -# Copyright (c) 2022 Tom Kralidis +# Copyright (c) 2026 Tom Kralidis # Copyright (c) 2024 Francesco Martinelli # # Permission is hereby granted, free of charge, to any person @@ -53,6 +53,8 @@ def __init__(self, processor_def: dict, process_metadata: dict): self.name = processor_def['name'] self.metadata = process_metadata self.supports_outputs = False + self.allow_internal_requests = processor_def.get( + 'allow_internal_requests', False) def set_job_id(self, job_id: str) -> None: """ diff --git a/pygeoapi/process/manager/base.py b/pygeoapi/process/manager/base.py index 632ce4ea2..bb711f354 100644 --- a/pygeoapi/process/manager/base.py +++ b/pygeoapi/process/manager/base.py @@ -46,10 +46,12 @@ BaseProcessor, JobNotFoundError, JobResultNotFoundError, + ProcessorExecuteError, UnknownProcessError, ) from pygeoapi.util import ( get_current_datetime, + is_request_allowed, JobStatus, ProcessExecutionMode, RequestedProcessExecutionMode, @@ -105,7 +107,11 @@ def get_processor(self, process_id: str) -> BaseProcessor: except KeyError as err: raise UnknownProcessError('Invalid process identifier') from err else: - return load_plugin('process', process_conf['processor']) + pp = load_plugin('process', process_conf['processor']) + pp.allow_internal_requests = process_conf.get( + 'allow_internal_requests', False) + + return pp def get_jobs(self, status: JobStatus = None, @@ -395,13 +401,13 @@ def execute_process( """ job_id = str(uuid.uuid1()) - processor = self.get_processor(process_id) - processor.set_job_id(job_id) + self.processor = self.get_processor(process_id) + self.processor.set_job_id(job_id) extra_execute_handler_parameters = { 'requested_response': requested_response } - job_control_options = processor.metadata.get( + job_control_options = self.processor.metadata.get( 'jobControlOptions', []) if execution_mode == RequestedProcessExecutionMode.respond_async: @@ -474,7 +480,7 @@ def execute_process( # TODO: handler's response could also be allowed to include more HTTP # headers mime_type, outputs, status = handler( - processor, + self.processor, job_id, data_dict, requested_outputs, @@ -484,26 +490,37 @@ def execute_process( def _send_in_progress_notification(self, subscriber: Optional[Subscriber]): if subscriber and subscriber.in_progress_uri: - response = requests.post(subscriber.in_progress_uri, json={}) - LOGGER.debug( - f'In progress notification response: {response.status_code}' - ) + self.__do_subscriber_request(subscriber.in_progress_uri) def _send_success_notification( self, subscriber: Optional[Subscriber], outputs: Any ): - if subscriber: - response = requests.post(subscriber.success_uri, json=outputs) - LOGGER.debug( - f'Success notification response: {response.status_code}' - ) + if subscriber and subscriber.success_uri: + self.__do_subscriber_request(subscriber.success_uri, outputs) def _send_failed_notification(self, subscriber: Optional[Subscriber]): if subscriber and subscriber.failed_uri: - response = requests.post(subscriber.failed_uri, json={}) - LOGGER.debug( - f'Failed notification response: {response.status_code}' - ) + self.__do_subscriber_request(subscriber.failed_uri) + + def __do_subscriber_request(self, url: str, data: dict = {}) -> None: + """ + Helper function to execute a subscriber URL via HTTP POST + + :param url: `str` of URL + :param data: `dict` of request payload + + :returns: `None` + """ + + if not is_request_allowed(url, self.processor.allow_internal_requests): + msg = 'URL not allowed' + LOGGER.error(f'{msg}: {url}') + raise ProcessorExecuteError(msg) + + response = requests.post(url, json=data) + LOGGER.debug( + f'Response: {response.status_code}' + ) def __repr__(self): return f' {self.name}' diff --git a/pygeoapi/provider/filesystem.py b/pygeoapi/provider/filesystem.py index db2a824be..f534c1b73 100644 --- a/pygeoapi/provider/filesystem.py +++ b/pygeoapi/provider/filesystem.py @@ -2,7 +2,7 @@ # # Authors: Tom Kralidis # -# Copyright (c) 2023 Tom Kralidis +# Copyright (c) 2026 Tom Kralidis # # Permission is hereby granted, free of charge, to any person # obtaining a copy of this software and associated documentation @@ -34,6 +34,7 @@ import os from pygeoapi.provider.base import (BaseProvider, ProviderConnectionError, + ProviderInvalidQueryError, ProviderNotFoundError) from pygeoapi.util import file_modified_iso8601, get_path_basename, url_join @@ -76,9 +77,15 @@ def get_data_path(self, baseurl, urlpath, dirpath): root_link = None child_links = [] - data_path = os.path.join(self.data, dirpath) + if '..' in dirpath: + msg = 'Invalid path requested' + LOGGER.error(f'{msg}: {dirpath}') + raise ProviderInvalidQueryError(msg) + data_path = self.data + dirpath + LOGGER.debug(f'Data path: {data_path}') + if '/' not in dirpath: # root root_link = baseurl else: diff --git a/pygeoapi/resources/schemas/config/pygeoapi-config-0.x.yml b/pygeoapi/resources/schemas/config/pygeoapi-config-0.x.yml index 19c18f5dc..aaee641ec 100644 --- a/pygeoapi/resources/schemas/config/pygeoapi-config-0.x.yml +++ b/pygeoapi/resources/schemas/config/pygeoapi-config-0.x.yml @@ -682,7 +682,11 @@ properties: For custom built plugins, use the import path (e.g. `mypackage.provider.MyProvider`) required: - name - required: + allow_internal_requests: + type: boolean + description: whether to allow internal HTTP requests + default: false + requred: - type - processor definitions: diff --git a/pygeoapi/util.py b/pygeoapi/util.py index 1e9118798..30b29f5d8 100644 --- a/pygeoapi/util.py +++ b/pygeoapi/util.py @@ -36,6 +36,7 @@ from decimal import Decimal from enum import Enum from heapq import heappush +import ipaddress import json import logging import mimetypes @@ -43,6 +44,7 @@ import pathlib from pathlib import Path import re +import socket from typing import Any, IO, Union, List, Optional from urllib.parse import urlparse from urllib.request import urlopen @@ -755,3 +757,30 @@ def remove_url_auth(url: str) -> str: u = urlparse(url) auth = f'{u.username}:{u.password}@' return url.replace(auth, '') + + +def is_request_allowed(url: str, allow_internal: bool = False) -> bool: + """ + Test whether an HTTP request is allowed to be executed + + :param url: `str` of URL + :param allow_internal: `bool` of whether internal requests are + allowed (default `False`) + + :returns: `bool` of whether HTTP request execution is allowed + """ + + is_allowed = False + + u = urlparse(url) + + ip = socket.gethostbyname(u.hostname) + + is_private = ipaddress.ip_address(ip).is_private + + if not is_private: + is_allowed = True + if is_private and allow_internal: + is_allowed = True + + return is_allowed diff --git a/tests/other/test_util.py b/tests/other/test_util.py index e17876b39..df9ea2f57 100644 --- a/tests/other/test_util.py +++ b/tests/other/test_util.py @@ -2,7 +2,7 @@ # # Authors: Tom Kralidis # -# Copyright (c) 2025 Tom Kralidis +# Copyright (c) 2026 Tom Kralidis # # Permission is hereby granted, free of charge, to any person # obtaining a copy of this software and associated documentation @@ -329,3 +329,21 @@ def test_get_choice_from_headers(): 'accept') == 'application/ld+json' assert util.get_choice_from_headers( {'accept-language': 'en_US', 'accept': '*/*'}, 'accept') == '*/*' + + +@pytest.mark.parametrize('url,allow_internal,result', [ + ['http://127.0.0.1/test', False, False], + ['http://127.0.0.1/test', True, True], + ['http://192.168.0.12/test', False, False], + ['http://192.168.0.12/test', True, True], + ['http://169.254.0.11/test', False, False], + ['http://169.254.0.11/test', True, True], + ['http://0.0.0.0/test', True, True], + ['http://0.0.0.0/test', False, False], + ['http://localhost:5000/test', False, False], + ['http://localhost:5000/test', True, True], + ['https://pygeoapi.io', False, True], + ['https://pygeoapi.io', True, True] +]) +def test_is_request_allowed(url, allow_internal, result): + assert util.is_request_allowed(url, allow_internal) is result diff --git a/tests/provider/test_filesystem_provider.py b/tests/provider/test_filesystem_provider.py index f1cfffcf0..37208acb6 100644 --- a/tests/provider/test_filesystem_provider.py +++ b/tests/provider/test_filesystem_provider.py @@ -2,7 +2,7 @@ # # Authors: Tom Kralidis # -# Copyright (c) 2021 Tom Kralidis +# Copyright (c) 2026 Tom Kralidis # # Permission is hereby granted, free of charge, to any person # obtaining a copy of this software and associated documentation @@ -30,6 +30,7 @@ import os import pytest +from pygeoapi.provider.base import ProviderInvalidQueryError from pygeoapi.provider.filesystem import FileSystemProvider THISDIR = os.path.dirname(os.path.realpath(__file__)) @@ -73,3 +74,6 @@ def test_query(config): 'osm_id': 'int' } assert r['assets']['default']['href'] == 'http://example.org/stac/poi_portugal.gpkg' # noqa + + with pytest.raises(ProviderInvalidQueryError): + _ = p.get_data_path(baseurl, urlpath, '../../poi_portugal')