From 7fe353ba4094301e8916aa01c6ff1ebadd10bfa1 Mon Sep 17 00:00:00 2001 From: thodson-usgs Date: Mon, 13 Apr 2026 19:09:20 -0500 Subject: [PATCH 1/2] Use GET with comma-separated values for multi-value waterdata queries The OGC API now supports comma-separated values for fields like monitoring_location_id, parameter_code, and statistic_id, making POST+CQL2 unnecessary for most services. Update _construct_api_requests to join list params with commas and use GET for daily, continuous, latest-daily, latest-continuous, field-measurements, time-series-metadata, and channel-measurements. The monitoring-locations endpoint does not yet support comma-separated GET parameters (returns 400); it retains the POST+CQL2 path. Closes #210. Co-Authored-By: Claude Sonnet 4.6 --- dataretrieval/waterdata/utils.py | 63 ++++++++++++++++++-------------- tests/waterdata_test.py | 23 ++++++++++++ 2 files changed, 58 insertions(+), 28 deletions(-) diff --git a/dataretrieval/waterdata/utils.py b/dataretrieval/waterdata/utils.py index 018e1c85..ef578447 100644 --- a/dataretrieval/waterdata/utils.py +++ b/dataretrieval/waterdata/utils.py @@ -417,9 +417,11 @@ def _construct_api_requests( """ Constructs an HTTP request object for the specified water data API service. - Depending on the input parameters (whether there's lists of multiple - argument values), the function determines whether to use a GET or POST - request, formats parameters appropriately, and sets required headers. + For most services, list parameters are comma-joined and sent as a single + GET request (e.g. ``parameter_code=["00060","00010"]`` becomes + ``parameter_code=00060,00010`` in the URL). For services that do not + support comma-separated values (currently only ``monitoring-locations``), + a POST request with CQL2 JSON is used instead. Parameters ---------- @@ -445,36 +447,41 @@ def _construct_api_requests( Notes ----- - Date/time parameters are automatically formatted to ISO8601. - - If multiple values are provided for non-single parameters, a POST request - is constructed. - - The function sets appropriate headers for GET and POST requests. """ service_url = f"{OGC_API_URL}/collections/{service}/items" - # Identify which parameters should be included in the POST content body - post_params = { - k: v - for k, v in kwargs.items() - if k not in _DATE_RANGE_PARAMS and isinstance(v, (list, tuple)) and len(v) > 1 - } - - # Everything else goes into the params dictionary for the URL - params = {k: v for k, v in kwargs.items() if k not in post_params} - # Set skipGeometry parameter (API expects camelCase) - params["skipGeometry"] = skip_geometry + # The monitoring-locations endpoint does not support comma-separated values + # for multi-value GET parameters; CQL2 POST is required for that service. + _cql2_required_services = {"monitoring-locations"} - # If limit is none or greater than 50000, then set limit to max results. Otherwise, - # use the limit - params["limit"] = 50000 if limit is None or limit > 50000 else limit + # Format date/time parameters to ISO8601 first — both routing paths need it. + for key in _DATE_RANGE_PARAMS: + if key in kwargs: + kwargs[key] = _format_api_dates( + kwargs[key], + date=(service == "daily" and key != "last_modified"), + ) - # Indicate if function needs to perform POST conversion - POST = bool(post_params) + if service in _cql2_required_services: + # Legacy path: POST with CQL2 for multi-value params + post_params = { + k: v + for k, v in kwargs.items() + if k not in _DATE_RANGE_PARAMS + and isinstance(v, (list, tuple)) + and len(v) > 1 + } + params = {k: v for k, v in kwargs.items() if k not in post_params} + else: + # Join list/tuple values with commas for multi-value GET parameters. + post_params = {} + params = { + k: ",".join(str(x) for x in v) if isinstance(v, (list, tuple)) else v + for k, v in kwargs.items() + } - # Convert dates to ISO08601 format - for i in _DATE_RANGE_PARAMS: - if i in params: - dates = service == "daily" and i != "last_modified" - params[i] = _format_api_dates(params[i], date=dates) + params["skipGeometry"] = skip_geometry + params["limit"] = 50000 if limit is None or limit > 50000 else limit # `len()` instead of truthiness: a numpy ndarray would raise on `if bbox:`. if bbox is not None and len(bbox) > 0: @@ -490,7 +497,7 @@ def _construct_api_requests( headers = _default_headers() - if POST: + if post_params: headers["Content-Type"] = "application/query-cql-json" request = requests.Request( method="POST", diff --git a/tests/waterdata_test.py b/tests/waterdata_test.py index e2ba4da8..bb61c94f 100644 --- a/tests/waterdata_test.py +++ b/tests/waterdata_test.py @@ -30,6 +30,7 @@ from dataretrieval.waterdata.utils import ( _check_monitoring_location_id, _check_profiles, + _construct_api_requests, _normalize_str_iterable, ) @@ -111,6 +112,28 @@ def test_check_profiles(): _check_profiles(service="results", profile="foo") +def test_construct_api_requests_multivalue_get(): + """Multi-value params use GET with comma-separated values for daily service.""" + req = _construct_api_requests( + "daily", + monitoring_location_id=["USGS-05427718", "USGS-05427719"], + parameter_code=["00060", "00065"], + ) + assert req.method == "GET" + assert "monitoring_location_id=USGS-05427718%2CUSGS-05427719" in req.url + assert "parameter_code=00060%2C00065" in req.url + + +def test_construct_api_requests_monitoring_locations_post(): + """monitoring-locations uses POST+CQL2 for multi-value params (API limitation).""" + req = _construct_api_requests( + "monitoring-locations", + hydrologic_unit_code=["010802050102", "010802050103"], + ) + assert req.method == "POST" + assert req.body is not None + + def test_samples_results(): """Test results call for proper columns""" df, _ = get_samples( From fa7886934e86b86e813fe72d2020c4c19b15c912 Mon Sep 17 00:00:00 2001 From: thodson-usgs Date: Thu, 14 May 2026 08:43:14 -0500 Subject: [PATCH 2/2] Polish PR 233: module-level constant, fix misleading comment, add 3 unit tests MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Style alignment with the rest of `waterdata/utils.py`: - Hoist `_cql2_required_services` from a function-local lowercase `set` to a module-level `_CQL2_REQUIRED_SERVICES = frozenset(...)` to match the convention of `_DATE_RANGE_PARAMS`, `_NO_NORMALIZE_PARAMS`, `_MONITORING_LOCATION_ID_RE`, etc. - Drop the "Legacy path:" prefix in the inline comment. POST/CQL2 is still the current and required path for monitoring-locations — the API team hasn't promised to add comma-GET there. Rephrased the two branches symmetrically ("POST with CQL2 JSON" / "GET with comma- separated values") so neither reads as deprecated. New unit tests: - `test_construct_api_requests_single_value_stays_get` — confirms a scalar `monitoring_location_id="USGS-..."` still produces a clean GET with no `%2C`, i.e. existing single-site callers see no change. - `test_construct_api_requests_numeric_list_joins_with_str` — pins down that `water_year=[2020, 2021]` reaches the URL as `water_year=2020%2C2021`, exercising the `str(x) for x in v` generator that exists specifically to handle non-string list params (without it, `",".join` on a list of ints would TypeError). - `test_construct_api_requests_two_element_date_list_becomes_interval` — pins down the contract that a two-element date list (`time=["2024-01-01", "2024-01-31"]`) is interpreted as start/end of an OGC datetime interval (joined with `/`), NOT as two discrete dates. The OGC `datetime` parameter doesn't support "these N specific dates" — that would require a CQL filter. Test exists so this semantic choice can't be silently changed. --- dataretrieval/waterdata/utils.py | 14 +++++------ tests/waterdata_test.py | 41 ++++++++++++++++++++++++++++++++ 2 files changed, 48 insertions(+), 7 deletions(-) diff --git a/dataretrieval/waterdata/utils.py b/dataretrieval/waterdata/utils.py index ef578447..af5906e0 100644 --- a/dataretrieval/waterdata/utils.py +++ b/dataretrieval/waterdata/utils.py @@ -153,6 +153,10 @@ def _switch_properties_id(properties: list[str] | None, id_name: str, service: s {"datetime", "last_modified", "begin", "begin_utc", "end", "end_utc", "time"} ) +# Services that don't support comma-separated values for multi-value GET +# parameters and require POST with CQL2 JSON instead. +_CQL2_REQUIRED_SERVICES = frozenset({"monitoring-locations"}) + def _parse_datetime(value: str) -> datetime | None: """Parse a single datetime string against the supported formats. @@ -450,10 +454,6 @@ def _construct_api_requests( """ service_url = f"{OGC_API_URL}/collections/{service}/items" - # The monitoring-locations endpoint does not support comma-separated values - # for multi-value GET parameters; CQL2 POST is required for that service. - _cql2_required_services = {"monitoring-locations"} - # Format date/time parameters to ISO8601 first — both routing paths need it. for key in _DATE_RANGE_PARAMS: if key in kwargs: @@ -462,8 +462,8 @@ def _construct_api_requests( date=(service == "daily" and key != "last_modified"), ) - if service in _cql2_required_services: - # Legacy path: POST with CQL2 for multi-value params + if service in _CQL2_REQUIRED_SERVICES: + # POST with CQL2 JSON: multi-value params go in the request body. post_params = { k: v for k, v in kwargs.items() @@ -473,7 +473,7 @@ def _construct_api_requests( } params = {k: v for k, v in kwargs.items() if k not in post_params} else: - # Join list/tuple values with commas for multi-value GET parameters. + # GET with comma-separated values: join list/tuple values into one string. post_params = {} params = { k: ",".join(str(x) for x in v) if isinstance(v, (list, tuple)) else v diff --git a/tests/waterdata_test.py b/tests/waterdata_test.py index bb61c94f..40ea0615 100644 --- a/tests/waterdata_test.py +++ b/tests/waterdata_test.py @@ -134,6 +134,47 @@ def test_construct_api_requests_monitoring_locations_post(): assert req.body is not None +def test_construct_api_requests_single_value_stays_get(): + """A length-1 list (or scalar) reaches the URL as a plain value, not a + comma-separated form, so existing single-site callers see no change.""" + req = _construct_api_requests( + "daily", + monitoring_location_id="USGS-05427718", + parameter_code="00060", + ) + assert req.method == "GET" + assert "monitoring_location_id=USGS-05427718" in req.url + assert "%2C" not in req.url # no comma-encoded multi-value + + +def test_construct_api_requests_numeric_list_joins_with_str(): + """Numeric-list params (e.g. ``water_year=[2020, 2021]`` on get_peaks) + must reach the URL as a comma-joined string, not crash on ``",".join`` + of ints. The generator-of-``str(x)`` exists exactly for this case.""" + req = _construct_api_requests( + "peaks", + monitoring_location_id="USGS-05427718", + water_year=[2020, 2021], + ) + assert req.method == "GET" + assert "water_year=2020%2C2021" in req.url + + +def test_construct_api_requests_two_element_date_list_becomes_interval(): + """A two-element date list is interpreted as start/end of an OGC datetime + interval (joined with '/'), NOT as two discrete dates. The OGC `datetime` + parameter does not support "these N specific dates" — that would require + a CQL filter. Verifying so this contract is locked in.""" + req = _construct_api_requests( + "daily", + monitoring_location_id="USGS-05427718", + time=["2024-01-01", "2024-01-31"], + ) + assert req.method == "GET" + # `/` URL-encodes to %2F. Confirms _format_api_dates ran before the join. + assert "time=2024-01-01%2F2024-01-31" in req.url + + def test_samples_results(): """Test results call for proper columns""" df, _ = get_samples(