diff --git a/dataretrieval/waterdata/utils.py b/dataretrieval/waterdata/utils.py index 018e1c85..af5906e0 100644 --- a/dataretrieval/waterdata/utils.py +++ b/dataretrieval/waterdata/utils.py @@ -153,6 +153,10 @@ def _switch_properties_id(properties: list[str] | None, id_name: str, service: s {"datetime", "last_modified", "begin", "begin_utc", "end", "end_utc", "time"} ) +# Services that don't support comma-separated values for multi-value GET +# parameters and require POST with CQL2 JSON instead. +_CQL2_REQUIRED_SERVICES = frozenset({"monitoring-locations"}) + def _parse_datetime(value: str) -> datetime | None: """Parse a single datetime string against the supported formats. @@ -417,9 +421,11 @@ def _construct_api_requests( """ Constructs an HTTP request object for the specified water data API service. - Depending on the input parameters (whether there's lists of multiple - argument values), the function determines whether to use a GET or POST - request, formats parameters appropriately, and sets required headers. + For most services, list parameters are comma-joined and sent as a single + GET request (e.g. ``parameter_code=["00060","00010"]`` becomes + ``parameter_code=00060,00010`` in the URL). For services that do not + support comma-separated values (currently only ``monitoring-locations``), + a POST request with CQL2 JSON is used instead. Parameters ---------- @@ -445,37 +451,38 @@ def _construct_api_requests( Notes ----- - Date/time parameters are automatically formatted to ISO8601. - - If multiple values are provided for non-single parameters, a POST request - is constructed. - - The function sets appropriate headers for GET and POST requests. """ service_url = f"{OGC_API_URL}/collections/{service}/items" - # Identify which parameters should be included in the POST content body - post_params = { - k: v - for k, v in kwargs.items() - if k not in _DATE_RANGE_PARAMS and isinstance(v, (list, tuple)) and len(v) > 1 - } + # Format date/time parameters to ISO8601 first — both routing paths need it. + for key in _DATE_RANGE_PARAMS: + if key in kwargs: + kwargs[key] = _format_api_dates( + kwargs[key], + date=(service == "daily" and key != "last_modified"), + ) - # Everything else goes into the params dictionary for the URL - params = {k: v for k, v in kwargs.items() if k not in post_params} - # Set skipGeometry parameter (API expects camelCase) - params["skipGeometry"] = skip_geometry + if service in _CQL2_REQUIRED_SERVICES: + # POST with CQL2 JSON: multi-value params go in the request body. + post_params = { + k: v + for k, v in kwargs.items() + if k not in _DATE_RANGE_PARAMS + and isinstance(v, (list, tuple)) + and len(v) > 1 + } + params = {k: v for k, v in kwargs.items() if k not in post_params} + else: + # GET with comma-separated values: join list/tuple values into one string. + post_params = {} + params = { + k: ",".join(str(x) for x in v) if isinstance(v, (list, tuple)) else v + for k, v in kwargs.items() + } - # If limit is none or greater than 50000, then set limit to max results. Otherwise, - # use the limit + params["skipGeometry"] = skip_geometry params["limit"] = 50000 if limit is None or limit > 50000 else limit - # Indicate if function needs to perform POST conversion - POST = bool(post_params) - - # Convert dates to ISO08601 format - for i in _DATE_RANGE_PARAMS: - if i in params: - dates = service == "daily" and i != "last_modified" - params[i] = _format_api_dates(params[i], date=dates) - # `len()` instead of truthiness: a numpy ndarray would raise on `if bbox:`. if bbox is not None and len(bbox) > 0: params["bbox"] = ",".join(map(str, bbox)) @@ -490,7 +497,7 @@ def _construct_api_requests( headers = _default_headers() - if POST: + if post_params: headers["Content-Type"] = "application/query-cql-json" request = requests.Request( method="POST", diff --git a/tests/waterdata_test.py b/tests/waterdata_test.py index e2ba4da8..40ea0615 100644 --- a/tests/waterdata_test.py +++ b/tests/waterdata_test.py @@ -30,6 +30,7 @@ from dataretrieval.waterdata.utils import ( _check_monitoring_location_id, _check_profiles, + _construct_api_requests, _normalize_str_iterable, ) @@ -111,6 +112,69 @@ def test_check_profiles(): _check_profiles(service="results", profile="foo") +def test_construct_api_requests_multivalue_get(): + """Multi-value params use GET with comma-separated values for daily service.""" + req = _construct_api_requests( + "daily", + monitoring_location_id=["USGS-05427718", "USGS-05427719"], + parameter_code=["00060", "00065"], + ) + assert req.method == "GET" + assert "monitoring_location_id=USGS-05427718%2CUSGS-05427719" in req.url + assert "parameter_code=00060%2C00065" in req.url + + +def test_construct_api_requests_monitoring_locations_post(): + """monitoring-locations uses POST+CQL2 for multi-value params (API limitation).""" + req = _construct_api_requests( + "monitoring-locations", + hydrologic_unit_code=["010802050102", "010802050103"], + ) + assert req.method == "POST" + assert req.body is not None + + +def test_construct_api_requests_single_value_stays_get(): + """A length-1 list (or scalar) reaches the URL as a plain value, not a + comma-separated form, so existing single-site callers see no change.""" + req = _construct_api_requests( + "daily", + monitoring_location_id="USGS-05427718", + parameter_code="00060", + ) + assert req.method == "GET" + assert "monitoring_location_id=USGS-05427718" in req.url + assert "%2C" not in req.url # no comma-encoded multi-value + + +def test_construct_api_requests_numeric_list_joins_with_str(): + """Numeric-list params (e.g. ``water_year=[2020, 2021]`` on get_peaks) + must reach the URL as a comma-joined string, not crash on ``",".join`` + of ints. The generator-of-``str(x)`` exists exactly for this case.""" + req = _construct_api_requests( + "peaks", + monitoring_location_id="USGS-05427718", + water_year=[2020, 2021], + ) + assert req.method == "GET" + assert "water_year=2020%2C2021" in req.url + + +def test_construct_api_requests_two_element_date_list_becomes_interval(): + """A two-element date list is interpreted as start/end of an OGC datetime + interval (joined with '/'), NOT as two discrete dates. The OGC `datetime` + parameter does not support "these N specific dates" — that would require + a CQL filter. Verifying so this contract is locked in.""" + req = _construct_api_requests( + "daily", + monitoring_location_id="USGS-05427718", + time=["2024-01-01", "2024-01-31"], + ) + assert req.method == "GET" + # `/` URL-encodes to %2F. Confirms _format_api_dates ran before the join. + assert "time=2024-01-01%2F2024-01-31" in req.url + + def test_samples_results(): """Test results call for proper columns""" df, _ = get_samples(