Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
63 changes: 35 additions & 28 deletions dataretrieval/waterdata/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -153,6 +153,10 @@ def _switch_properties_id(properties: list[str] | None, id_name: str, service: s
{"datetime", "last_modified", "begin", "begin_utc", "end", "end_utc", "time"}
)

# Services that don't support comma-separated values for multi-value GET
# parameters and require POST with CQL2 JSON instead.
_CQL2_REQUIRED_SERVICES = frozenset({"monitoring-locations"})


def _parse_datetime(value: str) -> datetime | None:
"""Parse a single datetime string against the supported formats.
Expand Down Expand Up @@ -417,9 +421,11 @@ def _construct_api_requests(
"""
Constructs an HTTP request object for the specified water data API service.

Depending on the input parameters (whether there's lists of multiple
argument values), the function determines whether to use a GET or POST
request, formats parameters appropriately, and sets required headers.
For most services, list parameters are comma-joined and sent as a single
GET request (e.g. ``parameter_code=["00060","00010"]`` becomes
``parameter_code=00060,00010`` in the URL). For services that do not
support comma-separated values (currently only ``monitoring-locations``),
a POST request with CQL2 JSON is used instead.

Parameters
----------
Expand All @@ -445,37 +451,38 @@ def _construct_api_requests(
Notes
-----
- Date/time parameters are automatically formatted to ISO8601.
- If multiple values are provided for non-single parameters, a POST request
is constructed.
- The function sets appropriate headers for GET and POST requests.
"""
service_url = f"{OGC_API_URL}/collections/{service}/items"

# Identify which parameters should be included in the POST content body
post_params = {
k: v
for k, v in kwargs.items()
if k not in _DATE_RANGE_PARAMS and isinstance(v, (list, tuple)) and len(v) > 1
}
# Format date/time parameters to ISO8601 first — both routing paths need it.
for key in _DATE_RANGE_PARAMS:
if key in kwargs:
kwargs[key] = _format_api_dates(
kwargs[key],
date=(service == "daily" and key != "last_modified"),
)

# Everything else goes into the params dictionary for the URL
params = {k: v for k, v in kwargs.items() if k not in post_params}
# Set skipGeometry parameter (API expects camelCase)
params["skipGeometry"] = skip_geometry
if service in _CQL2_REQUIRED_SERVICES:
# POST with CQL2 JSON: multi-value params go in the request body.
post_params = {
k: v
for k, v in kwargs.items()
if k not in _DATE_RANGE_PARAMS
and isinstance(v, (list, tuple))
and len(v) > 1
}
params = {k: v for k, v in kwargs.items() if k not in post_params}
else:
# GET with comma-separated values: join list/tuple values into one string.
post_params = {}
params = {
k: ",".join(str(x) for x in v) if isinstance(v, (list, tuple)) else v
for k, v in kwargs.items()
}

# If limit is none or greater than 50000, then set limit to max results. Otherwise,
# use the limit
params["skipGeometry"] = skip_geometry
params["limit"] = 50000 if limit is None or limit > 50000 else limit

# Indicate if function needs to perform POST conversion
POST = bool(post_params)

# Convert dates to ISO08601 format
for i in _DATE_RANGE_PARAMS:
if i in params:
dates = service == "daily" and i != "last_modified"
params[i] = _format_api_dates(params[i], date=dates)

# `len()` instead of truthiness: a numpy ndarray would raise on `if bbox:`.
if bbox is not None and len(bbox) > 0:
params["bbox"] = ",".join(map(str, bbox))
Expand All @@ -490,7 +497,7 @@ def _construct_api_requests(

headers = _default_headers()

if POST:
if post_params:
headers["Content-Type"] = "application/query-cql-json"
request = requests.Request(
method="POST",
Expand Down
64 changes: 64 additions & 0 deletions tests/waterdata_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@
from dataretrieval.waterdata.utils import (
_check_monitoring_location_id,
_check_profiles,
_construct_api_requests,
_normalize_str_iterable,
)

Expand Down Expand Up @@ -111,6 +112,69 @@ def test_check_profiles():
_check_profiles(service="results", profile="foo")


def test_construct_api_requests_multivalue_get():
"""Multi-value params use GET with comma-separated values for daily service."""
req = _construct_api_requests(
"daily",
monitoring_location_id=["USGS-05427718", "USGS-05427719"],
parameter_code=["00060", "00065"],
)
assert req.method == "GET"
assert "monitoring_location_id=USGS-05427718%2CUSGS-05427719" in req.url
assert "parameter_code=00060%2C00065" in req.url


def test_construct_api_requests_monitoring_locations_post():
"""monitoring-locations uses POST+CQL2 for multi-value params (API limitation)."""
req = _construct_api_requests(
"monitoring-locations",
hydrologic_unit_code=["010802050102", "010802050103"],
)
assert req.method == "POST"
assert req.body is not None


def test_construct_api_requests_single_value_stays_get():
"""A length-1 list (or scalar) reaches the URL as a plain value, not a
comma-separated form, so existing single-site callers see no change."""
req = _construct_api_requests(
"daily",
monitoring_location_id="USGS-05427718",
parameter_code="00060",
)
assert req.method == "GET"
assert "monitoring_location_id=USGS-05427718" in req.url
assert "%2C" not in req.url # no comma-encoded multi-value


def test_construct_api_requests_numeric_list_joins_with_str():
"""Numeric-list params (e.g. ``water_year=[2020, 2021]`` on get_peaks)
must reach the URL as a comma-joined string, not crash on ``",".join``
of ints. The generator-of-``str(x)`` exists exactly for this case."""
req = _construct_api_requests(
"peaks",
monitoring_location_id="USGS-05427718",
water_year=[2020, 2021],
)
assert req.method == "GET"
assert "water_year=2020%2C2021" in req.url


def test_construct_api_requests_two_element_date_list_becomes_interval():
"""A two-element date list is interpreted as start/end of an OGC datetime
interval (joined with '/'), NOT as two discrete dates. The OGC `datetime`
parameter does not support "these N specific dates" — that would require
a CQL filter. Verifying so this contract is locked in."""
req = _construct_api_requests(
"daily",
monitoring_location_id="USGS-05427718",
time=["2024-01-01", "2024-01-31"],
)
assert req.method == "GET"
# `/` URL-encodes to %2F. Confirms _format_api_dates ran before the join.
assert "time=2024-01-01%2F2024-01-31" in req.url


def test_samples_results():
"""Test results call for proper columns"""
df, _ = get_samples(
Expand Down
Loading