From 19814402c0fc19952dcf0863897fc586460ae89a Mon Sep 17 00:00:00 2001 From: thodson-usgs Date: Tue, 23 Jun 2026 15:05:08 -0500 Subject: [PATCH 1/2] refactor(waterdata)!: snake_case get_samples params (camelCase kept via shim) Standardize the modern waterdata getter surface on snake_case parameter names ahead of the breaking 1.2.0 release. `get_samples` and `get_samples_summary` were the last getters exposing the Samples API's native camelCase param names; every other OGC getter already uses snake_case. The function still sends the Samples API its native camelCase query parameters: a module-level `_SAMPLES_PARAM_TO_API` dict maps each public snake_case parameter to its camelCase wire name just before the request is built (mirroring how the OGC getters map e.g. `skipGeometry`/`bbox`). Mappings follow `get_monitoring_locations`: `stateFips`->`state_code`, `countyFips`->`county_code`, `countryFips`->`country_code`, `boundingBox`->`bbox`, `monitoringLocationIdentifier`->`monitoring_location_id`; the rest are snake_cased (`usgsPCode`->`usgs_pcode`, `hydrologicUnit`->`hydrologic_unit`, etc.). Docstrings now document each snake_case parameter and note its underlying Samples-API camelCase name. A new generic, testable `_accept_legacy_kwargs(mapping)` decorator (dataretrieval/waterdata/utils.py) lets both getters still accept the old camelCase names, translating them to the new snake_case params and emitting a DeprecationWarning that names the replacement. Existing callers (including the demo notebooks, left untouched) keep working with a warning. BREAKING CHANGE: `get_samples` / `get_samples_summary` parameters are now snake_case. The old camelCase names still work but emit a DeprecationWarning and will be removed in a future release. Co-Authored-By: Claude Opus 4.8 Claude-Session: https://claude.ai/code/session_01Sjb14HkwuCydKSKMsaXsgd --- dataretrieval/waterdata/api.py | 204 +++++++++++++++++++------------ dataretrieval/waterdata/utils.py | 59 ++++++++- tests/waterdata_test.py | 102 +++++++++++++--- 3 files changed, 269 insertions(+), 96 deletions(-) diff --git a/dataretrieval/waterdata/api.py b/dataretrieval/waterdata/api.py index 8d686f74..e79d08f7 100644 --- a/dataretrieval/waterdata/api.py +++ b/dataretrieval/waterdata/api.py @@ -36,6 +36,7 @@ _OUTPUT_ID_BY_SERVICE, GEOPANDAS, SAMPLES_URL, + _accept_legacy_kwargs, _as_str_list, _check_profiles, _construct_cql_request, @@ -2259,32 +2260,70 @@ def _get_samples_csv( return df, response +# Map the public snake_case ``get_samples`` parameters to the camelCase query +# parameter names the Samples API expects on the wire. ``characteristic`` is +# already snake_case-compatible (single word) and is sent unchanged. The +# remaining snake_case params are bookkeeping (``service``/``profile``/ +# ``ssl_check``) and never reach the request. +_SAMPLES_PARAM_TO_API = { + "activity_media_name": "activityMediaName", + "activity_start_date_lower": "activityStartDateLower", + "activity_start_date_upper": "activityStartDateUpper", + "activity_type_code": "activityTypeCode", + "characteristic_group": "characteristicGroup", + "characteristic_user_supplied": "characteristicUserSupplied", + "bbox": "boundingBox", + "country_code": "countryFips", + "state_code": "stateFips", + "county_code": "countyFips", + "site_type_code": "siteTypeCode", + "site_type_name": "siteTypeName", + "usgs_pcode": "usgsPCode", + "hydrologic_unit": "hydrologicUnit", + "monitoring_location_id": "monitoringLocationIdentifier", + "organization_id": "organizationIdentifier", + "point_location_latitude": "pointLocationLatitude", + "point_location_longitude": "pointLocationLongitude", + "point_location_within_miles": "pointLocationWithinMiles", + "project_id": "projectIdentifier", + "record_identifier_user_supplied": "recordIdentifierUserSupplied", +} + +# Deprecated camelCase keyword names (the Samples-API spelling) accepted for +# backward compatibility, mapped to the new snake_case parameter names. Derived +# from ``_SAMPLES_PARAM_TO_API`` so the two never drift apart. +_SAMPLES_LEGACY_KWARGS = { + api_name: py_name for py_name, api_name in _SAMPLES_PARAM_TO_API.items() +} + + +@_accept_legacy_kwargs(_SAMPLES_LEGACY_KWARGS) def get_samples( ssl_check: bool = True, service: SERVICES = "results", profile: PROFILES = "fullphyschem", - activityMediaName: str | Iterable[str] | None = None, - activityStartDateLower: str | None = None, - activityStartDateUpper: str | None = None, - activityTypeCode: str | Iterable[str] | None = None, - characteristicGroup: str | Iterable[str] | None = None, + activity_media_name: str | Iterable[str] | None = None, + activity_start_date_lower: str | None = None, + activity_start_date_upper: str | None = None, + activity_type_code: str | Iterable[str] | None = None, + characteristic_group: str | Iterable[str] | None = None, characteristic: str | Iterable[str] | None = None, - characteristicUserSupplied: str | Iterable[str] | None = None, - boundingBox: list[float] | None = None, - countryFips: str | Iterable[str] | None = None, - stateFips: str | Iterable[str] | None = None, - countyFips: str | Iterable[str] | None = None, - siteTypeCode: str | Iterable[str] | None = None, - siteTypeName: str | Iterable[str] | None = None, - usgsPCode: str | Iterable[str] | None = None, - hydrologicUnit: str | Iterable[str] | None = None, - monitoringLocationIdentifier: str | Iterable[str] | None = None, - organizationIdentifier: str | Iterable[str] | None = None, - pointLocationLatitude: float | None = None, - pointLocationLongitude: float | None = None, - pointLocationWithinMiles: float | None = None, - projectIdentifier: str | Iterable[str] | None = None, - recordIdentifierUserSupplied: str | Iterable[str] | None = None, + characteristic_user_supplied: str | Iterable[str] | None = None, + bbox: list[float] | None = None, + country_code: str | Iterable[str] | None = None, + state_code: str | Iterable[str] | None = None, + county_code: str | Iterable[str] | None = None, + site_type_code: str | Iterable[str] | None = None, + site_type_name: str | Iterable[str] | None = None, + usgs_pcode: str | Iterable[str] | None = None, + hydrologic_unit: str | Iterable[str] | None = None, + monitoring_location_id: str | Iterable[str] | None = None, + organization_id: str | Iterable[str] | None = None, + point_location_latitude: float | None = None, + point_location_longitude: float | None = None, + point_location_within_miles: float | None = None, + project_id: str | Iterable[str] | None = None, + record_identifier_user_supplied: str | Iterable[str] | None = None, ) -> tuple[pd.DataFrame, BaseMetadata]: """Search Samples database for USGS water quality data. This is a wrapper function for the Samples database API. All potential @@ -2320,35 +2359,38 @@ def get_samples( "actgroup", "count" projects - "project", "projectmonitoringlocationweight" organizations - "organization", "count" - activityMediaName : string or iterable of strings, optional + activity_media_name : string or iterable of strings, optional Name or code indicating environmental medium in which sample was taken. Call ``get_codes("samplemedia")`` for the valid inputs. - Example: "Water". - activityStartDateLower : string, optional + Example: "Water". (Samples API: ``activityMediaName``) + activity_start_date_lower : string, optional The start date if using a date range. Takes the format YYYY-MM-DD. The logic is inclusive, i.e. it will also return results that match the date. If left as None, will pull all data on or before - activityStartDateUpper, if populated. - activityStartDateUpper : string, optional + ``activity_start_date_upper``, if populated. + (Samples API: ``activityStartDateLower``) + activity_start_date_upper : string, optional The end date if using a date range. Takes the format YYYY-MM-DD. The logic is inclusive, i.e. it will also return results that match the date. If left as None, will pull all data after - activityStartDateLower up to the most recent available results. - activityTypeCode : string or iterable of strings, optional + ``activity_start_date_lower`` up to the most recent available results. + (Samples API: ``activityStartDateUpper``) + activity_type_code : string or iterable of strings, optional Text code that describes type of field activity performed. - Example: "Sample-Routine, regular". - characteristicGroup : string or iterable of strings, optional + Example: "Sample-Routine, regular". (Samples API: ``activityTypeCode``) + characteristic_group : string or iterable of strings, optional Characteristic group is a broad category of characteristics describing one or more results. Call ``get_codes("characteristicgroup")`` for the valid inputs. - Example: "Organics, PFAS" + Example: "Organics, PFAS" (Samples API: ``characteristicGroup``) characteristic : string or iterable of strings, optional Characteristic is a specific category describing one or more results. Call ``get_codes("characteristics")`` for the valid inputs. - Example: "Suspended Sediment Discharge" - characteristicUserSupplied : string or iterable of strings, optional + Example: "Suspended Sediment Discharge" (Samples API: ``characteristic``) + characteristic_user_supplied : string or iterable of strings, optional A user supplied characteristic name describing one or more results. - boundingBox: list of four floats, optional + (Samples API: ``characteristicUserSupplied``) + bbox : list of four floats, optional Filters on the associated monitoring location's point location by checking if it is located within the specified geographic area. The logic is inclusive, i.e. it will include locations that overlap @@ -2361,55 +2403,63 @@ def get_samples( * Eastern-most longitude * Northern-most latitude - Example: [-92.8,44.2,-88.9,46.0] - countryFips : string or iterable of strings, optional - Example: "US" (United States) - stateFips : string or iterable of strings, optional + Example: [-92.8,44.2,-88.9,46.0] (Samples API: ``boundingBox``) + country_code : string or iterable of strings, optional + Example: "US" (United States) (Samples API: ``countryFips``) + state_code : string or iterable of strings, optional Call ``get_codes("states")`` for the valid inputs. - Example: "US:15" (United States: Hawaii) - countyFips : string or iterable of strings, optional + Example: "US:15" (United States: Hawaii) (Samples API: ``stateFips``) + county_code : string or iterable of strings, optional Call ``get_codes("counties")`` for the valid inputs. Example: "US:15:001" (United States: Hawaii, Hawaii County) - siteTypeCode : string or iterable of strings, optional + (Samples API: ``countyFips``) + site_type_code : string or iterable of strings, optional An abbreviation for a certain site type. Call ``get_codes("sitetype")`` for the valid inputs. - Example: "GW" (Groundwater site) - siteTypeName : string or iterable of strings, optional + Example: "GW" (Groundwater site) (Samples API: ``siteTypeCode``) + site_type_name : string or iterable of strings, optional A full name for a certain site type. Call ``get_codes("sitetype")`` for the valid inputs. - Example: "Well" - usgsPCode : string or iterable of strings, optional + Example: "Well" (Samples API: ``siteTypeName``) + usgs_pcode : string or iterable of strings, optional 5-digit number used in the US Geological Survey computerized data system, National Water Information System (NWIS), to uniquely identify a specific constituent (the ``parameterCode`` column of ``get_codes("characteristics")``). Example: "00060" (Discharge, cubic feet per second) - hydrologicUnit : string or iterable of strings, optional + (Samples API: ``usgsPCode``) + hydrologic_unit : string or iterable of strings, optional Max 12-digit number used to describe a hydrologic unit. - Example: "070900020502" - monitoringLocationIdentifier : string or iterable of strings, optional + Example: "070900020502" (Samples API: ``hydrologicUnit``) + monitoring_location_id : string or iterable of strings, optional A monitoring location identifier has two parts: the agency code and the location number, separated by a dash (-). Example: "USGS-040851385" - organizationIdentifier : string or iterable of strings, optional + (Samples API: ``monitoringLocationIdentifier``) + organization_id : string or iterable of strings, optional Designator used to uniquely identify a specific organization. Currently only accepting the organization "USGS". - pointLocationLatitude : float, optional + (Samples API: ``organizationIdentifier``) + point_location_latitude : float, optional Latitude for a point/radius query (decimal degrees). Must be used - with pointLocationLongitude and pointLocationWithinMiles. - pointLocationLongitude : float, optional + with ``point_location_longitude`` and ``point_location_within_miles``. + (Samples API: ``pointLocationLatitude``) + point_location_longitude : float, optional Longitude for a point/radius query (decimal degrees). Must be used - with pointLocationLatitude and pointLocationWithinMiles. - pointLocationWithinMiles : float, optional + with ``point_location_latitude`` and ``point_location_within_miles``. + (Samples API: ``pointLocationLongitude``) + point_location_within_miles : float, optional Radius for a point/radius query. Must be used with - pointLocationLatitude and pointLocationLongitude - projectIdentifier : string or iterable of strings, optional + ``point_location_latitude`` and ``point_location_longitude``. + (Samples API: ``pointLocationWithinMiles``) + project_id : string or iterable of strings, optional Designator used to uniquely identify a data collection project. Project identifiers are specific to an organization (e.g. USGS). - Example: "ZH003QW03" - recordIdentifierUserSupplied : string or iterable of strings, optional + Example: "ZH003QW03" (Samples API: ``projectIdentifier``) + record_identifier_user_supplied : string or iterable of strings, optional Internal AQS record identifier that returns 1 entry. Only available for the "results" service. + (Samples API: ``recordIdentifierUserSupplied``) Returns ------- @@ -2432,34 +2482,37 @@ def get_samples( >>> # Get PFAS results within a bounding box >>> df, md = dataretrieval.waterdata.get_samples( - ... boundingBox=[-90.2, 42.6, -88.7, 43.2], - ... characteristicGroup="Organics, PFAS", + ... bbox=[-90.2, 42.6, -88.7, 43.2], + ... characteristic_group="Organics, PFAS", ... ) >>> # Get all activities for the Commonwealth of Virginia over a date range >>> df, md = dataretrieval.waterdata.get_samples( ... service="activities", ... profile="sampact", - ... activityStartDateLower="2023-10-01", - ... activityStartDateUpper="2024-01-01", - ... stateFips="US:51", + ... activity_start_date_lower="2023-10-01", + ... activity_start_date_upper="2024-01-01", + ... state_code="US:51", ... ) >>> # Get all pH samples for two sites in Utah >>> df, md = dataretrieval.waterdata.get_samples( - ... monitoringLocationIdentifier=[ + ... monitoring_location_id=[ ... "USGS-393147111462301", ... "USGS-393343111454101", ... ], - ... usgsPCode="00400", + ... usgs_pcode="00400", ... ) """ _check_profiles(service, profile) - # Build argument dictionary, omitting None values - params = _get_args(locals(), exclude={"ssl_check", "profile"}) + # Build argument dictionary, omitting None values. Parameters are the + # public snake_case names here; translate them to the camelCase names the + # Samples API expects just before building the request. + args = _get_args(locals(), exclude={"ssl_check", "profile"}) + params = {_SAMPLES_PARAM_TO_API.get(key, key): value for key, value in args.items()} params.update({"mimeType": "text/csv"}) @@ -2474,8 +2527,9 @@ def get_samples( return df, BaseMetadata(response) +@_accept_legacy_kwargs({"monitoringLocationIdentifier": "monitoring_location_id"}) def get_samples_summary( - monitoringLocationIdentifier: str, + monitoring_location_id: str, ssl_check: bool = True, ) -> tuple[pd.DataFrame, BaseMetadata]: """Get a summary of discrete water-quality samples at a single monitoring location. @@ -2493,13 +2547,13 @@ def get_samples_summary( Parameters ---------- - monitoringLocationIdentifier : string + monitoring_location_id : string A monitoring location identifier has two parts, separated by a dash (``-``): the agency code and the location number. Examples: ``"USGS-040851385"``, ``"AZ014-320821110580701"``, ``"CAX01-15304600"``. Bare location numbers without an agency prefix are accepted by the service but return an empty result, so a prefix - is effectively required. + is effectively required. (Samples API: ``monitoringLocationIdentifier``) ssl_check : bool, optional Check the SSL certificate. Default is True. @@ -2516,18 +2570,18 @@ def get_samples_summary( >>> # What discrete-sample data is available at this site? >>> df, md = dataretrieval.waterdata.get_samples_summary( - ... monitoringLocationIdentifier="USGS-04074950" + ... monitoring_location_id="USGS-04074950" ... ) """ - if not isinstance(monitoringLocationIdentifier, str): + if not isinstance(monitoring_location_id, str): raise TypeError( - "monitoringLocationIdentifier must be a string; the Samples " + "monitoring_location_id must be a string; the Samples " "summary service accepts exactly one monitoring location per " - f"request, got {type(monitoringLocationIdentifier).__name__}." + f"request, got {type(monitoring_location_id).__name__}." ) - url = f"{SAMPLES_URL}/summary/{quote(monitoringLocationIdentifier, safe='')}" + url = f"{SAMPLES_URL}/summary/{quote(monitoring_location_id, safe='')}" params = {"mimeType": "text/csv"} df, response = _get_samples_csv(url, params, ssl_check) diff --git a/dataretrieval/waterdata/utils.py b/dataretrieval/waterdata/utils.py index ef45bd06..65f9ea2f 100644 --- a/dataretrieval/waterdata/utils.py +++ b/dataretrieval/waterdata/utils.py @@ -15,7 +15,10 @@ from __future__ import annotations -from typing import Any, get_args +import functools +import warnings +from collections.abc import Callable, Mapping +from typing import Any, TypeVar, get_args import httpx import pandas as pd @@ -293,6 +296,59 @@ def _check_profiles( ) +_R = TypeVar("_R") + + +def _accept_legacy_kwargs( + mapping: Mapping[str, str], +) -> Callable[[Callable[..., _R]], Callable[..., _R]]: + """Decorator: accept deprecated keyword-argument names, translating them + to their modern equivalents and emitting a :class:`DeprecationWarning`. + + ``mapping`` maps each deprecated keyword name to the new keyword name the + wrapped function expects (e.g. ``{"stateFips": "state_code"}``). When a + caller passes a deprecated name, it is renamed to the new name before the + wrapped function is invoked and a ``DeprecationWarning`` naming the + replacement is emitted. Callers that already use the new names are + unaffected (no warning, no overhead beyond the wrapper call). + + The wrapped function's return type is preserved; its parameter list is + intentionally relaxed (the wrapper accepts the extra deprecated names), + so static checkers won't flag legacy call sites. + + Raises + ------ + TypeError + If both a deprecated name and its modern equivalent are supplied for + the same argument (ambiguous), mirroring Python's "got multiple + values for argument" error. + """ + + def decorator(func: Callable[..., _R]) -> Callable[..., _R]: + @functools.wraps(func) + def wrapper(*args: Any, **kwargs: Any) -> _R: + for old_name, new_name in mapping.items(): + if old_name not in kwargs: + continue + if new_name in kwargs: + raise TypeError( + f"{func.__name__}() received both {old_name!r} " + f"(deprecated) and {new_name!r}; pass only {new_name!r}." + ) + warnings.warn( + f"The {old_name!r} argument is deprecated and will be " + f"removed in a future release; use {new_name!r} instead.", + DeprecationWarning, + stacklevel=2, + ) + kwargs[new_name] = kwargs.pop(old_name) + return func(*args, **kwargs) + + return wrapper + + return decorator + + __all__ = [ "BASE_URL", "GEOPANDAS", @@ -304,6 +360,7 @@ def _check_profiles( "_EXTRA_ID_COLS", "_NO_NORMALIZE_PARAMS", "_OUTPUT_ID_BY_SERVICE", + "_accept_legacy_kwargs", "_arrange_cols", "_as_str_list", "_check_id_format", diff --git a/tests/waterdata_test.py b/tests/waterdata_test.py index 92b68618..b7f23617 100644 --- a/tests/waterdata_test.py +++ b/tests/waterdata_test.py @@ -84,10 +84,10 @@ def test_mock_get_samples(httpx_mock): df, md = get_samples( service="results", profile="fullphyschem", - activityMediaName="Water", - activityStartDateLower="2020-01-01", - activityStartDateUpper="2024-12-31", - monitoringLocationIdentifier="USGS-05406500", + activity_media_name="Water", + activity_start_date_lower="2020-01-01", + activity_start_date_upper="2024-12-31", + monitoring_location_id="USGS-05406500", ) assert type(df) is DataFrame # 181 source columns + 6 derived DateTime columns @@ -107,7 +107,7 @@ def test_mock_get_samples_summary(httpx_mock): ) response_file_path = "tests/data/samples_summary.txt" mock_request(httpx_mock, request_url, response_file_path) - df, md = get_samples_summary(monitoringLocationIdentifier="USGS-04183500") + df, md = get_samples_summary(monitoring_location_id="USGS-04183500") assert type(df) is DataFrame expected_columns = { "monitoringLocationIdentifier", @@ -130,7 +130,7 @@ def test_mock_get_samples_summary(httpx_mock): def test_get_samples_summary_rejects_list(): """The summary endpoint accepts only one site; a list must raise TypeError.""" with pytest.raises(TypeError, match="exactly one monitoring location"): - get_samples_summary(monitoringLocationIdentifier=["USGS-04183500"]) + get_samples_summary(monitoring_location_id=["USGS-04183500"]) def test_get_samples_raises_typed_error_on_429(httpx_mock): @@ -144,7 +144,7 @@ def test_get_samples_raises_typed_error_on_429(httpx_mock): get_samples( service="results", profile="fullphyschem", - monitoringLocationIdentifier="USGS-05406500", + monitoring_location_id="USGS-05406500", ) @@ -154,7 +154,69 @@ def test_get_samples_summary_raises_typed_error_on_5xx(httpx_mock): httpx_mock.add_response(status_code=503) with pytest.raises(ServiceUnavailable): - get_samples_summary(monitoringLocationIdentifier="USGS-04183500") + get_samples_summary(monitoring_location_id="USGS-04183500") + + +def test_get_samples_legacy_camelcase_kwargs_warn(httpx_mock): + """Legacy camelCase kwargs still work but emit a DeprecationWarning that + names the new snake_case parameter, and produce the same request URL as + the snake_case call.""" + request_url = ( + "https://api.waterdata.usgs.gov/samples-data/results/fullphyschem?" + "activityMediaName=Water&activityStartDateLower=2020-01-01" + "&activityStartDateUpper=2024-12-31&monitoringLocationIdentifier=USGS-05406500&mimeType=text%2Fcsv" + ) + response_file_path = "tests/data/samples_results.txt" + mock_request(httpx_mock, request_url, response_file_path) + with pytest.warns(DeprecationWarning, match="monitoring_location_id"): + df, md = get_samples( + service="results", + profile="fullphyschem", + activityMediaName="Water", + activityStartDateLower="2020-01-01", + activityStartDateUpper="2024-12-31", + monitoringLocationIdentifier="USGS-05406500", + ) + assert type(df) is DataFrame + # The deprecated names map to the same camelCase wire params: same URL. + assert md.url == request_url + + +def test_get_samples_summary_legacy_camelcase_kwarg_warns(httpx_mock): + """The deprecated ``monitoringLocationIdentifier`` keyword still works for + the summary endpoint and warns, naming the new snake_case parameter.""" + request_url = ( + "https://api.waterdata.usgs.gov/samples-data/summary/USGS-04183500" + "?mimeType=text%2Fcsv" + ) + response_file_path = "tests/data/samples_summary.txt" + mock_request(httpx_mock, request_url, response_file_path) + with pytest.warns(DeprecationWarning, match="monitoring_location_id"): + df, md = get_samples_summary(monitoringLocationIdentifier="USGS-04183500") + assert type(df) is DataFrame + assert md.url == request_url + + +def test_get_samples_rejects_both_legacy_and_new_kwarg(): + """Passing both the deprecated camelCase name and its snake_case + replacement is ambiguous and must raise TypeError.""" + with pytest.raises(TypeError, match="monitoring_location_id"): + get_samples( + monitoringLocationIdentifier="USGS-05406500", + monitoring_location_id="USGS-05406500", + ) + + +def test_accept_legacy_kwargs_passthrough_no_warning(recwarn): + """Using only the new names emits no DeprecationWarning.""" + from dataretrieval.waterdata.utils import _accept_legacy_kwargs + + @_accept_legacy_kwargs({"oldName": "new_name"}) + def f(new_name=None): + return new_name + + assert f(new_name="x") == "x" + assert not [w for w in recwarn.list if w.category is DeprecationWarning] def test_check_profiles(): @@ -337,9 +399,9 @@ def test_samples_results(): df, _ = get_samples( service="results", profile="narrow", - monitoringLocationIdentifier="USGS-05288705", - activityStartDateLower="2024-10-01", - activityStartDateUpper="2025-04-24", + monitoring_location_id="USGS-05288705", + activity_start_date_lower="2024-10-01", + activity_start_date_upper="2025-04-24", ) assert all( col in df.columns @@ -353,7 +415,7 @@ def test_samples_activity(): df, _ = get_samples( service="activities", profile="sampact", - monitoringLocationIdentifier="USGS-06719505", + monitoring_location_id="USGS-06719505", ) assert len(df) > 0 assert len(df.columns) == 97 @@ -365,10 +427,10 @@ def test_samples_locations(): df, _ = get_samples( service="locations", profile="site", - stateFips="US:55", - activityStartDateLower="2024-10-01", - activityStartDateUpper="2025-04-24", - usgsPCode="00010", + state_code="US:55", + activity_start_date_lower="2024-10-01", + activity_start_date_upper="2025-04-24", + usgs_pcode="00010", ) assert all( col in df.columns for col in ["Location_Identifier", "Location_Latitude"] @@ -381,9 +443,9 @@ def test_samples_projects(): df, _ = get_samples( service="projects", profile="project", - stateFips="US:15", - activityStartDateLower="2024-10-01", - activityStartDateUpper="2025-04-24", + state_code="US:15", + activity_start_date_lower="2024-10-01", + activity_start_date_upper="2025-04-24", ) assert all(col in df.columns for col in ["Org_Identifier", "Project_Identifier"]) assert len(df) > 0 @@ -391,7 +453,7 @@ def test_samples_projects(): def test_samples_organizations(): """Test organizations call for proper columns""" - df, _ = get_samples(service="organizations", profile="count", stateFips="US:01") + df, _ = get_samples(service="organizations", profile="count", state_code="US:01") assert len(df) == 1 assert df.size == 3 From 48b2cdb5b9845f92344b49d030ec5142dff59090 Mon Sep 17 00:00:00 2001 From: thodson-usgs Date: Tue, 23 Jun 2026 15:23:49 -0500 Subject: [PATCH 2/2] test(waterdata): cover backward compat of every legacy camelCase get_samples kwarg MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The existing tests check a couple of deprecated camelCase params end-to-end. Add a single unit test that iterates the whole `_SAMPLES_LEGACY_KWARGS` mapping and asserts, for every legacy name, that it is still accepted, emits a `DeprecationWarning` naming the snake_case replacement, is renamed to that param, and round-trips to the same Samples-API wire name it always used — so every existing camelCase call site keeps producing an identical request. A future param renamed without a legacy alias now fails this test. Co-Authored-By: Claude Opus 4.8 Claude-Session: https://claude.ai/code/session_01Sjb14HkwuCydKSKMsaXsgd --- tests/waterdata_test.py | 35 +++++++++++++++++++++++++++++++++++ 1 file changed, 35 insertions(+) diff --git a/tests/waterdata_test.py b/tests/waterdata_test.py index b7f23617..adc8b0f4 100644 --- a/tests/waterdata_test.py +++ b/tests/waterdata_test.py @@ -219,6 +219,41 @@ def f(new_name=None): assert not [w for w in recwarn.list if w.category is DeprecationWarning] +def test_every_legacy_camelcase_samples_kwarg_is_backward_compatible(): + """Every deprecated camelCase ``get_samples`` parameter stays backward + compatible: it is still accepted, is renamed to its snake_case replacement + with a ``DeprecationWarning``, and resolves to the exact same Samples-API + wire parameter it always did — so existing camelCase call sites keep + producing identical requests after the rename. Covers the whole mapping, so + a future param renamed without a legacy alias fails here.""" + from dataretrieval.waterdata.api import ( + _SAMPLES_LEGACY_KWARGS, + _SAMPLES_PARAM_TO_API, + ) + from dataretrieval.waterdata.utils import _accept_legacy_kwargs + + assert _SAMPLES_LEGACY_KWARGS, "expected a non-empty legacy-kwarg mapping" + + received = {} + + @_accept_legacy_kwargs(_SAMPLES_LEGACY_KWARGS) + def spy(**kwargs): + received.clear() + received.update(kwargs) + + for old_camel, new_snake in _SAMPLES_LEGACY_KWARGS.items(): + # The deprecated camelCase name is accepted, warns, and is translated to + # the snake_case parameter the function now expects ... + with pytest.warns(DeprecationWarning, match=new_snake): + spy(**{old_camel: "sentinel"}) + assert received == {new_snake: "sentinel"}, ( + f"legacy {old_camel!r} did not map to {new_snake!r}" + ) + # ... and that snake_case parameter resolves back to the same camelCase + # wire name, so the request is byte-identical to the pre-rename behavior. + assert _SAMPLES_PARAM_TO_API[new_snake] == old_camel + + def test_check_profiles(): """Tests that correct errors are raised for invalid profiles.""" with pytest.raises(ValueError):