From 2b521539bb49a7508b795595ac2edcb3f5bd107a Mon Sep 17 00:00:00 2001 From: eloramirez1356 Date: Mon, 15 Jun 2026 12:40:29 -0500 Subject: [PATCH 1/3] fix: fix STAC server dataset prefix matching --- README.md | 26 +++++++ dclimate_client_py/stac_server.py | 35 ++++++++- tests/test_stac_server_listing.py | 118 ++++++++++++++++++++++++++++++ 3 files changed, 176 insertions(+), 3 deletions(-) diff --git a/README.md b/README.md index b9c12f4..26675e6 100644 --- a/README.md +++ b/README.md @@ -72,6 +72,32 @@ async def main(): }, ) +# ERA5 land datasets +# +# ERA5 and ERA5-Land datasets are separate dataset IDs within the ECMWF ERA5 +# collection. Use list_datasets() or list_available_datasets() to inspect the +# exact names before loading. +async def main_era5_land(): + async with dClimateClient() as dclimate: + # Non-land ERA5 total precipitation + precip, precip_metadata = await dclimate.load_dataset( + dataset="precipitation_total", + collection="era5", + organization="ecmwf", + variant="finalized", + ) + + # ERA5-Land total precipitation + land_precip, land_metadata = await dclimate.load_dataset( + dataset="precipitation_total_land", + collection="era5", + organization="ecmwf", + variant="finalized", + ) + + # ERA5-Land wind datasets follow the same pattern: + # dataset="wind_u_10m_land" or dataset="wind_v_10m_land" + # Custom IPFS endpoints (optional) async def main_custom_ipfs(): async with dClimateClient( diff --git a/dclimate_client_py/stac_server.py b/dclimate_client_py/stac_server.py index 9e74650..cc06c6f 100644 --- a/dclimate_client_py/stac_server.py +++ b/dclimate_client_py/stac_server.py @@ -13,6 +13,33 @@ STAC_SERVER_URL = "https://api.stac.dclimate.net" +def _dataset_id_from_item_id(feature_id: str, collection: str) -> Optional[str]: + prefix = f"{collection}-" + remainder = ( + feature_id[len(prefix) :] if feature_id.startswith(prefix) else feature_id + ) + dataset, _, _ = remainder.partition("-") + return dataset or None + + +def _feature_matches_dataset( + feature: Dict[str, Any], collection: str, dataset: str +) -> bool: + feature_collection = feature.get("collection") + if feature_collection and feature_collection != collection: + return False + + props = feature.get("properties") or {} + dataset_id = props.get("dclimate:dataset_id") + if dataset_id: + return dataset_id == dataset + + feature_id = feature.get("id") + if not isinstance(feature_id, str): + return False + return _dataset_id_from_item_id(feature_id, collection) == dataset + + def resolve_cid_from_stac_server( collection: str, dataset: str, @@ -48,9 +75,11 @@ def resolve_cid_from_stac_server( features = response.json().get("features", []) - # Filter to matching dataset (item ID pattern: {collection}-{dataset}-{variant}) - prefix = f"{collection}-{dataset}" - matches = [f for f in features if f["id"].startswith(prefix)] + # Filter to the exact dataset. A prefix match would conflate datasets such + # as precipitation_total and precipitation_total_land. + matches = [ + f for f in features if _feature_matches_dataset(f, collection, dataset) + ] if not matches: raise ValueError(f"No items found for {collection}/{dataset}") diff --git a/tests/test_stac_server_listing.py b/tests/test_stac_server_listing.py index 8e215ed..6f0f078 100644 --- a/tests/test_stac_server_listing.py +++ b/tests/test_stac_server_listing.py @@ -16,6 +16,7 @@ from dclimate_client_py.stac_server import ( list_available_datasets_from_stac_server, + resolve_cid_from_stac_server, ) @@ -240,6 +241,123 @@ def test_groups_multiple_variants_under_same_dataset(monkeypatch): assert result["ecmwf_era5"]["types"] == ["temperature_2m"] +def test_resolve_cid_uses_exact_dataset_id_for_prefix_collisions(monkeypatch): + """Base ERA5 datasets must not resolve to similarly named *_land datasets.""" + _install_mocks( + monkeypatch, + collections_body=SAMPLE_COLLECTIONS, + search_body={ + "features": [ + { + "id": "ecmwf_era5-precipitation_total_land-finalized", + "collection": "ecmwf_era5", + "properties": { + "dclimate:dataset_id": "precipitation_total_land", + "dclimate:variant": "finalized", + }, + "assets": { + "data": { + "href": "ipfs://bafy-era5-land-precip-finalized" + } + }, + }, + { + "id": "ecmwf_era5-precipitation_total-finalized", + "collection": "ecmwf_era5", + "properties": { + "dclimate:dataset_id": "precipitation_total", + "dclimate:variant": "finalized", + }, + "assets": { + "data": { + "href": "ipfs://bafy-era5-precip-finalized" + } + }, + }, + ] + }, + ) + + cid = resolve_cid_from_stac_server( + "ecmwf_era5", + "precipitation_total", + "finalized", + "https://example.test", + ) + + assert cid == "bafy-era5-precip-finalized" + + +def test_resolve_cid_rejects_only_prefix_dataset_match(monkeypatch): + _install_mocks( + monkeypatch, + collections_body=SAMPLE_COLLECTIONS, + search_body={ + "features": [ + { + "id": "ecmwf_era5-wind_u_10m_land-finalized", + "collection": "ecmwf_era5", + "properties": { + "dclimate:dataset_id": "wind_u_10m_land", + "dclimate:variant": "finalized", + }, + "assets": { + "data": {"href": "ipfs://bafy-era5-land-wind-u"} + }, + } + ] + }, + ) + + with pytest.raises(ValueError, match="No items found"): + resolve_cid_from_stac_server( + "ecmwf_era5", + "wind_u_10m", + "finalized", + "https://example.test", + ) + + +def test_resolve_cid_legacy_id_fallback_is_exact(monkeypatch): + _install_mocks( + monkeypatch, + collections_body=SAMPLE_COLLECTIONS, + search_body={ + "features": [ + { + "id": "ecmwf_era5-temperature_2m_land-finalized", + "collection": "ecmwf_era5", + "properties": { + "dclimate:variant": "finalized", + }, + "assets": { + "data": {"href": "ipfs://bafy-era5-land-t2m"} + }, + }, + { + "id": "ecmwf_era5-temperature_2m-finalized", + "collection": "ecmwf_era5", + "properties": { + "dclimate:variant": "finalized", + }, + "assets": { + "data": {"href": "ipfs://bafy-era5-t2m"} + }, + }, + ] + }, + ) + + cid = resolve_cid_from_stac_server( + "ecmwf_era5", + "temperature_2m", + "finalized", + "https://example.test", + ) + + assert cid == "bafy-era5-t2m" + + def test_collections_endpoint_error_propagates(monkeypatch): def failing_get(url, *args, **kwargs): return _mock_response({}, status=500) From bb25f35a3762c3472e88994d6b650543c91b1e34 Mon Sep 17 00:00:00 2001 From: eloramirez1356 Date: Tue, 16 Jun 2026 08:55:21 -0500 Subject: [PATCH 2/3] style: apply Ruff formatting to STAC server changes --- dclimate_client_py/stac_server.py | 4 +--- tests/test_stac_server_listing.py | 22 +++++----------------- 2 files changed, 6 insertions(+), 20 deletions(-) diff --git a/dclimate_client_py/stac_server.py b/dclimate_client_py/stac_server.py index cc06c6f..da164de 100644 --- a/dclimate_client_py/stac_server.py +++ b/dclimate_client_py/stac_server.py @@ -77,9 +77,7 @@ def resolve_cid_from_stac_server( # Filter to the exact dataset. A prefix match would conflate datasets such # as precipitation_total and precipitation_total_land. - matches = [ - f for f in features if _feature_matches_dataset(f, collection, dataset) - ] + matches = [f for f in features if _feature_matches_dataset(f, collection, dataset)] if not matches: raise ValueError(f"No items found for {collection}/{dataset}") diff --git a/tests/test_stac_server_listing.py b/tests/test_stac_server_listing.py index 6f0f078..8b8038f 100644 --- a/tests/test_stac_server_listing.py +++ b/tests/test_stac_server_listing.py @@ -256,9 +256,7 @@ def test_resolve_cid_uses_exact_dataset_id_for_prefix_collisions(monkeypatch): "dclimate:variant": "finalized", }, "assets": { - "data": { - "href": "ipfs://bafy-era5-land-precip-finalized" - } + "data": {"href": "ipfs://bafy-era5-land-precip-finalized"} }, }, { @@ -268,11 +266,7 @@ def test_resolve_cid_uses_exact_dataset_id_for_prefix_collisions(monkeypatch): "dclimate:dataset_id": "precipitation_total", "dclimate:variant": "finalized", }, - "assets": { - "data": { - "href": "ipfs://bafy-era5-precip-finalized" - } - }, + "assets": {"data": {"href": "ipfs://bafy-era5-precip-finalized"}}, }, ] }, @@ -301,9 +295,7 @@ def test_resolve_cid_rejects_only_prefix_dataset_match(monkeypatch): "dclimate:dataset_id": "wind_u_10m_land", "dclimate:variant": "finalized", }, - "assets": { - "data": {"href": "ipfs://bafy-era5-land-wind-u"} - }, + "assets": {"data": {"href": "ipfs://bafy-era5-land-wind-u"}}, } ] }, @@ -330,9 +322,7 @@ def test_resolve_cid_legacy_id_fallback_is_exact(monkeypatch): "properties": { "dclimate:variant": "finalized", }, - "assets": { - "data": {"href": "ipfs://bafy-era5-land-t2m"} - }, + "assets": {"data": {"href": "ipfs://bafy-era5-land-t2m"}}, }, { "id": "ecmwf_era5-temperature_2m-finalized", @@ -340,9 +330,7 @@ def test_resolve_cid_legacy_id_fallback_is_exact(monkeypatch): "properties": { "dclimate:variant": "finalized", }, - "assets": { - "data": {"href": "ipfs://bafy-era5-t2m"} - }, + "assets": {"data": {"href": "ipfs://bafy-era5-t2m"}}, }, ] }, From 7b4e22f655605865ac75afc0a7c97b522eb9e9bc Mon Sep 17 00:00:00 2001 From: TheGreatAlgo <37487508+TheGreatAlgo@users.noreply.github.com> Date: Tue, 16 Jun 2026 13:55:23 -0400 Subject: [PATCH 3/3] fix: update version --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index 1374ac5..dae94d2 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -6,7 +6,7 @@ build-backend = "pdm.backend" [project] name = "dclimate-client-py" -version = "0.5.8" # Set a static version or handle it in versioning strategy +version = "0.5.9" # Set a static version or handle it in versioning strategy description = "Python client library for accessing dClimate weather and climate data" readme = "README.md" license = {text = "MIT"}