diff --git a/README.md b/README.md index b9c12f4..26675e6 100644 --- a/README.md +++ b/README.md @@ -72,6 +72,32 @@ async def main(): }, ) +# ERA5 land datasets +# +# ERA5 and ERA5-Land datasets are separate dataset IDs within the ECMWF ERA5 +# collection. Use list_datasets() or list_available_datasets() to inspect the +# exact names before loading. +async def main_era5_land(): + async with dClimateClient() as dclimate: + # Non-land ERA5 total precipitation + precip, precip_metadata = await dclimate.load_dataset( + dataset="precipitation_total", + collection="era5", + organization="ecmwf", + variant="finalized", + ) + + # ERA5-Land total precipitation + land_precip, land_metadata = await dclimate.load_dataset( + dataset="precipitation_total_land", + collection="era5", + organization="ecmwf", + variant="finalized", + ) + + # ERA5-Land wind datasets follow the same pattern: + # dataset="wind_u_10m_land" or dataset="wind_v_10m_land" + # Custom IPFS endpoints (optional) async def main_custom_ipfs(): async with dClimateClient( diff --git a/dclimate_client_py/stac_server.py b/dclimate_client_py/stac_server.py index 9e74650..da164de 100644 --- a/dclimate_client_py/stac_server.py +++ b/dclimate_client_py/stac_server.py @@ -13,6 +13,33 @@ STAC_SERVER_URL = "https://api.stac.dclimate.net" +def _dataset_id_from_item_id(feature_id: str, collection: str) -> Optional[str]: + prefix = f"{collection}-" + remainder = ( + feature_id[len(prefix) :] if feature_id.startswith(prefix) else feature_id + ) + dataset, _, _ = remainder.partition("-") + return dataset or None + + +def _feature_matches_dataset( + feature: Dict[str, Any], collection: str, dataset: str +) -> bool: + feature_collection = feature.get("collection") + if feature_collection and feature_collection != collection: + return False + + props = feature.get("properties") or {} + dataset_id = props.get("dclimate:dataset_id") + if dataset_id: + return dataset_id == dataset + + feature_id = feature.get("id") + if not isinstance(feature_id, str): + return False + return _dataset_id_from_item_id(feature_id, collection) == dataset + + def resolve_cid_from_stac_server( collection: str, dataset: str, @@ -48,9 +75,9 @@ def resolve_cid_from_stac_server( features = response.json().get("features", []) - # Filter to matching dataset (item ID pattern: {collection}-{dataset}-{variant}) - prefix = f"{collection}-{dataset}" - matches = [f for f in features if f["id"].startswith(prefix)] + # Filter to the exact dataset. A prefix match would conflate datasets such + # as precipitation_total and precipitation_total_land. + matches = [f for f in features if _feature_matches_dataset(f, collection, dataset)] if not matches: raise ValueError(f"No items found for {collection}/{dataset}") diff --git a/pyproject.toml b/pyproject.toml index 1374ac5..dae94d2 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -6,7 +6,7 @@ build-backend = "pdm.backend" [project] name = "dclimate-client-py" -version = "0.5.8" # Set a static version or handle it in versioning strategy +version = "0.5.9" # Set a static version or handle it in versioning strategy description = "Python client library for accessing dClimate weather and climate data" readme = "README.md" license = {text = "MIT"} diff --git a/tests/test_stac_server_listing.py b/tests/test_stac_server_listing.py index 8e215ed..8b8038f 100644 --- a/tests/test_stac_server_listing.py +++ b/tests/test_stac_server_listing.py @@ -16,6 +16,7 @@ from dclimate_client_py.stac_server import ( list_available_datasets_from_stac_server, + resolve_cid_from_stac_server, ) @@ -240,6 +241,111 @@ def test_groups_multiple_variants_under_same_dataset(monkeypatch): assert result["ecmwf_era5"]["types"] == ["temperature_2m"] +def test_resolve_cid_uses_exact_dataset_id_for_prefix_collisions(monkeypatch): + """Base ERA5 datasets must not resolve to similarly named *_land datasets.""" + _install_mocks( + monkeypatch, + collections_body=SAMPLE_COLLECTIONS, + search_body={ + "features": [ + { + "id": "ecmwf_era5-precipitation_total_land-finalized", + "collection": "ecmwf_era5", + "properties": { + "dclimate:dataset_id": "precipitation_total_land", + "dclimate:variant": "finalized", + }, + "assets": { + "data": {"href": "ipfs://bafy-era5-land-precip-finalized"} + }, + }, + { + "id": "ecmwf_era5-precipitation_total-finalized", + "collection": "ecmwf_era5", + "properties": { + "dclimate:dataset_id": "precipitation_total", + "dclimate:variant": "finalized", + }, + "assets": {"data": {"href": "ipfs://bafy-era5-precip-finalized"}}, + }, + ] + }, + ) + + cid = resolve_cid_from_stac_server( + "ecmwf_era5", + "precipitation_total", + "finalized", + "https://example.test", + ) + + assert cid == "bafy-era5-precip-finalized" + + +def test_resolve_cid_rejects_only_prefix_dataset_match(monkeypatch): + _install_mocks( + monkeypatch, + collections_body=SAMPLE_COLLECTIONS, + search_body={ + "features": [ + { + "id": "ecmwf_era5-wind_u_10m_land-finalized", + "collection": "ecmwf_era5", + "properties": { + "dclimate:dataset_id": "wind_u_10m_land", + "dclimate:variant": "finalized", + }, + "assets": {"data": {"href": "ipfs://bafy-era5-land-wind-u"}}, + } + ] + }, + ) + + with pytest.raises(ValueError, match="No items found"): + resolve_cid_from_stac_server( + "ecmwf_era5", + "wind_u_10m", + "finalized", + "https://example.test", + ) + + +def test_resolve_cid_legacy_id_fallback_is_exact(monkeypatch): + _install_mocks( + monkeypatch, + collections_body=SAMPLE_COLLECTIONS, + search_body={ + "features": [ + { + "id": "ecmwf_era5-temperature_2m_land-finalized", + "collection": "ecmwf_era5", + "properties": { + "dclimate:variant": "finalized", + }, + "assets": {"data": {"href": "ipfs://bafy-era5-land-t2m"}}, + }, + { + "id": "ecmwf_era5-temperature_2m-finalized", + "collection": "ecmwf_era5", + "properties": { + "dclimate:variant": "finalized", + }, + "assets": {"data": {"href": "ipfs://bafy-era5-t2m"}}, + }, + ] + }, + ) + + cid = resolve_cid_from_stac_server( + "ecmwf_era5", + "temperature_2m", + "finalized", + "https://example.test", + ) + + assert cid == "bafy-era5-t2m" + + def test_collections_endpoint_error_propagates(monkeypatch): def failing_get(url, *args, **kwargs): return _mock_response({}, status=500)