From 2c2250c158e200a331465d704a26b9257951f8bc Mon Sep 17 00:00:00 2001 From: Brendan Collins Date: Tue, 24 Mar 2026 19:38:01 -0700 Subject: [PATCH 1/9] Add design spec for hypsometric_integral in zonal.py --- .../2026-03-24-hypsometric-integral-design.md | 101 ++++++++++++++++++ 1 file changed, 101 insertions(+) create mode 100644 docs/superpowers/specs/2026-03-24-hypsometric-integral-design.md diff --git a/docs/superpowers/specs/2026-03-24-hypsometric-integral-design.md b/docs/superpowers/specs/2026-03-24-hypsometric-integral-design.md new file mode 100644 index 00000000..688457c9 --- /dev/null +++ b/docs/superpowers/specs/2026-03-24-hypsometric-integral-design.md @@ -0,0 +1,101 @@ +# Hypsometric Integral — Design Spec + +## Summary + +Add a `hypsometric_integral` function to `xrspatial/zonal.py` that computes +the hypsometric integral (HI) per zone and returns a painted-back raster. + +HI is a geomorphic maturity indicator defined as: + +``` +HI = (mean - min) / (max - min) +``` + +where mean, min, and max are the elevation values within a zone (basin, +catchment, or arbitrary polygon). + +## API + +```python +def hypsometric_integral( + zones, + values, + nodata=np.nan, + name='hypsometric_integral', +) -> xr.DataArray: +``` + +### Parameters + +| Parameter | Type | Description | +|-----------|------|-------------| +| `zones` | `DataArray` or `GeoDataFrame` | 2D zone IDs (integer). GeoDataFrame is rasterized via existing `_maybe_rasterize_zones`. | +| `values` | `DataArray` | 2D elevation raster, same shape as zones. | +| `nodata` | `float` | Fill value for cells outside any zone. Default `np.nan`. | +| `name` | `str` | Name for the output DataArray. Default `'hypsometric_integral'`. | + +### Returns + +`xr.DataArray` — same shape, dims, coords, and attrs as `values`. Each cell +contains the HI of its zone. Cells outside any zone get `nodata`. Zones with +zero elevation range (flat) get `nodata`. + +## Placement + +Lives in `xrspatial/zonal.py` alongside `stats`, `crosstab`, `apply`, etc. + +Rationale: HI requires a zones raster + a values raster (same signature as +other zonal functions) and computes a per-zone aggregate statistic. It is +structurally a zonal operation, not a local neighborhood transform. + +## Backends + +All four backends via `ArrayTypeFunctionMapping`: + +- **numpy**: iterate unique zones, compute min/mean/max per zone, paint back. + Can reuse existing `_sort_and_stride` infrastructure for grouping values by + zone. +- **cupy**: same logic on GPU arrays. Use `cupy.unique`, scatter/gather. +- **dask+numpy**: `map_blocks` or blockwise aggregation. Two-pass: first pass + computes per-zone min/sum/max/count across chunks, second pass reduces and + paints back. +- **dask+cupy**: same as dask+numpy but with cupy chunk functions. + +## Algorithm + +1. Validate inputs (2D, matching shapes). +2. Identify unique zones (excluding NaN / 0 if used as nodata). +3. For each zone `z`: + - Mask: cells where `zones == z` and `values` is finite. + - Compute `min_z`, `mean_z`, `max_z`. + - `hi_z = (mean_z - min_z) / (max_z - min_z)` if `max_z != min_z`, else `nodata`. +4. Paint `hi_z` back into all cells belonging to zone `z`. +5. Fill remaining cells with `nodata`. + +## Accessor + +Expose via `xrspatial.accessor` as: + +```python +da.spatial.hypsometric_integral(zones) +``` + +where `da` is the elevation DataArray. + +## Tests + +- **Hand-crafted case**: zones with known elevation distributions and + pre-computed HI values. +- **Edge cases**: single-cell zones, flat zones (range=0 returns nodata), + NaN cells within a zone (ignored in computation), zones with all-NaN values. +- **Cross-backend parity**: standard `general_checks` pattern comparing + numpy, cupy, dask+numpy, dask+cupy outputs. +- **GeoDataFrame zones input**: verify rasterization path works. + +## Scope + +This is intentionally minimal. Future extensions (not in this iteration): +- Hypsometric curve data (normalized area-altitude distribution) +- Per-zone summary table output +- Skewness / kurtosis of the hypsometric distribution +- Integration as a stat option in `zonal.stats()` From 5dc25aa112dbb929e0432f104cbf0bf71a44ca91 Mon Sep 17 00:00:00 2001 From: Brendan Collins Date: Tue, 24 Mar 2026 20:56:58 -0700 Subject: [PATCH 2/9] Update hypsometric_integral spec with review fixes Add column/rasterize_kw params, fix accessor namespace to .xrs, clarify nodata semantics, specify float64 output dtype, add list-of-pairs zones support, note dask chunk alignment strategy. --- .../2026-03-24-hypsometric-integral-design.md | 71 ++++++++++++------- 1 file changed, 45 insertions(+), 26 deletions(-) diff --git a/docs/superpowers/specs/2026-03-24-hypsometric-integral-design.md b/docs/superpowers/specs/2026-03-24-hypsometric-integral-design.md index 688457c9..d6c6350b 100644 --- a/docs/superpowers/specs/2026-03-24-hypsometric-integral-design.md +++ b/docs/superpowers/specs/2026-03-24-hypsometric-integral-design.md @@ -20,7 +20,9 @@ catchment, or arbitrary polygon). def hypsometric_integral( zones, values, - nodata=np.nan, + nodata=0, + column=None, + rasterize_kw=None, name='hypsometric_integral', ) -> xr.DataArray: ``` @@ -29,16 +31,19 @@ def hypsometric_integral( | Parameter | Type | Description | |-----------|------|-------------| -| `zones` | `DataArray` or `GeoDataFrame` | 2D zone IDs (integer). GeoDataFrame is rasterized via existing `_maybe_rasterize_zones`. | -| `values` | `DataArray` | 2D elevation raster, same shape as zones. | -| `nodata` | `float` | Fill value for cells outside any zone. Default `np.nan`. | +| `zones` | `DataArray`, `GeoDataFrame`, or list of `(geometry, value)` pairs | 2D integer zone IDs. Vector inputs are rasterized via `_maybe_rasterize_zones` using `values` as the template grid. | +| `values` | `DataArray` | 2D elevation raster (float), same shape as zones. | +| `nodata` | `int`, default `0` | Zone ID that represents "no zone". Cells with this zone ID are excluded from computation and filled with `NaN` in the output. Matches `apply()` convention. Set to `None` to include all zone IDs. | +| `column` | `str` or `None` | Column in a GeoDataFrame containing zone IDs. Required when `zones` is a GeoDataFrame. | +| `rasterize_kw` | `dict` or `None` | Extra keyword arguments passed to `rasterize()` when vector zones are provided. | | `name` | `str` | Name for the output DataArray. Default `'hypsometric_integral'`. | ### Returns -`xr.DataArray` — same shape, dims, coords, and attrs as `values`. Each cell -contains the HI of its zone. Cells outside any zone get `nodata`. Zones with -zero elevation range (flat) get `nodata`. +`xr.DataArray` (dtype `float64`) — same shape, dims, coords, and attrs as +`values`. Each cell contains the HI of its zone. Cells belonging to the +`nodata` zone or with non-finite elevation values get `NaN`. Zones with zero +elevation range (flat) also get `NaN`. ## Placement @@ -52,50 +57,64 @@ structurally a zonal operation, not a local neighborhood transform. All four backends via `ArrayTypeFunctionMapping`: -- **numpy**: iterate unique zones, compute min/mean/max per zone, paint back. - Can reuse existing `_sort_and_stride` infrastructure for grouping values by - zone. -- **cupy**: same logic on GPU arrays. Use `cupy.unique`, scatter/gather. -- **dask+numpy**: `map_blocks` or blockwise aggregation. Two-pass: first pass - computes per-zone min/sum/max/count across chunks, second pass reduces and - paints back. -- **dask+cupy**: same as dask+numpy but with cupy chunk functions. +- **numpy**: use `_sort_and_stride` to group values by zone, compute + min/mean/max per zone, build a zone-to-HI lookup, paint back with + vectorized indexing. +- **cupy**: same logic using `cupy.unique` and device-side scatter/gather. +- **dask+numpy**: compute per-chunk partial aggregates (min, max, sum, count + per zone) via `map_blocks`, reduce across chunks to get global per-zone + stats, then `map_blocks` again to paint HI values back using the global + lookup. Zones and values chunks must be aligned (use `validate_arrays`). +- **dask+cupy**: same two-pass structure. Follows the existing pattern where + chunk functions use cupy internally (same as `_stats_dask_cupy`). ## Algorithm -1. Validate inputs (2D, matching shapes). -2. Identify unique zones (excluding NaN / 0 if used as nodata). -3. For each zone `z`: +1. Validate inputs (2D, matching shapes via `validate_arrays`). +2. Rasterize vector zones if needed (`_maybe_rasterize_zones`). +3. Identify unique zone IDs, excluding `nodata` zone and NaN. +4. For each zone `z`: - Mask: cells where `zones == z` and `values` is finite. - Compute `min_z`, `mean_z`, `max_z`. - - `hi_z = (mean_z - min_z) / (max_z - min_z)` if `max_z != min_z`, else `nodata`. -4. Paint `hi_z` back into all cells belonging to zone `z`. -5. Fill remaining cells with `nodata`. + - `hi_z = (mean_z - min_z) / (max_z - min_z)` if `max_z != min_z`, + else `NaN`. +5. Paint `hi_z` back into all cells belonging to zone `z`. +6. Fill remaining cells (nodata zone, non-finite values, flat zones) with + `NaN`. + +### Value nodata handling + +Only non-finite values (`NaN`, `inf`) are excluded from per-zone statistics. +Users with sentinel nodata values (e.g., -9999) should mask their DEM before +calling this function. This matches the convention used by `apply()`. ## Accessor -Expose via `xrspatial.accessor` as: +Expose via the existing `.xrs` accessor: ```python -da.spatial.hypsometric_integral(zones) +elevation.xrs.zonal_hypsometric_integral(zones) ``` -where `da` is the elevation DataArray. +Following the `zonal_` prefix convention used by `zonal_stats`, `zonal_apply`, +and `zonal_crosstab`. ## Tests - **Hand-crafted case**: zones with known elevation distributions and pre-computed HI values. -- **Edge cases**: single-cell zones, flat zones (range=0 returns nodata), +- **Edge cases**: single-cell zones, flat zones (range=0 returns NaN), NaN cells within a zone (ignored in computation), zones with all-NaN values. - **Cross-backend parity**: standard `general_checks` pattern comparing numpy, cupy, dask+numpy, dask+cupy outputs. -- **GeoDataFrame zones input**: verify rasterization path works. +- **Vector zones input**: verify GeoDataFrame and list-of-pairs rasterization + paths work. ## Scope This is intentionally minimal. Future extensions (not in this iteration): - Hypsometric curve data (normalized area-altitude distribution) - Per-zone summary table output +- `zone_ids` parameter to restrict computation to a subset of zones - Skewness / kurtosis of the hypsometric distribution - Integration as a stat option in `zonal.stats()` From bd3fb9aafb0b4bf415a70c4cca59333a7ba2bd0f Mon Sep 17 00:00:00 2001 From: Brendan Collins Date: Wed, 25 Mar 2026 06:19:41 -0700 Subject: [PATCH 3/9] Add implementation plan for hypsometric_integral --- .../plans/2026-03-24-hypsometric-integral.md | 659 ++++++++++++++++++ 1 file changed, 659 insertions(+) create mode 100644 docs/superpowers/plans/2026-03-24-hypsometric-integral.md diff --git a/docs/superpowers/plans/2026-03-24-hypsometric-integral.md b/docs/superpowers/plans/2026-03-24-hypsometric-integral.md new file mode 100644 index 00000000..df0eacea --- /dev/null +++ b/docs/superpowers/plans/2026-03-24-hypsometric-integral.md @@ -0,0 +1,659 @@ +# Hypsometric Integral Implementation Plan + +> **For agentic workers:** REQUIRED SUB-SKILL: Use superpowers:subagent-driven-development (recommended) or superpowers:executing-plans to implement this plan task-by-task. Steps use checkbox (`- [ ]`) syntax for tracking. + +**Goal:** Add a `hypsometric_integral(zones, values)` function to `xrspatial/zonal.py` that computes HI = (mean - min) / (max - min) per zone and paints the result back into a raster. + +**Architecture:** A new public function in `zonal.py` with four backend implementations (numpy, cupy, dask+numpy, dask+cupy) following the existing `ArrayTypeFunctionMapping` dispatch pattern. The numpy path uses `np.unique` + boolean masking per zone (simpler than `_sort_and_stride` since we only need min/mean/max; `_sort_and_stride` exists for the generic multi-stat case in `stats()`). The cupy path transfers to host and reuses the numpy path (same pattern as `_stats_dask_cupy`; spec's "device-side scatter/gather" is aspirational, host transfer is fine for this use case). The dask path uses module-level `@delayed` functions for per-block aggregation (min/max/sum/count), reduces to global per-zone HI, then `map_blocks` to paint back (preserving chunk structure). Exposed via the `.xrs` accessor and the top-level `xrspatial` namespace. + +**Tech Stack:** numpy, cupy (optional), dask (optional), xarray, pytest + +**Spec:** `docs/superpowers/specs/2026-03-24-hypsometric-integral-design.md` + +--- + +### Task 1: Write failing tests for numpy backend + +**Files:** +- Create: `xrspatial/tests/test_hypsometric_integral.py` + +Every test function needs `@pytest.mark.parametrize("backend", ...)` — there is no global `backend` fixture. This matches the pattern in `test_zonal.py`. + +- [ ] **Step 1: Create test file with hand-crafted test cases** + +```python +# xrspatial/tests/test_hypsometric_integral.py +try: + import dask.array as da +except ImportError: + da = None + +import numpy as np +import pytest +import xarray as xr + +from .general_checks import create_test_raster + + +# --- fixtures --------------------------------------------------------------- + +@pytest.fixture +def hi_zones(backend): + """Two zones (1, 2) plus nodata (0). + + Zone 1: 5 cells — (0,1), (0,2), (0,3), (1,1), (1,2) + Zone 2: 6 cells — (1,3), (2,1), (2,2), (2,3), (3,1), (3,2) + Nodata: 4 cells — column 0 and (3,3) + """ + data = np.array([ + [0, 1, 1, 1], + [0, 1, 1, 2], + [0, 2, 2, 2], + [0, 2, 2, 0], + ], dtype=np.float64) + return create_test_raster(data, backend, dims=['y', 'x'], + attrs={'res': (1.0, 1.0)}, chunks=(2, 2)) + + +@pytest.fixture +def hi_values(backend): + """Elevation values. + + Zone 1 cells: 10, 20, 30, 40, 50 + min=10, max=50, mean=30, HI=(30-10)/(50-10) = 0.5 + + Zone 2 cells: 100, 60, 70, 80, 90, 95 + min=60, max=100, mean=82.5, HI=(82.5-60)/(100-60) = 0.5625 + """ + data = np.array([ + [999., 10., 20., 30.], + [999., 40., 50., 100.], + [999., 60., 70., 80.], + [999., 90., 95., 999.], + ], dtype=np.float64) + return create_test_raster(data, backend, dims=['y', 'x'], + attrs={'res': (1.0, 1.0)}, chunks=(2, 2)) + + +# --- basic ------------------------------------------------------------------- + +@pytest.mark.parametrize("backend", ['numpy', 'dask+numpy', 'cupy', 'dask+cupy']) +def test_hypsometric_integral_basic(backend, hi_zones, hi_values): + from xrspatial.zonal import hypsometric_integral + + result = hypsometric_integral(hi_zones, hi_values) + + assert isinstance(result, xr.DataArray) + assert result.shape == hi_values.shape + assert result.dims == hi_values.dims + assert result.name == 'hypsometric_integral' + + out = result.values if not (da and isinstance(result.data, da.Array)) else result.compute().values + + # zone 0 (nodata) cells should be NaN + nodata_mask = np.array([ + [True, False, False, False], + [True, False, False, False], + [True, False, False, False], + [True, False, False, True], + ]) + assert np.all(np.isnan(out[nodata_mask])) + + # zone 1: HI = 0.5 + z1_mask = np.array([ + [False, True, True, True], + [False, True, True, False], + [False, False, False, False], + [False, False, False, False], + ]) + np.testing.assert_allclose(out[z1_mask], 0.5, rtol=1e-10) + + # zone 2: HI = 0.5625 + z2_mask = np.array([ + [False, False, False, False], + [False, False, False, True], + [False, True, True, True], + [False, True, True, False], + ]) + np.testing.assert_allclose(out[z2_mask], 0.5625, rtol=1e-10) + + +# --- edge cases -------------------------------------------------------------- + +@pytest.mark.parametrize("backend", ['numpy', 'dask+numpy', 'cupy', 'dask+cupy']) +def test_hypsometric_integral_flat_zone(backend): + """A zone with all identical values has range=0, so HI should be NaN.""" + from xrspatial.zonal import hypsometric_integral + + zones = create_test_raster( + np.array([[1, 1], [1, 1]], dtype=np.float64), backend, + chunks=(2, 2)) + values = create_test_raster( + np.array([[5.0, 5.0], [5.0, 5.0]]), backend, + chunks=(2, 2)) + + result = hypsometric_integral(zones, values, nodata=0) + out = result.values if not (da and isinstance(result.data, da.Array)) else result.compute().values + assert np.all(np.isnan(out)) + + +@pytest.mark.parametrize("backend", ['numpy', 'dask+numpy', 'cupy', 'dask+cupy']) +def test_hypsometric_integral_nan_in_values(backend): + """NaN elevation cells should be excluded from per-zone stats.""" + from xrspatial.zonal import hypsometric_integral + + zones = create_test_raster( + np.array([[1, 1], [1, 1]], dtype=np.float64), backend, + chunks=(2, 2)) + values = create_test_raster( + np.array([[10.0, np.nan], [20.0, 30.0]]), backend, + chunks=(2, 2)) + + result = hypsometric_integral(zones, values, nodata=0) + out = result.values if not (da and isinstance(result.data, da.Array)) else result.compute().values + + # zone 1 finite values: 10, 20, 30 -> HI = (20-10)/(30-10) = 0.5 + # the NaN cell should remain NaN in output + assert np.isnan(out[0, 1]) + np.testing.assert_allclose(out[0, 0], 0.5, rtol=1e-10) + np.testing.assert_allclose(out[1, 0], 0.5, rtol=1e-10) + np.testing.assert_allclose(out[1, 1], 0.5, rtol=1e-10) + + +@pytest.mark.parametrize("backend", ['numpy', 'dask+numpy', 'cupy', 'dask+cupy']) +def test_hypsometric_integral_single_cell_zone(backend): + """A zone with a single cell has range=0, so HI=NaN.""" + from xrspatial.zonal import hypsometric_integral + + zones = create_test_raster( + np.array([[1, 2]], dtype=np.float64), backend, + chunks=(1, 2)) + values = create_test_raster( + np.array([[10.0, 20.0]]), backend, + chunks=(1, 2)) + + result = hypsometric_integral(zones, values, nodata=0) + out = result.values if not (da and isinstance(result.data, da.Array)) else result.compute().values + # single cell -> range=0 -> NaN + assert np.all(np.isnan(out)) + + +@pytest.mark.parametrize("backend", ['numpy', 'dask+numpy', 'cupy', 'dask+cupy']) +def test_hypsometric_integral_all_nan_zone(backend): + """A zone whose elevation cells are all NaN should produce NaN.""" + from xrspatial.zonal import hypsometric_integral + + zones = create_test_raster( + np.array([[1, 1], [2, 2]], dtype=np.float64), backend, + chunks=(2, 2)) + values = create_test_raster( + np.array([[np.nan, np.nan], [10.0, 20.0]]), backend, + chunks=(2, 2)) + + result = hypsometric_integral(zones, values, nodata=0) + out = result.values if not (da and isinstance(result.data, da.Array)) else result.compute().values + + # zone 1: all NaN -> NaN + assert np.all(np.isnan(out[0, :])) + # zone 2: HI = (15-10)/(20-10) = 0.5 + np.testing.assert_allclose(out[1, :], 0.5, rtol=1e-10) + + +@pytest.mark.parametrize("backend", ['numpy', 'dask+numpy', 'cupy', 'dask+cupy']) +def test_hypsometric_integral_nodata_none(backend): + """When nodata=None, all zone IDs are included (even 0).""" + from xrspatial.zonal import hypsometric_integral + + zones = create_test_raster( + np.array([[0, 0], [1, 1]], dtype=np.float64), backend, + chunks=(2, 2)) + values = create_test_raster( + np.array([[10.0, 20.0], [30.0, 40.0]]), backend, + chunks=(2, 2)) + + result = hypsometric_integral(zones, values, nodata=None) + out = result.values if not (da and isinstance(result.data, da.Array)) else result.compute().values + + # zone 0: HI = (15-10)/(20-10) = 0.5 + np.testing.assert_allclose(out[0, :], 0.5, rtol=1e-10) + # zone 1: HI = (35-30)/(40-30) = 0.5 + np.testing.assert_allclose(out[1, :], 0.5, rtol=1e-10) +``` + +- [ ] **Step 2: Run tests to verify they fail** + +Run: `pytest xrspatial/tests/test_hypsometric_integral.py -v -x -k "numpy" --no-header 2>&1 | head -30` +Expected: FAIL — `ImportError: cannot import name 'hypsometric_integral' from 'xrspatial.zonal'` + +- [ ] **Step 3: Commit** + +```bash +git add xrspatial/tests/test_hypsometric_integral.py +git commit -m "Add failing tests for hypsometric_integral" +``` + +--- + +### Task 2: Implement numpy backend and public function + +**Files:** +- Modify: `xrspatial/zonal.py` (add `_hi_numpy` and `hypsometric_integral`) + +Note: `zonal.py` does not import `functools.partial`. The dispatcher uses lambdas to bind arguments, following the existing pattern in `stats()`. + +- [ ] **Step 1: Add numpy backend function to zonal.py** + +Add in a new section before the existing `_apply_numpy` helper function (search for `def _apply_numpy`): + +```python +# --------------------------------------------------------------------------- +# Hypsometric integral +# --------------------------------------------------------------------------- + +def _hi_numpy(zones_data, values_data, nodata): + """Numpy backend for hypsometric integral.""" + unique_zones = np.unique(zones_data[np.isfinite(zones_data)]) + if nodata is not None: + unique_zones = unique_zones[unique_zones != nodata] + + out = np.full(values_data.shape, np.nan, dtype=np.float64) + + for z in unique_zones: + mask = (zones_data == z) & np.isfinite(values_data) + if not np.any(mask): + continue + vals = values_data[mask] + mn, mx = vals.min(), vals.max() + if mx == mn: + continue # flat zone -> NaN + hi = (vals.mean() - mn) / (mx - mn) + out[mask] = hi + return out +``` + +- [ ] **Step 2: Add public `hypsometric_integral` function** + +Add immediately after `_hi_numpy`: + +```python +def hypsometric_integral( + zones, + values, + nodata=0, + column=None, + rasterize_kw=None, + name='hypsometric_integral', +): + """Hypsometric integral (HI) per zone, painted back to a raster. + + HI measures geomorphic maturity: ``(mean - min) / (max - min)`` + computed over elevations within each zone. Values range from 0 to 1. + + Parameters + ---------- + zones : xr.DataArray, GeoDataFrame, or list of (geometry, value) pairs + Zone definitions. Integer zone IDs. GeoDataFrame and list-of-pairs + inputs are rasterized using *values* as the template grid. + values : xr.DataArray + 2D elevation raster (float), same shape as *zones*. + nodata : int or None, default 0 + Zone ID that means "no zone". Excluded from computation; those + cells get NaN in the output. Set to ``None`` to include all IDs. + column : str, optional + Column in a GeoDataFrame containing zone IDs. + rasterize_kw : dict, optional + Extra keyword arguments for ``rasterize()`` when *zones* is vector. + name : str, default ``'hypsometric_integral'`` + Name for the output DataArray. + + Returns + ------- + xr.DataArray + Float64 raster, same shape/dims/coords as *values*. Each cell + holds the HI of its zone. NaN for nodata zones, non-finite + elevation cells, and flat zones (elevation range = 0). + """ + zones = _maybe_rasterize_zones(zones, values, column=column, + rasterize_kw=rasterize_kw) + validate_arrays(zones, values) + + _nodata = nodata # capture for closures + + mapper = ArrayTypeFunctionMapping( + numpy_func=lambda z, v: _hi_numpy(z, v, _nodata), + cupy_func=not_implemented_func, + dask_func=not_implemented_func, + dask_cupy_func=not_implemented_func, + ) + + out = mapper(zones)(zones.data, values.data) + + return xr.DataArray( + out, + name=name, + dims=values.dims, + coords=values.coords, + attrs=values.attrs, + ) +``` + +- [ ] **Step 3: Run numpy tests to verify they pass** + +Run: `pytest xrspatial/tests/test_hypsometric_integral.py -v -k "numpy and not dask and not cupy" --no-header 2>&1 | tail -20` +Expected: all `numpy` backend tests PASS + +- [ ] **Step 4: Commit** + +```bash +git add xrspatial/zonal.py +git commit -m "Add hypsometric_integral with numpy backend" +``` + +--- + +### Task 3: Implement cupy backend + +**Files:** +- Modify: `xrspatial/zonal.py` (add `_hi_cupy`, update dispatcher) + +- [ ] **Step 1: Add cupy backend function** + +Add after `_hi_numpy`: + +```python +def _hi_cupy(zones_data, values_data, nodata): + """CuPy backend for hypsometric integral — transfer to host, compute, return.""" + import cupy as cp + result_np = _hi_numpy(cp.asnumpy(zones_data), cp.asnumpy(values_data), nodata) + return cp.asarray(result_np) +``` + +- [ ] **Step 2: Update dispatcher in `hypsometric_integral`** + +Change `cupy_func=not_implemented_func` to: + +```python +cupy_func=lambda z, v: _hi_cupy(z, v, _nodata), +``` + +- [ ] **Step 3: Run tests (numpy still passes)** + +Run: `pytest xrspatial/tests/test_hypsometric_integral.py -v -k "numpy and not dask and not cupy" --no-header 2>&1 | tail -10` +Expected: PASS + +- [ ] **Step 4: Commit** + +```bash +git add xrspatial/zonal.py +git commit -m "Add cupy backend for hypsometric_integral" +``` + +--- + +### Task 4: Implement dask+numpy backend + +**Files:** +- Modify: `xrspatial/zonal.py` (add `_hi_dask_numpy`, update dispatcher) + +The dask path: (1) compute per-zone min/max/sum/count as delayed tasks across blocks, (2) reduce to global per-zone HI lookup, (3) `map_blocks` to paint HI back per chunk (preserving chunk structure). + +- [ ] **Step 1: Add module-level delayed helpers and dask+numpy backend function** + +Add three functions after `_hi_cupy`. The `@delayed` helpers are defined at module level to match the existing pattern in `zonal.py` (see `_single_stats_func` at line ~280). + +```python +@delayed +def _hi_block_stats(z_block, v_block, uzones): + """Per-chunk: return (n_zones, 4) array of [min, max, sum, count].""" + result = np.full((len(uzones), 4), np.nan, dtype=np.float64) + result[:, 3] = 0 # count starts at 0 + for i, z in enumerate(uzones): + mask = (z_block == z) & np.isfinite(v_block) + if not np.any(mask): + continue + vals = v_block[mask] + result[i, 0] = vals.min() + result[i, 1] = vals.max() + result[i, 2] = vals.sum() + result[i, 3] = len(vals) + return result + + +@delayed +def _hi_reduce(partials_list, uzones): + """Reduce per-block stats to global per-zone HI lookup dict.""" + stacked = np.stack(partials_list) # (n_blocks, n_zones, 4) + g_min = np.nanmin(stacked[:, :, 0], axis=0) + g_max = np.nanmax(stacked[:, :, 1], axis=0) + g_sum = np.nansum(stacked[:, :, 2], axis=0) + g_count = np.nansum(stacked[:, :, 3], axis=0) + + hi_lookup = {} + for i, z in enumerate(uzones): + if g_count[i] == 0 or g_max[i] == g_min[i]: + hi_lookup[z] = np.nan + else: + mean = g_sum[i] / g_count[i] + hi_lookup[z] = (mean - g_min[i]) / (g_max[i] - g_min[i]) + return hi_lookup + + +def _hi_dask_numpy(zones_data, values_data, nodata): + """Dask+numpy backend for hypsometric integral.""" + # Step 1: find all unique zones across all chunks + unique_zones = _unique_finite_zones(zones_data) + if nodata is not None: + unique_zones = unique_zones[unique_zones != nodata] + + if len(unique_zones) == 0: + return da.full(values_data.shape, np.nan, dtype=np.float64, + chunks=values_data.chunks) + + # Step 2: per-block aggregation -> global reduce + zones_blocks = zones_data.to_delayed().ravel() + values_blocks = values_data.to_delayed().ravel() + + partials = [ + _hi_block_stats(zb, vb, unique_zones) + for zb, vb in zip(zones_blocks, values_blocks) + ] + + # Compute the HI lookup eagerly so map_blocks can use it as a parameter. + hi_lookup = dask.compute(_hi_reduce(partials, unique_zones))[0] + + # Step 3: paint back using map_blocks (preserves chunk structure) + def _paint(zones_chunk, values_chunk, hi_map): + out = np.full(zones_chunk.shape, np.nan, dtype=np.float64) + for z, hi_val in hi_map.items(): + mask = (zones_chunk == z) & np.isfinite(values_chunk) + out[mask] = hi_val + return out + + return da.map_blocks( + _paint, zones_data, values_data, hi_map=hi_lookup, + dtype=np.float64, meta=np.array(()), + ) +``` + +Note: `delayed` and `dask` are already imported at module level in `zonal.py`. + +- [ ] **Step 2: Update dispatcher** + +Change `dask_func=not_implemented_func` to: + +```python +dask_func=lambda z, v: _hi_dask_numpy(z, v, _nodata), +``` + +- [ ] **Step 3: Run all tests including dask** + +Run: `pytest xrspatial/tests/test_hypsometric_integral.py -v -k "numpy or dask" --no-header 2>&1 | tail -20` +Expected: all numpy and dask+numpy tests PASS + +- [ ] **Step 4: Commit** + +```bash +git add xrspatial/zonal.py +git commit -m "Add dask+numpy backend for hypsometric_integral" +``` + +--- + +### Task 5: Implement dask+cupy backend + +**Files:** +- Modify: `xrspatial/zonal.py` (add `_hi_dask_cupy`, update dispatcher) + +Follows the same pattern as `_stats_dask_cupy`: convert dask+cupy chunks to numpy, delegate to the dask+numpy path. + +- [ ] **Step 1: Add dask+cupy backend function** + +Add after `_hi_dask_numpy`: + +```python +def _hi_dask_cupy(zones_data, values_data, nodata): + """Dask+cupy backend: convert chunks to numpy, delegate.""" + zones_cpu = zones_data.map_blocks( + lambda x: x.get(), dtype=zones_data.dtype, meta=np.array(()), + ) + values_cpu = values_data.map_blocks( + lambda x: x.get(), dtype=values_data.dtype, meta=np.array(()), + ) + return _hi_dask_numpy(zones_cpu, values_cpu, nodata) +``` + +- [ ] **Step 2: Update dispatcher** + +Change `dask_cupy_func=not_implemented_func` to: + +```python +dask_cupy_func=lambda z, v: _hi_dask_cupy(z, v, _nodata), +``` + +- [ ] **Step 3: Run tests (numpy and dask still pass)** + +Run: `pytest xrspatial/tests/test_hypsometric_integral.py -v -k "numpy and not cupy" --no-header 2>&1 | tail -10` +Expected: PASS + +- [ ] **Step 4: Commit** + +```bash +git add xrspatial/zonal.py +git commit -m "Add dask+cupy backend for hypsometric_integral" +``` + +--- + +### Task 6: Wire up public exports and accessor + +**Files:** +- Modify: `xrspatial/__init__.py` (add export) +- Modify: `xrspatial/accessor.py` (add accessor method) +- Modify: `xrspatial/tests/test_hypsometric_integral.py` (add accessor test) + +- [ ] **Step 1: Write failing test for accessor** + +Add to the end of `xrspatial/tests/test_hypsometric_integral.py`: + +```python +@pytest.mark.parametrize("backend", ['numpy', 'dask+numpy', 'cupy', 'dask+cupy']) +def test_hypsometric_integral_accessor(backend, hi_zones, hi_values): + """Verify the .xrs accessor method works.""" + result = hi_values.xrs.zonal_hypsometric_integral(hi_zones) + assert isinstance(result, xr.DataArray) + assert result.shape == hi_values.shape + + out = result.values if not (da and isinstance(result.data, da.Array)) else result.compute().values + z1_mask = np.array([ + [False, True, True, True], + [False, True, True, False], + [False, False, False, False], + [False, False, False, False], + ]) + np.testing.assert_allclose(out[z1_mask], 0.5, rtol=1e-10) + + +def test_hypsometric_integral_list_of_pairs_zones(): + """Vector zones via list of (geometry, value) pairs.""" + from shapely.geometry import box + from xrspatial.zonal import hypsometric_integral + + pytest.importorskip("shapely") + pytest.importorskip("rasterio") + + values_data = np.array([ + [10., 20., 30.], + [40., 50., 60.], + [70., 80., 90.], + ], dtype=np.float64) + values = xr.DataArray(values_data, dims=['y', 'x']) + values['y'] = [2.0, 1.0, 0.0] + values['x'] = [0.0, 1.0, 2.0] + values.attrs['res'] = (1.0, 1.0) + + # Zone 1 covers left half, zone 2 covers right half + zones_pairs = [ + (box(-0.5, -0.5, 1.5, 2.5), 1), + (box(1.5, -0.5, 2.5, 2.5), 2), + ] + + result = hypsometric_integral(zones_pairs, values, nodata=0) + assert isinstance(result, xr.DataArray) + assert result.shape == values.shape +``` + +- [ ] **Step 2: Run test to verify it fails** + +Run: `pytest xrspatial/tests/test_hypsometric_integral.py::test_hypsometric_integral_accessor -v -k "numpy and not dask and not cupy" --no-header 2>&1 | tail -10` +Expected: FAIL — `AttributeError` + +- [ ] **Step 3: Add accessor method to `xrspatial/accessor.py`** + +Add after the `zonal_crosstab` method (search for `def zonal_crosstab`): + +```python + def zonal_hypsometric_integral(self, zones, **kwargs): + from .zonal import hypsometric_integral + return hypsometric_integral(zones, self._obj, **kwargs) +``` + +- [ ] **Step 4: Add top-level export to `xrspatial/__init__.py`** + +Add after the existing zonal imports (search for `zonal_stats`): + +```python +from xrspatial.zonal import hypsometric_integral # noqa +``` + +- [ ] **Step 5: Run all tests** + +Run: `pytest xrspatial/tests/test_hypsometric_integral.py -v -k "numpy and not dask and not cupy" --no-header 2>&1 | tail -20` +Expected: all tests PASS + +- [ ] **Step 6: Commit** + +```bash +git add xrspatial/__init__.py xrspatial/accessor.py xrspatial/tests/test_hypsometric_integral.py +git commit -m "Wire up hypsometric_integral accessor and public export" +``` + +--- + +### Task 7: Final validation + +**Files:** (none — verification only) + +- [ ] **Step 1: Run the full zonal test suite to check for regressions** + +Run: `pytest xrspatial/tests/test_zonal.py -v -k "numpy and not dask and not cupy" --no-header -q 2>&1 | tail -10` +Expected: existing zonal tests still PASS + +- [ ] **Step 2: Run the hypsometric integral tests one final time** + +Run: `pytest xrspatial/tests/test_hypsometric_integral.py -v --no-header 2>&1` +Expected: all PASS (cupy/dask+cupy tests skip if no GPU) + +- [ ] **Step 3: Verify import works from top level** + +Run: `python -c "from xrspatial import hypsometric_integral; print(hypsometric_integral.__doc__[:60])"` +Expected: prints first 60 chars of docstring without error From e99a83b2f6baa3b154c080d917edcb2ae2723a52 Mon Sep 17 00:00:00 2001 From: Brendan Collins Date: Wed, 25 Mar 2026 07:28:41 -0700 Subject: [PATCH 4/9] Add .claude/worktrees/ to .gitignore --- .gitignore | 1 + 1 file changed, 1 insertion(+) diff --git a/.gitignore b/.gitignore index f085d7ff..11a8c9f2 100644 --- a/.gitignore +++ b/.gitignore @@ -97,3 +97,4 @@ dmypy.json .asv/ xrspatial-examples/ *.zarr/ +.claude/worktrees/ From e8453bbf3920ef16bbb0a0d7b3aadc3238428bb4 Mon Sep 17 00:00:00 2001 From: Brendan Collins Date: Wed, 25 Mar 2026 07:30:50 -0700 Subject: [PATCH 5/9] Add failing tests for hypsometric_integral --- xrspatial/tests/test_hypsometric_integral.py | 196 +++++++++++++++++++ 1 file changed, 196 insertions(+) create mode 100644 xrspatial/tests/test_hypsometric_integral.py diff --git a/xrspatial/tests/test_hypsometric_integral.py b/xrspatial/tests/test_hypsometric_integral.py new file mode 100644 index 00000000..461e929c --- /dev/null +++ b/xrspatial/tests/test_hypsometric_integral.py @@ -0,0 +1,196 @@ +# xrspatial/tests/test_hypsometric_integral.py +try: + import dask.array as da +except ImportError: + da = None + +import numpy as np +import pytest +import xarray as xr + +from .general_checks import create_test_raster + + +# --- fixtures --------------------------------------------------------------- + +@pytest.fixture +def hi_zones(backend): + """Two zones (1, 2) plus nodata (0). + + Zone 1: 5 cells — (0,1), (0,2), (0,3), (1,1), (1,2) + Zone 2: 6 cells — (1,3), (2,1), (2,2), (2,3), (3,1), (3,2) + Nodata: 4 cells — column 0 and (3,3) + """ + data = np.array([ + [0, 1, 1, 1], + [0, 1, 1, 2], + [0, 2, 2, 2], + [0, 2, 2, 0], + ], dtype=np.float64) + return create_test_raster(data, backend, dims=['y', 'x'], + attrs={'res': (1.0, 1.0)}, chunks=(2, 2)) + + +@pytest.fixture +def hi_values(backend): + """Elevation values. + + Zone 1 cells: 10, 20, 30, 40, 50 + min=10, max=50, mean=30, HI=(30-10)/(50-10) = 0.5 + + Zone 2 cells: 100, 60, 70, 80, 90, 95 + min=60, max=100, mean=82.5, HI=(82.5-60)/(100-60) = 0.5625 + """ + data = np.array([ + [999., 10., 20., 30.], + [999., 40., 50., 100.], + [999., 60., 70., 80.], + [999., 90., 95., 999.], + ], dtype=np.float64) + return create_test_raster(data, backend, dims=['y', 'x'], + attrs={'res': (1.0, 1.0)}, chunks=(2, 2)) + + +# --- basic ------------------------------------------------------------------- + +@pytest.mark.parametrize("backend", ['numpy', 'dask+numpy', 'cupy', 'dask+cupy']) +def test_hypsometric_integral_basic(backend, hi_zones, hi_values): + from xrspatial.zonal import hypsometric_integral + + result = hypsometric_integral(hi_zones, hi_values) + + assert isinstance(result, xr.DataArray) + assert result.shape == hi_values.shape + assert result.dims == hi_values.dims + assert result.name == 'hypsometric_integral' + + out = result.values if not (da and isinstance(result.data, da.Array)) else result.compute().values + + # zone 0 (nodata) cells should be NaN + nodata_mask = np.array([ + [True, False, False, False], + [True, False, False, False], + [True, False, False, False], + [True, False, False, True], + ]) + assert np.all(np.isnan(out[nodata_mask])) + + # zone 1: HI = 0.5 + z1_mask = np.array([ + [False, True, True, True], + [False, True, True, False], + [False, False, False, False], + [False, False, False, False], + ]) + np.testing.assert_allclose(out[z1_mask], 0.5, rtol=1e-10) + + # zone 2: HI = 0.5625 + z2_mask = np.array([ + [False, False, False, False], + [False, False, False, True], + [False, True, True, True], + [False, True, True, False], + ]) + np.testing.assert_allclose(out[z2_mask], 0.5625, rtol=1e-10) + + +# --- edge cases -------------------------------------------------------------- + +@pytest.mark.parametrize("backend", ['numpy', 'dask+numpy', 'cupy', 'dask+cupy']) +def test_hypsometric_integral_flat_zone(backend): + """A zone with all identical values has range=0, so HI should be NaN.""" + from xrspatial.zonal import hypsometric_integral + + zones = create_test_raster( + np.array([[1, 1], [1, 1]], dtype=np.float64), backend, + chunks=(2, 2)) + values = create_test_raster( + np.array([[5.0, 5.0], [5.0, 5.0]]), backend, + chunks=(2, 2)) + + result = hypsometric_integral(zones, values, nodata=0) + out = result.values if not (da and isinstance(result.data, da.Array)) else result.compute().values + assert np.all(np.isnan(out)) + + +@pytest.mark.parametrize("backend", ['numpy', 'dask+numpy', 'cupy', 'dask+cupy']) +def test_hypsometric_integral_nan_in_values(backend): + """NaN elevation cells should be excluded from per-zone stats.""" + from xrspatial.zonal import hypsometric_integral + + zones = create_test_raster( + np.array([[1, 1], [1, 1]], dtype=np.float64), backend, + chunks=(2, 2)) + values = create_test_raster( + np.array([[10.0, np.nan], [20.0, 30.0]]), backend, + chunks=(2, 2)) + + result = hypsometric_integral(zones, values, nodata=0) + out = result.values if not (da and isinstance(result.data, da.Array)) else result.compute().values + + # zone 1 finite values: 10, 20, 30 -> HI = (20-10)/(30-10) = 0.5 + # the NaN cell should remain NaN in output + assert np.isnan(out[0, 1]) + np.testing.assert_allclose(out[0, 0], 0.5, rtol=1e-10) + np.testing.assert_allclose(out[1, 0], 0.5, rtol=1e-10) + np.testing.assert_allclose(out[1, 1], 0.5, rtol=1e-10) + + +@pytest.mark.parametrize("backend", ['numpy', 'dask+numpy', 'cupy', 'dask+cupy']) +def test_hypsometric_integral_single_cell_zone(backend): + """A zone with a single cell has range=0, so HI=NaN.""" + from xrspatial.zonal import hypsometric_integral + + zones = create_test_raster( + np.array([[1, 2]], dtype=np.float64), backend, + chunks=(1, 2)) + values = create_test_raster( + np.array([[10.0, 20.0]]), backend, + chunks=(1, 2)) + + result = hypsometric_integral(zones, values, nodata=0) + out = result.values if not (da and isinstance(result.data, da.Array)) else result.compute().values + # single cell -> range=0 -> NaN + assert np.all(np.isnan(out)) + + +@pytest.mark.parametrize("backend", ['numpy', 'dask+numpy', 'cupy', 'dask+cupy']) +def test_hypsometric_integral_all_nan_zone(backend): + """A zone whose elevation cells are all NaN should produce NaN.""" + from xrspatial.zonal import hypsometric_integral + + zones = create_test_raster( + np.array([[1, 1], [2, 2]], dtype=np.float64), backend, + chunks=(2, 2)) + values = create_test_raster( + np.array([[np.nan, np.nan], [10.0, 20.0]]), backend, + chunks=(2, 2)) + + result = hypsometric_integral(zones, values, nodata=0) + out = result.values if not (da and isinstance(result.data, da.Array)) else result.compute().values + + # zone 1: all NaN -> NaN + assert np.all(np.isnan(out[0, :])) + # zone 2: HI = (15-10)/(20-10) = 0.5 + np.testing.assert_allclose(out[1, :], 0.5, rtol=1e-10) + + +@pytest.mark.parametrize("backend", ['numpy', 'dask+numpy', 'cupy', 'dask+cupy']) +def test_hypsometric_integral_nodata_none(backend): + """When nodata=None, all zone IDs are included (even 0).""" + from xrspatial.zonal import hypsometric_integral + + zones = create_test_raster( + np.array([[0, 0], [1, 1]], dtype=np.float64), backend, + chunks=(2, 2)) + values = create_test_raster( + np.array([[10.0, 20.0], [30.0, 40.0]]), backend, + chunks=(2, 2)) + + result = hypsometric_integral(zones, values, nodata=None) + out = result.values if not (da and isinstance(result.data, da.Array)) else result.compute().values + + # zone 0: HI = (15-10)/(20-10) = 0.5 + np.testing.assert_allclose(out[0, :], 0.5, rtol=1e-10) + # zone 1: HI = (35-30)/(40-30) = 0.5 + np.testing.assert_allclose(out[1, :], 0.5, rtol=1e-10) From 69633cc19dec93307006b4cb474b1c7ba448dac6 Mon Sep 17 00:00:00 2001 From: Brendan Collins Date: Wed, 25 Mar 2026 07:51:29 -0700 Subject: [PATCH 6/9] Add hypsometric_integral with numpy backend --- xrspatial/zonal.py | 93 ++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 93 insertions(+) diff --git a/xrspatial/zonal.py b/xrspatial/zonal.py index eaefaabd..e35a0fb3 100644 --- a/xrspatial/zonal.py +++ b/xrspatial/zonal.py @@ -1331,6 +1331,99 @@ def crosstab( return crosstab_df +# --------------------------------------------------------------------------- +# Hypsometric integral +# --------------------------------------------------------------------------- + +def _hi_numpy(zones_data, values_data, nodata): + """Numpy backend for hypsometric integral.""" + unique_zones = np.unique(zones_data[np.isfinite(zones_data)]) + if nodata is not None: + unique_zones = unique_zones[unique_zones != nodata] + + out = np.full(values_data.shape, np.nan, dtype=np.float64) + + for z in unique_zones: + mask = (zones_data == z) & np.isfinite(values_data) + if not np.any(mask): + continue + vals = values_data[mask] + mn, mx = vals.min(), vals.max() + if mx == mn: + continue # flat zone -> NaN + hi = (vals.mean() - mn) / (mx - mn) + out[mask] = hi + return out + + +def _hi_cupy(zones_data, values_data, nodata): + """CuPy backend for hypsometric integral — transfer to host, compute, return.""" + import cupy as cp + result_np = _hi_numpy(cp.asnumpy(zones_data), cp.asnumpy(values_data), nodata) + return cp.asarray(result_np) + + +def hypsometric_integral( + zones, + values, + nodata=0, + column=None, + rasterize_kw=None, + name='hypsometric_integral', +): + """Hypsometric integral (HI) per zone, painted back to a raster. + + HI measures geomorphic maturity: ``(mean - min) / (max - min)`` + computed over elevations within each zone. Values range from 0 to 1. + + Parameters + ---------- + zones : xr.DataArray, GeoDataFrame, or list of (geometry, value) pairs + Zone definitions. Integer zone IDs. GeoDataFrame and list-of-pairs + inputs are rasterized using *values* as the template grid. + values : xr.DataArray + 2D elevation raster (float), same shape as *zones*. + nodata : int or None, default 0 + Zone ID that means "no zone". Excluded from computation; those + cells get NaN in the output. Set to ``None`` to include all IDs. + column : str, optional + Column in a GeoDataFrame containing zone IDs. + rasterize_kw : dict, optional + Extra keyword arguments for ``rasterize()`` when *zones* is vector. + name : str, default ``'hypsometric_integral'`` + Name for the output DataArray. + + Returns + ------- + xr.DataArray + Float64 raster, same shape/dims/coords as *values*. Each cell + holds the HI of its zone. NaN for nodata zones, non-finite + elevation cells, and flat zones (elevation range = 0). + """ + zones = _maybe_rasterize_zones(zones, values, column=column, + rasterize_kw=rasterize_kw) + validate_arrays(zones, values) + + _nodata = nodata # capture for closures + + mapper = ArrayTypeFunctionMapping( + numpy_func=lambda z, v: _hi_numpy(z, v, _nodata), + cupy_func=lambda z, v: _hi_cupy(z, v, _nodata), + dask_func=not_implemented_func, + dask_cupy_func=not_implemented_func, + ) + + out = mapper(zones)(zones.data, values.data) + + return xr.DataArray( + out, + name=name, + dims=values.dims, + coords=values.coords, + attrs=values.attrs, + ) + + def _apply_numpy(zones_data, values_data, func, nodata): out = values_data.copy() if nodata is not None: From 56d2b0a47d05bd6d35ff9f51b8e248c5748d4095 Mon Sep 17 00:00:00 2001 From: Brendan Collins Date: Wed, 25 Mar 2026 11:31:50 -0700 Subject: [PATCH 7/9] Add dask backends for hypsometric_integral Wire up _hi_dask_numpy (per-block aggregation + reduce + map_blocks paint-back) and _hi_dask_cupy (host transfer delegate) backends. --- xrspatial/zonal.py | 88 ++++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 86 insertions(+), 2 deletions(-) diff --git a/xrspatial/zonal.py b/xrspatial/zonal.py index e35a0fb3..da2ebe21 100644 --- a/xrspatial/zonal.py +++ b/xrspatial/zonal.py @@ -1363,6 +1363,90 @@ def _hi_cupy(zones_data, values_data, nodata): return cp.asarray(result_np) +@delayed +def _hi_block_stats(z_block, v_block, uzones): + """Per-chunk: return (n_zones, 4) array of [min, max, sum, count].""" + result = np.full((len(uzones), 4), np.nan, dtype=np.float64) + result[:, 3] = 0 # count starts at 0 + for i, z in enumerate(uzones): + mask = (z_block == z) & np.isfinite(v_block) + if not np.any(mask): + continue + vals = v_block[mask] + result[i, 0] = vals.min() + result[i, 1] = vals.max() + result[i, 2] = vals.sum() + result[i, 3] = len(vals) + return result + + +@delayed +def _hi_reduce(partials_list, uzones): + """Reduce per-block stats to global per-zone HI lookup dict.""" + stacked = np.stack(partials_list) # (n_blocks, n_zones, 4) + g_min = np.nanmin(stacked[:, :, 0], axis=0) + g_max = np.nanmax(stacked[:, :, 1], axis=0) + g_sum = np.nansum(stacked[:, :, 2], axis=0) + g_count = np.nansum(stacked[:, :, 3], axis=0) + + hi_lookup = {} + for i, z in enumerate(uzones): + if g_count[i] == 0 or g_max[i] == g_min[i]: + hi_lookup[z] = np.nan + else: + mean = g_sum[i] / g_count[i] + hi_lookup[z] = (mean - g_min[i]) / (g_max[i] - g_min[i]) + return hi_lookup + + +def _hi_dask_numpy(zones_data, values_data, nodata): + """Dask+numpy backend for hypsometric integral.""" + # Step 1: find all unique zones across all chunks + unique_zones = _unique_finite_zones(zones_data) + if nodata is not None: + unique_zones = unique_zones[unique_zones != nodata] + + if len(unique_zones) == 0: + return da.full(values_data.shape, np.nan, dtype=np.float64, + chunks=values_data.chunks) + + # Step 2: per-block aggregation -> global reduce + zones_blocks = zones_data.to_delayed().ravel() + values_blocks = values_data.to_delayed().ravel() + + partials = [ + _hi_block_stats(zb, vb, unique_zones) + for zb, vb in zip(zones_blocks, values_blocks) + ] + + # Compute the HI lookup eagerly so map_blocks can use it as a parameter. + hi_lookup = dask.compute(_hi_reduce(partials, unique_zones))[0] + + # Step 3: paint back using map_blocks (preserves chunk structure) + def _paint(zones_chunk, values_chunk, hi_map): + out = np.full(zones_chunk.shape, np.nan, dtype=np.float64) + for z, hi_val in hi_map.items(): + mask = (zones_chunk == z) & np.isfinite(values_chunk) + out[mask] = hi_val + return out + + return da.map_blocks( + _paint, zones_data, values_data, hi_map=hi_lookup, + dtype=np.float64, meta=np.array(()), + ) + + +def _hi_dask_cupy(zones_data, values_data, nodata): + """Dask+cupy backend: convert chunks to numpy, delegate.""" + zones_cpu = zones_data.map_blocks( + lambda x: x.get(), dtype=zones_data.dtype, meta=np.array(()), + ) + values_cpu = values_data.map_blocks( + lambda x: x.get(), dtype=values_data.dtype, meta=np.array(()), + ) + return _hi_dask_numpy(zones_cpu, values_cpu, nodata) + + def hypsometric_integral( zones, values, @@ -1409,8 +1493,8 @@ def hypsometric_integral( mapper = ArrayTypeFunctionMapping( numpy_func=lambda z, v: _hi_numpy(z, v, _nodata), cupy_func=lambda z, v: _hi_cupy(z, v, _nodata), - dask_func=not_implemented_func, - dask_cupy_func=not_implemented_func, + dask_func=lambda z, v: _hi_dask_numpy(z, v, _nodata), + dask_cupy_func=lambda z, v: _hi_dask_cupy(z, v, _nodata), ) out = mapper(zones)(zones.data, values.data) From cd4f3e44bce43ce97692c40ce982350edde03d9b Mon Sep 17 00:00:00 2001 From: Brendan Collins Date: Wed, 25 Mar 2026 11:38:09 -0700 Subject: [PATCH 8/9] Wire up hypsometric_integral accessor and public export Add .xrs.zonal_hypsometric_integral() accessor method, top-level import in __init__.py, accessor test, and vector-zones test. --- xrspatial/__init__.py | 1 + xrspatial/accessor.py | 4 ++ xrspatial/tests/test_hypsometric_integral.py | 46 ++++++++++++++++++++ 3 files changed, 51 insertions(+) diff --git a/xrspatial/__init__.py b/xrspatial/__init__.py index 8bd732a6..6b5dc2cc 100644 --- a/xrspatial/__init__.py +++ b/xrspatial/__init__.py @@ -131,6 +131,7 @@ from xrspatial.zonal import crosstab as zonal_crosstab # noqa from xrspatial.zonal import regions as regions # noqa from xrspatial.zonal import stats as zonal_stats # noqa +from xrspatial.zonal import hypsometric_integral # noqa from xrspatial.zonal import suggest_zonal_canvas as suggest_zonal_canvas # noqa from xrspatial.reproject import merge # noqa from xrspatial.reproject import reproject # noqa diff --git a/xrspatial/accessor.py b/xrspatial/accessor.py index 6ab4b090..cde22013 100644 --- a/xrspatial/accessor.py +++ b/xrspatial/accessor.py @@ -320,6 +320,10 @@ def zonal_crosstab(self, zones, **kwargs): from .zonal import crosstab return crosstab(zones, self._obj, **kwargs) + def zonal_hypsometric_integral(self, zones, **kwargs): + from .zonal import hypsometric_integral + return hypsometric_integral(zones, self._obj, **kwargs) + def crop(self, zones, zones_ids, **kwargs): from .zonal import crop return crop(zones, self._obj, zones_ids, **kwargs) diff --git a/xrspatial/tests/test_hypsometric_integral.py b/xrspatial/tests/test_hypsometric_integral.py index 461e929c..5f4f20db 100644 --- a/xrspatial/tests/test_hypsometric_integral.py +++ b/xrspatial/tests/test_hypsometric_integral.py @@ -194,3 +194,49 @@ def test_hypsometric_integral_nodata_none(backend): np.testing.assert_allclose(out[0, :], 0.5, rtol=1e-10) # zone 1: HI = (35-30)/(40-30) = 0.5 np.testing.assert_allclose(out[1, :], 0.5, rtol=1e-10) + + +@pytest.mark.parametrize("backend", ['numpy', 'dask+numpy', 'cupy', 'dask+cupy']) +def test_hypsometric_integral_accessor(backend, hi_zones, hi_values): + """Verify the .xrs accessor method works.""" + result = hi_values.xrs.zonal_hypsometric_integral(hi_zones) + assert isinstance(result, xr.DataArray) + assert result.shape == hi_values.shape + + out = result.values if not (da and isinstance(result.data, da.Array)) else result.compute().values + z1_mask = np.array([ + [False, True, True, True], + [False, True, True, False], + [False, False, False, False], + [False, False, False, False], + ]) + np.testing.assert_allclose(out[z1_mask], 0.5, rtol=1e-10) + + +def test_hypsometric_integral_list_of_pairs_zones(): + """Vector zones via list of (geometry, value) pairs.""" + from shapely.geometry import box + from xrspatial.zonal import hypsometric_integral + + pytest.importorskip("shapely") + pytest.importorskip("rasterio") + + values_data = np.array([ + [10., 20., 30.], + [40., 50., 60.], + [70., 80., 90.], + ], dtype=np.float64) + values = xr.DataArray(values_data, dims=['y', 'x']) + values['y'] = [2.0, 1.0, 0.0] + values['x'] = [0.0, 1.0, 2.0] + values.attrs['res'] = (1.0, 1.0) + + # Zone 1 covers left half, zone 2 covers right half + zones_pairs = [ + (box(-0.5, -0.5, 1.5, 2.5), 1), + (box(1.5, -0.5, 2.5, 2.5), 2), + ] + + result = hypsometric_integral(zones_pairs, values, nodata=0) + assert isinstance(result, xr.DataArray) + assert result.shape == values.shape From a1f1d6620484e9f0d9c468b3a00b829e046de43c Mon Sep 17 00:00:00 2001 From: Brendan Collins Date: Wed, 25 Mar 2026 11:39:47 -0700 Subject: [PATCH 9/9] Fix cupy test extraction to use .data.get() instead of .values Add _to_numpy helper that handles cupy, dask, and numpy backends correctly, avoiding implicit cupy-to-numpy conversion error. --- xrspatial/tests/test_hypsometric_integral.py | 28 +++++++++++++++----- 1 file changed, 21 insertions(+), 7 deletions(-) diff --git a/xrspatial/tests/test_hypsometric_integral.py b/xrspatial/tests/test_hypsometric_integral.py index 5f4f20db..0debcc33 100644 --- a/xrspatial/tests/test_hypsometric_integral.py +++ b/xrspatial/tests/test_hypsometric_integral.py @@ -10,6 +10,20 @@ from .general_checks import create_test_raster +try: + import cupy as cp +except ImportError: + cp = None + + +def _to_numpy(result): + """Extract numpy array from any backend result.""" + if da and isinstance(result.data, da.Array): + result = result.compute() + if cp is not None and isinstance(result.data, cp.ndarray): + return result.data.get() + return result.values + # --- fixtures --------------------------------------------------------------- @@ -64,7 +78,7 @@ def test_hypsometric_integral_basic(backend, hi_zones, hi_values): assert result.dims == hi_values.dims assert result.name == 'hypsometric_integral' - out = result.values if not (da and isinstance(result.data, da.Array)) else result.compute().values + out = _to_numpy(result) # zone 0 (nodata) cells should be NaN nodata_mask = np.array([ @@ -109,7 +123,7 @@ def test_hypsometric_integral_flat_zone(backend): chunks=(2, 2)) result = hypsometric_integral(zones, values, nodata=0) - out = result.values if not (da and isinstance(result.data, da.Array)) else result.compute().values + out = _to_numpy(result) assert np.all(np.isnan(out)) @@ -126,7 +140,7 @@ def test_hypsometric_integral_nan_in_values(backend): chunks=(2, 2)) result = hypsometric_integral(zones, values, nodata=0) - out = result.values if not (da and isinstance(result.data, da.Array)) else result.compute().values + out = _to_numpy(result) # zone 1 finite values: 10, 20, 30 -> HI = (20-10)/(30-10) = 0.5 # the NaN cell should remain NaN in output @@ -149,7 +163,7 @@ def test_hypsometric_integral_single_cell_zone(backend): chunks=(1, 2)) result = hypsometric_integral(zones, values, nodata=0) - out = result.values if not (da and isinstance(result.data, da.Array)) else result.compute().values + out = _to_numpy(result) # single cell -> range=0 -> NaN assert np.all(np.isnan(out)) @@ -167,7 +181,7 @@ def test_hypsometric_integral_all_nan_zone(backend): chunks=(2, 2)) result = hypsometric_integral(zones, values, nodata=0) - out = result.values if not (da and isinstance(result.data, da.Array)) else result.compute().values + out = _to_numpy(result) # zone 1: all NaN -> NaN assert np.all(np.isnan(out[0, :])) @@ -188,7 +202,7 @@ def test_hypsometric_integral_nodata_none(backend): chunks=(2, 2)) result = hypsometric_integral(zones, values, nodata=None) - out = result.values if not (da and isinstance(result.data, da.Array)) else result.compute().values + out = _to_numpy(result) # zone 0: HI = (15-10)/(20-10) = 0.5 np.testing.assert_allclose(out[0, :], 0.5, rtol=1e-10) @@ -203,7 +217,7 @@ def test_hypsometric_integral_accessor(backend, hi_zones, hi_values): assert isinstance(result, xr.DataArray) assert result.shape == hi_values.shape - out = result.values if not (da and isinstance(result.data, da.Array)) else result.compute().values + out = _to_numpy(result) z1_mask = np.array([ [False, True, True, True], [False, True, True, False],