Skip to content

Commit af42544

Browse files
authored
Zonal_crosstab 3D: Ensure content of input param values is preserved (#754)
* _sort_and_stride 3d case: deepcopy values to ensure its content does not change * zonal stats, zonal crosstab: test to ensure input data unmodified
1 parent 71a1282 commit af42544

File tree

3 files changed

+79
-3
lines changed

3 files changed

+79
-3
lines changed

xrspatial/tests/general_checks.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -72,6 +72,10 @@ def general_output_checks(input_agg: xr.DataArray,
7272
assert output_data.dtype == expected_results.dtype
7373

7474

75+
def assert_input_data_unmodified(data_before, data_after):
76+
assert data_before.equals(data_after)
77+
78+
7579
def assert_nan_edges_effect(result_agg):
7680
# nan edge effect
7781
edges = [

xrspatial/tests/test_zonal.py

Lines changed: 73 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,5 @@
1+
import copy
2+
13
import dask.array as da
24
import dask.dataframe as dd
35
import numpy as np
@@ -11,7 +13,9 @@
1113
from xrspatial import zonal_stats as stats
1214
from xrspatial.zonal import regions
1315

14-
from .general_checks import create_test_raster, general_output_checks, has_cuda_and_cupy
16+
from .general_checks import (
17+
assert_input_data_unmodified, create_test_raster, general_output_checks, has_cuda_and_cupy
18+
)
1519

1620

1721
@pytest.fixture
@@ -299,14 +303,26 @@ def check_results(backend, df_result, expected_results_dict):
299303
def test_default_stats(backend, data_zones, data_values_2d, result_default_stats):
300304
if backend == 'cupy' and not has_cuda_and_cupy():
301305
pytest.skip("Requires CUDA and CuPy")
306+
307+
# copy input data to verify they're unchanged after running the function
308+
copied_data_zones = copy.deepcopy(data_zones)
309+
copied_data_values_2d = copy.deepcopy(data_values_2d)
310+
302311
df_result = stats(zones=data_zones, values=data_values_2d)
303312
check_results(backend, df_result, result_default_stats)
304313

314+
assert_input_data_unmodified(data_zones, copied_data_zones)
315+
assert_input_data_unmodified(data_values_2d, copied_data_values_2d)
316+
305317

306318
@pytest.mark.parametrize("backend", ['numpy'])
307319
def test_default_stats_dataarray(
308320
backend, data_zones, data_values_2d, result_default_stats_dataarray
309321
):
322+
# copy input data to verify they're unchanged after running the function
323+
copied_data_zones = copy.deepcopy(data_zones)
324+
copied_data_values_2d = copy.deepcopy(data_values_2d)
325+
310326
dataarray_result = stats(
311327
zones=data_zones, values=data_values_2d, return_type='xarray.DataArray'
312328
)
@@ -317,29 +333,43 @@ def test_default_stats_dataarray(
317333
verify_dtype=False,
318334
verify_attrs=False,
319335
)
320-
336+
assert_input_data_unmodified(data_zones, copied_data_zones)
337+
assert_input_data_unmodified(data_values_2d, copied_data_values_2d)
321338

322339
@pytest.mark.parametrize("backend", ['numpy', 'dask+numpy', 'cupy'])
323340
def test_zone_ids_stats(backend, data_zones, data_values_2d, result_zone_ids_stats):
324341
if backend == 'cupy' and not has_cuda_and_cupy():
325342
pytest.skip("Requires CUDA and CuPy")
343+
344+
# copy input data to verify they're unchanged after running the function
345+
copied_data_zones = copy.deepcopy(data_zones)
346+
copied_data_values_2d = copy.deepcopy(data_values_2d)
347+
326348
zone_ids, expected_result = result_zone_ids_stats
327349
df_result = stats(zones=data_zones, values=data_values_2d,
328350
zone_ids=zone_ids)
329351
check_results(backend, df_result, expected_result)
352+
assert_input_data_unmodified(data_zones, copied_data_zones)
353+
assert_input_data_unmodified(data_values_2d, copied_data_values_2d)
330354

331355

332356
@pytest.mark.parametrize("backend", ['numpy'])
333357
def test_zone_ids_stats_dataarray(
334358
backend, data_zones, data_values_2d, result_zone_ids_stats_dataarray
335359
):
360+
# copy input data to verify they're unchanged after running the function
361+
copied_data_zones = copy.deepcopy(data_zones)
362+
copied_data_values_2d = copy.deepcopy(data_values_2d)
363+
336364
zone_ids, expected_result = result_zone_ids_stats_dataarray
337365
dataarray_result = stats(
338366
zones=data_zones, values=data_values_2d, zone_ids=zone_ids, return_type='xarray.DataArray'
339367
)
340368
general_output_checks(
341369
data_values_2d, dataarray_result, expected_result, verify_dtype=False, verify_attrs=False
342370
)
371+
assert_input_data_unmodified(data_zones, copied_data_zones)
372+
assert_input_data_unmodified(data_values_2d, copied_data_values_2d)
343373

344374

345375
@pytest.mark.parametrize("backend", ['numpy', 'cupy'])
@@ -348,6 +378,10 @@ def test_custom_stats(backend, data_zones, data_values_2d, result_custom_stats):
348378
if backend == 'cupy' and not has_cuda_and_cupy():
349379
pytest.skip("Requires CUDA and CuPy")
350380

381+
# copy input data to verify they're unchanged after running the function
382+
copied_data_zones = copy.deepcopy(data_zones)
383+
copied_data_values_2d = copy.deepcopy(data_values_2d)
384+
351385
custom_stats = {
352386
'double_sum': _double_sum,
353387
'range': _range,
@@ -359,10 +393,15 @@ def test_custom_stats(backend, data_zones, data_values_2d, result_custom_stats):
359393
zone_ids=zone_ids, nodata_values=nodata_values
360394
)
361395
check_results(backend, df_result, expected_result)
396+
assert_input_data_unmodified(data_zones, copied_data_zones)
397+
assert_input_data_unmodified(data_values_2d, copied_data_values_2d)
362398

363399

364400
@pytest.mark.parametrize("backend", ['numpy'])
365401
def test_custom_stats_dataarray(backend, data_zones, data_values_2d, result_custom_stats_dataarray):
402+
# copy input data to verify they're unchanged after running the function
403+
copied_data_zones = copy.deepcopy(data_zones)
404+
copied_data_values_2d = copy.deepcopy(data_values_2d)
366405
# ---- custom stats returns a xr.DataArray (NumPy only) ----
367406
custom_stats = {
368407
'double_sum': _double_sum,
@@ -376,43 +415,69 @@ def test_custom_stats_dataarray(backend, data_zones, data_values_2d, result_cust
376415
general_output_checks(
377416
data_values_2d, dataarray_result, expected_result, verify_dtype=False, verify_attrs=False
378417
)
418+
assert_input_data_unmodified(data_zones, copied_data_zones)
419+
assert_input_data_unmodified(data_values_2d, copied_data_values_2d)
379420

380421

381422
@pytest.mark.parametrize("backend", ['numpy', 'dask+numpy'])
382423
def test_count_crosstab_2d(backend, data_zones, data_values_2d, result_count_crosstab_2d):
424+
# copy input data to verify they're unchanged after running the function
425+
copied_data_zones = copy.deepcopy(data_zones)
426+
copied_data_values_2d = copy.deepcopy(data_values_2d)
427+
383428
zone_ids, cat_ids, expected_result = result_count_crosstab_2d
384429
df_result = crosstab(
385430
zones=data_zones, values=data_values_2d, zone_ids=zone_ids, cat_ids=cat_ids,
386431
)
387432
check_results(backend, df_result, expected_result)
433+
assert_input_data_unmodified(data_zones, copied_data_zones)
434+
assert_input_data_unmodified(data_values_2d, copied_data_values_2d)
388435

389436

390437
@pytest.mark.parametrize("backend", ['numpy', 'dask+numpy'])
391438
def test_percentage_crosstab_2d(backend, data_zones, data_values_2d, result_percentage_crosstab_2d):
439+
# copy input data to verify they're unchanged after running the function
440+
copied_data_zones = copy.deepcopy(data_zones)
441+
copied_data_values_2d = copy.deepcopy(data_values_2d)
442+
392443
nodata_values, zone_ids, cat_ids, expected_result = result_percentage_crosstab_2d
393444
df_result = crosstab(
394445
zones=data_zones, values=data_values_2d, zone_ids=zone_ids, cat_ids=cat_ids,
395446
nodata_values=nodata_values, agg='percentage'
396447
)
397448
check_results(backend, df_result, expected_result)
449+
assert_input_data_unmodified(data_zones, copied_data_zones)
450+
assert_input_data_unmodified(data_values_2d, copied_data_values_2d)
398451

399452

400453
@pytest.mark.parametrize("backend", ['numpy', 'dask+numpy'])
401454
def test_crosstab_3d_count(backend, data_zones, data_values_3d, result_crosstab_3d):
455+
# copy input data to verify they're unchanged after running the function
456+
copied_data_zones = copy.deepcopy(data_zones)
457+
copied_data_values_3d = copy.deepcopy(data_values_3d)
458+
402459
layer, zone_ids, expected_result = result_crosstab_3d
403460
df_result = crosstab(zones=data_zones, values=data_values_3d,
404461
zone_ids=zone_ids, layer=layer, agg='count')
405462
check_results(backend, df_result, expected_result['count'])
463+
assert_input_data_unmodified(data_zones, copied_data_zones)
464+
assert_input_data_unmodified(data_values_3d, copied_data_values_3d)
406465

407466

408467
@pytest.mark.parametrize("backend", ['numpy'])
409468
def test_crosstab_3d_agg_method(backend, data_zones, data_values_3d, result_crosstab_3d):
469+
# copy input data to verify they're unchanged after running the function
470+
copied_data_zones = copy.deepcopy(data_zones)
471+
copied_data_values_3d = copy.deepcopy(data_values_3d)
472+
410473
layer, zone_ids, expected_result = result_crosstab_3d
411474
agg_methods = ['min', 'max', 'mean', 'sum', 'std', 'var', 'count']
412475
for agg in agg_methods:
413476
df_result = crosstab(zones=data_zones, values=data_values_3d,
414477
zone_ids=zone_ids, layer=layer, agg=agg)
415478
check_results(backend, df_result, expected_result[agg])
479+
assert_input_data_unmodified(data_zones, copied_data_zones)
480+
assert_input_data_unmodified(data_values_3d, copied_data_values_3d)
416481

417482

418483
@pytest.mark.parametrize("backend", ['numpy', 'dask+numpy'])
@@ -422,12 +487,18 @@ def test_nodata_values_crosstab_3d(
422487
data_values_3d,
423488
result_nodata_values_crosstab_3d
424489
):
490+
# copy input data to verify they're unchanged after running the function
491+
copied_data_zones = copy.deepcopy(data_zones)
492+
copied_data_values_3d = copy.deepcopy(data_values_3d)
493+
425494
nodata_values, layer, zone_ids, expected_result = result_nodata_values_crosstab_3d
426495
df_result = crosstab(
427496
zones=data_zones, values=data_values_3d, zone_ids=zone_ids,
428497
layer=layer, nodata_values=nodata_values
429498
)
430499
check_results(backend, df_result, expected_result)
500+
assert_input_data_unmodified(data_zones, copied_data_zones)
501+
assert_input_data_unmodified(data_values_3d, copied_data_values_3d)
431502

432503

433504
def test_apply():

xrspatial/zonal.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
# standard library
2+
import copy
23
from math import sqrt
34
from typing import Callable, Dict, List, Optional, Union
45

@@ -92,7 +93,7 @@ def _sort_and_stride(zones, values, unique_zones):
9293

9394
values_shape = values.shape
9495
if len(values_shape) == 3:
95-
values_by_zones = values.reshape(
96+
values_by_zones = copy.deepcopy(values).reshape(
9697
values_shape[0], values_shape[1] * values_shape[2])
9798
for i in range(values_shape[0]):
9899
values_by_zones[i] = values_by_zones[i][sorted_indices]

0 commit comments

Comments
 (0)