@@ -1749,15 +1749,15 @@ def test_validate_reindex() -> None:
17491749
17501750
17511751@requires_dask
1752- def test_1d_blockwise_sort_optimization ():
1752+ def test_1d_blockwise_sort_optimization () -> None :
17531753 # Make sure for resampling problems sorting isn't done.
17541754 time = pd .Series (pd .date_range ("2020-09-01" , "2020-12-31 23:59" , freq = "3h" ))
17551755 array = dask .array .ones ((len (time ),), chunks = (224 ,))
17561756
1757- actual , _ = groupby_reduce (array , time .dt .dayofyear .values , method = "blockwise" , func = "count" )
1757+ actual , * _ = groupby_reduce (array , time .dt .dayofyear .values , method = "blockwise" , func = "count" )
17581758 assert all ("getitem" not in k for k in actual .dask )
17591759
1760- actual , _ = groupby_reduce (
1760+ actual , * _ = groupby_reduce (
17611761 array ,
17621762 time .dt .dayofyear .values [::- 1 ],
17631763 sort = True ,
@@ -1766,7 +1766,7 @@ def test_1d_blockwise_sort_optimization():
17661766 )
17671767 assert any ("getitem" in k for k in actual .dask .layers )
17681768
1769- actual , _ = groupby_reduce (
1769+ actual , * _ = groupby_reduce (
17701770 array ,
17711771 time .dt .dayofyear .values [::- 1 ],
17721772 sort = False ,
@@ -1777,7 +1777,7 @@ def test_1d_blockwise_sort_optimization():
17771777
17781778
17791779@requires_dask
1780- def test_negative_index_factorize_race_condition ():
1780+ def test_negative_index_factorize_race_condition () -> None :
17811781 # shape = (10, 2000)
17821782 # chunks = ((shape[0]-1,1), 10)
17831783 shape = (101 , 174000 )
@@ -1804,17 +1804,17 @@ def test_negative_index_factorize_race_condition():
18041804
18051805
18061806@pytest .mark .parametrize ("sort" , [True , False ])
1807- def test_expected_index_conversion_passthrough_range_index (sort ):
1807+ def test_expected_index_conversion_passthrough_range_index (sort ) -> None :
18081808 index = pd .RangeIndex (100 )
1809- actual = _convert_expected_groups_to_index (expected_groups = (index ,), isbin = (False ,), sort = (sort ,))
1809+ actual = _convert_expected_groups_to_index (expected_groups = (index ,), isbin = (False ,), sort = (sort ,)) # type: ignore[call-overload]
18101810 assert actual [0 ] is index
18111811
18121812
1813- def test_method_check_numpy ():
1813+ def test_method_check_numpy () -> None :
18141814 bins = [- 2 , - 1 , 0 , 1 , 2 ]
18151815 field = np .ones ((5 , 3 ))
18161816 by = np .array ([[- 1.5 , - 1.5 , 0.5 , 1.5 , 1.5 ] * 3 ]).reshape (5 , 3 )
1817- actual , _ = groupby_reduce (
1817+ actual , * _ = groupby_reduce (
18181818 field ,
18191819 by ,
18201820 expected_groups = pd .IntervalIndex .from_breaks (bins ),
@@ -1825,7 +1825,7 @@ def test_method_check_numpy():
18251825 expected = np .array ([6 , np .nan , 3 , 6 ])
18261826 assert_equal (actual , expected )
18271827
1828- actual , _ = groupby_reduce (
1828+ actual , * _ = groupby_reduce (
18291829 field ,
18301830 by ,
18311831 expected_groups = pd .IntervalIndex .from_breaks (bins ),
@@ -1845,7 +1845,7 @@ def test_method_check_numpy():
18451845
18461846
18471847@pytest .mark .parametrize ("dtype" , [None , np .float64 ])
1848- def test_choose_engine (dtype ):
1848+ def test_choose_engine (dtype ) -> None :
18491849 numbagg_possible = HAS_NUMBAGG and dtype is None
18501850 default = "numbagg" if numbagg_possible else "numpy"
18511851 mean = _initialize_aggregation (
@@ -1887,10 +1887,10 @@ def test_choose_engine(dtype):
18871887 assert _choose_engine (np .array ([1 , 1 , 2 , 2 ]), agg = argmax ) == "numpy"
18881888
18891889
1890- def test_xarray_fill_value_behaviour ():
1890+ def test_xarray_fill_value_behaviour () -> None :
18911891 bar = np .array ([1 , 2 , 3 , np .nan , np .nan , np .nan , 4 , 5 , np .nan , np .nan ])
18921892 times = np .arange (0 , 20 , 2 )
1893- actual , _ = groupby_reduce (bar , times , func = "nansum" , expected_groups = (np .arange (19 ),))
1893+ actual , * _ = groupby_reduce (bar , times , func = "nansum" , expected_groups = (np .arange (19 ),))
18941894 nan = np .nan
18951895 # fmt: off
18961896 expected = np .array (
@@ -1905,7 +1905,7 @@ def test_xarray_fill_value_behaviour():
19051905@pytest .mark .parametrize ("func" , ["nanquantile" , "quantile" ])
19061906@pytest .mark .parametrize ("chunk" , [pytest .param (True , marks = requires_dask ), False ])
19071907@pytest .mark .parametrize ("by_ndim" , [1 , 2 ])
1908- def test_multiple_quantiles (q , chunk , func , by_ndim ):
1908+ def test_multiple_quantiles (q , chunk , func , by_ndim ) -> None :
19091909 array = np .array ([[1 , - 1 , np .nan , 3 , 4 , 10 , 5 ], [1 , np .nan , np .nan , 3 , 4 , np .nan , np .nan ]])
19101910 labels = np .array ([0 , 0 , 0 , 1 , 0 , 1 , 1 ])
19111911 if by_ndim == 2 :
@@ -1916,38 +1916,37 @@ def test_multiple_quantiles(q, chunk, func, by_ndim):
19161916 if chunk :
19171917 array = dask .array .from_array (array , chunks = (1 ,) + (- 1 ,) * by_ndim )
19181918
1919- actual , _ = groupby_reduce (array , labels , func = func , finalize_kwargs = dict (q = q ), axis = axis )
1919+ actual , * _ = groupby_reduce (array , labels , func = func , finalize_kwargs = dict (q = q ), axis = axis )
19201920 sorted_array = array [..., [0 , 1 , 2 , 4 , 3 , 5 , 6 ]]
19211921 f = partial (getattr (np , func ), q = q , axis = axis , keepdims = True )
19221922 if chunk :
1923- sorted_array = sorted_array .compute ()
1923+ sorted_array = sorted_array .compute () # type: ignore[attr-defined]
19241924 expected = np .concatenate ((f (sorted_array [..., :4 ]), f (sorted_array [..., 4 :])), axis = - 1 )
19251925 if by_ndim == 2 :
19261926 expected = expected .squeeze (axis = - 2 )
19271927 assert_equal (expected , actual , tolerance = {"atol" : 1e-14 })
19281928
19291929
19301930@pytest .mark .parametrize ("dtype" , ["U3" , "S3" ])
1931- def test_nanlen_string (dtype , engine ):
1931+ def test_nanlen_string (dtype , engine ) -> None :
19321932 array = np .array (["ABC" , "DEF" , "GHI" , "JKL" , "MNO" , "PQR" ], dtype = dtype )
19331933 by = np .array ([0 , 0 , 1 , 2 , 1 , 0 ])
19341934 expected = np .array ([3 , 2 , 1 ], dtype = np .intp )
19351935 actual , * _ = groupby_reduce (array , by , func = "count" , engine = engine )
19361936 assert_equal (expected , actual )
19371937
19381938
1939- def test_cumusm ():
1939+ def test_cumusm () -> None :
19401940 array = np .array ([1 , 1 , 1 ], dtype = np .uint64 )
19411941 by = np .array ([0 ] * array .shape [- 1 ])
1942- kwargs = {"func" : "nancumsum" , "axis" : - 1 }
19431942 expected = np .nancumsum (array , axis = - 1 )
19441943
1945- actual = groupby_scan (array , by , ** kwargs )
1944+ actual = groupby_scan (array , by , func = "nancumsum" , axis = - 1 )
19461945 assert_equal (expected , actual )
19471946
19481947 if has_dask :
19491948 da = dask .array .from_array (array , chunks = 2 )
1950- actual = groupby_scan (da , by , ** kwargs )
1949+ actual = groupby_scan (da , by , func = "nancumsum" , axis = - 1 )
19511950 assert_equal (expected , actual )
19521951
19531952
@@ -1962,7 +1961,7 @@ def test_cumusm():
19621961@pytest .mark .parametrize ("size" , ((1 , 12 ), (12 ,), (12 , 9 )))
19631962@pytest .mark .parametrize ("add_nan_by" , [True , False ])
19641963@pytest .mark .parametrize ("func" , ["ffill" , "bfill" ])
1965- def test_ffill_bfill (chunks , size , add_nan_by , func ):
1964+ def test_ffill_bfill (chunks , size , add_nan_by , func ) -> None :
19661965 array , by = gen_array_by (size , func )
19671966 if chunks :
19681967 array = dask .array .from_array (array , chunks = chunks )
@@ -1976,11 +1975,11 @@ def test_ffill_bfill(chunks, size, add_nan_by, func):
19761975
19771976
19781977@requires_dask
1979- def test_blockwise_nans ():
1978+ def test_blockwise_nans () -> None :
19801979 array = dask .array .ones ((1 , 10 ), chunks = 2 )
19811980 by = np .array ([- 1 , 0 , - 1 , 1 , - 1 , 2 , - 1 , 3 , 4 , 4 ])
1982- actual , actual_groups = flox .groupby_reduce (array , by , func = "sum" , expected_groups = pd .RangeIndex (0 , 5 ))
1983- expected , expected_groups = flox .groupby_reduce (
1981+ actual , * actual_groups = flox .groupby_reduce (array , by , func = "sum" , expected_groups = pd .RangeIndex (0 , 5 ))
1982+ expected , * expected_groups = flox .groupby_reduce (
19841983 array .compute (), by , func = "sum" , expected_groups = pd .RangeIndex (0 , 5 )
19851984 )
19861985 assert_equal (expected_groups , actual_groups )
@@ -1989,50 +1988,68 @@ def test_blockwise_nans():
19891988
19901989@pytest .mark .parametrize ("func" , ["sum" , "prod" , "count" , "nansum" ])
19911990@pytest .mark .parametrize ("engine" , ["flox" , "numpy" ])
1992- def test_agg_dtypes (func , engine ):
1991+ def test_agg_dtypes (func , engine ) -> None :
19931992 # regression test for GH388
19941993 counts = np .array ([0 , 2 , 1 , 0 , 1 ])
19951994 group = np .array ([1 , 1 , 1 , 2 , 2 ])
1996- actual , _ = groupby_reduce (
1995+ actual , * _ = groupby_reduce (
19971996 counts , group , expected_groups = (np .array ([1 , 2 ]),), func = func , dtype = "uint8" , engine = engine
19981997 )
19991998 expected = _get_array_func (func )(counts , dtype = "uint8" )
20001999 assert actual .dtype == np .uint8 == expected .dtype
20012000
20022001
20032002@requires_dask
2004- def test_blockwise_avoid_rechunk ():
2003+ def test_blockwise_avoid_rechunk () -> None :
20052004 array = dask .array .zeros ((6 ,), chunks = (2 , 4 ), dtype = np .int64 )
20062005 by = np .array (["1" , "1" , "0" , "" , "0" , "" ], dtype = "<U1" )
2007- actual , groups = groupby_reduce (array , by , func = "first" )
2008- assert_equal (groups , ["" , "0" , "1" ])
2006+ actual , * groups = groupby_reduce (array , by , func = "first" )
2007+ assert_equal (groups , [[ "" , "0" , "1" ] ])
20092008 assert_equal (actual , np .array ([0 , 0 , 0 ], dtype = np .int64 ))
20102009
20112010
2012- def test_datetime_minmax (engine ):
2011+ def test_datetime_minmax (engine ) -> None :
20132012 # GH403
20142013 array = np .array ([np .datetime64 ("2000-01-01" ), np .datetime64 ("2000-01-02" ), np .datetime64 ("2000-01-03" )])
20152014 by = np .array ([0 , 0 , 1 ])
2016- actual , _ = flox .groupby_reduce (array , by , func = "nanmin" , engine = engine )
2015+ actual , * _ = flox .groupby_reduce (array , by , func = "nanmin" , engine = engine )
20172016 expected = array [[0 , 2 ]]
20182017 assert_equal (expected , actual )
20192018
20202019 expected = array [[1 , 2 ]]
2021- actual , _ = flox .groupby_reduce (array , by , func = "nanmax" , engine = engine )
2020+ actual , * _ = flox .groupby_reduce (array , by , func = "nanmax" , engine = engine )
20222021 assert_equal (expected , actual )
20232022
20242023
20252024@pytest .mark .parametrize ("func" , ["first" , "last" , "nanfirst" , "nanlast" ])
2026- def test_datetime_timedelta_first_last (engine , func ):
2025+ def test_datetime_timedelta_first_last (engine , func ) -> None :
20272026 import flox
20282027
20292028 idx = 0 if "first" in func else - 1
2029+ idx1 = 2 if "first" in func else - 1
20302030
2031+ ## datetime
20312032 dt = pd .date_range ("2001-01-01" , freq = "d" , periods = 5 ).values
20322033 by = np .ones (dt .shape , dtype = int )
2033- actual , _ = flox .groupby_reduce (dt , by , func = func , engine = engine )
2034+ actual , * _ = flox .groupby_reduce (dt , by , func = func , engine = engine )
20342035 assert_equal (actual , dt [[idx ]])
20352036
2037+ # missing group
2038+ by = np .array ([0 , 2 , 3 , 3 , 3 ])
2039+ actual , * _ = flox .groupby_reduce (
2040+ dt , by , expected_groups = ([0 , 1 , 2 , 3 ],), func = func , engine = engine , fill_value = dtypes .NA
2041+ )
2042+ assert_equal (actual , [dt [0 ], np .datetime64 ("NaT" ), dt [1 ], dt [idx1 ]])
2043+
2044+ ## timedelta
20362045 dt = dt - dt [0 ]
2037- actual , _ = flox .groupby_reduce (dt , by , func = func , engine = engine )
2046+ by = np .ones (dt .shape , dtype = int )
2047+ actual , * _ = flox .groupby_reduce (dt , by , func = func , engine = engine )
20382048 assert_equal (actual , dt [[idx ]])
2049+
2050+ # missing group
2051+ by = np .array ([0 , 2 , 3 , 3 , 3 ])
2052+ actual , * _ = flox .groupby_reduce (
2053+ dt , by , expected_groups = ([0 , 1 , 2 , 3 ],), func = func , engine = engine , fill_value = dtypes .NA
2054+ )
2055+ assert_equal (actual , [dt [0 ], np .timedelta64 ("NaT" ), dt [1 ], dt [idx1 ]])
0 commit comments