|
9 | 9 | from typing import TYPE_CHECKING |
10 | 10 |
|
11 | 11 | import numpy as np |
| 12 | +import pandas as pd |
12 | 13 |
|
13 | 14 | from .aggregations import ( |
14 | | - AGGREGATIONS, |
| 15 | + SCANS, |
15 | 16 | AlignedArrays, |
16 | 17 | Scan, |
17 | 18 | ScanState, |
|
37 | 38 | from .types import DaskArray |
38 | 39 |
|
39 | 40 |
|
40 | | -def _validate_expected_groups_for_scan(nby, expected_groups): |
| 41 | +def _validate_expected_groups(nby, expected_groups): |
41 | 42 | """Validate expected_groups for scan operations.""" |
42 | 43 | if expected_groups is None: |
43 | 44 | return (None,) * nby |
44 | 45 | return expected_groups |
45 | 46 |
|
46 | 47 |
|
47 | | -def _convert_expected_groups_to_index_for_scan(expected_groups, isbin, sort): |
| 48 | +def _convert_expected_groups_to_index(expected_groups): |
48 | 49 | """Convert expected_groups to index for scan operations.""" |
49 | | - import pandas as pd |
50 | | - |
51 | 50 | result = [] |
52 | | - for expect, isbin_ in zip(expected_groups, isbin): |
| 51 | + for expect in expected_groups: |
53 | 52 | if expect is None: |
54 | 53 | result.append(None) |
55 | 54 | elif isinstance(expect, pd.Index): |
@@ -159,21 +158,18 @@ def groupby_scan( |
159 | 158 | if not is_duck_array(array): |
160 | 159 | array = np.asarray(array) |
161 | 160 |
|
162 | | - if isinstance(func, str): |
163 | | - agg = AGGREGATIONS[func] |
| 161 | + agg = SCANS[func] if isinstance(func, str) else func |
164 | 162 | assert isinstance(agg, Scan) |
165 | 163 | agg = copy.deepcopy(agg) |
166 | 164 |
|
167 | | - if (agg == AGGREGATIONS["ffill"] or agg == AGGREGATIONS["bfill"]) and array.dtype.kind != "f": |
| 165 | + if (agg == SCANS["ffill"] or agg == SCANS["bfill"]) and array.dtype.kind != "f": |
168 | 166 | # nothing to do, no NaNs! |
169 | 167 | return array |
170 | 168 |
|
171 | 169 | if expected_groups is not None: |
172 | | - raise NotImplementedError("Setting `expected_groups` and binning is not supported yet.") |
173 | | - expected_groups = _validate_expected_groups_for_scan(nby, expected_groups) |
174 | | - expected_groups = _convert_expected_groups_to_index_for_scan( |
175 | | - expected_groups, isbin=(False,) * nby, sort=False |
176 | | - ) |
| 170 | + raise NotImplementedError("Setting `expected_groups` with scans is not supported yet.") |
| 171 | + expected_groups = _validate_expected_groups(nby, expected_groups) |
| 172 | + expected_groups = _convert_expected_groups_to_index(expected_groups) |
177 | 173 |
|
178 | 174 | # Don't factorize early only when |
179 | 175 | # grouping by dask arrays, and not having expected_groups |
|
0 commit comments