Skip to content

Commit bf2402b

Browse files
committed
Add xrdtypes.py!
1 parent 7b9bd17 commit bf2402b

File tree

1 file changed

+179
-0
lines changed

1 file changed

+179
-0
lines changed

dask_groupby/xrdtypes.py

Lines changed: 179 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,179 @@
1+
import functools
2+
3+
import numpy as np
4+
5+
from . import xrutils as utils
6+
7+
# Use as a sentinel value to indicate a dtype appropriate NA value.
8+
NA = utils.ReprObject("<NA>")
9+
10+
11+
@functools.total_ordering
12+
class AlwaysGreaterThan:
13+
def __gt__(self, other):
14+
return True
15+
16+
def __eq__(self, other):
17+
return isinstance(other, type(self))
18+
19+
20+
@functools.total_ordering
21+
class AlwaysLessThan:
22+
def __lt__(self, other):
23+
return True
24+
25+
def __eq__(self, other):
26+
return isinstance(other, type(self))
27+
28+
29+
# Equivalence to np.inf (-np.inf) for object-type
30+
INF = AlwaysGreaterThan()
31+
NINF = AlwaysLessThan()
32+
33+
34+
# Pairs of types that, if both found, should be promoted to object dtype
35+
# instead of following NumPy's own type-promotion rules. These type promotion
36+
# rules match pandas instead. For reference, see the NumPy type hierarchy:
37+
# https://docs.scipy.org/doc/numpy-1.13.0/reference/arrays.scalars.html
38+
PROMOTE_TO_OBJECT = [
39+
{np.number, np.character}, # numpy promotes to character
40+
{np.bool_, np.character}, # numpy promotes to character
41+
{np.bytes_, np.unicode_}, # numpy promotes to unicode
42+
]
43+
44+
45+
def maybe_promote(dtype):
46+
"""Simpler equivalent of pandas.core.common._maybe_promote
47+
48+
Parameters
49+
----------
50+
dtype : np.dtype
51+
52+
Returns
53+
-------
54+
dtype : Promoted dtype that can hold missing values.
55+
fill_value : Valid missing value for the promoted dtype.
56+
"""
57+
# N.B. these casting rules should match pandas
58+
if np.issubdtype(dtype, np.floating):
59+
fill_value = np.nan
60+
elif np.issubdtype(dtype, np.timedelta64):
61+
# See https://github.com/numpy/numpy/issues/10685
62+
# np.timedelta64 is a subclass of np.integer
63+
# Check np.timedelta64 before np.integer
64+
fill_value = np.timedelta64("NaT")
65+
elif np.issubdtype(dtype, np.integer):
66+
dtype = np.float32 if dtype.itemsize <= 2 else np.float64
67+
fill_value = np.nan
68+
elif np.issubdtype(dtype, np.complexfloating):
69+
fill_value = np.nan + np.nan * 1j
70+
elif np.issubdtype(dtype, np.datetime64):
71+
fill_value = np.datetime64("NaT")
72+
else:
73+
dtype = object
74+
fill_value = np.nan
75+
return np.dtype(dtype), fill_value
76+
77+
78+
NAT_TYPES = {np.datetime64("NaT").dtype, np.timedelta64("NaT").dtype}
79+
80+
81+
def get_fill_value(dtype):
82+
"""Return an appropriate fill value for this dtype.
83+
84+
Parameters
85+
----------
86+
dtype : np.dtype
87+
88+
Returns
89+
-------
90+
fill_value : Missing value corresponding to this dtype.
91+
"""
92+
_, fill_value = maybe_promote(dtype)
93+
return fill_value
94+
95+
96+
def get_pos_infinity(dtype, max_for_int=False):
97+
"""Return an appropriate positive infinity for this dtype.
98+
99+
Parameters
100+
----------
101+
dtype : np.dtype
102+
max_for_int : bool
103+
Return np.iinfo(dtype).max instead of np.inf
104+
105+
Returns
106+
-------
107+
fill_value : positive infinity value corresponding to this dtype.
108+
"""
109+
if issubclass(dtype.type, np.floating):
110+
return np.inf
111+
112+
if issubclass(dtype.type, np.integer):
113+
if max_for_int:
114+
return np.iinfo(dtype).max
115+
else:
116+
return np.inf
117+
118+
if issubclass(dtype.type, np.complexfloating):
119+
return np.inf + 1j * np.inf
120+
121+
return INF
122+
123+
124+
def get_neg_infinity(dtype, min_for_int=False):
125+
"""Return an appropriate positive infinity for this dtype.
126+
127+
Parameters
128+
----------
129+
dtype : np.dtype
130+
min_for_int : bool
131+
Return np.iinfo(dtype).min instead of -np.inf
132+
133+
Returns
134+
-------
135+
fill_value : positive infinity value corresponding to this dtype.
136+
"""
137+
if issubclass(dtype.type, np.floating):
138+
return -np.inf
139+
140+
if issubclass(dtype.type, np.integer):
141+
if min_for_int:
142+
return np.iinfo(dtype).min
143+
else:
144+
return -np.inf
145+
146+
if issubclass(dtype.type, np.complexfloating):
147+
return -np.inf - 1j * np.inf
148+
149+
return NINF
150+
151+
152+
def is_datetime_like(dtype):
153+
"""Check if a dtype is a subclass of the numpy datetime types"""
154+
return np.issubdtype(dtype, np.datetime64) or np.issubdtype(dtype, np.timedelta64)
155+
156+
157+
def result_type(*arrays_and_dtypes):
158+
"""Like np.result_type, but with type promotion rules matching pandas.
159+
160+
Examples of changed behavior:
161+
number + string -> object (not string)
162+
bytes + unicode -> object (not unicode)
163+
164+
Parameters
165+
----------
166+
*arrays_and_dtypes : list of arrays and dtypes
167+
The dtype is extracted from both numpy and dask arrays.
168+
169+
Returns
170+
-------
171+
numpy.dtype for the result.
172+
"""
173+
types = {np.result_type(t).type for t in arrays_and_dtypes}
174+
175+
for left, right in PROMOTE_TO_OBJECT:
176+
if any(issubclass(t, left) for t in types) and any(issubclass(t, right) for t in types):
177+
return np.dtype(object)
178+
179+
return np.result_type(*arrays_and_dtypes)

0 commit comments

Comments
 (0)