Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 4 additions & 2 deletions arkouda/numpy/pdarraycreation.py
Original file line number Diff line number Diff line change
Expand Up @@ -329,7 +329,9 @@ def array(
# If a is not already a numpy.ndarray, convert it
if not isinstance(a, np.ndarray):
try:
if dtype is not None and dtype != bigint:
from arkouda.numpy.dtypes import dtype as ak_dtype

if dtype is not None and ak_dtype(dtype) != "bigint":
# if the user specified dtype, use that dtype
a = np.array(a, dtype=dtype)
elif (
Expand Down Expand Up @@ -544,7 +546,7 @@ def _bigint_from_numpy(
pass
else:
if not isinstance(x, (int, float, np.integer, np.floating)):
raise TypeError("bigint requires numeric input, got non-numeric object")
raise TypeError(f"bigint requires numeric input, got {type(x)}")

# Handle floats: bigint-from-float behaves like existing code (cast to float64).
# Note: we only need to know *that* a float exists, not which values.
Expand Down
11 changes: 11 additions & 0 deletions arkouda/pandas/extension/_arkouda_array.py
Original file line number Diff line number Diff line change
Expand Up @@ -114,6 +114,17 @@ def __init__(
def _from_sequence(cls, scalars, dtype=None, copy=False):
from arkouda.numpy.pdarraycreation import array as ak_array

# normalize dtype input
if (
dtype is not None
and (
getattr(dtype, "name", None) in {"bigint", "ak.bigint"}
or str(dtype) in {"bigint", "ak.bigint"}
)
or dtype is ArkoudaBigintDtype
):
dtype = "bigint"

# If pandas passes our own EA dtype, ignore it and infer from data
if isinstance(dtype, _ArkoudaBaseDtype):
dtype = dtype.numpy_dtype
Expand Down
18 changes: 17 additions & 1 deletion arkouda/pandas/extension/_arkouda_extension_array.py
Original file line number Diff line number Diff line change
Expand Up @@ -442,6 +442,15 @@ def take(self, indexer, fill_value=None, allow_fill=False):
if fill_value is None:
fill_value = self.default_fill_value

# Pandas commonly passes fill_value=-1 for integer-like data when allow_fill=True.
# That cannot be represented in unsigned dtypes; fall back to a safe fill.
try:
if hasattr(self._data.dtype, "kind") and self._data.dtype.kind == "u" and fill_value < 0:
fill_value = 0
except TypeError:
# Non-numeric fill_value; let the cast below raise a sensible error
pass

# cast once to ensure dtype match
fv = self._data.dtype.type(fill_value)

Expand All @@ -454,7 +463,14 @@ def take(self, indexer, fill_value=None, allow_fill=False):
if oob.any():
raise IndexError("indexer out of bounds in take with allow_fill=True")

gathered = ak.where(mask, fv, self._data[idx_fix])
data = self._data[idx_fix]

if self._data.dtype == ak.bigint:
gathered = data.copy()
gathered[mask] = fill_value
else:
gathered = ak.where(mask, fv, data)

return type(self)(gathered)

def factorize(self, use_na_sentinel=True) -> Tuple[NDArray[np.intp], "ArkoudaExtensionArray"]:
Expand Down
95 changes: 95 additions & 0 deletions arkouda/pandas/extension/_series_accessor.py
Original file line number Diff line number Diff line change
Expand Up @@ -377,6 +377,101 @@ def is_arkouda(self) -> bool:
idx_arr = self._obj.index.values
return isinstance(arr, ArkoudaExtensionArray) and isinstance(idx_arr, ArkoudaExtensionArray)

# ------------------------------------------------------------------
# Legacy delegation: thin wrappers over ak.Series
# ------------------------------------------------------------------

def locate(self, key: object) -> pd.Series:
"""
Lookup values by index label on the Arkouda server.

This is a thin wrapper around the legacy :meth:`arkouda.pandas.series.Series.locate`
method. It converts the pandas Series to a legacy Arkouda ``ak.Series``,
performs the locate operation on the server, and wraps the result back into
an Arkouda-backed pandas Series (ExtensionArray-backed) without NumPy
materialization.

Parameters
----------
key : object
Lookup key or keys. Interpreted in the same way as the legacy Arkouda
``Series.locate`` method. This may be:
- a scalar
- a list/tuple of scalars
- an Arkouda ``pdarray``
- an Arkouda ``Index`` / ``MultiIndex``
- an Arkouda ``Series`` (special case: preserves key index)

Returns
-------
pd.Series
A pandas Series backed by Arkouda ExtensionArrays containing the located
values. The returned Series remains distributed (no NumPy materialization)
and is sorted by index.

Notes
-----
* This method is Arkouda-specific; pandas does not define ``Series.locate``.
* If ``key`` is a pandas Index/MultiIndex, consider converting it via
``key.ak.to_ak_legacy()`` before calling ``locate`` for the most direct path.

Examples
--------
>>> import arkouda as ak
>>> import pandas as pd
>>> s = pd.Series([10, 20, 30], index=pd.Index([1, 2, 3])).ak.to_ak()
>>> out = s.ak.locate([3, 1])
>>> out.tolist()
[np.int64(10), np.int64(30)]
"""
# Lift the pandas Series into a legacy Arkouda Series
aks = self.to_ak_legacy()

# Normalize common pandas key types to legacy Arkouda where possible
# (mirrors the “thin wrapper” approach used in the Index accessor).
if isinstance(key, (pd.Index, pd.MultiIndex)):
key = ArkoudaIndexAccessor(key).to_ak_legacy()
elif isinstance(key, pd.Series):
# For pandas Series keys, convert to legacy ak.Series to preserve key.index semantics.
key = ArkoudaSeriesAccessor(key).to_ak_legacy()

out_ak = aks.locate(key)

# Wrap result into an Arkouda-backed pandas Series
# (keep the returned index/name from the legacy result).
idx = ArkoudaIndexAccessor(out_ak.index.to_pandas()).to_ak() # preserve names/levels
return _ak_array_to_pandas_series(out_ak.values, name=out_ak.name).set_axis(idx)

@staticmethod
def _from_return_msg(rep_msg: str) -> pd.Series:
"""
Construct a pandas Series from a legacy Arkouda return message.

This mirrors :meth:`ArkoudaIndexAccessor.from_return_msg`. It calls the legacy
``ak.Series.from_return_msg`` constructor, then immediately wraps the resulting
Arkouda arrays back into a pandas Series backed by Arkouda ExtensionArrays.

Parameters
----------
rep_msg : str
The ``+ delimited`` string message returned by Arkouda server operations
that construct a Series.

Returns
-------
pd.Series
A pandas Series backed by Arkouda ExtensionArrays (no NumPy materialization).
"""
ak_s = ak_Series.from_return_msg(rep_msg)

# Wrap values and index without materializing
out = _ak_array_to_pandas_series(ak_s.values, name=ak_s.name)

# Prefer wrapping the legacy index via the Index accessor helper path.
# We need a pandas Index/MultiIndex backed by EAs.
pd_idx = ArkoudaIndexAccessor.from_ak_legacy(ak_s.index)
return pd.Series(out.array, index=pd_idx, name=ak_s.name)

def groupby(self) -> GroupBy:
"""
Return an Arkouda GroupBy object for this Series, without materializing.
Expand Down
141 changes: 111 additions & 30 deletions arkouda/pandas/series.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@
from arkouda.categorical import Categorical
from arkouda.numpy import cast as akcast
from arkouda.numpy.alignment import lookup
from arkouda.numpy.pdarraycreation import arange, array, zeros
from arkouda.numpy.pdarraycreation import arange, zeros
from arkouda.numpy.segarray import SegArray
from arkouda.numpy.strings import Strings
else:
Expand Down Expand Up @@ -677,38 +677,119 @@ def locate(self, key: Union[int, pdarray, Index, Series, List, Tuple]) -> Series
A Series containing the values corresponding to the key.

"""
from arkouda.numpy.pdarraycreation import array

def is_scalar_label(x) -> bool:
# scalar label component (NOT array-like)
return not isinstance(x, (pdarray, Index, Series, list, tuple))

def to_pdarray(obj) -> pdarray:
"""
Convert without ever touching pandas/numpy containers.
Assumes Arkouda Index/Series store pdarrays internally.
"""
if isinstance(obj, pdarray):
return obj
if isinstance(obj, Index):
# Arkouda Index wrapper: underlying pdarray is on .index
return obj.index
if isinstance(obj, Series):
# Arkouda Series wrapper: underlying pdarray is on .values
values = obj.values
if isinstance(values, pdarray):
return values
# python scalar / python list/tuple -> arkouda pdarray (server-side)
return array(obj)

def rebuild_mi_with_names(mi: MultiIndex, names) -> MultiIndex:
# Do not rely on mi.names setter being functional
return MultiIndex(mi.levels, names=list(names))

def finalize(selector) -> Series:
out_index = self.index[selector]
if isinstance(out_index, MultiIndex) and isinstance(self.index, MultiIndex):
out_index = rebuild_mi_with_names(out_index, self.index.names)
return Series(index=out_index, data=self.values[selector])

# ---- Series key: preserve its index, lookup by its values (Arkouda Series)
if isinstance(key, Series):
# special case, keep the index values of the Series, and lookup the values
return Series(index=key.index, data=lookup(self.index.index, self.values, key.values))
elif isinstance(key, MultiIndex):
idx = self.index.lookup(key.index)
elif isinstance(key, Index):
idx = self.index.lookup(key.index)
elif isinstance(key, pdarray):
idx = self.index.lookup(key)
elif isinstance(key, (list, tuple)):

# ---- Direct index objects
if isinstance(key, MultiIndex):
return finalize(self.index.lookup(key.index))

if isinstance(key, Index):
return finalize(self.index.lookup(key.index))

# ---- pdarray key
if isinstance(key, pdarray):
return finalize(self.index.lookup(key))

# ---- list/tuple keys
if isinstance(key, (list, tuple)):
if isinstance(self.index, MultiIndex):
nlevels = self.index.nlevels

if len(key) != nlevels:
raise TypeError(
"For MultiIndex Series, 'key' must be a tuple label, an Index/MultiIndex, "
"or per-level keys with length equal to nlevels."
)

# Reject flat list-of-scalars like [0, 2]
if isinstance(key, list):
all_scalar = True
for k in key:
if not is_scalar_label(k):
all_scalar = False
break
if all_scalar:
raise TypeError(
"For MultiIndex Series, a single label must be a tuple, e.g. (0, 2), "
"not a flat list like [0, 2]."
)

# Single scalar label tuple: (0, 10) -> per-level length-1 arrays
if isinstance(key, tuple):
all_scalar = True
for k in key:
if not is_scalar_label(k):
all_scalar = False
break
if all_scalar:
per_level = [array([k]) for k in key]
return finalize(self.index.lookup(per_level))

# Per-level keys: normalize each element without pandas/numpy
per_level = [to_pdarray(k) for k in key]

# Enforce paired selection: equal sizes (metadata only)
sizes = [int(k.size) for k in per_level]
if len(set(sizes)) != 1:
raise ValueError(
f"Per-level MultiIndex keys must have the same length; got {sizes}."
)

return finalize(self.index.lookup(per_level))

# Non-MultiIndex:
# - list of scalars -> convert to pdarray
# - nested list/tuple -> transpose using pure Python (keys only)
key0 = key[0]
if isinstance(key0, list) or isinstance(key0, tuple):
# nested list. check if already arkouda arrays
if not isinstance(key0[0], pdarray):
# convert list of lists to list of pdarrays
key = [array(a) for a in np.array(key).T.copy()]

elif not isinstance(key0, pdarray):
# a list of scalers, convert into arkouda array
try:
val = array(key)
if isinstance(val, pdarray):
key = val
except Exception:
raise TypeError("'key' parameter must be convertible to pdarray")

# else already list if arkouda array, use as is
idx = self.index.lookup(key)
else:
# scalar value
idx = self.index == key
return Series(index=self.index[idx], data=self.values[idx])

if isinstance(key0, (list, tuple)):
cols = list(zip(*key))
per_level = [array(col) for col in cols] # col is a tuple of python scalars
return finalize(self.index.lookup(per_level))

if isinstance(key0, pdarray):
return finalize(self.index.lookup(key))

return finalize(self.index.lookup(to_pdarray(key)))

# ---- scalar key
return finalize(self.index == key)

@classmethod
def _make_binop(cls, operator):
Expand Down
Loading