Skip to content
Merged
Show file tree
Hide file tree
Changes from 16 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
94 changes: 74 additions & 20 deletions scipy/stats/_quantile.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,17 +14,20 @@
from scipy.stats._axis_nan_policy import _broadcast_arrays, _contains_nan


def _quantile_iv(x, p, method, axis, nan_policy, keepdims):
xp = array_namespace(x, p)
def _quantile_iv(x, p, method, axis, nan_policy, keepdims, weights):
xp = array_namespace(x, p, weights)

if not xp.isdtype(xp.asarray(x).dtype, ('integral', 'real floating')):
raise ValueError("`x` must have real dtype.")

if not xp.isdtype(xp.asarray(p).dtype, 'real floating'):
raise ValueError("`p` must have real floating dtype.")

x, p = xp_promote(x, p, force_floating=True, xp=xp)
p = xp.asarray(p, device=xp_device(x))
if not (weights is None
or xp.isdtype(xp.asarray(weights).dtype, ('integral', 'real floating'))):
raise ValueError("`weights` must have real dtype.")

x, p, weights = xp_promote(x, p, weights, force_floating=True, xp=xp)
dtype = x.dtype

axis_none = axis is None
Expand All @@ -50,6 +53,12 @@ def _quantile_iv(x, p, method, axis, nan_policy, keepdims):
message = f"`method` must be one of {methods}"
raise ValueError(message)

no_weights = {'_lower', '_midpoint', '_higher', '_nearest', 'harrell-davis',
'round_nearest', 'round_inward', 'round_outward'}
if weights is not None and method in no_weights:
message = f"`method='{method}'` does not support `weights`."
raise ValueError(message)

contains_nans = _contains_nan(x, nan_policy, xp_omit_okay=True, xp=xp)

if keepdims not in {None, True, False}:
Expand All @@ -64,8 +73,20 @@ def _quantile_iv(x, p, method, axis, nan_policy, keepdims):
shape[axis] = 1
x = xp.full(shape, xp.nan, dtype=dtype, device=xp_device(x))

y = xp.sort(x, axis=axis, stable=False)
y, p = _broadcast_arrays((y, p), axis=axis)
if weights is None:
y = xp.sort(x, axis=axis, stable=False)
y, p = _broadcast_arrays((y, p), axis=axis)
n_zero_weight = None
else:
x, weights = xp.broadcast_arrays(x, weights)
i_zero_weight = (weights == 0)
n_zero_weight = xp.count_nonzero(i_zero_weight, axis=axis, keepdims=True)
x = xpx.at(x)[i_zero_weight].set(xp.inf, copy=True)
i_y = xp.argsort(x, axis=axis, stable=False)
y = xp.take_along_axis(x, i_y, axis=axis)
weights = xp.take_along_axis(weights, i_y, axis=axis)
y, p, weights, i_y, n_zero_weight = _broadcast_arrays(
(y, p, weights, i_y, n_zero_weight), axis=axis)

if (keepdims is False) and (p.shape[axis] != 1):
message = "`keepdims` may be False only if the length of `p` along `axis` is 1."
Expand All @@ -74,9 +95,13 @@ def _quantile_iv(x, p, method, axis, nan_policy, keepdims):

y = xp.moveaxis(y, axis, -1)
p = xp.moveaxis(p, axis, -1)
weights = weights if weights is None else xp.moveaxis(weights, axis, -1)
n_zero_weight = (n_zero_weight if n_zero_weight is None
else xp.moveaxis(n_zero_weight, axis, -1))

n = _length_nonmasked(y, -1, xp=xp, keepdims=True)
n = xp.asarray(n, dtype=dtype, device=xp_device(y))
n = n if n_zero_weight is None else n - n_zero_weight

if contains_nans:
nans = xp.isnan(y)

Expand All @@ -85,8 +110,7 @@ def _quantile_iv(x, p, method, axis, nan_policy, keepdims):
if nan_policy == 'propagate':
nan_out = xp.any(nans, axis=-1)
else: # 'omit'
non_nan = xp.astype(~nans, xp.uint64)
n_int = xp.sum(non_nan, axis=-1, keepdims=True)
n_int = n - xp.count_nonzero(nans, axis=-1, keepdims=True)
n = xp.astype(n_int, dtype)
# NaNs are produced only if slice is empty after removing NaNs
nan_out = xp.any(n == 0, axis=-1)
Expand All @@ -99,17 +123,21 @@ def _quantile_iv(x, p, method, axis, nan_policy, keepdims):
y = xp.asarray(y, copy=True) # ensure writable
y = xpx.at(y, nans).set(0) # any non-nan will prevent NaN from propagating

n = xp.asarray(n, dtype=dtype, device=xp_device(y))

p_mask = (p > 1) | (p < 0) | xp.isnan(p)
if xp.any(p_mask):
p = xp.asarray(p, copy=True)
p = xpx.at(p, p_mask).set(0.5) # these get NaN-ed out at the end

return y, p, method, axis, nan_policy, keepdims, n, axis_none, ndim, p_mask, xp
return (y, p, method, axis, nan_policy, keepdims,
n, axis_none, ndim, p_mask, weights, xp)


@xp_capabilities(skip_backends=[("dask.array", "No take_along_axis yet.")],
jax_jit=False)
def quantile(x, p, *, method='linear', axis=0, nan_policy='propagate', keepdims=None):
def quantile(x, p, *, method='linear', axis=0, nan_policy='propagate', keepdims=None,
weights=None):
"""
Compute the p-th quantile of the data along the specified axis.

Expand Down Expand Up @@ -179,10 +207,17 @@ def quantile(x, p, *, method='linear', axis=0, nan_policy='propagate', keepdims=
axis to contain the number of quantiles given by ``p.size``. Therefore:

- By default, the axis will be reduced away if possible (i.e. if there is
exactly one element of `q` per axis-slice of `x`).
exactly one element of `p` per axis-slice of `x`).
- If `keepdims` is set to True, the axis will not be reduced away.
- If `keepdims` is set to False, the axis will be reduced away
if possible, and an error will be raised otherwise.
weights : array_like of finite, non-negative real numbers
Frequency weights; e.g., for counting number weights,
``quantile(x, p, weights=weights)`` is equivalent to
``quantile(np.repeat(x, weights), p)``. Values other than finite counting
numbers are accepted, but may not have valid statistical interpretations.
Not compatible with ``method='harrell-davis'`` or those that begin with
``'round_'``.

Returns
-------
Expand Down Expand Up @@ -245,6 +280,11 @@ def quantile(x, p, *, method='linear', axis=0, nan_policy='propagate', keepdims=
3. ``closest_observation``: ``m = -1/2`` and
``g = 1 - int((index == j) & (j%2 == 1))``

Note that for methods ``inverted_cdf`` and ``averaged_inverted_cdf``, only the
relative proportions of tied observations (and relative weights) affect the
results; for all other methods, the total number of observations (and absolute
weights) matter.
Copy link
Contributor Author

@mdhaber mdhaber Nov 6, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I was thinking that if I were to include weights in an example, this is what I'd show.

I think I'll be asked to include the information about coverage conditions, etc., that appears in the NumPy documentation, so I'll address that now: I would prefer not to, but I can add references. I don't think the math underlying the weights needs to be spelled out because it's exactly the same math as repeated samples; it's only the implementation that is generalized. Now, that same generalized implementation happens to work for non-integer weights, too, so if there's a reference for valid statistical interpretations of non-integral weights, I'd be interested in including that.


A different strategy for computing quantiles from [2]_, ``method='harrell-davis'``,
uses a weighted combination of all elements. The weights are computed as:

Expand Down Expand Up @@ -326,18 +366,19 @@ def quantile(x, p, *, method='linear', axis=0, nan_policy='propagate', keepdims=
"""
# Input validation / standardization

temp = _quantile_iv(x, p, method, axis, nan_policy, keepdims)
y, p, method, axis, nan_policy, keepdims, n, axis_none, ndim, p_mask, xp = temp
temp = _quantile_iv(x, p, method, axis, nan_policy, keepdims, weights)
(y, p, method, axis, nan_policy, keepdims,
n, axis_none, ndim, p_mask, weights, xp) = temp

if method in {'inverted_cdf', 'averaged_inverted_cdf', 'closest_observation',
'hazen', 'interpolated_inverted_cdf', 'linear',
'median_unbiased', 'normal_unbiased', 'weibull'}:
res = _quantile_hf(y, p, n, method, xp)
res = _quantile_hf(y, p, n, method, weights, xp)
elif method in {'harrell-davis'}:
res = _quantile_hd(y, p, n, xp)
elif method in {'_lower', '_midpoint', '_higher', '_nearest'}:
res = _quantile_bc(y, p, n, method, xp)
else: # method.startswith('winsor'):
else: # method.startswith('round'):
res = _quantile_winsor(y, p, n, method, xp)

res = xpx.at(res, p_mask).set(xp.nan)
Expand All @@ -356,13 +397,26 @@ def quantile(x, p, *, method='linear', axis=0, nan_policy='propagate', keepdims=
return res[()] if res.ndim == 0 else res


def _quantile_hf(y, p, n, method, xp):
def _quantile_hf(y, p, n, method, weights, xp):
ms = dict(inverted_cdf=0, averaged_inverted_cdf=0, closest_observation=-0.5,
interpolated_inverted_cdf=0, hazen=0.5, weibull=p, linear=1 - p,
median_unbiased=p/3 + 1/3, normal_unbiased=p/4 + 3/8)
m = ms[method]
jg = p*n + m - 1
j = jg // 1

if weights is None:
jg = p * n + m
jp1 = jg // 1
j = jp1 - 1
else:
cumulative_weights = xp.cumulative_sum(weights, axis=-1)
n_int = xp.asarray(n, dtype=xp.int64)
n_int = xp.broadcast_to(n_int, cumulative_weights.shape[:-1] + (1,))
total_weight = xp.take_along_axis(cumulative_weights, n_int-1, axis=-1)
Comment on lines +413 to +415
Copy link
Contributor Author

@mdhaber mdhaber Nov 6, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

For nan_policy='omit' and marray, n is an array the is the number of valid entries in a slice; that's why we can't just take the last element. (marray tests are skipped right now because I'm running into a mutability issue. I think it's an marray issue and the implementation will probably "just work" when that gets resolved because we're already accounting for n depending on the slice.)

jg = p * total_weight + m
jp1 = _xp_searchsorted(cumulative_weights, jg, side='right')
j = _xp_searchsorted(cumulative_weights, jg-1, side='right')
j, jp1 = xp.astype(j, y.dtype), xp.astype(jp1, y.dtype)

g = jg % 1
if method == 'inverted_cdf':
g = xp.astype((g > 0), jg.dtype)
Expand All @@ -376,7 +430,7 @@ def _quantile_hf(y, p, n, method, xp):

g = xpx.at(g)[j < 0].set(0)
j = xp.clip(j, 0., n - 1)
jp1 = xp.clip(j + 1, 0., n - 1)
jp1 = xp.clip(jp1, 0., n - 1)

return ((1 - g) * xp.take_along_axis(y, xp.astype(j, xp.int64), axis=-1)
+ g * xp.take_along_axis(y, xp.astype(jp1, xp.int64), axis=-1))
Expand Down
Loading
Loading