Add weighted quantile and percentile support with tests

Aniketsy · Aniketsy · commit 4f522e6bdcb9 · 2025-10-25T11:00:20.000+05:30
diff --git a/jax/_src/numpy/reductions.py b/jax/_src/numpy/reductions.py
@@ -2379,7 +2379,6 @@ def quantile(a: ArrayLike, q: ArrayLike, axis: int | tuple[int, ...] | None = No
     >>> jnp.quantile(x, q, method='nearest')
     Array([2., 4., 7.], dtype=float32)
   """
-  check_arraylike("quantile", a, q)
   if weights is None:
     a, q = ensure_arraylike("quantile", a, q)
   else:
@@ -2390,7 +2389,7 @@ def quantile(a: ArrayLike, q: ArrayLike, axis: int | tuple[int, ...] | None = No
   if not isinstance(interpolation, DeprecatedArg):
     raise TypeError("quantile() argument interpolation was removed in JAX"
                     " v0.8.0. Use method instead.")
-  return _quantile(lax.asarray(a), lax.asarray(q), axis, method, keepdims, False, weights)
+  return _quantile(a, q, axis, method, keepdims, False, weights)
 
 # TODO(jakevdp): interpolation argument deprecated 2024-05-16
 @export
@@ -2439,7 +2438,6 @@ def nanquantile(a: ArrayLike, q: ArrayLike, axis: int | tuple[int, ...] | None =
     >>> jnp.nanquantile(x, q)
     Array([1.5, 3. , 4.5], dtype=float32)
   """
-  check_arraylike("nanquantile", a, q)
   if weights is None:
     a, q = ensure_arraylike("nanquantile", a, q)
   else:
@@ -2451,7 +2449,7 @@ def nanquantile(a: ArrayLike, q: ArrayLike, axis: int | tuple[int, ...] | None =
   if not isinstance(interpolation, DeprecatedArg):
     raise TypeError("nanquantile() argument interpolation was removed in JAX"
                     " v0.8.0. Use method instead.")
-  return _quantile(lax.asarray(a), lax.asarray(q), axis, method, keepdims, True, weights)
+  return _quantile(a, q, axis, method, keepdims, True, weights)
 
 def _quantile(a: Array, q: Array, axis: int | tuple[int, ...] | None,
               method: str, keepdims: bool, squash_nans: bool, weights: Array | None = None) -> Array:
@@ -2498,7 +2496,7 @@ def _quantile(a: Array, q: Array, axis: int | tuple[int, ...] | None,
   if weights is None:
     a, = promote_dtypes_inexact(a)
   else:
-    a, q = promote_dtypes_inexact(a, q)
+    a, weights = promote_dtypes_inexact(a, weights)
     a_shape = a.shape
     w_shape = np.shape(weights)
     if w_shape != a_shape:
@@ -2513,11 +2511,6 @@ def _quantile(a: Array, q: Array, axis: int | tuple[int, ...] | None,
       weights = lax.expand_dims(weights, axis)
     weights = _broadcast_to(weights, a.shape)
 
-    weights_have_nan = jnp.any(jnp.isnan(weights))
-    if weights_have_nan:
-      out_shape = q.shape if hasattr(q, "shape") and getattr(q, "ndim", 0) > 0 else ()
-      return lax.full(out_shape, np.nan, dtype=a.dtype)
-
     if squash_nans:
       nan_mask = ~lax_internal._isnan(a)
       weights = _where(nan_mask, weights, 0)
@@ -2530,7 +2523,7 @@ def _quantile(a: Array, q: Array, axis: int | tuple[int, ...] | None,
     cum_weights = lax.cumsum(weights_sorted, axis=axis)
     cum_weights_norm = lax.div(cum_weights, total_weight)
 
-    def _weighted_quantile(qi, weights_have_nan=weights_have_nan):
+    def _weighted_quantile(qi):
       index_dtype = dtypes.default_int_dtype()
       idx = sum(lax.lt(cum_weights_norm, qi), axis=axis, dtype=index_dtype)
       idx = lax.clamp(0, idx, a_sorted.shape[axis] - 1)
@@ -2558,9 +2551,6 @@ def _weighted_quantile(qi, weights_have_nan=weights_have_nan):
         out = val
       else:
         raise ValueError(f"{method=!r} not recognized")
-      if weights_have_nan:
-        out = lax.full_like(out, np.nan)
-        out = lax.squeeze(out, axis=axis)
       return out
 
     if q.ndim == 0:
@@ -2700,7 +2690,6 @@ def percentile(a: ArrayLike, q: ArrayLike,
     >>> jnp.percentile(x, q, method='nearest')
     Array([1., 3., 4.], dtype=float32)
   """
-  check_arraylike("percentile", a, q)
   if weights is None:
     a, q = ensure_arraylike("percentile", a, q)
   else:
@@ -2764,7 +2753,6 @@ def nanpercentile(a: ArrayLike, q: ArrayLike,
     >>> jnp.nanpercentile(x, q)
     Array([1.5, 3. , 4.5], dtype=float32)
   """
-  check_arraylike("nanpercentile", a, q)
   if weights is None:
     a, q = ensure_arraylike("nanpercentile", a, q)
   else:
diff --git a/tests/lax_numpy_reducers_test.py b/tests/lax_numpy_reducers_test.py
@@ -17,7 +17,6 @@
 from functools import partial
 import itertools
 import unittest
-import pytest
 
 from absl.testing import absltest
 from absl.testing import parameterized
@@ -764,51 +763,58 @@ def testPercentilePrecision(self):
     x = jnp.float64([1, 2, 3, 4, 7, 10])
     self.assertEqual(jnp.percentile(x, 50), 3.5)
 
-  def test_weighted_quantile_all_weights_one(self):
-    a = jnp.array([1, 2, 3, 4, 5], dtype=float)
-    weights = jnp.ones_like(a)
-    q = jnp.array([0.25, 0.5, 0.75])
-    result = jnp.quantile(a, q, axis=0, method="inverted_cdf", keepdims=False, squash_nans=False, weights=weights)
-    expected = np.quantile(np.array(a), np.array(q), axis=0, weights=np.array(weights), method="inverted_cdf")
-    np.testing.assert_allclose(np.array(result), expected, rtol=1e-6)
-
-  def test_weighted_quantile_multiple_q(self):
-    a = jnp.arange(10, dtype=float)
-    weights = jnp.ones_like(a)
-    q = jnp.array([0.25, 0.5, 0.75])
-    result = jnp.quantile(a, q, axis=0, method="inverted_cdf", keepdims=False, squash_nans=False, weights=weights)
-    expected = np.quantile(np.array(a), np.array(q), axis=0, weights=np.array(weights), method="inverted_cdf")
-    np.testing.assert_allclose(np.array(result), expected, rtol=1e-6)
-
-  def test_weighted_quantile_keepdims(self):
-    a = jnp.array([1, 2, 3, 4], dtype=float)
-    weights = jnp.array([1, 1, 1, 1], dtype=float)
-    q = 0.5
-    result = jnp.quantile(a, q, axis=0, method="inverted_cdf", keepdims=True, squash_nans=False, weights=weights)
-    expected = np.quantile(np.array(a), np.array(q), axis=0, keepdims=True, weights=np.array(weights), method="inverted_cdf")
-    np.testing.assert_allclose(np.array(result), expected, rtol=1e-6)
+  @jtu.sample_product(
+    [dict(a_shape=a_shape, axis=axis)
+      for a_shape, axis in (
+        ((7,), None),
+        ((6, 7,), None),
+        ((47, 7), 0),
+        ((47, 7), ()),
+        ((4, 101), 1),
+        ((4, 47, 7), (1, 2)),
+        ((4, 47, 7), (0, 2)),
+        ((4, 47, 7), (1, 0, 2)),
+      )
+    ],
+    a_dtype=default_dtypes,
+    q_dtype=[np.float32],
+    q_shape=scalar_shapes + [(1,), (4,)],
+    keepdims=[False, True],
+    method=['linear', 'lower', 'higher', 'nearest', 'midpoint', 'inverted_cdf'],
+)
+  def testWeightedQuantile(self, a_shape, a_dtype, q_shape, q_dtype, axis, keepdims, method):
+    rng = jtu.rand_default(self.rng())
+    a = rng(a_shape, a_dtype)
+    q = rng(q_shape, q_dtype)
+    if axis is None:
+        weights_shape = a_shape
+    elif isinstance(axis, tuple):
+        weights_shape = tuple(a_shape[i] for i in axis)
+    else:
+        weights_shape = (a_shape[axis],)
+    weights = np.abs(rng(weights_shape, a_dtype)) + 1e-3
 
-  def test_weighted_quantile_linear(self):
-    a = jnp.array([1, 2, 3, 4, 5], dtype=float)
-    weights = jnp.array([1, 2, 1, 1, 1], dtype=float)
-    q = jnp.array([0.5])
-    result = jnp.quantile(a, q, axis=0, method="inverted_cdf", keepdims=False, squash_nans=False, weights=weights)
-    expected = np.quantile(np.array(a), np.array(q), axis=0, weights=np.array(weights), method="inverted_cdf")
-    np.testing.assert_allclose(np.array(result), expected, rtol=1e-6)
+    def np_fun(a, q, weights):
+        return np.quantile(np.array(a), np.array(q), axis=axis, weights=np.array(weights), method=method, keepdims=keepdims)
+    def jnp_fun(a, q, weights):
+        return jnp.quantile(a, q, axis=axis, weights=weights, method=method, keepdims=keepdims)
+    args_maker = lambda: [a, q, weights]
+    self._CheckAgainstNumpy(np_fun, jnp_fun, args_maker, tol=1e-6)
+    self._CompileAndCheck(jnp_fun, args_maker, rtol=1e-6)
 
   def test_weighted_quantile_negative_weights(self):
     a = jnp.array([1, 2, 3, 4, 5], dtype=float)
     weights = jnp.array([1, -1, 1, 1, 1], dtype=float)
     q = jnp.array([0.5])
-    with pytest.raises(ValueError):
-        jnp.quantile(a, q, axis=0, method="linear", keepdims=False, squash_nans=False, weights=weights)
+    with self.assertRaisesRegex(ValueError, "Weights must be non-negative"):
+      jnp.quantile(a, q, axis=0, method="linear", keepdims=False, squash_nans=False, weights=weights)
 
   def test_weighted_quantile_all_weights_zero(self):
     a = jnp.array([1, 2, 3, 4, 5], dtype=float)
     weights = jnp.zeros_like(a)
     q = jnp.array([0.5])
-    with pytest.raises(ValueError):
-        jnp.quantile(a, q, axis=0, method="linear", keepdims=False, squash_nans=False, weights=weights)
+    with self.assertRaisesRegex(ValueError, "Sum of weights must not be zero"):
+      jnp.quantile(a, q, axis=0, method="linear", keepdims=False, squash_nans=False, weights=weights)
 
   def test_weighted_quantile_weights_with_nan(self):
     a = jnp.array([1, 2, 3, 4, 5], dtype=float)
@@ -825,15 +831,6 @@ def test_weighted_quantile_scalar_q(self):
     assert jnp.issubdtype(result.dtype, jnp.floating)
     assert result.shape == ()
 
-  def test_weighted_quantile_jit(self):
-    a = jnp.array([1, 2, 3, 4, 5], dtype=float)
-    weights = jnp.array([1, 2, 1, 1, 1], dtype=float)
-    q = jnp.array([0.25, 0.5, 0.75])
-    quantile_jit = jax.jit(lambda a, q, weights: jnp.quantile(a, q, axis=0, method="inverted_cdf", keepdims=False, squash_nans=False, weights=weights))
-    result = quantile_jit(a, q, weights)
-    expected = np.quantile(np.array(a), np.array(q), axis=0, weights=np.array(weights), method="inverted_cdf")
-    np.testing.assert_allclose(np.array(result), expected, rtol=1e-6)
-
   @jtu.sample_product(
     [dict(a_shape=a_shape, axis=axis)
       for a_shape, axis in (