Skip to content

Commit 0450215

Browse files
feat: update core packages (#1790)
* feat: upgrade support to python 3.13 * feat: remove python 3.9, as it is no longer supported since last October * fix: remove display message * chore: fix the tests for the multiprocessing * fix(linting): code formatting * fix: histograms support Numpy 2.x * fix: histogram for numpy 2.2x * fix(linting): code formatting * fix: histogram safe error * chore: add Union typing fix * fix: improve hist compute logic * fix(linting): code formatting * fix: fix chi_squared function * fix: typing * fix: array optional typing * fix: change the logic to align with the typing * fix(linting): code formatting --------- Co-authored-by: Azory YData Bot <azory@ydata.ai>
1 parent b6117b5 commit 0450215

File tree

8 files changed

+123
-116
lines changed

8 files changed

+123
-116
lines changed

.github/workflows/tests.yml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@ jobs:
1616
strategy:
1717
matrix:
1818
os: [ ubuntu-22.04 ]
19-
python-version: ["3.9", "3.10", "3.11", "3.12", "3.13" ]
19+
python-version: ["3.10", "3.11", "3.12", "3.13" ]
2020
pandas: [ "pandas>1.1" ]
2121
numpy: [ "numpy>=1.21" ]
2222
runs-on: ${{ matrix.os }}
@@ -65,7 +65,7 @@ jobs:
6565
strategy:
6666
matrix:
6767
os: [ubuntu-22.04]
68-
python-version: ["3.12"]
68+
python-version: ["3.13"]
6969
pandas: [ "pandas>1.1" ]
7070
numpy: [ "numpy>=1.21" ]
7171

pyproject.toml

Lines changed: 17 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@ package_name = "ydata-profiling"
1111

1212
[project]
1313
name = "ydata-profiling"
14-
requires-python = ">=3.7,<3.14"
14+
requires-python = ">=3.10,<3.14"
1515
authors = [
1616
{name = "YData Labs Inc", email = "opensource@ydata.ai"}
1717
]
@@ -32,43 +32,42 @@ classifiers = [
3232
"Topic :: Scientific/Engineering",
3333
"Framework :: IPython",
3434
"Programming Language :: Python :: 3",
35-
"Programming Language :: Python :: 3.7",
36-
"Programming Language :: Python :: 3.8",
37-
"Programming Language :: Python :: 3.9",
3835
"Programming Language :: Python :: 3.10",
3936
"Programming Language :: Python :: 3.11",
4037
"Programming Language :: Python :: 3.12",
38+
"Programming Language :: Python :: 3.13",
39+
"Programming Language :: Python :: 3.14",
4140
]
4241

4342
dependencies = [
44-
"scipy>=1.4.1, <1.16",
45-
"pandas>1.1, <3.0, !=1.4.0",
43+
"scipy>=1.8, <1.17",
44+
"pandas>1.5, <3.0, !=1.4.0",
4645
"matplotlib>=3.5, <=3.10",
47-
"pydantic>=2",
48-
"PyYAML>=5.0.0, <6.1",
49-
"jinja2>=2.11.1, <3.2",
46+
"pydantic>=2, <3",
47+
"PyYAML>=6.0.3, <6.1",
48+
"jinja2>=3.1.6, <3.2",
5049
"visions[type_image_path]>=0.7.5, <0.8.2",
51-
"numpy>=1.16.0,<2.2",
50+
"numpy>=1.22,<2.4",
5251
# Could be optional
5352
# Related to HTML report
5453
"minify-html>=0.15.0",
5554
"filetype>=1.0.0",
5655
# Correlations
57-
"phik>=0.11.1,<0.13",
56+
"phik>=0.12.5, <0.13",
5857
# Examples
59-
"requests>=2.24.0, <3",
58+
"requests>=2.32.0, <3",
6059
# Progress bar
61-
"tqdm>=4.48.2, <5",
60+
"tqdm>=4.66.3, <5",
6261
"seaborn>=0.10.1, <0.14",
6362
"multimethod>=1.4, <2",
6463
# metrics
6564
"statsmodels>=0.13.2, <1",
6665
# type checking
67-
"typeguard>=3, <5",
68-
"imagehash==4.3.1",
69-
"wordcloud>=1.9.3",
70-
"dacite>=1.8",
71-
"numba>=0.56.0, <=0.61",
66+
"typeguard>=4, <5",
67+
"imagehash==4.3.2",
68+
"wordcloud>=1.9.4",
69+
"dacite>=1.9, <2",
70+
"numba>=0.60,<0.63",
7271
]
7372

7473
dynamic = [

src/ydata_profiling/__init__.py

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,6 @@
1111
from ydata_profiling.controller import pandas_decorator # isort:skip # noqa
1212
from ydata_profiling.profile_report import ProfileReport # isort:skip # noqa
1313
from ydata_profiling.version import __version__ # isort:skip # noqa
14-
from ydata_profiling.utils.information import display_banner
1514

1615
# backend
1716
import ydata_profiling.model.pandas # isort:skip # noqa
@@ -26,8 +25,6 @@
2625

2726
warnings.simplefilter("ignore", category=NumbaDeprecationWarning)
2827

29-
display_banner()
30-
3128
__all__ = [
3229
"pandas_decorator",
3330
"ProfileReport",

src/ydata_profiling/model/pandas/describe_categorical_pandas.py

Lines changed: 0 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,6 @@
1616
series_handle_nulls,
1717
series_hashable,
1818
)
19-
from ydata_profiling.utils.information import DisplayInfo
2019

2120

2221
def get_character_counts_vc(vc: pd.Series) -> pd.Series:
@@ -270,11 +269,6 @@ def pandas_describe_categorical_1d(
270269

271270
if config.vars.cat.dirty_categories: # noqa: SIM102
272271
if not _displayed_catvar_banner:
273-
display_info = DisplayInfo(
274-
title="Identify dirty categories with ydata-sdk",
275-
info_text="This feature is only available for ydata-sdk users. Register to give try it.",
276-
)
277-
display_info.display_message()
278272
_displayed_catvar_banner = True
279273

280274
return config, series, summary

src/ydata_profiling/model/summary_algorithms.py

Lines changed: 95 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
import functools
2-
from typing import Any, Callable, Optional, Tuple, TypeVar
2+
from typing import Any, Callable, Optional, Tuple, TypeVar, Union
33

44
import numpy as np
55
import pandas as pd
@@ -26,6 +26,41 @@ def inner(
2626
return inner
2727

2828

29+
def safe_histogram(
30+
values: np.ndarray,
31+
bins: Union[int, str, np.ndarray] = "auto",
32+
weights: Optional[np.ndarray] = None,
33+
density: bool = False,
34+
) -> Tuple[np.ndarray, np.ndarray]:
35+
"""
36+
Wrapper to avoid
37+
ValueError: Too many bins for data range. Cannot create N finite-sized bins.
38+
"""
39+
try:
40+
return np.histogram(values, bins=bins, weights=weights, density=density)
41+
except ValueError as exc:
42+
if "Too many bins for data range" in str(exc):
43+
try:
44+
return np.histogram(
45+
values, bins="auto", weights=weights, density=density
46+
)
47+
except ValueError:
48+
finite = values[np.isfinite(values)]
49+
if finite.size == 0:
50+
return np.array([]), np.array([])
51+
vmin = float(np.min(finite))
52+
vmax = float(np.max(finite))
53+
if vmin == vmax:
54+
eps = 0.5 if vmin == 0 else abs(vmin) * 0.5
55+
bin_edges = np.array([vmin - eps, vmin + eps])
56+
else:
57+
bin_edges = np.array([vmin, vmax])
58+
return np.histogram(
59+
values, bins=bin_edges, weights=weights, density=density
60+
)
61+
raise
62+
63+
2964
def histogram_compute(
3065
config: Settings,
3166
finite_values: np.ndarray,
@@ -36,27 +71,75 @@ def histogram_compute(
3671
stats = {}
3772
if len(finite_values) == 0:
3873
return {name: []}
74+
3975
hist_config = config.plot.histogram
40-
bins_arg = "auto" if hist_config.bins == 0 else min(hist_config.bins, n_unique)
41-
bins = np.histogram_bin_edges(finite_values, bins=bins_arg)
42-
if len(bins) > hist_config.max_bins:
43-
bins = np.histogram_bin_edges(finite_values, bins=hist_config.max_bins)
44-
weights = weights if weights and len(weights) == hist_config.max_bins else None
45-
46-
stats[name] = np.histogram(
47-
finite_values, bins=bins, weights=weights, density=config.plot.histogram.density
76+
77+
# Compute data range
78+
finite = finite_values[np.isfinite(finite_values)]
79+
vmin = float(np.min(finite))
80+
vmax = float(np.max(finite))
81+
data_range = vmax - vmin
82+
83+
# Choose of Bins based on observed data values
84+
if data_range == 0:
85+
eps = 0.5 if vmin == 0 else abs(vmin) * 0.1
86+
bins = np.array([vmin - eps, vmin + eps])
87+
else:
88+
requested_bins = hist_config.bins if hist_config.bins > 0 else "auto"
89+
90+
if isinstance(requested_bins, int):
91+
safe_bins = min(requested_bins, n_unique, hist_config.max_bins)
92+
93+
safe_bins = max(1, safe_bins)
94+
95+
bins = np.linspace(vmin, vmax, safe_bins + 1)
96+
else:
97+
bins = np.histogram_bin_edges(finite_values, bins="auto")
98+
if len(bins) - 1 > hist_config.max_bins:
99+
bins = np.linspace(vmin, vmax, hist_config.max_bins + 1)
100+
101+
hist = np.histogram(
102+
finite_values,
103+
bins=bins,
104+
weights=weights,
105+
density=hist_config.density,
48106
)
107+
108+
stats[name] = hist
49109
return stats
50110

51111

52112
def chi_square(
53-
values: Optional[np.ndarray] = None, histogram: Optional[np.ndarray] = None
113+
values: Optional[np.ndarray] = None,
114+
histogram: Optional[np.ndarray] = None,
54115
) -> dict:
116+
# Case 1: histogram not passed → we compute it
55117
if histogram is None:
56-
bins = np.histogram_bin_edges(values, bins="auto")
118+
if values is None:
119+
return {"statistic": 0, "pvalue": 0}
120+
121+
# Try NumPy "auto" binning (may fail under NumPy 2)
122+
try:
123+
bins = np.histogram_bin_edges(values, bins="auto")
124+
except ValueError:
125+
# Fallback: basic 1-bin histogram covering the min→max range
126+
finite = values[np.isfinite(values)]
127+
if finite.size == 0:
128+
return {"statistic": 0, "pvalue": 0}
129+
130+
vmin = float(finite.min())
131+
vmax = float(finite.max())
132+
if vmin == vmax:
133+
bins = np.array([vmin - 0.5, vmin + 0.5])
134+
else:
135+
bins = np.array([vmin, vmax])
136+
57137
histogram, _ = np.histogram(values, bins=bins)
58-
if len(histogram) == 0 or np.sum(histogram) == 0:
138+
139+
# Case 2: histogram exists but is empty
140+
if histogram.size == 0 or histogram.sum() == 0:
59141
return {"statistic": 0, "pvalue": 0}
142+
60143
return dict(chisquare(histogram)._asdict())
61144

62145

src/ydata_profiling/report/presentation/flavours/html/templates/sequence/overview_tabs.html

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,4 @@
11
{% if tabs | length > 0 %}
2-
{% if oss %}
3-
<p class="text-body-secondary text-end">Brought to you by <a href="https://ydata.ai/?utm_source=opensource&utm_medium=ydataprofiling&utm_campaign=report">YData</a></p>
4-
{% endif %}
52
<div class="row item {% if classes %}{{ classes }}{% endif %}" {% if id %} id="{{ id }}"{% endif %}>
63
<ul class="nav nav-tabs tab-nav" role="tablist">
74
{% for tab in tabs %}

src/ydata_profiling/utils/information.py

Lines changed: 0 additions & 68 deletions
This file was deleted.

tests/unit/test_console.py

Lines changed: 9 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,17 +1,22 @@
11
import os
22

33
import pytest
4+
import requests
45

56
from ydata_profiling.controller import console
67
from ydata_profiling.utils.paths import get_config
78

9+
NASA_URL = "https://data.nasa.gov/docs/legacy/meteorite_landings/Meteorite_Landings.csv"
10+
811

912
@pytest.fixture
1013
def console_data(get_data_file):
11-
return get_data_file(
12-
"meteorites.csv",
13-
"https://data.nasa.gov/docs/legacy/meteorite_landings/Meteorite_Landings.csv",
14-
)
14+
try:
15+
return get_data_file("meteorites.csv", NASA_URL)
16+
except requests.RequestException as e:
17+
pytest.skip(f"Skipping console tests: NASA dataset unavailable ({e})")
18+
except Exception as e:
19+
pytest.skip(f"Skipping console tests: cannot fetch meteorites.csv ({e})")
1520

1621

1722
@pytest.mark.skipif(os.name == "nt", reason="multiprocessing+pytest broken on Windows")

0 commit comments

Comments
 (0)