Skip to content

Commit ce762d5

Browse files
fix(eda.plot): fixed wordcloud, all nan column
1 parent c1fea00 commit ce762d5

File tree

2 files changed

+36
-10
lines changed

2 files changed

+36
-10
lines changed

dataprep/eda/distribution/compute/__init__.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -94,6 +94,7 @@ def compute(
9494
or dtype = Continuous() or dtype = "Continuous" or dtype = Continuous()
9595
""" # pylint: disable=too-many-locals
9696

97+
df.columns = df.columns.astype(str)
9798
df = to_dask(df)
9899

99100
if not any((x, y, z)):

dataprep/eda/distribution/render.py

Lines changed: 35 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -255,20 +255,44 @@ def _sci_notation_superscript(value: str) -> str:
255255
return value
256256

257257

258+
def _empty_figure(title: str, plot_height: int, plot_width: int) -> Figure:
259+
# If no data to render in the heatmap, i.e. no missing values
260+
# we render a blank heatmap
261+
fig = Figure(
262+
x_range=[],
263+
y_range=[],
264+
plot_height=plot_height,
265+
plot_width=plot_width,
266+
title=title,
267+
x_axis_location="below",
268+
tools="hover",
269+
toolbar_location=None,
270+
background_fill_color="#fafafa",
271+
)
272+
273+
# Add at least one renderer to fig, otherwise bokeh
274+
# gives us error -1000 (MISSING_RENDERERS): Plot has no renderers
275+
fig.rect(x=0, y=0, width=0, height=0)
276+
return fig
277+
278+
258279
def wordcloud_viz(word_cnts: pd.Series, plot_width: int, plot_height: int,) -> Panel:
259280
"""
260281
Visualize the word cloud
261282
""" # pylint: disable=unsubscriptable-object
262283
ellipse_mask = np.array(
263284
Image.open(f"{Path(__file__).parent.parent.parent}/assets/ellipse.jpg")
264285
)
265-
wordcloud = WordCloud(
266-
background_color="white", mask=ellipse_mask, width=800, height=400
267-
)
286+
wordcloud = WordCloud(background_color="white", mask=ellipse_mask)
268287
wordcloud.generate_from_frequencies(word_cnts)
269-
wcimg = wordcloud.to_array().astype(np.uint8)
270-
alpha = np.full([*wcimg.shape[:2], 1], 255, dtype=np.uint8)
271-
wcimg = np.concatenate([wcimg, alpha], axis=2)[::-1, :]
288+
wcarr = wordcloud.to_array().astype(np.uint8)
289+
290+
# use image_rgba following this example
291+
# https://docs.bokeh.org/en/latest/docs/gallery/image_rgba.html
292+
img = np.empty(wcarr.shape[:2], dtype=np.uint32)
293+
view = img.view(dtype=np.uint8).reshape((*wcarr.shape[:2], 4))
294+
alpha = np.full((*wcarr.shape[:2], 1), 255, dtype=np.uint8)
295+
view[:] = np.concatenate([wcarr, alpha], axis=2)[::-1]
272296

273297
fig = figure(
274298
plot_width=plot_width,
@@ -278,7 +302,7 @@ def wordcloud_viz(word_cnts: pd.Series, plot_width: int, plot_height: int,) -> P
278302
y_range=(0, 1),
279303
toolbar_location=None,
280304
)
281-
fig.image_rgba(image=[wcimg], x=0, y=0, dh=1, dw=1)
305+
fig.image_rgba(image=[img], x=0, y=0, dw=1, dh=1)
282306

283307
fig.axis.visible = False
284308
fig.grid.visible = False
@@ -368,7 +392,7 @@ def pie_viz(
368392
if nrows > npresent:
369393
df = df.append(pd.DataFrame({col: [nrows - npresent]}, index=["Others"]))
370394
df["pct"] = df[col] / nrows * 100
371-
df["angle"] = df[col] / npresent * 2 * np.pi
395+
df["angle"] = df[col] / nrows * 2 * np.pi
372396

373397
tooltips = [(col, "@index"), ("Count", f"@{col}"), ("Percent", "@pct{0.2f}%")]
374398
fig = Figure(
@@ -417,6 +441,8 @@ def hist_viz(
417441
"""
418442
# pylint: disable=too-many-arguments,too-many-locals
419443
counts, bins = hist
444+
if sum(counts) == 0:
445+
return _empty_figure(col, plot_height, plot_width)
420446
intvls = _format_bin_intervals(bins)
421447
df = pd.DataFrame(
422448
{
@@ -451,8 +477,7 @@ def hist_viz(
451477
fig.add_tools(hover)
452478
tweak_figure(fig, "hist", show_yticks)
453479
fig.yaxis.axis_label = "Frequency"
454-
if not df.empty:
455-
_format_axis(fig, df.iloc[0]["left"], df.iloc[-1]["right"], "x")
480+
_format_axis(fig, df.iloc[0]["left"], df.iloc[-1]["right"], "x")
456481
if show_yticks:
457482
fig.xaxis.axis_label = col
458483
if yscale == "linear":

0 commit comments

Comments
 (0)