From 0962c4a1b956503d77e0ac7d69db0efd6196b929 Mon Sep 17 00:00:00 2001 From: ccccjone Date: Tue, 5 Dec 2023 00:41:21 -0800 Subject: [PATCH 1/7] Improved test coverage for Styler.bar error conditions --- pandas/tests/io/formats/style/test_bar.py | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/pandas/tests/io/formats/style/test_bar.py b/pandas/tests/io/formats/style/test_bar.py index 19884aaac86a7..c91cc3f214c62 100644 --- a/pandas/tests/io/formats/style/test_bar.py +++ b/pandas/tests/io/formats/style/test_bar.py @@ -305,3 +305,23 @@ def test_bar_value_error_raises(): msg = r"`height` must be a value in \[0, 100\]" with pytest.raises(ValueError, match=msg): df.style.bar(height=200).to_html() + + +def test_bar_color_and_cmap_error_raises(): + df = DataFrame({"A": [1, 2, 3, 4]}) + msg = "`color` and `cmap` cannot both be given" + # Test that providing both color and cmap raises a ValueError + with pytest.raises(ValueError, match=msg): + df.style.bar(color='#d65f5f', cmap='viridis').to_html() + + +def test_bar_invalid_color_type_error_raises(): + df = DataFrame({"A": [1, 2, 3, 4]}) + msg = r"`color` must be string or list or tuple of 2 strings,\(eg: color=\['#d65f5f', '#5fba7d'\]\)" + # Test that providing an invalid color type raises a ValueError + with pytest.raises(ValueError, match=msg): + df.style.bar(color=123).to_html() + + # Test that providing a color list with more than two elements raises a ValueError + with pytest.raises(ValueError, match=msg): + df.style.bar(color=['#d65f5f', '#5fba7d', '#abcdef']).to_html() \ No newline at end of file From 14e7b59965bb52d83df625d3a8c1f21cdb72c2b4 Mon Sep 17 00:00:00 2001 From: ccccjone Date: Tue, 5 Dec 2023 02:39:49 -0800 Subject: [PATCH 2/7] Fixed the code style issue causing test failure --- pandas/tests/io/formats/style/test_bar.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/pandas/tests/io/formats/style/test_bar.py b/pandas/tests/io/formats/style/test_bar.py index c91cc3f214c62..6acb5d9c95154 100644 --- a/pandas/tests/io/formats/style/test_bar.py +++ b/pandas/tests/io/formats/style/test_bar.py @@ -317,11 +317,14 @@ def test_bar_color_and_cmap_error_raises(): def test_bar_invalid_color_type_error_raises(): df = DataFrame({"A": [1, 2, 3, 4]}) - msg = r"`color` must be string or list or tuple of 2 strings,\(eg: color=\['#d65f5f', '#5fba7d'\]\)" + msg = ( + r"`color` must be string or list or tuple of 2 strings," + r"\(eg: color=\['#d65f5f', '#5fba7d'\]\)" + ) # Test that providing an invalid color type raises a ValueError with pytest.raises(ValueError, match=msg): df.style.bar(color=123).to_html() # Test that providing a color list with more than two elements raises a ValueError with pytest.raises(ValueError, match=msg): - df.style.bar(color=['#d65f5f', '#5fba7d', '#abcdef']).to_html() \ No newline at end of file + df.style.bar(color=['#d65f5f', '#5fba7d', '#abcdef']).to_html() From e918b1cd33f33ab1a394e53bfcd7f56e52a6a1bb Mon Sep 17 00:00:00 2001 From: ccccjone Date: Wed, 6 Dec 2023 03:41:47 -0800 Subject: [PATCH 3/7] Fixed a Styler.bar bug of missing value --- pandas/io/formats/style.py | 38 +++++++++++++++++++++-- pandas/tests/io/formats/style/test_bar.py | 9 +++++- 2 files changed, 44 insertions(+), 3 deletions(-) diff --git a/pandas/io/formats/style.py b/pandas/io/formats/style.py index 39d5b45862a8f..8e084cb883976 100644 --- a/pandas/io/formats/style.py +++ b/pandas/io/formats/style.py @@ -4081,9 +4081,43 @@ def css_calc(x, left: float, right: float, align: str, color: str | list | tuple else: return ret + def replace_pd_NA_with_np_nan(data_structure): + """ + Recursively replace pd.NA with np.nan in a nested list or array. + + This function traverses a nested structure (like lists or numpy arrays) + and replaces occurrences of Pandas' NA values (pd.NA) with NumPy's NaN values (np.nan). + It handles nested lists and arrays, applying the replacement recursively. + + Parameters: + data_structure (list, np.ndarray, or any): A nested list, numpy array, or any other object. + If it's a list or numpy array, the function will + process its elements recursively. + + Returns: + list, np.ndarray, or any: A new object with the same structure as `data_structure`, + where all pd.NA values have been replaced with np.nan. + The type of the returned object is the same as the input. + + Example: + >>> data = [1, pd.NA, [3, pd.NA, [pd.NA, 5]], pd.NA] + >>> replace_pd_NA_with_np_nan(data) + [1, nan, [3, nan, [nan, 5]], nan] + """ + if isinstance(data_structure, list): + # Process each item in the list recursively + return [replace_pd_NA_with_np_nan(element) for element in data_structure] + elif isinstance(data_structure, np.ndarray): + # Convert numpy array elements recursively + return np.array([replace_pd_NA_with_np_nan(element) for element in data_structure]) + else: + # Replace pd.NA with np.nan for individual elements + return np.nan if pd.isna(data_structure) else data_structure + values = data.to_numpy() - left = np.nanmin(values) if vmin is None else vmin - right = np.nanmax(values) if vmax is None else vmax + np_values = replace_pd_NA_with_np_nan(values) + left = np.nanmin(np_values) if vmin is None else vmin + right = np.nanmax(np_values) if vmax is None else vmax z: float = 0 # adjustment to translate data if align == "mid": diff --git a/pandas/tests/io/formats/style/test_bar.py b/pandas/tests/io/formats/style/test_bar.py index 6acb5d9c95154..56241809850b5 100644 --- a/pandas/tests/io/formats/style/test_bar.py +++ b/pandas/tests/io/formats/style/test_bar.py @@ -1,7 +1,7 @@ import numpy as np import pytest -from pandas import DataFrame +from pandas import DataFrame, NA pytest.importorskip("jinja2") @@ -328,3 +328,10 @@ def test_bar_invalid_color_type_error_raises(): # Test that providing a color list with more than two elements raises a ValueError with pytest.raises(ValueError, match=msg): df.style.bar(color=['#d65f5f', '#5fba7d', '#abcdef']).to_html() + + +def test_styler_bar_with_missing_values(): + df = DataFrame({"A": [1, 2, NA, 4]}) + expected_substring = "linear-gradient" + html_output = df.style.bar(subset='A').to_html() + assert expected_substring in html_output From fdce1727c35db3622a591d2ea88a0b66e03a46af Mon Sep 17 00:00:00 2001 From: ccccjone Date: Wed, 6 Dec 2023 17:46:26 -0800 Subject: [PATCH 4/7] Adopted another tricky way to fix the bug --- pandas/io/formats/style.py | 39 ++--------------------- pandas/tests/io/formats/style/test_bar.py | 13 +++++--- 2 files changed, 11 insertions(+), 41 deletions(-) diff --git a/pandas/io/formats/style.py b/pandas/io/formats/style.py index 8e084cb883976..7d5c354aef002 100644 --- a/pandas/io/formats/style.py +++ b/pandas/io/formats/style.py @@ -4081,43 +4081,10 @@ def css_calc(x, left: float, right: float, align: str, color: str | list | tuple else: return ret - def replace_pd_NA_with_np_nan(data_structure): - """ - Recursively replace pd.NA with np.nan in a nested list or array. - - This function traverses a nested structure (like lists or numpy arrays) - and replaces occurrences of Pandas' NA values (pd.NA) with NumPy's NaN values (np.nan). - It handles nested lists and arrays, applying the replacement recursively. - - Parameters: - data_structure (list, np.ndarray, or any): A nested list, numpy array, or any other object. - If it's a list or numpy array, the function will - process its elements recursively. - - Returns: - list, np.ndarray, or any: A new object with the same structure as `data_structure`, - where all pd.NA values have been replaced with np.nan. - The type of the returned object is the same as the input. - - Example: - >>> data = [1, pd.NA, [3, pd.NA, [pd.NA, 5]], pd.NA] - >>> replace_pd_NA_with_np_nan(data) - [1, nan, [3, nan, [nan, 5]], nan] - """ - if isinstance(data_structure, list): - # Process each item in the list recursively - return [replace_pd_NA_with_np_nan(element) for element in data_structure] - elif isinstance(data_structure, np.ndarray): - # Convert numpy array elements recursively - return np.array([replace_pd_NA_with_np_nan(element) for element in data_structure]) - else: - # Replace pd.NA with np.nan for individual elements - return np.nan if pd.isna(data_structure) else data_structure - values = data.to_numpy() - np_values = replace_pd_NA_with_np_nan(values) - left = np.nanmin(np_values) if vmin is None else vmin - right = np.nanmax(np_values) if vmax is None else vmax + # A tricky way to address the issue where np.nanmin/np.nanmax fail to handle pd.NA. + left = np.nanmin(data.min(skipna=True)) if vmin is None else vmin + right = np.nanmax(data.max(skipna=True)) if vmax is None else vmax z: float = 0 # adjustment to translate data if align == "mid": diff --git a/pandas/tests/io/formats/style/test_bar.py b/pandas/tests/io/formats/style/test_bar.py index 56241809850b5..f53ddcb9939f2 100644 --- a/pandas/tests/io/formats/style/test_bar.py +++ b/pandas/tests/io/formats/style/test_bar.py @@ -330,8 +330,11 @@ def test_bar_invalid_color_type_error_raises(): df.style.bar(color=['#d65f5f', '#5fba7d', '#abcdef']).to_html() -def test_styler_bar_with_missing_values(): - df = DataFrame({"A": [1, 2, NA, 4]}) - expected_substring = "linear-gradient" - html_output = df.style.bar(subset='A').to_html() - assert expected_substring in html_output +def test_styler_bar_with_NA_values(): + df1 = DataFrame({"A": [1, 2, NA, 4]}) + df2 = DataFrame([[NA, NA], [NA, NA]]) + expected_substring = "style type=" + html_output1 = df1.style.bar(subset='A').to_html() + html_output2 = df2.style.bar(align="left", axis=None).to_html() + assert expected_substring in html_output1 + assert expected_substring in html_output2 From 33e60357cc8de2021aa2c2f3c061965815c1b9f4 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Thu, 7 Dec 2023 03:01:33 +0000 Subject: [PATCH 5/7] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- pandas/tests/io/formats/style/test_bar.py | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/pandas/tests/io/formats/style/test_bar.py b/pandas/tests/io/formats/style/test_bar.py index f53ddcb9939f2..6252816fe4be0 100644 --- a/pandas/tests/io/formats/style/test_bar.py +++ b/pandas/tests/io/formats/style/test_bar.py @@ -1,7 +1,10 @@ import numpy as np import pytest -from pandas import DataFrame, NA +from pandas import ( + NA, + DataFrame, +) pytest.importorskip("jinja2") @@ -312,7 +315,7 @@ def test_bar_color_and_cmap_error_raises(): msg = "`color` and `cmap` cannot both be given" # Test that providing both color and cmap raises a ValueError with pytest.raises(ValueError, match=msg): - df.style.bar(color='#d65f5f', cmap='viridis').to_html() + df.style.bar(color="#d65f5f", cmap="viridis").to_html() def test_bar_invalid_color_type_error_raises(): @@ -327,14 +330,14 @@ def test_bar_invalid_color_type_error_raises(): # Test that providing a color list with more than two elements raises a ValueError with pytest.raises(ValueError, match=msg): - df.style.bar(color=['#d65f5f', '#5fba7d', '#abcdef']).to_html() + df.style.bar(color=["#d65f5f", "#5fba7d", "#abcdef"]).to_html() def test_styler_bar_with_NA_values(): df1 = DataFrame({"A": [1, 2, NA, 4]}) df2 = DataFrame([[NA, NA], [NA, NA]]) expected_substring = "style type=" - html_output1 = df1.style.bar(subset='A').to_html() + html_output1 = df1.style.bar(subset="A").to_html() html_output2 = df2.style.bar(align="left", axis=None).to_html() assert expected_substring in html_output1 assert expected_substring in html_output2 From 13e976a1d41f02e959954fa0198bd8e5d73c358a Mon Sep 17 00:00:00 2001 From: ccccjone Date: Thu, 7 Dec 2023 16:48:16 -0800 Subject: [PATCH 6/7] Added a test for Styler.bar with pyarrow --- pandas/tests/io/formats/style/test_bar.py | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/pandas/tests/io/formats/style/test_bar.py b/pandas/tests/io/formats/style/test_bar.py index 6252816fe4be0..dc6d8d2d0dcc5 100644 --- a/pandas/tests/io/formats/style/test_bar.py +++ b/pandas/tests/io/formats/style/test_bar.py @@ -1,9 +1,11 @@ import numpy as np import pytest +import io from pandas import ( NA, DataFrame, + read_csv, ) pytest.importorskip("jinja2") @@ -341,3 +343,15 @@ def test_styler_bar_with_NA_values(): html_output2 = df2.style.bar(align="left", axis=None).to_html() assert expected_substring in html_output1 assert expected_substring in html_output2 + + +def test_style_bar_with_pyarrow_NA_values(): + data = '''name,age,test1,test2,teacher + Adam,15,95.0,80,Ashby + Bob,16,81.0,82,Ashby + Dave,16,89.0,84,Jones + Fred,15,,88,Jones''' + df = read_csv(io.StringIO(data), dtype_backend="pyarrow") + expected_substring = "style type=" + html_output = df.style.bar(subset="test1").to_html() + assert expected_substring in html_output From dc77342bbcd4383f3d13e1b9a3a2d4a239a71c4d Mon Sep 17 00:00:00 2001 From: ccccjone Date: Fri, 8 Dec 2023 13:12:32 -0800 Subject: [PATCH 7/7] Updated with code style --- pandas/tests/io/formats/style/test_bar.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/pandas/tests/io/formats/style/test_bar.py b/pandas/tests/io/formats/style/test_bar.py index dc6d8d2d0dcc5..b0e4712e8bb3d 100644 --- a/pandas/tests/io/formats/style/test_bar.py +++ b/pandas/tests/io/formats/style/test_bar.py @@ -1,6 +1,7 @@ +import io + import numpy as np import pytest -import io from pandas import ( NA, @@ -346,11 +347,11 @@ def test_styler_bar_with_NA_values(): def test_style_bar_with_pyarrow_NA_values(): - data = '''name,age,test1,test2,teacher + data = """name,age,test1,test2,teacher Adam,15,95.0,80,Ashby Bob,16,81.0,82,Ashby Dave,16,89.0,84,Jones - Fred,15,,88,Jones''' + Fred,15,,88,Jones""" df = read_csv(io.StringIO(data), dtype_backend="pyarrow") expected_substring = "style type=" html_output = df.style.bar(subset="test1").to_html()