-
-
Notifications
You must be signed in to change notification settings - Fork 18.5k
ENH: make Styler
compatible with non-unique indexes
#41269
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 14 commits
1cc569f
6982554
a3694db
5c6669c
732c7d5
4c99130
57e8bef
a7a2966
19fb7f9
4ce559e
7f28111
5043c01
9fc6cd3
09764ba
9451aae
4faeb29
aed0536
3a8f11e
51233be
8454c5e
c3b7af8
20cd19f
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -322,6 +322,10 @@ def set_tooltips( | |
raise NotImplementedError( | ||
"Tooltips can only render with 'cell_ids' is True." | ||
) | ||
if not ttips.index.is_unique or not ttips.columns.is_unique: | ||
raise KeyError( | ||
"Tooltips render only if `ttips` has unique index and columns." | ||
) | ||
if self.tooltips is None: # create a default instance if necessary | ||
self.tooltips = Tooltips() | ||
self.tooltips.tt_data = ttips | ||
|
@@ -442,6 +446,10 @@ def set_td_classes(self, classes: DataFrame) -> Styler: | |
' </tbody>' | ||
'</table>' | ||
""" | ||
if not classes.index.is_unique or not classes.columns.is_unique: | ||
raise KeyError( | ||
"Classes render only if `classes` has unique index and columns." | ||
) | ||
classes = classes.reindex_like(self.data) | ||
|
||
for r, row_tup in enumerate(classes.itertuples()): | ||
|
@@ -464,13 +472,26 @@ def _update_ctx(self, attrs: DataFrame) -> None: | |
Whitespace shouldn't matter and the final trailing ';' shouldn't | ||
matter. | ||
""" | ||
err = KeyError( | ||
"`Styler.apply` and `.applymap` are not compatible with subset slices " | ||
"containing non-unique index or column keys." | ||
) | ||
|
||
for cn in attrs.columns: | ||
for rn, c in attrs[[cn]].itertuples(): | ||
if not c: | ||
continue | ||
css_list = maybe_convert_css_to_tuples(c) | ||
i, j = self.index.get_loc(rn), self.columns.get_loc(cn) | ||
self.ctx[(i, j)].extend(css_list) | ||
try: | ||
for rn, c in attrs[[cn]].itertuples(): | ||
if not c: | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. can you limit the try/except not to the entire loop here, e.g. just put it around the .get_loc There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. it would be even better to simply refuse to render non-uniques entirely. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. i removed the separate cases, as suggested. |
||
continue | ||
css_list = maybe_convert_css_to_tuples(c) | ||
i, j = self.index.get_loc(rn), self.columns.get_loc(cn) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. can instead we just not use the indices to look up locations? and instead just use indexers (e.g. iterate over the number of columns and use iloc) There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. no, because |
||
self.ctx[(i, j)].extend(css_list) | ||
except ValueError as ve: | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. want to be much more fine grained |
||
if "Styles supplied as string must follow CSS rule formats" in str(ve): | ||
raise ve # error is from maybe_convert_css_to_tuples() | ||
else: | ||
raise err # 'too many values to unpack': caught non-unique column | ||
except TypeError: | ||
raise err # 'unhashable type: slice' caught a non-unique index | ||
|
||
def _copy(self, deepcopy: bool = False) -> Styler: | ||
styler = Styler( | ||
|
@@ -986,10 +1007,11 @@ def set_table_styles( | |
|
||
table_styles = [ | ||
{ | ||
"selector": str(s["selector"]) + idf + str(obj.get_loc(key)), | ||
"selector": str(s["selector"]) + idf + str(idx), | ||
"props": maybe_convert_css_to_tuples(s["props"]), | ||
} | ||
for key, styles in table_styles.items() | ||
for idx in obj.get_indexer_for([key]) | ||
for s in styles | ||
] | ||
else: | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -82,8 +82,6 @@ def __init__( | |
data = data.to_frame() | ||
if not isinstance(data, DataFrame): | ||
raise TypeError("``data`` must be a Series or DataFrame") | ||
if not data.index.is_unique or not data.columns.is_unique: | ||
raise ValueError("style is not supported for non-unique indices.") | ||
self.data: DataFrame = data | ||
self.index: Index = data.index | ||
self.columns: Index = data.columns | ||
|
@@ -495,9 +493,12 @@ def format( | |
escape=escape, | ||
) | ||
|
||
for row, value in data[[col]].itertuples(): | ||
i, j = self.index.get_loc(row), self.columns.get_loc(col) | ||
self._display_funcs[(i, j)] = format_func | ||
for row in data[[col]].itertuples(): | ||
i_ = self.index.get_indexer_for([row[0]]) # handle duplicate keys in | ||
j_ = self.columns.get_indexer_for([col]) # non-unique indexes | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. you can do this outside of the loop right? (as col doesn't change), for j_ does this change perf at all? (I don't think so, but checking). There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Good catch.. the multiple loops really killed it for the unique case (which is benchmarked)..
So I had to separate out the non-unique and unique cases with a conditional, then performance was the same...
|
||
for i in i_: | ||
for j in j_: | ||
self._display_funcs[(i, j)] = format_func | ||
|
||
return self | ||
|
||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,138 @@ | ||
import pytest | ||
|
||
from pandas import ( | ||
DataFrame, | ||
IndexSlice, | ||
) | ||
|
||
pytest.importorskip("jinja2") | ||
|
||
from pandas.io.formats.style import Styler | ||
|
||
|
||
@pytest.fixture | ||
def df(): | ||
return DataFrame( | ||
[[1, 2, 3], [4, 5, 6], [7, 8, 9]], | ||
index=["i", "j", "j"], | ||
columns=["c", "d", "d"], | ||
dtype=float, | ||
) | ||
|
||
|
||
@pytest.fixture | ||
def styler(df): | ||
return Styler(df, uuid_len=0) | ||
|
||
|
||
def test_format_non_unique(df): | ||
# GH 41269 | ||
|
||
# test dict | ||
html = df.style.format({"d": "{:.1f}"}).render() | ||
for val in ["1.000000<", "4.000000<", "7.000000<"]: | ||
assert val in html | ||
for val in ["2.0<", "3.0<", "5.0<", "6.0<", "8.0<", "9.0<"]: | ||
assert val in html | ||
|
||
# test subset | ||
html = df.style.format(precision=1, subset=IndexSlice["j", "d"]).render() | ||
for val in ["1.000000<", "4.000000<", "7.000000<", "2.000000<", "3.000000<"]: | ||
assert val in html | ||
for val in ["5.0<", "6.0<", "8.0<", "9.0<"]: | ||
assert val in html | ||
|
||
|
||
@pytest.mark.parametrize("func", ["apply", "applymap"]) | ||
def test_apply_applymap_non_unique_raises(df, func): | ||
# GH 41269 | ||
if func == "apply": | ||
op = lambda s: ["color: red;"] * len(s) | ||
else: | ||
op = lambda v: "color: red;" | ||
|
||
with pytest.raises(KeyError, match="`Styler.apply` and `.applymap` are not"): | ||
# slice is non-unique on columns | ||
getattr(df.style, func)(op, subset=("i", "d"))._compute() | ||
|
||
with pytest.raises(KeyError, match="`Styler.apply` and `.applymap` are not"): | ||
# slice is non-unique on rows | ||
getattr(df.style, func)(op, subset=("j", "c"))._compute() | ||
|
||
# unique subset OK | ||
getattr(df.style, func)(op, subset=("i", "c"))._compute() | ||
|
||
|
||
def test_table_styles_dict_non_unique_index(styler): | ||
styles = styler.set_table_styles( | ||
{"j": [{"selector": "td", "props": "a: v;"}]}, axis=1 | ||
).table_styles | ||
assert styles == [ | ||
{"selector": "td.row1", "props": [("a", "v")]}, | ||
{"selector": "td.row2", "props": [("a", "v")]}, | ||
] | ||
|
||
|
||
def test_table_styles_dict_non_unique_columns(styler): | ||
styles = styler.set_table_styles( | ||
{"d": [{"selector": "td", "props": "a: v;"}]}, axis=0 | ||
).table_styles | ||
assert styles == [ | ||
{"selector": "td.col1", "props": [("a", "v")]}, | ||
{"selector": "td.col2", "props": [("a", "v")]}, | ||
] | ||
|
||
|
||
def test_maybe_convert_css_raises(styler): | ||
# test _update_ctx() detects the right ValueError where non-unique columns present | ||
with pytest.raises(ValueError, match="Styles supplied as string must follow CSS"): | ||
styler.applymap(lambda x: "bad-css;")._compute() | ||
|
||
|
||
def test_tooltips_non_unique_raises(styler): | ||
# ttips has unique keys | ||
ttips = DataFrame([["1", "2"], ["3", "4"]], columns=["c", "d"], index=["a", "b"]) | ||
styler.set_tooltips(ttips=ttips) # OK | ||
|
||
# ttips has non-unique columns | ||
ttips = DataFrame([["1", "2"], ["3", "4"]], columns=["c", "c"], index=["a", "b"]) | ||
with pytest.raises(KeyError, match="Tooltips render only if `ttips` has unique"): | ||
styler.set_tooltips(ttips=ttips) | ||
|
||
# ttips has non-unique index | ||
ttips = DataFrame([["1", "2"], ["3", "4"]], columns=["c", "d"], index=["a", "a"]) | ||
with pytest.raises(KeyError, match="Tooltips render only if `ttips` has unique"): | ||
styler.set_tooltips(ttips=ttips) | ||
|
||
|
||
def test_set_td_classes_non_unique_raises(styler): | ||
# classes has unique keys | ||
classes = DataFrame([["1", "2"], ["3", "4"]], columns=["c", "d"], index=["a", "b"]) | ||
styler.set_td_classes(classes=classes) # OK | ||
|
||
# classes has non-unique columns | ||
classes = DataFrame([["1", "2"], ["3", "4"]], columns=["c", "c"], index=["a", "b"]) | ||
with pytest.raises(KeyError, match="Classes render only if `classes` has unique"): | ||
styler.set_td_classes(classes=classes) | ||
|
||
# classes has non-unique index | ||
classes = DataFrame([["1", "2"], ["3", "4"]], columns=["c", "d"], index=["a", "a"]) | ||
with pytest.raises(KeyError, match="Classes render only if `classes` has unique"): | ||
styler.set_td_classes(classes=classes) | ||
|
||
|
||
def test_hide_columns_non_unique(styler): | ||
ctx = styler.hide_columns(["d"])._translate() | ||
|
||
assert ctx["head"][0][1]["display_value"] == "c" | ||
assert ctx["head"][0][1]["is_visible"] is True | ||
|
||
assert ctx["head"][0][2]["display_value"] == "d" | ||
assert ctx["head"][0][2]["is_visible"] is False | ||
|
||
assert ctx["head"][0][3]["display_value"] == "d" | ||
assert ctx["head"][0][3]["is_visible"] is False | ||
|
||
assert ctx["body"][0][1]["is_visible"] is True | ||
assert ctx["body"][0][2]["is_visible"] is False | ||
assert ctx["body"][0][3]["is_visible"] is False |
Uh oh!
There was an error while loading. Please reload this page.