-
-
Notifications
You must be signed in to change notification settings - Fork 18.6k
ENH: Add custom descriptors (such as dtype, nunique, etc.) to Styler output #43894
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 87 commits
9743099
8a0253e
74c418e
70535c5
2fbe569
7903723
6a2793c
d227914
7ca5002
f27f7ed
a1000a7
c44dcda
c4c9aaa
c22cf0d
0e0b46e
1f3bbec
021bc26
4ba3dff
7fee05d
baa3233
f4ad390
22b03e3
db214d8
771d056
24952ae
1d47d0f
566738d
230138a
b9ba9ea
ea2bba1
75de033
4c34bc2
68ee832
ca70491
7a1994e
91320f8
92c1941
aad0e16
61b24ed
d4c5715
aa0172f
84a814f
f06b727
131070f
a048122
4931f32
e9716f2
d983464
c08ef82
0f11a62
5c52957
17d181c
e5b4d3e
f98ba9f
0cc5502
c5b75cc
ac1384b
c948f11
afa8f10
60efbbb
9ef5b44
a02b4ac
6b51d88
814bace
33c35a4
91d8680
260d356
46856fb
a4dce4d
fefb420
f2c8f0e
a945da5
b3c0f96
f36d4fd
e78907f
cd085df
45f5879
6f64e80
1c5cf53
58aa894
bd2fe7d
02aeae5
5541396
beb2bc1
7d62bb3
6bfb5a9
9554f89
d8e11c8
4f935c4
7031897
0c034a6
6128ccb
bb8201e
44204e0
ebec1d6
c205c38
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -1435,6 +1435,7 @@ def _copy(self, deepcopy: bool = False) -> Styler: | |
] | ||
deep = [ # nested lists or dicts | ||
"css", | ||
"descriptors", | ||
"_display_funcs", | ||
"_display_funcs_index", | ||
"_display_funcs_columns", | ||
|
@@ -1979,6 +1980,9 @@ def export(self) -> dict[str, Any]: | |
|
||
Can be applied to a second Styler with ``Styler.use``. | ||
|
||
.. versionchanged:: 1.5.0 | ||
Adds ``descriptors`` to the exported items. | ||
|
||
Returns | ||
------- | ||
styles : dict | ||
|
@@ -2000,6 +2004,7 @@ def export(self) -> dict[str, Any]: | |
- Whether axes and names are hidden from the display, if unambiguous. | ||
- Table attributes | ||
- Table styles | ||
- Descriptors | ||
|
||
The following attributes are considered data dependent and therefore not | ||
exported: | ||
|
@@ -2029,6 +2034,7 @@ def export(self) -> dict[str, Any]: | |
"hide_index_names": self.hide_index_names, | ||
"hide_column_names": self.hide_column_names, | ||
"css": copy.copy(self.css), | ||
"descriptors": copy.copy(self.descriptors), | ||
} | ||
|
||
def use(self, styles: dict[str, Any]) -> Styler: | ||
|
@@ -2037,6 +2043,9 @@ def use(self, styles: dict[str, Any]) -> Styler: | |
|
||
Possibly uses styles from ``Styler.export``. | ||
|
||
.. versionchanged:: 1.5.0 | ||
Adds ``descriptors`` to the used items. | ||
|
||
Parameters | ||
---------- | ||
styles : dict(str, Any) | ||
|
@@ -2054,6 +2063,8 @@ def use(self, styles: dict[str, Any]) -> Styler: | |
- "hide_index_names": whether index names are hidden. | ||
- "hide_column_names": whether column header names are hidden. | ||
- "css": the css class names used. | ||
- "descriptors": list of descriptors, typically added with | ||
``set_descriptors``. | ||
|
||
Returns | ||
------- | ||
|
@@ -2096,6 +2107,8 @@ def use(self, styles: dict[str, Any]) -> Styler: | |
self.hide_column_names = styles.get("hide_column_names", False) | ||
if styles.get("css"): | ||
self.css = styles.get("css") # type: ignore[assignment] | ||
if styles.get("descriptors"): | ||
self.set_descriptors(styles.get("descriptors")) | ||
return self | ||
|
||
def set_uuid(self, uuid: str) -> Styler: | ||
|
@@ -2354,7 +2367,10 @@ def set_table_styles( | |
"row_trim": "row_trim", | ||
"level": "level", | ||
"data": "data", | ||
"blank": "blank} | ||
"blank": "blank", | ||
"descriptor": "descriptor", | ||
"descriptor_name": "descriptor_name", | ||
"descriptor_value": "descriptor_value"} | ||
|
||
Examples | ||
-------- | ||
|
@@ -2425,6 +2441,46 @@ def set_table_styles( | |
self.table_styles = table_styles | ||
return self | ||
|
||
def set_descriptors( | ||
self, descriptors: list[str | Callable | tuple[str, Callable]] | None = None | ||
) -> Styler: | ||
""" | ||
Add header-level calculations to the output which describes the data. | ||
|
||
.. versionadded:: 1.5.0 | ||
|
||
Parameters | ||
---------- | ||
descriptors : list of str, callables or 2-tuples of str and callable | ||
If a string is given must be a valid Series method, e.g. "mean" invokes | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Could you some additional details about the callable. Specifically it must (or should) return a scalar quantity. Is the callable allowed to throw and exception? If so, what happens? I would be good to clarify this. |
||
Series.mean(). | ||
|
||
If a callable is given must accept a Series and return a scalar. No name | ||
will be displayed for the row. | ||
|
||
If a 2-tuple, must be a string used as the name of the row and a | ||
callable as above. | ||
|
||
Returns | ||
------- | ||
self : Styler | ||
|
||
Examples | ||
-------- | ||
|
||
>>> df = DataFrame([[1, 2], [3, 4]], columns=["A", "B"]) | ||
>>> styler = df.style.set_descriptors([ | ||
... "mean", | ||
... Series.mean, | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The screen show shows no lable on this line? Was it not possible to have |
||
... ("my-text", Series.mean), | ||
... ("my-func", lambda s: s.sum()/2), | ||
... ]) # doctest: +SKIP | ||
|
||
.. figure:: ../../_static/style/des_mean.png | ||
""" | ||
self.descriptors = descriptors if descriptors is not None else [] | ||
return self | ||
|
||
def set_na_rep(self, na_rep: str) -> StylerRenderer: | ||
""" | ||
Set the missing data representation on a ``Styler``. | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -115,6 +115,9 @@ def __init__( | |
"level": "level", | ||
"data": "data", | ||
"blank": "blank", | ||
"descriptor": "descriptor", | ||
"descriptor_value": "descriptor_value", | ||
"descriptor_name": "descriptor_name", | ||
} | ||
|
||
# add rendering variables | ||
|
@@ -124,6 +127,7 @@ def __init__( | |
self.hide_columns_: list = [False] * self.columns.nlevels | ||
self.hidden_rows: Sequence[int] = [] # sequence for specific hidden rows/cols | ||
self.hidden_columns: Sequence[int] = [] | ||
self.descriptors: list[str | Callable | tuple[str, Callable]] = [] | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Is this a common API? I think most places where alternative input are allowed either use a Sequence of some sort or Also, can't the There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
The Callable could techincally return anything so long as the returned object has a There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think the sig should then be There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. If you do stick with the tuple, it should be There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. yes, agreed, having a think if this can be improved. |
||
self.ctx: DefaultDict[tuple[int, int], CSSList] = defaultdict(list) | ||
self.ctx_index: DefaultDict[tuple[int, int], CSSList] = defaultdict(list) | ||
self.ctx_columns: DefaultDict[tuple[int, int], CSSList] = defaultdict(list) | ||
|
@@ -329,7 +333,9 @@ def _translate_header(self, sparsify_cols: bool, max_cols: int): | |
1) | .. | .. | .. | | ||
| index_blanks ... | column_name_n | column_headers (level_n) | | ||
+----------------------------+---------------+---------------------------+ | ||
2) | index_names (level_0 to level_n) ... | column_blanks ... | | ||
2) | index_blanks ... | descriptor | value by column | | ||
+----------------------------+---------------+---------------------------+ | ||
3) | index_names (level_0 to level_n) ... | column_blanks ... | | ||
+----------------------------+---------------+---------------------------+ | ||
|
||
Parameters | ||
|
@@ -365,7 +371,12 @@ def _translate_header(self, sparsify_cols: bool, max_cols: int): | |
) | ||
head.append(header_row) | ||
|
||
# 2) index names | ||
# 2) Descriptor calcs | ||
for r, descriptor in enumerate(self.descriptors): | ||
descriptor_row = self._generate_descriptor_row((r, descriptor), max_cols) | ||
bashtage marked this conversation as resolved.
Show resolved
Hide resolved
|
||
head.append(descriptor_row) | ||
|
||
# 3) index names | ||
if ( | ||
self.data.index.names | ||
and com.any_not_none(*self.data.index.names) | ||
|
@@ -477,6 +488,109 @@ def _generate_col_header_row(self, iter: tuple, max_cols: int, col_lengths: dict | |
|
||
return index_blanks + column_name + column_headers | ||
|
||
def _generate_descriptor_row(self, iter: tuple, max_cols: int): | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. What kind of tuples are allowed? Would be better to be as specific as possible to reduce any future refactor risks. |
||
""" | ||
Generate the row containing calculated descriptor values for columns: | ||
|
||
+----------------------------+---------------+---------------------------+ | ||
| index_blanks ... | descriptor_i | value_i by col | | ||
+----------------------------+---------------+---------------------------+ | ||
|
||
Parameters | ||
---------- | ||
iter : tuple | ||
Looping variables from outer scope | ||
max_cols : int | ||
Permissible number of columns | ||
|
||
Returns | ||
------- | ||
list of elements | ||
""" | ||
|
||
r, descriptor = iter | ||
|
||
# number of index blanks is governed by number of hidden index levels | ||
index_blanks = [ | ||
_element("th", self.css["blank"], self.css["blank_value"], True) | ||
] * (self.index.nlevels - sum(self.hide_index_) - 1) | ||
|
||
if isinstance(descriptor, str): | ||
name: str | None = descriptor | ||
mroeschke marked this conversation as resolved.
Show resolved
Hide resolved
|
||
func: Callable = getattr(Series, descriptor) | ||
elif isinstance(descriptor, tuple): | ||
name, func = descriptor[0], descriptor[1] | ||
else: | ||
name, func = None, descriptor | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. When the descriptor is a There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
|
||
|
||
display_func: Callable = _maybe_wrap_formatter( | ||
formatter=None, # use _default_formatter | ||
bashtage marked this conversation as resolved.
Show resolved
Hide resolved
|
||
decimal=get_option("styler.format.decimal"), | ||
thousands=get_option("styler.format.thousands"), | ||
precision=get_option("styler.format.precision"), | ||
na_rep=get_option("styler.format.na_rep"), | ||
escape=get_option("styler.format.escape"), | ||
) | ||
|
||
base_css = f"{self.css['descriptor_name']} {self.css['descriptor']}{r}" | ||
descriptor_name = [ | ||
_element( | ||
"th", | ||
base_css | ||
if (name is not None and not self.hide_column_names) | ||
bashtage marked this conversation as resolved.
Show resolved
Hide resolved
|
||
else f"{self.css['blank']} {base_css}", | ||
name | ||
if (name is not None and not self.hide_column_names) | ||
else self.css["blank_value"], | ||
not all(self.hide_index_), | ||
) | ||
] | ||
|
||
descriptor_values, visible_col_count = [], 0 | ||
for c, col in enumerate(self.columns): | ||
if c not in self.hidden_columns: | ||
header_element_visible = True | ||
visible_col_count += 1 | ||
try: | ||
header_element_value = func(self.data[col]) | ||
except Exception: | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Why not be strict here and raise? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. in the interest of a simpler initial PR i did not include a subset argument so the function needs to work on all colums. some dtypes might bot be suitable for the function so best approach is silent raise i think. descriptors aim to be more helpful than precise, so i think raising when failing would be annoying There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I see. What you think about raising a There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I still think so, but even if it wasn't, the way to turn it off is difficult:
However, with you prompting me to think about it, probably means better documentation and better examples could highlight this and make the behaviour (and workarounds) clear. |
||
header_element_value = self.css["blank_value"] | ||
else: | ||
header_element_visible = False | ||
header_element_value = None | ||
|
||
if visible_col_count > max_cols: | ||
# add an extra column with `...` value to indicate trimming | ||
descriptor_values.append( | ||
_element( | ||
"th", | ||
( | ||
f"{self.css['descriptor_value']} " | ||
f"{self.css['descriptor']}{r} " | ||
f"{self.css['col_trim']}" | ||
), | ||
"...", | ||
True, | ||
attributes="", | ||
) | ||
) | ||
break | ||
|
||
header_element = _element( | ||
"th", | ||
( | ||
f"{self.css['descriptor_value']} {self.css['descriptor']}{r} " | ||
f"{self.css['col']}{c}" | ||
), | ||
header_element_value, | ||
header_element_visible, | ||
display_value=display_func(header_element_value), | ||
attributes="", | ||
) | ||
descriptor_values.append(header_element) | ||
|
||
return index_blanks + descriptor_name + descriptor_values | ||
|
||
def _generate_index_names_row(self, iter: tuple, max_cols: int, col_lengths: dict): | ||
""" | ||
Generate the row containing index names | ||
|
@@ -1418,7 +1532,7 @@ def _default_formatter(x: Any, precision: int, thousands: bool = False) -> Any: | |
""" | ||
if isinstance(x, (float, complex)): | ||
return f"{x:,.{precision}f}" if thousands else f"{x:.{precision}f}" | ||
elif isinstance(x, int): | ||
elif isinstance(x, (int, np.int64)): | ||
attack68 marked this conversation as resolved.
Show resolved
Hide resolved
|
||
return f"{x:,.0f}" if thousands else f"{x:.0f}" | ||
return x | ||
|
||
|
@@ -1433,7 +1547,7 @@ def _wrap_decimal_thousands( | |
""" | ||
|
||
def wrapper(x): | ||
if isinstance(x, (float, complex, int)): | ||
if isinstance(x, (float, complex, int, np.int64)): | ||
if decimal != "." and thousands is not None and thousands != ",": | ||
return ( | ||
formatter(x) | ||
|
Uh oh!
There was an error while loading. Please reload this page.