diff --git a/doc/source/whatsnew/v3.0.0.rst b/doc/source/whatsnew/v3.0.0.rst index 9e7349a061295..4a6606ed20df2 100644 --- a/doc/source/whatsnew/v3.0.0.rst +++ b/doc/source/whatsnew/v3.0.0.rst @@ -38,12 +38,7 @@ Other enhancements - Support reading value labels from Stata 108-format (Stata 6) and earlier files (:issue:`58154`) - Users can globally disable any ``PerformanceWarning`` by setting the option ``mode.performance_warnings`` to ``False`` (:issue:`56920`) - :meth:`Styler.format_index_names` can now be used to format the index and column names (:issue:`48936` and :issue:`47489`) -- :class:`.errors.DtypeWarning` improved to include column names when mixed data types are detected (:issue:`58174`) -- :meth:`DataFrame.corrwith` now accepts ``min_periods`` as optional arguments, as in :meth:`DataFrame.corr` and :meth:`Series.corr` (:issue:`9490`) -- :meth:`DataFrame.cummin`, :meth:`DataFrame.cummax`, :meth:`DataFrame.cumprod` and :meth:`DataFrame.cumsum` methods now have a ``numeric_only`` parameter (:issue:`53072`) -- :meth:`DataFrame.fillna` and :meth:`Series.fillna` can now accept ``value=None``; for non-object dtype the corresponding NA value will be used (:issue:`57723`) -- :meth:`Series.cummin` and :meth:`Series.cummax` now supports :class:`CategoricalDtype` (:issue:`52335`) -- :meth:`Series.plot` now correctly handle the ``ylabel`` parameter for pie charts, allowing for explicit control over the y-axis label (:issue:`58239`) +- Added new display.option for integer formatting within DataFrames (:issue:`57177`) .. --------------------------------------------------------------------------- .. _whatsnew_300.notable_bug_fixes: diff --git a/pandas/core/config_init.py b/pandas/core/config_init.py index 46c9139c3456c..38138ce9c663c 100644 --- a/pandas/core/config_init.py +++ b/pandas/core/config_init.py @@ -276,6 +276,12 @@ def use_numba_cb(key: str) -> None: df.info() is called. Valid values True,False,'deep' """ +pc_integer_format_doc = """ +: str + This formats integer values in a DataFrame with a delimiter that defaults + to None (''). The other two options are 'comma' (',') and 'underscore' ('_'). +""" + def table_schema_cb(key: str) -> None: from pandas.io.formats.printing import enable_data_resource_formatter @@ -388,6 +394,12 @@ def is_terminal() -> bool: cf.register_option( "max_dir_items", 100, pc_max_dir_items, validator=is_nonnegative_int ) + cf.register_option( + "integer_format", + None, + pc_integer_format_doc, + validator=is_instance_factory((type(None), str)), + ) tc_sim_interactive_doc = """ : boolean @@ -412,9 +424,11 @@ def is_terminal() -> bool: "copy_on_write", # Get the default from an environment variable, if set, otherwise defaults # to False. This environment variable can be set for testing. - "warn" - if os.environ.get("PANDAS_COPY_ON_WRITE", "0") == "warn" - else os.environ.get("PANDAS_COPY_ON_WRITE", "0") == "1", + ( + "warn" + if os.environ.get("PANDAS_COPY_ON_WRITE", "0") == "warn" + else os.environ.get("PANDAS_COPY_ON_WRITE", "0") == "1" + ), copy_on_write_doc, validator=is_one_of_factory([True, False, "warn"]), ) diff --git a/pandas/io/formats/format.py b/pandas/io/formats/format.py index c503121328f53..2c914b2028bdc 100644 --- a/pandas/io/formats/format.py +++ b/pandas/io/formats/format.py @@ -434,6 +434,7 @@ def __init__( formatters: FormattersType | None = None, justify: str | None = None, float_format: FloatFormatType | None = None, + integer_format: str | None = None, sparsify: bool | None = None, index_names: bool = True, max_rows: int | None = None, @@ -453,6 +454,7 @@ def __init__( self.formatters = self._initialize_formatters(formatters) self.justify = self._initialize_justify(justify) self.float_format = float_format + self.integer_format = integer_format self.sparsify = self._initialize_sparsify(sparsify) self.show_index_names = index_names self.decimal = decimal @@ -756,6 +758,7 @@ def format_col(self, i: int) -> list[str]: frame.iloc[:, i]._values, formatter, float_format=self.float_format, + integer_format=self.integer_format, na_rep=self.na_rep, space=self.col_space.get(frame.columns[i]), decimal=self.decimal, @@ -789,9 +792,11 @@ def _get_formatted_column_labels(self, frame: DataFrame) -> list[list[str]]: fmt_columns = columns._format_flat(include_name=False) str_columns = [ [ - " " + x - if not self._get_formatter(i) and is_numeric_dtype(dtype) - else x + ( + " " + x + if not self._get_formatter(i) and is_numeric_dtype(dtype) + else x + ) ] for i, (x, dtype) in enumerate(zip(fmt_columns, self.frame.dtypes)) ] @@ -1063,6 +1068,7 @@ def format_array( values: ArrayLike, formatter: Callable | None, float_format: FloatFormatType | None = None, + integer_format: str | None = None, na_rep: str = "NaN", digits: int | None = None, space: str | int | None = None, @@ -1124,6 +1130,9 @@ def format_array( if float_format is None: float_format = get_option("display.float_format") + if integer_format is None: + integer_format = get_option("display.integer_format") + if digits is None: digits = get_option("display.precision") @@ -1132,6 +1141,7 @@ def format_array( digits=digits, na_rep=na_rep, float_format=float_format, + integer_format=integer_format, formatter=formatter, space=space, justify=justify, @@ -1153,6 +1163,7 @@ def __init__( na_rep: str = "NaN", space: str | int = 12, float_format: FloatFormatType | None = None, + integer_format: str | None = None, justify: str = "right", decimal: str = ".", quoting: int | None = None, @@ -1166,6 +1177,7 @@ def __init__( self.space = space self.formatter = formatter self.float_format = float_format + self.integer_format = integer_format self.justify = justify self.decimal = decimal self.quoting = quoting @@ -1455,13 +1467,19 @@ def _format_strings(self) -> list[str]: class _IntArrayFormatter(_GenericArrayFormatter): def _format_strings(self) -> list[str]: - if self.leading_space is False: - formatter_str = lambda x: f"{x:d}".format(x=x) + + if self.integer_format in (",", "_", None): + if self.integer_format is None: + self.integer_format = "" + if self.leading_space is False: + formatter_str = lambda x: f"{x:{self.integer_format}}".format(x=x) + else: + formatter_str = lambda x: f"{x: {self.integer_format}}".format(x=x) + formatter = self.formatter or formatter_str + fmt_values = [formatter(x) for x in self.values] + return fmt_values else: - formatter_str = lambda x: f"{x: d}".format(x=x) - formatter = self.formatter or formatter_str - fmt_values = [formatter(x) for x in self.values] - return fmt_values + raise ValueError("integer_format must be one of ',','_', or None") class _Datetime64Formatter(_GenericArrayFormatter): diff --git a/pandas/tests/io/formats/test_format.py b/pandas/tests/io/formats/test_format.py index b12cfc6876a8e..07c8bb06f6d75 100644 --- a/pandas/tests/io/formats/test_format.py +++ b/pandas/tests/io/formats/test_format.py @@ -2259,3 +2259,69 @@ def test_filepath_or_buffer_bad_arg_raises(float_frame, method): msg = "buf is not a file name and it has no write method" with pytest.raises(TypeError, match=msg): getattr(float_frame, method)(buf=object()) + + +class TestIntArrayFormatter: + def test_format_comma(self): + with option_context("display.integer_format", ","): + df = DataFrame({"A": [1000, 20000, 30], "B": [4.1, 50000.2, 600.0]}) + formatted_df = repr(df) + # Valid delimiter used for integer_format + expected_results = { + "A": ["1,000", "20,000", "30"], + "B": ["4.1", "50000.2", "600.0"], + } + expected_df = DataFrame(expected_results) + expected_df = repr(expected_df) + + assert formatted_df == expected_df + + def test_format_underscore(self): + with option_context("display.integer_format", "_"): + df = DataFrame({"A": [1000, 20000, 30], "B": [4.1, 50000.2, 600.0]}) + formatted_df = repr(df) + # Valid delimiter used for integer_format + expected_results = { + "A": ["1_000", "20_000", "30"], + "B": ["4.1", "50000.2", "600.0"], + } + expected_df = DataFrame(expected_results) + expected_df = repr(expected_df) + + assert formatted_df == expected_df + + def test_format_empty(self): + with option_context("display.integer_format", None): + df = DataFrame({"A": [1000, 20000, 30], "B": [4.1, 50000.2, 600.0]}) + formatted_df = repr(df) + # Valid delimiter used for integer_format + expected_results = { + "A": ["1000", "20000", "30"], + "B": ["4.1", "50000.2", "600.0"], + } + expected_df = DataFrame(expected_results) + expected_df = repr(expected_df) + + assert formatted_df == expected_df + + def test_format_invalid_to_none(self): + with option_context("display.integer_format", "."): + df = DataFrame({"A": [1000, 20000, 30], "B": [4.1, 50000.2, 600.0]}) + + with pytest.raises( + ValueError, match="integer_format must be one of ',','_', or None" + ): + repr(df) + + with option_context("display.integer_format", None): + df = DataFrame({"A": [1000, 20000, 30], "B": [4.1, 50000.2, 600.0]}) + formatted_df = repr(df) + # Valid delimiter used for integer_format + expected_results = { + "A": ["1000", "20000", "30"], + "B": ["4.1", "50000.2", "600.0"], + } + expected_df = DataFrame(expected_results) + expected_df = repr(expected_df) + + assert formatted_df == expected_df