diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 04e148453387b..c6c7acf5b3823 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -70,6 +70,10 @@ repos: - id: rst-inline-touching-normal types: [text] # overwrite types: [rst] types_or: [python, rst] +- repo: https://github.com/sphinx-contrib/sphinx-lint + rev: v0.2 + hooks: + - id: sphinx-lint - repo: https://github.com/asottile/yesqa rev: v1.3.0 hooks: diff --git a/doc/source/development/contributing_codebase.rst b/doc/source/development/contributing_codebase.rst index 61e3bcd44bea8..2fa6bf62ba80f 100644 --- a/doc/source/development/contributing_codebase.rst +++ b/doc/source/development/contributing_codebase.rst @@ -223,7 +223,7 @@ In some cases you may be tempted to use ``cast`` from the typing module when you ... else: # Reasonably only str objects would reach this but... obj = cast(str, obj) # Mypy complains without this! - return obj.upper() + return obj.upper() The limitation here is that while a human can reasonably understand that ``is_number`` would catch the ``int`` and ``float`` types mypy cannot make that same inference just yet (see `mypy #5206 `_. While the above works, the use of ``cast`` is **strongly discouraged**. Where applicable a refactor of the code to appease static analysis is preferable diff --git a/doc/source/development/contributing_environment.rst b/doc/source/development/contributing_environment.rst index fb27d07cfb18f..c881770aa7584 100644 --- a/doc/source/development/contributing_environment.rst +++ b/doc/source/development/contributing_environment.rst @@ -85,10 +85,10 @@ You will need `Build Tools for Visual Studio 2019 `_. .. warning:: - You DO NOT need to install Visual Studio 2019. - You only need "Build Tools for Visual Studio 2019" found by - scrolling down to "All downloads" -> "Tools for Visual Studio 2019". - In the installer, select the "C++ build tools" workload. + You DO NOT need to install Visual Studio 2019. + You only need "Build Tools for Visual Studio 2019" found by + scrolling down to "All downloads" -> "Tools for Visual Studio 2019". + In the installer, select the "C++ build tools" workload. You can install the necessary components on the commandline using `vs_buildtools.exe `_: diff --git a/doc/source/ecosystem.rst b/doc/source/ecosystem.rst index 15fa58f8d804a..256c3ee36e80c 100644 --- a/doc/source/ecosystem.rst +++ b/doc/source/ecosystem.rst @@ -540,7 +540,7 @@ Pandas-Genomics provides extension types, extension arrays, and extension access `Pint-Pandas`_ ~~~~~~~~~~~~~~ -``Pint-Pandas `` provides an extension type for +`Pint-Pandas `_ provides an extension type for storing numeric arrays with units. These arrays can be stored inside pandas' Series and DataFrame. Operations between Series and DataFrame columns which use pint's extension array are then units aware. @@ -548,7 +548,7 @@ use pint's extension array are then units aware. `Text Extensions for Pandas`_ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -``Text Extensions for Pandas `` +`Text Extensions for Pandas `_ provides extension types to cover common data structures for representing natural language data, plus library integrations that convert the outputs of popular natural language processing libraries into Pandas DataFrames. diff --git a/doc/source/user_guide/dsintro.rst b/doc/source/user_guide/dsintro.rst index ba8ef0d86d130..571f8980070af 100644 --- a/doc/source/user_guide/dsintro.rst +++ b/doc/source/user_guide/dsintro.rst @@ -678,7 +678,7 @@ Boolean operators operate element-wise as well: Transposing ~~~~~~~~~~~ -To transpose, access the ``T`` attribute or :meth:`DataFrame.transpose``, +To transpose, access the ``T`` attribute or :meth:`DataFrame.transpose`, similar to an ndarray: .. ipython:: python diff --git a/doc/source/user_guide/groupby.rst b/doc/source/user_guide/groupby.rst index bc772b5dab66c..f381d72069775 100644 --- a/doc/source/user_guide/groupby.rst +++ b/doc/source/user_guide/groupby.rst @@ -539,19 +539,19 @@ Some common aggregating functions are tabulated below: :widths: 20, 80 :delim: ; - :meth:`~pd.core.groupby.DataFrameGroupBy.mean`;Compute mean of groups - :meth:`~pd.core.groupby.DataFrameGroupBy.sum`;Compute sum of group values - :meth:`~pd.core.groupby.DataFrameGroupBy.size`;Compute group sizes - :meth:`~pd.core.groupby.DataFrameGroupBy.count`;Compute count of group - :meth:`~pd.core.groupby.DataFrameGroupBy.std`;Standard deviation of groups - :meth:`~pd.core.groupby.DataFrameGroupBy.var`;Compute variance of groups - :meth:`~pd.core.groupby.DataFrameGroupBy.sem`;Standard error of the mean of groups - :meth:`~pd.core.groupby.DataFrameGroupBy.describe`;Generates descriptive statistics - :meth:`~pd.core.groupby.DataFrameGroupBy.first`;Compute first of group values - :meth:`~pd.core.groupby.DataFrameGroupBy.last`;Compute last of group values - :meth:`~pd.core.groupby.DataFrameGroupBy.nth`;Take nth value, or a subset if n is a list - :meth:`~pd.core.groupby.DataFrameGroupBy.min`;Compute min of group values - :meth:`~pd.core.groupby.DataFrameGroupBy.max`;Compute max of group values + :meth:`~pd.core.groupby.DataFrameGroupBy.mean`;Compute mean of groups + :meth:`~pd.core.groupby.DataFrameGroupBy.sum`;Compute sum of group values + :meth:`~pd.core.groupby.DataFrameGroupBy.size`;Compute group sizes + :meth:`~pd.core.groupby.DataFrameGroupBy.count`;Compute count of group + :meth:`~pd.core.groupby.DataFrameGroupBy.std`;Standard deviation of groups + :meth:`~pd.core.groupby.DataFrameGroupBy.var`;Compute variance of groups + :meth:`~pd.core.groupby.DataFrameGroupBy.sem`;Standard error of the mean of groups + :meth:`~pd.core.groupby.DataFrameGroupBy.describe`;Generates descriptive statistics + :meth:`~pd.core.groupby.DataFrameGroupBy.first`;Compute first of group values + :meth:`~pd.core.groupby.DataFrameGroupBy.last`;Compute last of group values + :meth:`~pd.core.groupby.DataFrameGroupBy.nth`;Take nth value, or a subset if n is a list + :meth:`~pd.core.groupby.DataFrameGroupBy.min`;Compute min of group values + :meth:`~pd.core.groupby.DataFrameGroupBy.max`;Compute max of group values The aggregating functions above will exclude NA values. Any function which diff --git a/doc/source/user_guide/io.rst b/doc/source/user_guide/io.rst index 3a8583d395cc4..4ed71913d7b4d 100644 --- a/doc/source/user_guide/io.rst +++ b/doc/source/user_guide/io.rst @@ -5695,9 +5695,9 @@ for an explanation of how the database connection is handled. .. warning:: - When you open a connection to a database you are also responsible for closing it. - Side effects of leaving a connection open may include locking the database or - other breaking behaviour. + When you open a connection to a database you are also responsible for closing it. + Side effects of leaving a connection open may include locking the database or + other breaking behaviour. Writing DataFrames '''''''''''''''''' diff --git a/doc/source/user_guide/timeseries.rst b/doc/source/user_guide/timeseries.rst index b524205ed7679..582620d8b6479 100644 --- a/doc/source/user_guide/timeseries.rst +++ b/doc/source/user_guide/timeseries.rst @@ -2405,9 +2405,9 @@ you can use the ``tz_convert`` method. .. warning:: - Be wary of conversions between libraries. For some time zones, ``pytz`` and ``dateutil`` have different - definitions of the zone. This is more of a problem for unusual time zones than for - 'standard' zones like ``US/Eastern``. + Be wary of conversions between libraries. For some time zones, ``pytz`` and ``dateutil`` have different + definitions of the zone. This is more of a problem for unusual time zones than for + 'standard' zones like ``US/Eastern``. .. warning:: diff --git a/doc/source/user_guide/window.rst b/doc/source/user_guide/window.rst index f8c1f89be5d41..2407fd3113830 100644 --- a/doc/source/user_guide/window.rst +++ b/doc/source/user_guide/window.rst @@ -624,13 +624,13 @@ average of ``3, NaN, 5`` would be calculated as .. math:: - \frac{(1-\alpha)^2 \cdot 3 + 1 \cdot 5}{(1-\alpha)^2 + 1}. + \frac{(1-\alpha)^2 \cdot 3 + 1 \cdot 5}{(1-\alpha)^2 + 1}. Whereas if ``ignore_na=True``, the weighted average would be calculated as .. math:: - \frac{(1-\alpha) \cdot 3 + 1 \cdot 5}{(1-\alpha) + 1}. + \frac{(1-\alpha) \cdot 3 + 1 \cdot 5}{(1-\alpha) + 1}. The :meth:`~Ewm.var`, :meth:`~Ewm.std`, and :meth:`~Ewm.cov` functions have a ``bias`` argument, specifying whether the result should contain biased or unbiased statistics. diff --git a/doc/source/whatsnew/v0.15.0.rst b/doc/source/whatsnew/v0.15.0.rst index fc2b070df4392..04506f1655c7d 100644 --- a/doc/source/whatsnew/v0.15.0.rst +++ b/doc/source/whatsnew/v0.15.0.rst @@ -462,15 +462,15 @@ Rolling/expanding moments improvements .. code-block:: ipython - In [51]: ewma(s, com=3., min_periods=2) - Out[51]: - 0 NaN - 1 NaN - 2 1.000000 - 3 1.000000 - 4 1.571429 - 5 2.189189 - dtype: float64 + In [51]: pd.ewma(s, com=3., min_periods=2) + Out[51]: + 0 NaN + 1 NaN + 2 1.000000 + 3 1.000000 + 4 1.571429 + 5 2.189189 + dtype: float64 New behavior (note values start at index ``4``, the location of the 2nd (since ``min_periods=2``) non-empty value): @@ -557,21 +557,21 @@ Rolling/expanding moments improvements .. code-block:: ipython - In [89]: ewmvar(s, com=2., bias=False) - Out[89]: - 0 -2.775558e-16 - 1 3.000000e-01 - 2 9.556787e-01 - 3 3.585799e+00 - dtype: float64 - - In [90]: ewmvar(s, com=2., bias=False) / ewmvar(s, com=2., bias=True) - Out[90]: - 0 1.25 - 1 1.25 - 2 1.25 - 3 1.25 - dtype: float64 + In [89]: pd.ewmvar(s, com=2., bias=False) + Out[89]: + 0 -2.775558e-16 + 1 3.000000e-01 + 2 9.556787e-01 + 3 3.585799e+00 + dtype: float64 + + In [90]: pd.ewmvar(s, com=2., bias=False) / pd.ewmvar(s, com=2., bias=True) + Out[90]: + 0 1.25 + 1 1.25 + 2 1.25 + 3 1.25 + dtype: float64 Note that entry ``0`` is approximately 0, and the debiasing factors are a constant 1.25. By comparison, the following 0.15.0 results have a ``NaN`` for entry ``0``, diff --git a/doc/source/whatsnew/v0.18.1.rst b/doc/source/whatsnew/v0.18.1.rst index 3db00f686d62c..f873d320822ae 100644 --- a/doc/source/whatsnew/v0.18.1.rst +++ b/doc/source/whatsnew/v0.18.1.rst @@ -149,8 +149,8 @@ can return a valid boolean indexer or anything which is valid for these indexer' # callable returns list of labels df.loc[lambda x: [1, 2], lambda x: ["A", "B"]] -Indexing with``[]`` -""""""""""""""""""" +Indexing with ``[]`` +"""""""""""""""""""" Finally, you can use a callable in ``[]`` indexing of Series, DataFrame and Panel. The callable must return a valid input for ``[]`` indexing depending on its diff --git a/doc/source/whatsnew/v0.19.0.rst b/doc/source/whatsnew/v0.19.0.rst index 340e1ce9ee1ef..a2bb935c708bc 100644 --- a/doc/source/whatsnew/v0.19.0.rst +++ b/doc/source/whatsnew/v0.19.0.rst @@ -1553,7 +1553,7 @@ Bug fixes - Bug in invalid datetime parsing in ``to_datetime`` and ``DatetimeIndex`` may raise ``TypeError`` rather than ``ValueError`` (:issue:`11169`, :issue:`11287`) - Bug in ``Index`` created with tz-aware ``Timestamp`` and mismatched ``tz`` option incorrectly coerces timezone (:issue:`13692`) - Bug in ``DatetimeIndex`` with nanosecond frequency does not include timestamp specified with ``end`` (:issue:`13672`) -- Bug in ```Series`` when setting a slice with a ``np.timedelta64`` (:issue:`14155`) +- Bug in ``Series`` when setting a slice with a ``np.timedelta64`` (:issue:`14155`) - Bug in ``Index`` raises ``OutOfBoundsDatetime`` if ``datetime`` exceeds ``datetime64[ns]`` bounds, rather than coercing to ``object`` dtype (:issue:`13663`) - Bug in ``Index`` may ignore specified ``datetime64`` or ``timedelta64`` passed as ``dtype`` (:issue:`13981`) - Bug in ``RangeIndex`` can be created without no arguments rather than raises ``TypeError`` (:issue:`13793`) diff --git a/doc/source/whatsnew/v0.21.1.rst b/doc/source/whatsnew/v0.21.1.rst index 090a988d6406a..e217e1a75efc5 100644 --- a/doc/source/whatsnew/v0.21.1.rst +++ b/doc/source/whatsnew/v0.21.1.rst @@ -125,7 +125,7 @@ Indexing IO ^^ -- Bug in class:`~pandas.io.stata.StataReader` not converting date/time columns with display formatting addressed (:issue:`17990`). Previously columns with display formatting were normally left as ordinal numbers and not converted to datetime objects. +- Bug in :class:`~pandas.io.stata.StataReader` not converting date/time columns with display formatting addressed (:issue:`17990`). Previously columns with display formatting were normally left as ordinal numbers and not converted to datetime objects. - Bug in :func:`read_csv` when reading a compressed UTF-16 encoded file (:issue:`18071`) - Bug in :func:`read_csv` for handling null values in index columns when specifying ``na_filter=False`` (:issue:`5239`) - Bug in :func:`read_csv` when reading numeric category fields with high cardinality (:issue:`18186`) diff --git a/doc/source/whatsnew/v0.23.0.rst b/doc/source/whatsnew/v0.23.0.rst index be84c562b3c32..9f24bc8e8ec50 100644 --- a/doc/source/whatsnew/v0.23.0.rst +++ b/doc/source/whatsnew/v0.23.0.rst @@ -1126,7 +1126,7 @@ Removal of prior version deprecations/changes - The ``Panel`` class has dropped the ``to_long`` and ``toLong`` methods (:issue:`19077`) - The options ``display.line_with`` and ``display.height`` are removed in favor of ``display.width`` and ``display.max_rows`` respectively (:issue:`4391`, :issue:`19107`) - The ``labels`` attribute of the ``Categorical`` class has been removed in favor of :attr:`Categorical.codes` (:issue:`7768`) -- The ``flavor`` parameter have been removed from func:`to_sql` method (:issue:`13611`) +- The ``flavor`` parameter have been removed from :func:`to_sql` method (:issue:`13611`) - The modules ``pandas.tools.hashing`` and ``pandas.util.hashing`` have been removed (:issue:`16223`) - The top-level functions ``pd.rolling_*``, ``pd.expanding_*`` and ``pd.ewm*`` have been removed (Deprecated since v0.18). Instead, use the DataFrame/Series methods :attr:`~DataFrame.rolling`, :attr:`~DataFrame.expanding` and :attr:`~DataFrame.ewm` (:issue:`18723`) diff --git a/doc/source/whatsnew/v0.25.0.rst b/doc/source/whatsnew/v0.25.0.rst index e89e2f878fc24..e4dd6fa091d80 100644 --- a/doc/source/whatsnew/v0.25.0.rst +++ b/doc/source/whatsnew/v0.25.0.rst @@ -1121,7 +1121,7 @@ Indexing - Bug in which :meth:`DataFrame.to_csv` caused a segfault for a reindexed data frame, when the indices were single-level :class:`MultiIndex` (:issue:`26303`). - Fixed bug where assigning a :class:`arrays.PandasArray` to a :class:`pandas.core.frame.DataFrame` would raise error (:issue:`26390`) - Allow keyword arguments for callable local reference used in the :meth:`DataFrame.query` string (:issue:`26426`) -- Fixed a ``KeyError`` when indexing a :class:`MultiIndex`` level with a list containing exactly one label, which is missing (:issue:`27148`) +- Fixed a ``KeyError`` when indexing a :class:`MultiIndex` level with a list containing exactly one label, which is missing (:issue:`27148`) - Bug which produced ``AttributeError`` on partial matching :class:`Timestamp` in a :class:`MultiIndex` (:issue:`26944`) - Bug in :class:`Categorical` and :class:`CategoricalIndex` with :class:`Interval` values when using the ``in`` operator (``__contains``) with objects that are not comparable to the values in the ``Interval`` (:issue:`23705`) - Bug in :meth:`DataFrame.loc` and :meth:`DataFrame.iloc` on a :class:`DataFrame` with a single timezone-aware datetime64[ns] column incorrectly returning a scalar instead of a :class:`Series` (:issue:`27110`) diff --git a/doc/source/whatsnew/v0.7.0.rst b/doc/source/whatsnew/v0.7.0.rst index 1b947030ab8ab..1ee6a9899a655 100644 --- a/doc/source/whatsnew/v0.7.0.rst +++ b/doc/source/whatsnew/v0.7.0.rst @@ -190,11 +190,11 @@ been added: :header: "Method","Description" :widths: 40,60 - ``Series.iget_value(i)``, Retrieve value stored at location ``i`` - ``Series.iget(i)``, Alias for ``iget_value`` - ``DataFrame.irow(i)``, Retrieve the ``i``-th row - ``DataFrame.icol(j)``, Retrieve the ``j``-th column - "``DataFrame.iget_value(i, j)``", Retrieve the value at row ``i`` and column ``j`` + ``Series.iget_value(i)``, Retrieve value stored at location ``i`` + ``Series.iget(i)``, Alias for ``iget_value`` + ``DataFrame.irow(i)``, Retrieve the ``i``-th row + ``DataFrame.icol(j)``, Retrieve the ``j``-th column + "``DataFrame.iget_value(i, j)``", Retrieve the value at row ``i`` and column ``j`` API tweaks regarding label-based slicing ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ diff --git a/doc/source/whatsnew/v0.9.1.rst b/doc/source/whatsnew/v0.9.1.rst index 6b05e5bcded7e..a5c3860a895a2 100644 --- a/doc/source/whatsnew/v0.9.1.rst +++ b/doc/source/whatsnew/v0.9.1.rst @@ -54,44 +54,44 @@ New features - DataFrame has new ``where`` and ``mask`` methods to select values according to a given boolean mask (:issue:`2109`, :issue:`2151`) - DataFrame currently supports slicing via a boolean vector the same length as the DataFrame (inside the ``[]``). - The returned DataFrame has the same number of columns as the original, but is sliced on its index. + DataFrame currently supports slicing via a boolean vector the same length as the DataFrame (inside the ``[]``). + The returned DataFrame has the same number of columns as the original, but is sliced on its index. .. ipython:: python - df = DataFrame(np.random.randn(5, 3), columns = ['A','B','C']) + df = pd.DataFrame(np.random.randn(5, 3), columns=['A', 'B', 'C']) - df + df - df[df['A'] > 0] + df[df['A'] > 0] - If a DataFrame is sliced with a DataFrame based boolean condition (with the same size as the original DataFrame), - then a DataFrame the same size (index and columns) as the original is returned, with - elements that do not meet the boolean condition as ``NaN``. This is accomplished via - the new method ``DataFrame.where``. In addition, ``where`` takes an optional ``other`` argument for replacement. + If a DataFrame is sliced with a DataFrame based boolean condition (with the same size as the original DataFrame), + then a DataFrame the same size (index and columns) as the original is returned, with + elements that do not meet the boolean condition as ``NaN``. This is accomplished via + the new method ``DataFrame.where``. In addition, ``where`` takes an optional ``other`` argument for replacement. - .. ipython:: python + .. ipython:: python - df[df>0] + df[df > 0] - df.where(df>0) + df.where(df > 0) - df.where(df>0,-df) + df.where(df > 0, -df) - Furthermore, ``where`` now aligns the input boolean condition (ndarray or DataFrame), such that partial selection - with setting is possible. This is analogous to partial setting via ``.ix`` (but on the contents rather than the axis labels) + Furthermore, ``where`` now aligns the input boolean condition (ndarray or DataFrame), such that partial selection + with setting is possible. This is analogous to partial setting via ``.ix`` (but on the contents rather than the axis labels) - .. ipython:: python + .. ipython:: python - df2 = df.copy() - df2[ df2[1:4] > 0 ] = 3 - df2 + df2 = df.copy() + df2[df2[1:4] > 0] = 3 + df2 - ``DataFrame.mask`` is the inverse boolean operation of ``where``. + ``DataFrame.mask`` is the inverse boolean operation of ``where``. - .. ipython:: python + .. ipython:: python - df.mask(df<=0) + df.mask(df <= 0) - Enable referencing of Excel columns by their column names (:issue:`1936`) diff --git a/doc/source/whatsnew/v1.0.0.rst b/doc/source/whatsnew/v1.0.0.rst index 03dfe475475a1..2ab0af46cda88 100755 --- a/doc/source/whatsnew/v1.0.0.rst +++ b/doc/source/whatsnew/v1.0.0.rst @@ -525,7 +525,7 @@ Use :meth:`arrays.IntegerArray.to_numpy` with an explicit ``na_value`` instead. a.to_numpy(dtype="float", na_value=np.nan) -**Reductions can return ``pd.NA``** +**Reductions can return** ``pd.NA`` When performing a reduction such as a sum with ``skipna=False``, the result will now be ``pd.NA`` instead of ``np.nan`` in presence of missing values diff --git a/doc/source/whatsnew/v1.1.0.rst b/doc/source/whatsnew/v1.1.0.rst index ebd76d97e78b3..e1f54c439ae9b 100644 --- a/doc/source/whatsnew/v1.1.0.rst +++ b/doc/source/whatsnew/v1.1.0.rst @@ -665,9 +665,9 @@ the previous index (:issue:`32240`). In [4]: result Out[4]: min_val - 0 x - 1 y - 2 z + 0 x + 1 y + 2 z *New behavior*: diff --git a/doc/source/whatsnew/v1.4.0.rst b/doc/source/whatsnew/v1.4.0.rst index 87982a149054c..52aa9312d4c14 100644 --- a/doc/source/whatsnew/v1.4.0.rst +++ b/doc/source/whatsnew/v1.4.0.rst @@ -826,7 +826,7 @@ Datetimelike - Bug in :meth:`Timestamp.to_pydatetime` failing to retain the ``fold`` attribute (:issue:`45087`) - Bug in :meth:`Series.mode` with ``DatetimeTZDtype`` incorrectly returning timezone-naive and ``PeriodDtype`` incorrectly raising (:issue:`41927`) - Fixed regression in :meth:`~Series.reindex` raising an error when using an incompatible fill value with a datetime-like dtype (or not raising a deprecation warning for using a ``datetime.date`` as fill value) (:issue:`42921`) -- Bug in :class:`DateOffset`` addition with :class:`Timestamp` where ``offset.nanoseconds`` would not be included in the result (:issue:`43968`, :issue:`36589`) +- Bug in :class:`DateOffset` addition with :class:`Timestamp` where ``offset.nanoseconds`` would not be included in the result (:issue:`43968`, :issue:`36589`) - Bug in :meth:`Timestamp.fromtimestamp` not supporting the ``tz`` argument (:issue:`45083`) - Bug in :class:`DataFrame` construction from dict of :class:`Series` with mismatched index dtypes sometimes raising depending on the ordering of the passed dict (:issue:`44091`) - Bug in :class:`Timestamp` hashing during some DST transitions caused a segmentation fault (:issue:`33931` and :issue:`40817`) diff --git a/pandas/core/groupby/generic.py b/pandas/core/groupby/generic.py index 76511cb3eb48c..6986c04ae8d37 100644 --- a/pandas/core/groupby/generic.py +++ b/pandas/core/groupby/generic.py @@ -1658,13 +1658,13 @@ def value_counts( ... }) >>> df - gender education country - 0 male low US - 1 male medium FR - 2 female high US - 3 male low FR - 4 female high FR - 5 male low FR + gender education country + 0 male low US + 1 male medium FR + 2 female high US + 3 male low FR + 4 female high FR + 5 male low FR >>> df.groupby('gender').value_counts() gender education country diff --git a/pandas/tests/io/parser/common/test_common_basic.py b/pandas/tests/io/parser/common/test_common_basic.py index 5472bd99fa746..0ac0fe23bbbb7 100644 --- a/pandas/tests/io/parser/common/test_common_basic.py +++ b/pandas/tests/io/parser/common/test_common_basic.py @@ -699,7 +699,7 @@ def test_read_csv_and_table_sys_setprofile(all_parsers, read_func): def test_first_row_bom(all_parsers): # see gh-26545 parser = all_parsers - data = '''\ufeff"Head1" "Head2" "Head3"''' + data = '''\ufeff"Head1"\t"Head2"\t"Head3"''' result = parser.read_csv(StringIO(data), delimiter="\t") expected = DataFrame(columns=["Head1", "Head2", "Head3"]) @@ -710,7 +710,7 @@ def test_first_row_bom(all_parsers): def test_first_row_bom_unquoted(all_parsers): # see gh-36343 parser = all_parsers - data = """\ufeffHead1 Head2 Head3""" + data = """\ufeffHead1\tHead2\tHead3""" result = parser.read_csv(StringIO(data), delimiter="\t") expected = DataFrame(columns=["Head1", "Head2", "Head3"]) diff --git a/pandas/tests/io/test_clipboard.py b/pandas/tests/io/test_clipboard.py index 0cd4f9c02f69f..73e563fd2b743 100644 --- a/pandas/tests/io/test_clipboard.py +++ b/pandas/tests/io/test_clipboard.py @@ -209,9 +209,9 @@ def test_read_clipboard_infer_excel(self, request, mock_clipboard): text = dedent( """ - John James Charlie Mingus - 1 2 - 4 Harry Carney + John James\tCharlie Mingus + 1\t2 + 4\tHarry Carney """.strip() ) mock_clipboard[request.node.name] = text