Skip to content

Commit e8b02e1

Browse files
committed
Merge branch 'master' into categorical_map_na_Action
2 parents dc9d119 + 525f1ef commit e8b02e1

File tree

34 files changed

+461
-144
lines changed

34 files changed

+461
-144
lines changed

.github/workflows/wheels.yml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -173,8 +173,8 @@ jobs:
173173
pip install hypothesis>=6.34.2 pytest>=7.0.0 pytest-xdist>=2.2.0 pytest-asyncio>=0.17
174174
cd .. # Not a good idea to test within the src tree
175175
python -c "import pandas; print(pandas.__version__);
176-
pandas.test(extra_args=['-m not clipboard and not single_cpu and not slow and not network and not db', '-n 2', '--no-strict-data-files']);
177-
pandas.test(extra_args=['-m not clipboard and single_cpu and not slow and not network and not db', '--no-strict-data-files'])"
176+
pandas.test(extra_args=['-m not clipboard and not single_cpu and not slow and not network and not db', '-n 2']);
177+
pandas.test(extra_args=['-m not clipboard and single_cpu and not slow and not network and not db'])"
178178
- uses: actions/upload-artifact@v3
179179
with:
180180
name: sdist

ci/test_wheels.py

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -41,12 +41,10 @@
4141
multi_args = [
4242
"-m not clipboard and not single_cpu and not slow and not network and not db",
4343
"-n 2",
44-
"--no-strict-data-files",
4544
]
4645
pd.test(extra_args=multi_args)
4746
pd.test(
4847
extra_args=[
4948
"-m not clipboard and single_cpu and not slow and not network and not db",
50-
"--no-strict-data-files",
5149
]
5250
)

ci/test_wheels_windows.bat

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
set test_command=import pandas as pd; print(pd.__version__); ^
2-
pd.test(extra_args=['-m not clipboard and not single_cpu and not slow and not network and not db', '--no-strict-data-files', '-n=2']); ^
3-
pd.test(extra_args=['-m not clipboard and single_cpu and not slow and not network and not db', '--no-strict-data-files'])
2+
pd.test(extra_args=['-m not clipboard and not single_cpu and not slow and not network and not db', '-n 2']); ^
3+
pd.test(extra_args=['-m not clipboard and single_cpu and not slow and not network and not db'])
44

55
python --version
66
pip install pytz six numpy python-dateutil tzdata>=2022.1

doc/source/development/contributing_codebase.rst

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -812,7 +812,8 @@ install pandas) by typing::
812812
your installation is probably fine and you can start contributing!
813813

814814
Often it is worth running only a subset of tests first around your changes before running the
815-
entire suite.
815+
entire suite (tip: you can use the [pandas-coverage app](https://pandas-coverage.herokuapp.com/)
816+
to find out which tests hit the lines of code you've modified, and then run only those).
816817

817818
The easiest way to do this is with::
818819

doc/source/whatsnew/v2.1.0.rst

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -105,15 +105,19 @@ Deprecations
105105
~~~~~~~~~~~~
106106
- Deprecated silently dropping unrecognized timezones when parsing strings to datetimes (:issue:`18702`)
107107
- Deprecated :meth:`DataFrame._data` and :meth:`Series._data`, use public APIs instead (:issue:`33333`)
108+
- Deprecated :meth:`.Groupby.all` and :meth:`.GroupBy.any` with datetime64 or :class:`PeriodDtype` values, matching the :class:`Series` and :class:`DataFrame` deprecations (:issue:`34479`)
108109
- Deprecating pinning ``group.name`` to each group in :meth:`SeriesGroupBy.aggregate` aggregations; if your operation requires utilizing the groupby keys, iterate over the groupby object instead (:issue:`41090`)
109110
- Deprecated the default of ``observed=False`` in :meth:`DataFrame.groupby` and :meth:`Series.groupby`; this will default to ``True`` in a future version (:issue:`43999`)
110111
- Deprecated :meth:`DataFrameGroupBy.dtypes`, check ``dtypes`` on the underlying object instead (:issue:`51045`)
111112
- Deprecated ``axis=1`` in :meth:`DataFrame.groupby` and in :class:`Grouper` constructor, do ``frame.T.groupby(...)`` instead (:issue:`51203`)
113+
- Deprecated :meth:`Categorical.to_list`, use ``obj.tolist()`` instead (:issue:`51254`)
112114
- Deprecated passing a :class:`DataFrame` to :meth:`DataFrame.from_records`, use :meth:`DataFrame.set_index` or :meth:`DataFrame.drop` instead (:issue:`51353`)
113115
- Deprecated accepting slices in :meth:`DataFrame.take`, call ``obj[slicer]`` or pass a sequence of integers instead (:issue:`51539`)
114116
- Deprecated ``axis=1`` in :meth:`DataFrame.ewm`, :meth:`DataFrame.rolling`, :meth:`DataFrame.expanding`, transpose before calling the method instead (:issue:`51778`)
115117
- Deprecated the ``axis`` keyword in :meth:`DataFrame.ewm`, :meth:`Series.ewm`, :meth:`DataFrame.rolling`, :meth:`Series.rolling`, :meth:`DataFrame.expanding`, :meth:`Series.expanding` (:issue:`51778`)
118+
- Deprecated the ``axis`` keyword in :meth:`DataFrame.resample`, :meth:`Series.resample` (:issue:`51778`)
116119
- Deprecated 'method', 'limit', and 'fill_axis' keywords in :meth:`DataFrame.align` and :meth:`Series.align`, explicitly call ``fillna`` on the alignment results instead (:issue:`51856`)
120+
- Deprecated 'broadcast_axis' keyword in :meth:`Series.align` and :meth:`DataFrame.align`, upcast before calling ``align`` with ``left = DataFrame({col: left for col in right.columns}, index=right.index)`` (:issue:`51856`)
117121
- Deprecated the 'axis' keyword in :meth:`.GroupBy.idxmax`, :meth:`.GroupBy.idxmin`, :meth:`.GroupBy.fillna`, :meth:`.GroupBy.take`, :meth:`.GroupBy.skew`, :meth:`.GroupBy.rank`, :meth:`.GroupBy.cumprod`, :meth:`.GroupBy.cumsum`, :meth:`.GroupBy.cummax`, :meth:`.GroupBy.cummin`, :meth:`.GroupBy.pct_change`, :meth:`GroupBy.diff`, :meth:`.GroupBy.shift`, and :meth:`DataFrameGroupBy.corrwith`; for ``axis=1`` operate on the underlying :class:`DataFrame` instead (:issue:`50405`, :issue:`51046`)
118122
-
119123

@@ -206,6 +210,7 @@ MultiIndex
206210

207211
I/O
208212
^^^
213+
- Bug in :func:`read_html`, tail texts were removed together with elements containing ``display:none`` style (:issue:`51629`)
209214
- :meth:`DataFrame.to_orc` now raising ``ValueError`` when non-default :class:`Index` is given (:issue:`51828`)
210215
-
211216

@@ -231,6 +236,7 @@ Groupby/resample/rolling
231236
grouped :class:`Series` or :class:`DataFrame` was a :class:`DatetimeIndex`, :class:`TimedeltaIndex`
232237
or :class:`PeriodIndex`, and the ``groupby`` method was given a function as its first argument,
233238
the function operated on the whole index rather than each element of the index. (:issue:`51979`)
239+
- Bug in :meth:`GroupBy.var` failing to raise ``TypeError`` when called with datetime64 or :class:`PeriodDtype` values (:issue:`52128`)
234240
-
235241

236242
Reshaping
@@ -257,6 +263,7 @@ Styler
257263
Other
258264
^^^^^
259265
- Bug in :func:`assert_almost_equal` now throwing assertion error for two unequal sets (:issue:`51727`)
266+
- Bug in :meth:`Series.memory_usage` when ``deep=True`` throw an error with Series of objects and the returned value is incorrect, as it does not take into account GC corrections (:issue:`51858`)
260267

261268
.. ***DO NOT USE THIS SECTION***
262269

pandas/_libs/lib.pyx

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
from collections import abc
22
from decimal import Decimal
33
from enum import Enum
4+
from sys import getsizeof
45
from typing import (
56
Literal,
67
_GenericAlias,
@@ -159,7 +160,7 @@ def memory_usage_of_objects(arr: object[:]) -> int64_t:
159160

160161
n = len(arr)
161162
for i in range(n):
162-
size += arr[i].__sizeof__()
163+
size += getsizeof(arr[i])
163164
return size
164165

165166

pandas/_libs/tslibs/offsets.pyx

Lines changed: 9 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -2546,7 +2546,6 @@ cdef class MonthEnd(MonthOffset):
25462546
DateOffset of one month end.
25472547
25482548
MonthEnd goes to the next date which is an end of the month.
2549-
To get the end of the current month pass the parameter n equals 0.
25502549
25512550
See Also
25522551
--------
@@ -2562,10 +2561,10 @@ cdef class MonthEnd(MonthOffset):
25622561
>>> ts + pd.offsets.MonthEnd()
25632562
Timestamp('2022-02-28 00:00:00')
25642563
2565-
If you want to get the end of the current month pass the parameter n equals 0:
2564+
If you want to get the end of the current month:
25662565
25672566
>>> ts = pd.Timestamp(2022, 1, 31)
2568-
>>> ts + pd.offsets.MonthEnd(0)
2567+
>>> pd.offsets.MonthEnd().rollforward(ts)
25692568
Timestamp('2022-01-31 00:00:00')
25702569
"""
25712570
_period_dtype_code = PeriodDtypeCode.M
@@ -2578,7 +2577,6 @@ cdef class MonthBegin(MonthOffset):
25782577
DateOffset of one month at beginning.
25792578
25802579
MonthBegin goes to the next date which is a start of the month.
2581-
To get the start of the current month pass the parameter n equals 0.
25822580
25832581
See Also
25842582
--------
@@ -2594,10 +2592,10 @@ cdef class MonthBegin(MonthOffset):
25942592
>>> ts + pd.offsets.MonthBegin()
25952593
Timestamp('2023-01-01 00:00:00')
25962594
2597-
If you want to get the start of the current month pass the parameter n equals 0:
2595+
If you want to get the start of the current month:
25982596
25992597
>>> ts = pd.Timestamp(2022, 12, 1)
2600-
>>> ts + pd.offsets.MonthBegin(0)
2598+
>>> pd.offsets.MonthBegin().rollback(ts)
26012599
Timestamp('2022-12-01 00:00:00')
26022600
"""
26032601
_prefix = "MS"
@@ -2609,7 +2607,6 @@ cdef class BusinessMonthEnd(MonthOffset):
26092607
DateOffset increments between the last business day of the month.
26102608
26112609
BusinessMonthEnd goes to the next date which is the last business day of the month.
2612-
To get the last business day of the current month pass the parameter n equals 0.
26132610
26142611
Examples
26152612
--------
@@ -2621,11 +2618,10 @@ cdef class BusinessMonthEnd(MonthOffset):
26212618
>>> ts + pd.offsets.BMonthEnd()
26222619
Timestamp('2022-12-30 00:00:00')
26232620
2624-
If you want to get the end of the current business month
2625-
pass the parameter n equals 0:
2621+
If you want to get the end of the current business month:
26262622
26272623
>>> ts = pd.Timestamp(2022, 11, 30)
2628-
>>> ts + pd.offsets.BMonthEnd(0)
2624+
>>> pd.offsets.BMonthEnd().rollforward(ts)
26292625
Timestamp('2022-11-30 00:00:00')
26302626
"""
26312627
_prefix = "BM"
@@ -2637,8 +2633,7 @@ cdef class BusinessMonthBegin(MonthOffset):
26372633
DateOffset of one month at the first business day.
26382634
26392635
BusinessMonthBegin goes to the next date which is the first business day
2640-
of the month. To get the first business day of the current month pass
2641-
the parameter n equals 0.
2636+
of the month.
26422637
26432638
Examples
26442639
--------
@@ -2650,11 +2645,10 @@ cdef class BusinessMonthBegin(MonthOffset):
26502645
>>> ts + pd.offsets.BMonthBegin()
26512646
Timestamp('2023-01-02 00:00:00')
26522647
2653-
If you want to get the start of the current business month pass
2654-
the parameter n equals 0:
2648+
If you want to get the start of the current business month:
26552649
26562650
>>> ts = pd.Timestamp(2022, 12, 1)
2657-
>>> ts + pd.offsets.BMonthBegin(0)
2651+
>>> pd.offsets.BMonthBegin().rollback(ts)
26582652
Timestamp('2022-12-01 00:00:00')
26592653
"""
26602654
_prefix = "BMS"

pandas/_testing/__init__.py

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -177,6 +177,23 @@
177177
np.uint32,
178178
]
179179

180+
PYTHON_DATA_TYPES = [
181+
str,
182+
int,
183+
float,
184+
complex,
185+
list,
186+
tuple,
187+
range,
188+
dict,
189+
set,
190+
frozenset,
191+
bool,
192+
bytes,
193+
bytearray,
194+
memoryview,
195+
]
196+
180197
ENDIAN = {"little": "<", "big": ">"}[byteorder]
181198

182199
NULL_OBJECTS = [None, np.nan, pd.NaT, float("nan"), pd.NA, Decimal("NaN")]

pandas/conftest.py

Lines changed: 30 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -103,9 +103,9 @@
103103

104104
def pytest_addoption(parser) -> None:
105105
parser.addoption(
106-
"--no-strict-data-files",
107-
action="store_false",
108-
help="Don't fail if a test is skipped for missing data file.",
106+
"--strict-data-files",
107+
action="store_true",
108+
help="Fail if a test is skipped for missing data file.",
109109
)
110110

111111

@@ -760,6 +760,29 @@ def index_or_series_obj(request):
760760
return _index_or_series_objs[request.param].copy(deep=True)
761761

762762

763+
_typ_objects_series = {
764+
f"{dtype.__name__}-series": Series(dtype) for dtype in tm.PYTHON_DATA_TYPES
765+
}
766+
767+
768+
_index_or_series_memory_objs = {
769+
**indices_dict,
770+
**_series,
771+
**_narrow_series,
772+
**_typ_objects_series,
773+
}
774+
775+
776+
@pytest.fixture(params=_index_or_series_memory_objs.keys())
777+
def index_or_series_memory_obj(request):
778+
"""
779+
Fixture for tests on indexes, series, series with a narrow dtype and
780+
series with empty objects type
781+
copy to avoid mutation, e.g. setting .name
782+
"""
783+
return _index_or_series_memory_objs[request.param].copy(deep=True)
784+
785+
763786
# ----------------------------------------------------------------
764787
# DataFrames
765788
# ----------------------------------------------------------------
@@ -1112,9 +1135,9 @@ def all_numeric_accumulations(request):
11121135
@pytest.fixture
11131136
def strict_data_files(pytestconfig):
11141137
"""
1115-
Returns the configuration for the test setting `--no-strict-data-files`.
1138+
Returns the configuration for the test setting `--strict-data-files`.
11161139
"""
1117-
return pytestconfig.getoption("--no-strict-data-files")
1140+
return pytestconfig.getoption("--strict-data-files")
11181141

11191142

11201143
@pytest.fixture
@@ -1134,7 +1157,7 @@ def datapath(strict_data_files: str) -> Callable[..., str]:
11341157
Raises
11351158
------
11361159
ValueError
1137-
If the path doesn't exist and the --no-strict-data-files option is not set.
1160+
If the path doesn't exist and the --strict-data-files option is set.
11381161
"""
11391162
BASE_PATH = os.path.join(os.path.dirname(__file__), "tests")
11401163

@@ -1143,7 +1166,7 @@ def deco(*args):
11431166
if not os.path.exists(path):
11441167
if strict_data_files:
11451168
raise ValueError(
1146-
f"Could not find file {path} and --no-strict-data-files is not set."
1169+
f"Could not find file {path} and --strict-data-files is set."
11471170
)
11481171
pytest.skip(f"Could not find {path}.")
11491172
return path

pandas/core/arrays/arrow/array.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2091,7 +2091,10 @@ def _dt_round(
20912091
return self._round_temporally("round", freq, ambiguous, nonexistent)
20922092

20932093
def _dt_to_pydatetime(self):
2094-
return np.array(self._pa_array.to_pylist(), dtype=object)
2094+
data = self._pa_array.to_pylist()
2095+
if self._dtype.pyarrow_dtype.unit == "ns":
2096+
data = [ts.to_pydatetime(warn=False) for ts in data]
2097+
return np.array(data, dtype=object)
20952098

20962099
def _dt_tz_localize(
20972100
self,

pandas/core/arrays/categorical.py

Lines changed: 10 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,8 @@
1313
cast,
1414
overload,
1515
)
16-
from warnings import warn
16+
import warnings
17+
1718

1819
import numpy as np
1920

@@ -553,6 +554,13 @@ def to_list(self):
553554
"""
554555
Alias for tolist.
555556
"""
557+
# GH#51254
558+
warnings.warn(
559+
"Categorical.to_list is deprecated and will be removed in a future "
560+
"version. Use obj.tolist() instead",
561+
FutureWarning,
562+
stacklevel=find_stack_level(),
563+
)
556564
return self.tolist()
557565

558566
@classmethod
@@ -1280,7 +1288,7 @@ def map(
12801288
Index(['first', 'second', nan], dtype='object')
12811289
"""
12821290
if na_action is lib.no_default:
1283-
warn(
1291+
warnings.warn(
12841292
"The default value of 'ignore' for the `na_action` parameter in "
12851293
"pandas.Categorical.map is deprecated and will be "
12861294
"changed to 'None' in a future version. Please set na_action to the "

pandas/core/arrays/datetimelike.py

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -957,10 +957,17 @@ def _cmp_method(self, other, op):
957957
if not isinstance(other, type(self)):
958958
# i.e. Timedelta/Timestamp, cast to ndarray and let
959959
# compare_mismatched_resolutions handle broadcasting
960-
other_arr = np.array(other.asm8)
960+
try:
961+
# GH#52080 see if we can losslessly cast to shared unit
962+
other = other.as_unit(self.unit, round_ok=False)
963+
except ValueError:
964+
other_arr = np.array(other.asm8)
965+
return compare_mismatched_resolutions(
966+
self._ndarray, other_arr, op
967+
)
961968
else:
962969
other_arr = other._ndarray
963-
return compare_mismatched_resolutions(self._ndarray, other_arr, op)
970+
return compare_mismatched_resolutions(self._ndarray, other_arr, op)
964971

965972
other_vals = self._unbox(other)
966973
# GH#37462 comparison on i8 values is almost 2x faster than M8/m8

pandas/core/frame.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -5033,7 +5033,7 @@ def align(
50335033
method: FillnaOptions | None | lib.NoDefault = lib.no_default,
50345034
limit: int | None | lib.NoDefault = lib.no_default,
50355035
fill_axis: Axis | lib.NoDefault = lib.no_default,
5036-
broadcast_axis: Axis | None = None,
5036+
broadcast_axis: Axis | None | lib.NoDefault = lib.no_default,
50375037
) -> tuple[Self, NDFrameT]:
50385038
return super().align(
50395039
other,
@@ -11418,7 +11418,7 @@ def asfreq(
1141811418
def resample(
1141911419
self,
1142011420
rule,
11421-
axis: Axis = 0,
11421+
axis: Axis | lib.NoDefault = lib.no_default,
1142211422
closed: str | None = None,
1142311423
label: str | None = None,
1142411424
convention: str = "start",

0 commit comments

Comments
 (0)