Skip to content

Commit 9ca0a98

Browse files
committed
Merge remote-tracking branch 'upstream/master' into GH38454-export-stata-value-labels
2 parents 2331741 + b8d750f commit 9ca0a98

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

61 files changed

+936
-604
lines changed

asv_bench/benchmarks/reshape.py

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -102,6 +102,7 @@ def setup(self, dtype):
102102
columns = np.arange(n)
103103
if dtype == "int":
104104
values = np.arange(m * m * n).reshape(m * m, n)
105+
self.df = DataFrame(values, index, columns)
105106
else:
106107
# the category branch is ~20x slower than int. So we
107108
# cut down the size a bit. Now it's only ~3x slower.
@@ -111,7 +112,10 @@ def setup(self, dtype):
111112
values = np.take(list(string.ascii_letters), indices)
112113
values = [pd.Categorical(v) for v in values.T]
113114

114-
self.df = DataFrame(values, index, columns)
115+
self.df = DataFrame(
116+
{i: cat for i, cat in enumerate(values)}, index, columns
117+
)
118+
115119
self.df2 = self.df.iloc[:-1]
116120

117121
def time_full_product(self, dtype):

ci/code_checks.sh

Lines changed: 6 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -107,45 +107,22 @@ fi
107107
### DOCTESTS ###
108108
if [[ -z "$CHECK" || "$CHECK" == "doctests" ]]; then
109109

110-
MSG='Doctests for individual files' ; echo $MSG
111-
pytest -q --doctest-modules \
112-
pandas/core/accessor.py \
113-
pandas/core/aggregation.py \
114-
pandas/core/algorithms.py \
115-
pandas/core/base.py \
116-
pandas/core/construction.py \
117-
pandas/core/frame.py \
118-
pandas/core/generic.py \
119-
pandas/core/indexers.py \
120-
pandas/core/nanops.py \
121-
pandas/core/series.py \
122-
pandas/io/sql.py
123-
RET=$(($RET + $?)) ; echo $MSG "DONE"
124-
125-
MSG='Doctests for directories' ; echo $MSG
126-
pytest -q --doctest-modules \
110+
MSG='Doctests' ; echo $MSG
111+
python -m pytest --doctest-modules \
127112
pandas/_libs/ \
128113
pandas/api/ \
129114
pandas/arrays/ \
130115
pandas/compat/ \
131-
pandas/core/array_algos/ \
132-
pandas/core/arrays/ \
133-
pandas/core/computation/ \
134-
pandas/core/dtypes/ \
135-
pandas/core/groupby/ \
136-
pandas/core/indexes/ \
137-
pandas/core/ops/ \
138-
pandas/core/reshape/ \
139-
pandas/core/strings/ \
140-
pandas/core/tools/ \
141-
pandas/core/window/ \
116+
pandas/core \
142117
pandas/errors/ \
143118
pandas/io/clipboard/ \
144119
pandas/io/json/ \
145120
pandas/io/excel/ \
146121
pandas/io/parsers/ \
147122
pandas/io/sas/ \
148-
pandas/tseries/
123+
pandas/io/sql.py \
124+
pandas/tseries/ \
125+
pandas/io/formats/style_render.py
149126
RET=$(($RET + $?)) ; echo $MSG "DONE"
150127

151128
fi

ci/deps/actions-39-slow.yaml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,7 @@ dependencies:
2323
- matplotlib
2424
- moto>=1.3.14
2525
- flask
26+
- numba
2627
- numexpr
2728
- numpy
2829
- openpyxl

ci/deps/actions-39.yaml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@ dependencies:
2222
- matplotlib
2323
- moto>=1.3.14
2424
- flask
25+
- numba
2526
- numexpr
2627
- numpy
2728
- openpyxl

ci/deps/azure-windows-39.yaml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,7 @@ dependencies:
2323
- matplotlib
2424
- moto>=1.3.14
2525
- flask
26+
- numba
2627
- numexpr
2728
- numpy
2829
- openpyxl

ci/run_tests.sh

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,10 @@ fi
3030
echo $PYTEST_CMD
3131
sh -c "$PYTEST_CMD"
3232

33-
PYTEST_AM_CMD="PANDAS_DATA_MANAGER=array pytest -m \"$PATTERN and arraymanager\" -n $PYTEST_WORKERS --dist=loadfile $TEST_ARGS $COVERAGE pandas"
33+
if [[ "$PANDAS_DATA_MANAGER" != "array" ]]; then
34+
# The ArrayManager tests should have already been run by PYTEST_CMD if PANDAS_DATA_MANAGER was already set to array
35+
PYTEST_AM_CMD="PANDAS_DATA_MANAGER=array pytest -m \"$PATTERN and arraymanager\" -n $PYTEST_WORKERS --dist=loadfile $TEST_ARGS $COVERAGE pandas"
3436

35-
echo $PYTEST_AM_CMD
36-
sh -c "$PYTEST_AM_CMD"
37+
echo $PYTEST_AM_CMD
38+
sh -c "$PYTEST_AM_CMD"
39+
fi

doc/source/user_guide/boolean.rst

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,11 @@
1212
Nullable Boolean data type
1313
**************************
1414

15+
.. note::
16+
17+
BooleanArray is currently experimental. Its API or implementation may
18+
change without warning.
19+
1520
.. versionadded:: 1.0.0
1621

1722

doc/source/whatsnew/index.rst

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@ Version 1.3
2424
.. toctree::
2525
:maxdepth: 2
2626

27+
v1.3.2
2728
v1.3.1
2829
v1.3.0
2930

doc/source/whatsnew/v1.3.1.rst

Lines changed: 8 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
.. _whatsnew_131:
22

3-
What's new in 1.3.1 (July ??, 2021)
3+
What's new in 1.3.1 (July 25, 2021)
44
-----------------------------------
55

66
These are the changes in pandas 1.3.1. See :ref:`release` for a full changelog
@@ -25,6 +25,10 @@ Fixed regressions
2525
- Fixed regression in :meth:`DataFrame.isin` and :meth:`Series.isin` raising ``TypeError`` with nullable data containing at least one missing value (:issue:`42405`)
2626
- Regression in :func:`concat` between objects with bool dtype and integer dtype casting to object instead of to integer (:issue:`42092`)
2727
- Bug in :class:`Series` constructor not accepting a ``dask.Array`` (:issue:`38645`)
28+
- Fixed regression for ``SettingWithCopyWarning`` displaying incorrect stacklevel (:issue:`42570`)
29+
- Fixed regression for :func:`merge_asof` raising ``KeyError`` when one of the ``by`` columns is in the index (:issue:`34488`)
30+
- Fixed regression in :func:`to_datetime` returning pd.NaT for inputs that produce duplicated values, when ``cache=True`` (:issue:`42259`)
31+
- Fixed regression in :meth:`SeriesGroupBy.value_counts` that resulted in an ``IndexError`` when called on a Series with one row (:issue:`42618`)
2832

2933
.. ---------------------------------------------------------------------------
3034
@@ -34,16 +38,8 @@ Bug fixes
3438
~~~~~~~~~
3539
- Fixed bug in :meth:`DataFrame.transpose` dropping values when the DataFrame had an Extension Array dtype and a duplicate index (:issue:`42380`)
3640
- Fixed bug in :meth:`DataFrame.to_xml` raising ``KeyError`` when called with ``index=False`` and an offset index (:issue:`42458`)
37-
-
38-
39-
.. ---------------------------------------------------------------------------
40-
41-
.. _whatsnew_131.other:
42-
43-
Other
44-
~~~~~
45-
-
46-
-
41+
- Fixed bug in :meth:`.Styler.set_sticky` not handling index names correctly for single index columns case (:issue:`42537`)
42+
- Fixed bug in :meth:`DataFrame.copy` failing to consolidate blocks in the result (:issue:`42579`)
4743

4844
.. ---------------------------------------------------------------------------
4945
@@ -52,4 +48,4 @@ Other
5248
Contributors
5349
~~~~~~~~~~~~
5450

55-
.. contributors:: v1.3.0..v1.3.1|HEAD
51+
.. contributors:: v1.3.0..v1.3.1

doc/source/whatsnew/v1.3.2.rst

Lines changed: 47 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,47 @@
1+
.. _whatsnew_132:
2+
3+
What's new in 1.3.2 (August ??, 2021)
4+
-------------------------------------
5+
6+
These are the changes in pandas 1.3.2. See :ref:`release` for a full changelog
7+
including other versions of pandas.
8+
9+
{{ header }}
10+
11+
.. ---------------------------------------------------------------------------
12+
13+
.. _whatsnew_132.regressions:
14+
15+
Fixed regressions
16+
~~~~~~~~~~~~~~~~~
17+
- Performance regression in :meth:`DataFrame.isin` and :meth:`Series.isin` for nullable data types (:issue:`42714`)
18+
- Regression in updating values of :class:`pandas.Series` using boolean index, created by using :meth:`pandas.DataFrame.pop` (:issue:`42530`)
19+
- Regression in :meth:`DataFrame.from_records` with empty records (:issue:`42456`)
20+
-
21+
22+
.. ---------------------------------------------------------------------------
23+
24+
.. _whatsnew_132.bug_fixes:
25+
26+
Bug fixes
27+
~~~~~~~~~
28+
-
29+
-
30+
31+
.. ---------------------------------------------------------------------------
32+
33+
.. _whatsnew_132.other:
34+
35+
Other
36+
~~~~~
37+
-
38+
-
39+
40+
.. ---------------------------------------------------------------------------
41+
42+
.. _whatsnew_132.contributors:
43+
44+
Contributors
45+
~~~~~~~~~~~~
46+
47+
.. contributors:: v1.3.1..v1.3.2|HEAD

doc/source/whatsnew/v1.4.0.rst

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,7 @@ enhancement2
2929

3030
Other enhancements
3131
^^^^^^^^^^^^^^^^^^
32+
- :class:`DataFrameGroupBy` operations with ``as_index=False`` now correctly retain ``ExtensionDtype`` dtypes for columns being grouped on (:issue:`41373`)
3233
- Add support for assigning values to ``by`` argument in :meth:`DataFrame.plot.hist` and :meth:`DataFrame.plot.box` (:issue:`15079`)
3334
- :meth:`Series.sample`, :meth:`DataFrame.sample`, and :meth:`.GroupBy.sample` now accept a ``np.random.Generator`` as input to ``random_state``. A generator will be more performant, especially with ``replace=False`` (:issue:`38100`)
3435
- Additional options added to :meth:`.Styler.bar` to control alignment and display, with keyword only arguments (:issue:`26070`, :issue:`36419`)
@@ -156,6 +157,7 @@ Deprecations
156157
- Deprecated treating integer keys in :meth:`Series.__setitem__` as positional when the index is a :class:`Float64Index` not containing the key, a :class:`IntervalIndex` with no entries containing the key, or a :class:`MultiIndex` with leading :class:`Float64Index` level not containing the key (:issue:`33469`)
157158
- Deprecated treating ``numpy.datetime64`` objects as UTC times when passed to the :class:`Timestamp` constructor along with a timezone. In a future version, these will be treated as wall-times. To retain the old behavior, use ``Timestamp(dt64).tz_localize("UTC").tz_convert(tz)`` (:issue:`24559`)
158159
- Deprecated ignoring missing labels when indexing with a sequence of labels on a level of a MultiIndex (:issue:`42351`)
160+
- Creating an empty Series without a dtype will now raise a more visible ``FutureWarning`` instead of a ``DeprecationWarning`` (:issue:`30017`)
159161

160162
.. ---------------------------------------------------------------------------
161163
@@ -165,6 +167,7 @@ Performance improvements
165167
~~~~~~~~~~~~~~~~~~~~~~~~
166168
- Performance improvement in :meth:`.GroupBy.sample`, especially when ``weights`` argument provided (:issue:`34483`)
167169
- Performance improvement in :meth:`.GroupBy.transform` for user-defined functions (:issue:`41598`)
170+
- Performance improvement in constructing :class:`DataFrame` objects (:issue:`42631`)
168171

169172
.. ---------------------------------------------------------------------------
170173
@@ -184,7 +187,6 @@ Categorical
184187

185188
Datetimelike
186189
^^^^^^^^^^^^
187-
- Bug in :func:`to_datetime` returning pd.NaT for inputs that produce duplicated values, when ``cache=True`` (:issue:`42259`)
188190
- Bug in :class:`DataFrame` constructor unnecessarily copying non-datetimelike 2D object arrays (:issue:`39272`)
189191
-
190192

@@ -225,7 +227,6 @@ Indexing
225227
- Bug in :meth:`Series.loc` when with a :class:`MultiIndex` whose first level contains only ``np.nan`` values (:issue:`42055`)
226228
- Bug in indexing on a :class:`Series` or :class:`DataFrame` with a :class:`DatetimeIndex` when passing a string, the return type depended on whether the index was monotonic (:issue:`24892`)
227229
- Bug in indexing on a :class:`MultiIndex` failing to drop scalar levels when the indexer is a tuple containing a datetime-like string (:issue:`42476`)
228-
-
229230

230231
Missing
231232
^^^^^^^
@@ -236,6 +237,7 @@ MultiIndex
236237
^^^^^^^^^^
237238
- Bug in :meth:`MultiIndex.get_loc` where the first level is a :class:`DatetimeIndex` and a string key is passed (:issue:`42465`)
238239
- Bug in :meth:`MultiIndex.reindex` when passing a ``level`` that corresponds to an ``ExtensionDtype`` level (:issue:`42043`)
240+
- Bug in :meth:`MultiIndex.get_loc` raising ``TypeError`` instead of ``KeyError`` on nested tuple (:issue:`42440`)
239241
-
240242

241243
I/O
@@ -263,7 +265,7 @@ Groupby/resample/rolling
263265

264266
Reshaping
265267
^^^^^^^^^
266-
-
268+
- :func:`concat` creating :class:`MultiIndex` with duplicate level entries when concatenating a :class:`DataFrame` with duplicates in :class:`Index` and multiple keys (:issue:`42651`)
267269
-
268270

269271
Sparse

environment.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -108,7 +108,7 @@ dependencies:
108108
- fsspec>=0.7.4, <2021.6.0 # for generic remote file operations
109109
- gcsfs>=0.6.0 # file IO when using 'gcs://...' path
110110
- sqlalchemy # pandas.read_sql, DataFrame.to_sql
111-
- xarray # DataFrame.to_xarray
111+
- xarray<0.19 # DataFrame.to_xarray
112112
- cftime # Needed for downstream xarray.CFTimeIndex test
113113
- pyreadstat # pandas.read_spss
114114
- tabulate>=0.8.3 # DataFrame.to_markdown

pandas/core/arrays/masked.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -417,7 +417,7 @@ def isin(self, values) -> BooleanArray: # type: ignore[override]
417417
# see https://github.com/pandas-dev/pandas/pull/38379 for some discussion
418418
result[self._mask] = values_have_NA
419419

420-
mask = np.zeros_like(self, dtype=bool)
420+
mask = np.zeros(self._data.shape, dtype=bool)
421421
return BooleanArray(result, mask, copy=False)
422422

423423
def copy(self: BaseMaskedArrayT) -> BaseMaskedArrayT:

pandas/core/flags.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -68,9 +68,9 @@ def allows_duplicate_labels(self) -> bool:
6868
Examples
6969
--------
7070
>>> df = pd.DataFrame({"A": [1, 2]}, index=['a', 'a'])
71-
>>> df.allows_duplicate_labels
71+
>>> df.flags.allows_duplicate_labels
7272
True
73-
>>> df.allows_duplicate_labels = False
73+
>>> df.flags.allows_duplicate_labels = False
7474
Traceback (most recent call last):
7575
...
7676
pandas.errors.DuplicateLabelError: Index has duplicates.

pandas/core/generic.py

Lines changed: 5 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -67,6 +67,7 @@
6767
doc,
6868
rewrite_axis_style_signature,
6969
)
70+
from pandas.util._exceptions import find_stack_level
7071
from pandas.util._validators import (
7172
validate_ascending,
7273
validate_bool_kwarg,
@@ -3506,7 +3507,7 @@ def _maybe_update_cacher(
35063507
"""
35073508

35083509
if verify_is_copy:
3509-
self._check_setitem_copy(stacklevel=5, t="referent")
3510+
self._check_setitem_copy(t="referent")
35103511

35113512
if clear:
35123513
self._clear_item_cache()
@@ -3853,26 +3854,21 @@ def _check_is_chained_assignment_possible(self) -> bool_t:
38533854
setting.
38543855
"""
38553856
if self._is_copy:
3856-
self._check_setitem_copy(stacklevel=4, t="referent")
3857+
self._check_setitem_copy(t="referent")
38573858
return False
38583859

38593860
@final
3860-
def _check_setitem_copy(self, stacklevel=4, t="setting", force=False):
3861+
def _check_setitem_copy(self, t="setting", force=False):
38613862
"""
38623863
38633864
Parameters
38643865
----------
3865-
stacklevel : int, default 4
3866-
the level to show of the stack when the error is output
38673866
t : str, the type of setting error
38683867
force : bool, default False
38693868
If True, then force showing an error.
38703869
38713870
validate if we are doing a setitem on a chained copy.
38723871
3873-
If you call this function, be sure to set the stacklevel such that the
3874-
user will see the error *at the level of setting*
3875-
38763872
It is technically possible to figure out that we are setting on
38773873
a copy even WITH a multi-dtyped pandas object. In other words, some
38783874
blocks may be views while other are not. Currently _is_view will ALWAYS
@@ -3931,7 +3927,7 @@ def _check_setitem_copy(self, stacklevel=4, t="setting", force=False):
39313927
if value == "raise":
39323928
raise com.SettingWithCopyError(t)
39333929
elif value == "warn":
3934-
warnings.warn(t, com.SettingWithCopyWarning, stacklevel=stacklevel)
3930+
warnings.warn(t, com.SettingWithCopyWarning, stacklevel=find_stack_level())
39353931

39363932
def __delitem__(self, key) -> None:
39373933
"""

pandas/core/groupby/generic.py

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -758,7 +758,7 @@ def apply_series_value_counts():
758758
# new values are where sorted labels change
759759
lchanges = llab(lab, slice(1, None)) != llab(lab, slice(None, -1))
760760
inc = np.r_[True, lchanges]
761-
if not len(lchanges):
761+
if not len(val):
762762
inc = lchanges
763763
inc[idx] = True # group boundaries are also new values
764764
out = np.diff(np.nonzero(np.r_[inc, True])[0]) # value counts
@@ -1033,7 +1033,7 @@ def aggregate(self, func=None, *args, engine=None, engine_kwargs=None, **kwargs)
10331033
self._insert_inaxis_grouper_inplace(result)
10341034
result.index = Index(range(len(result)))
10351035

1036-
return result._convert(datetime=True)
1036+
return result
10371037

10381038
agg = aggregate
10391039

@@ -1684,6 +1684,8 @@ def _wrap_agged_manager(self, mgr: Manager2D) -> DataFrame:
16841684
if self.axis == 1:
16851685
result = result.T
16861686

1687+
# Note: we only need to pass datetime=True in order to get numeric
1688+
# values converted
16871689
return self._reindex_output(result)._convert(datetime=True)
16881690

16891691
def _iterate_column_groupbys(self, obj: FrameOrSeries):

0 commit comments

Comments
 (0)