pandas-dev
diff --git a/‎Dockerfile
Lines changed: 6 additions & 1 deletion b/‎Dockerfile
Lines changed: 6 additions & 1 deletion
diff --git a/‎asv_bench/benchmarks/indexing.py
Lines changed: 34 additions & 16 deletions b/‎asv_bench/benchmarks/indexing.py
Lines changed: 34 additions & 16 deletions
diff --git a/‎asv_bench/benchmarks/reindex.py
Lines changed: 7 additions & 1 deletion b/‎asv_bench/benchmarks/reindex.py
Lines changed: 7 additions & 1 deletion
diff --git a/‎doc/source/_static/style/format_excel_css.png
32.4 KB b/‎doc/source/_static/style/format_excel_css.png
32.4 KB
diff --git a/‎doc/source/whatsnew/v1.4.2.rst
Lines changed: 2 additions & 0 deletions b/‎doc/source/whatsnew/v1.4.2.rst
Lines changed: 2 additions & 0 deletions
diff --git a/‎doc/source/whatsnew/v1.5.0.rst
Lines changed: 9 additions & 2 deletions b/‎doc/source/whatsnew/v1.5.0.rst
Lines changed: 9 additions & 2 deletions
@@ -1,4 +1,4 @@
-FROM quay.io/condaforge/miniforge3:4.11.0-0
+FROM quay.io/condaforge/miniforge3
 
 # if you forked pandas, you can pass in your own GitHub username to use your fork
 # i.e. gh_username=myname
@@ -12,6 +12,11 @@ ENV DEBIAN_FRONTEND=noninteractive
 RUN apt-get update \
     && apt-get -y install --no-install-recommends apt-utils dialog 2>&1 \
     #
+    # Install tzdata and configure timezone (fix for tests which try to read from "/etc/localtime")
+    && apt-get -y install tzdata \
+    && ln -fs /usr/share/zoneinfo/Etc/UTC /etc/localtime \
+    && dpkg-reconfigure -f noninteractive tzdata \
+    #
     # Verify git, process tools, lsb-release (common in install instructions for CLIs) installed
     && apt-get -y install git iproute2 procps iproute2 lsb-release \
     #
 
@@ -204,11 +204,11 @@ class MultiIndexing:
     param_names = ["unique_levels"]
 
     def setup(self, unique_levels):
-        self.ndim = 2
+        self.nlevels = 2
         if unique_levels:
-            mi = MultiIndex.from_arrays([range(1000000)] * self.ndim)
+            mi = MultiIndex.from_arrays([range(1000000)] * self.nlevels)
         else:
-            mi = MultiIndex.from_product([range(1000)] * self.ndim)
+            mi = MultiIndex.from_product([range(1000)] * self.nlevels)
         self.df = DataFrame(np.random.randn(len(mi)), index=mi)
 
         self.tgt_slice = slice(200, 800)
@@ -232,27 +232,27 @@ def time_loc_partial_key_list(self, unique_levels):
     def time_loc_partial_key_scalar(self, unique_levels):
         self.df.loc[self.tgt_scalar, :]
 
-    def time_loc_partial_bool_indexer(self, unique_levels):
+    def time_loc_partial_key_bool_indexer(self, unique_levels):
         self.df.loc[self.tgt_bool_indexer, :]
 
     def time_loc_all_slices(self, unique_levels):
-        target = tuple([self.tgt_slice] * self.ndim)
+        target = tuple([self.tgt_slice] * self.nlevels)
         self.df.loc[target, :]
 
     def time_loc_all_null_slices(self, unique_levels):
-        target = tuple([self.tgt_null_slice] * self.ndim)
+        target = tuple([self.tgt_null_slice] * self.nlevels)
         self.df.loc[target, :]
 
     def time_loc_all_lists(self, unique_levels):
-        target = tuple([self.tgt_list] * self.ndim)
+        target = tuple([self.tgt_list] * self.nlevels)
         self.df.loc[target, :]
 
     def time_loc_all_scalars(self, unique_levels):
-        target = tuple([self.tgt_scalar] * self.ndim)
+        target = tuple([self.tgt_scalar] * self.nlevels)
         self.df.loc[target, :]
 
     def time_loc_all_bool_indexers(self, unique_levels):
-        target = tuple([self.tgt_bool_indexer] * self.ndim)
+        target = tuple([self.tgt_bool_indexer] * self.nlevels)
         self.df.loc[target, :]
 
     def time_loc_slice_plus_null_slice(self, unique_levels):
@@ -263,6 +263,18 @@ def time_loc_null_slice_plus_slice(self, unique_levels):
         target = (self.tgt_null_slice, self.tgt_slice)
         self.df.loc[target, :]
 
+    def time_xs_level_0(self, unique_levels):
+        target = self.tgt_scalar
+        self.df.xs(target, level=0)
+
+    def time_xs_level_1(self, unique_levels):
+        target = self.tgt_scalar
+        self.df.xs(target, level=1)
+
+    def time_xs_full_key(self, unique_levels):
+        target = tuple([self.tgt_scalar] * self.nlevels)
+        self.df.xs(target)
+
 
 class IntervalIndexing:
     def setup_cache(self):
@@ -290,20 +302,26 @@ def setup(self):
         self.dti = dti
         self.dti2 = dti2
 
-        index = np.random.choice(dti, 10000, replace=True)
-        df = DataFrame(index=index, data={"a": 1})
-        df_sort = df.sort_index()
-        self.df = df
-        self.df_sort = df_sort
-
     def time_get_indexer_mismatched_tz(self):
         # reached via e.g.
         #  ser = Series(range(len(dti)), index=dti)
         #  ser[dti2]
         self.dti.get_indexer(self.dti2)
 
+
+class SortedAndUnsortedDatetimeIndexLoc:
+    def setup(self):
+        dti = date_range("2016-01-01", periods=10000, tz="US/Pacific")
+        index = np.array(dti)
+
+        unsorted_index = index.copy()
+        unsorted_index[10] = unsorted_index[20]
+
+        self.df_unsorted = DataFrame(index=unsorted_index, data={"a": 1})
+        self.df_sort = DataFrame(index=index, data={"a": 1})
+
     def time_loc_unsorted(self):
-        self.df.loc["2016-6-11"]
+        self.df_unsorted.loc["2016-6-11"]
 
     def time_loc_sorted(self):
         self.df_sort.loc["2016-6-11"]
 
@@ -28,16 +28,22 @@ def setup(self):
         index = MultiIndex.from_arrays([level1, level2])
         self.s = Series(np.random.randn(N * K), index=index)
         self.s_subset = self.s[::2]
+        self.s_subset_no_cache = self.s[::2].copy()
 
     def time_reindex_dates(self):
         self.df.reindex(self.rng_subset)
 
     def time_reindex_columns(self):
         self.df2.reindex(columns=self.df.columns[1:5])
 
-    def time_reindex_multiindex(self):
+    def time_reindex_multiindex_with_cache(self):
+        # MultiIndex._values gets cached
         self.s.reindex(self.s_subset.index)
 
+    def time_reindex_multiindex_no_cache(self):
+        # Copy to avoid MultiIndex._values getting cached
+        self.s.reindex(self.s_subset_no_cache.index.copy())
+
 
 class ReindexMethod:
 
 
@@ -15,6 +15,8 @@ including other versions of pandas.
 Fixed regressions
 ~~~~~~~~~~~~~~~~~
 - Fixed regression in :meth:`DataFrame.drop` and :meth:`Series.drop` when :class:`Index` had extension dtype and duplicates (:issue:`45860`)
+- Fixed memory performance regression in :meth:`Series.fillna` when called on a :class:`DataFrame` column with ``inplace=True`` (:issue:`46149`)
+- Provided an alternative solution for passing custom Excel formats in :meth:`.Styler.to_excel`, which was a regression based on stricter CSS validation. Examples available in the documentation for :meth:`.Styler.format` (:issue:`46152`)
 -
 
 .. ---------------------------------------------------------------------------
 
@@ -289,6 +289,7 @@ Other Deprecations
 
 Performance improvements
 ~~~~~~~~~~~~~~~~~~~~~~~~
+- Performance improvement in :meth:`DataFrame.corrwith` for column-wise (axis=0) Pearson and Spearman correlation when other is a :class:`Series` (:issue:`46174`)
 - Performance improvement in :meth:`.GroupBy.transform` for some user-defined DataFrame -> Series functions (:issue:`45387`)
 - Performance improvement in :meth:`DataFrame.duplicated` when subset consists of only one column (:issue:`45236`)
 - Performance improvement in :meth:`.GroupBy.diff` (:issue:`16706`)
@@ -297,9 +298,9 @@ Performance improvements
 - Performance improvement in :meth:`MultiIndex.get_locs` (:issue:`45681`, :issue:`46040`)
 - Performance improvement in :func:`merge` when left and/or right are empty (:issue:`45838`)
 - Performance improvement in :meth:`DataFrame.join` when left and/or right are empty (:issue:`46015`)
+- Performance improvement in :meth:`DataFrame.reindex` and :meth:`Series.reindex` when target is a :class:`MultiIndex` (:issue:`46235`)
 - Performance improvement in :func:`factorize` (:issue:`46109`)
 - Performance improvement in :class:`DataFrame` and :class:`Series` constructors for extension dtype scalars (:issue:`45854`)
--
 
 .. ---------------------------------------------------------------------------
 .. _whatsnew_150.bug_fixes:
@@ -392,6 +393,7 @@ Missing
 
 MultiIndex
 ^^^^^^^^^^
+- Bug in :meth:`Series.to_numpy` where multiindexed Series could not be converted to numpy arrays when an ``na_value`` was supplied (:issue:`45774`)
 - Bug in :class:`MultiIndex.equals` not commutative when only one side has extension array dtype (:issue:`46026`)
 -
 
@@ -406,6 +408,7 @@ I/O
 - Bug in :func:`read_parquet` when ``engine="pyarrow"`` which caused partial write to disk when column of unsupported datatype was passed (:issue:`44914`)
 - Bug in :func:`DataFrame.to_excel` and :class:`ExcelWriter` would raise when writing an empty DataFrame to a ``.ods`` file (:issue:`45793`)
 - Bug in Parquet roundtrip for Interval dtype with ``datetime64[ns]`` subtype (:issue:`45881`)
+- Bug in :func:`read_excel` when reading a ``.ods`` file with newlines between xml elements(:issue:`45598`)
 
 Period
 ^^^^^^
@@ -418,7 +421,7 @@ Plotting
 - Bug in :meth:`DataFrame.plot.box` that prevented labeling the x-axis (:issue:`45463`)
 - Bug in :meth:`DataFrame.boxplot` that prevented passing in ``xlabel`` and ``ylabel`` (:issue:`45463`)
 - Bug in :meth:`DataFrame.boxplot` that prevented specifying ``vert=False`` (:issue:`36918`)
--
+- Bug in :meth:`DataFrame.plot.scatter` that prevented specifying ``norm`` (:issue:`45809`)
 
 Groupby/resample/rolling
 ^^^^^^^^^^^^^^^^^^^^^^^^
@@ -428,6 +431,10 @@ Groupby/resample/rolling
 - Bug in :meth:`.ExponentialMovingWindow.mean` with ``axis=1`` and ``engine='numba'`` when the :class:`DataFrame` has more columns than rows (:issue:`46086`)
 - Bug when using ``engine="numba"`` would return the same jitted function when modifying ``engine_kwargs`` (:issue:`46086`)
 - Bug in :meth:`.DataFrameGroupby.transform` fails when ``axis=1`` and ``func`` is ``"first"`` or ``"last"`` (:issue:`45986`)
+- Bug in :meth:`DataFrameGroupby.cumsum` with ``skipna=False`` giving incorrect results (:issue:`46216`)
+- Bug in :meth:`.GroupBy.cumsum` with ``timedelta64[ns]`` dtype failing to recognize ``NaT`` as a null value (:issue:`46216`)
+- Bug in :meth:`GroupBy.cummin` and :meth:`GroupBy.cummax` with nullable dtypes incorrectly altering the original data in place (:issue:`46220`)
+-
 
 Reshaping
 ^^^^^^^^^
Original file line number	Diff line number	Diff line change
`@@ -15,6 +15,8 @@ including other versions of pandas.`
`15`	`15`	`Fixed regressions`
`16`	`16`	`~~~~~~~~~~~~~~~~~`
`17`	`17`	- Fixed regression in :meth:`DataFrame.drop` and :meth:`Series.drop` when :class:`Index` had extension dtype and duplicates (:issue:`45860`)
	`18`	+- Fixed memory performance regression in :meth:`Series.fillna` when called on a :class:`DataFrame` column with ``inplace=True`` (:issue:`46149`)
	`19`	+- Provided an alternative solution for passing custom Excel formats in :meth:`.Styler.to_excel`, which was a regression based on stricter CSS validation. Examples available in the documentation for :meth:`.Styler.format` (:issue:`46152`)
`18`	`20`	`-`
`19`	`21`
`20`	`22`	`.. ---------------------------------------------------------------------------`