pandas-dev
diff --git a/‎asv_bench/benchmarks/io/sql.py
Lines changed: 4 additions & 4 deletions b/‎asv_bench/benchmarks/io/sql.py
Lines changed: 4 additions & 4 deletions
diff --git a/‎asv_bench/benchmarks/reshape.py
Lines changed: 1 addition & 1 deletion b/‎asv_bench/benchmarks/reshape.py
Lines changed: 1 addition & 1 deletion
diff --git a/‎doc/scripts/eval_performance.py
Lines changed: 108 additions & 0 deletions b/‎doc/scripts/eval_performance.py
Lines changed: 108 additions & 0 deletions
diff --git a/‎doc/source/_static/eval-perf-small.png
-24.7 KB b/‎doc/source/_static/eval-perf-small.png
-24.7 KB
diff --git a/‎doc/source/_static/eval-perf.png
10.8 KB b/‎doc/source/_static/eval-perf.png
10.8 KB
diff --git a/‎doc/source/_static/query-perf-small.png
-21.2 KB b/‎doc/source/_static/query-perf-small.png
-21.2 KB
diff --git a/‎doc/source/_static/query-perf.png
8.79 KB b/‎doc/source/_static/query-perf.png
8.79 KB
diff --git a/‎doc/source/conf.py
Lines changed: 1 addition & 1 deletion b/‎doc/source/conf.py
Lines changed: 1 addition & 1 deletion
diff --git a/‎doc/source/development/contributing_environment.rst
Lines changed: 1 addition & 1 deletion b/‎doc/source/development/contributing_environment.rst
Lines changed: 1 addition & 1 deletion
diff --git a/‎doc/source/user_guide/categorical.rst
Lines changed: 2 additions & 12 deletions b/‎doc/source/user_guide/categorical.rst
Lines changed: 2 additions & 12 deletions
diff --git a/‎doc/source/user_guide/enhancingperf.rst
Lines changed: 5 additions & 19 deletions b/‎doc/source/user_guide/enhancingperf.rst
Lines changed: 5 additions & 19 deletions
diff --git a/‎doc/source/user_guide/indexing.rst
Lines changed: 19 additions & 5 deletions b/‎doc/source/user_guide/indexing.rst
Lines changed: 19 additions & 5 deletions
diff --git a/‎doc/source/whatsnew/v0.15.0.rst
Lines changed: 1 addition & 2 deletions b/‎doc/source/whatsnew/v0.15.0.rst
Lines changed: 1 addition & 2 deletions
diff --git a/‎doc/source/whatsnew/v0.19.0.rst
Lines changed: 2 additions & 2 deletions b/‎doc/source/whatsnew/v0.19.0.rst
Lines changed: 2 additions & 2 deletions
diff --git a/‎doc/source/whatsnew/v2.0.0.rst
Lines changed: 14 additions & 0 deletions b/‎doc/source/whatsnew/v2.0.0.rst
Lines changed: 14 additions & 0 deletions
diff --git a/‎pandas/_libs/parsers.pyx
Lines changed: 1 addition & 1 deletion b/‎pandas/_libs/parsers.pyx
Lines changed: 1 addition & 1 deletion
@@ -38,7 +38,7 @@ def setup(self, connection):
             },
             index=tm.makeStringIndex(N),
         )
-        self.df.loc[1000:3000, "float_with_nan"] = np.nan
+        self.df.iloc[1000:3000, 1] = np.nan
         self.df["date"] = self.df["datetime"].dt.date
         self.df["time"] = self.df["datetime"].dt.time
         self.df["datetime_string"] = self.df["datetime"].astype(str)
@@ -88,7 +88,7 @@ def setup(self, connection, dtype):
             },
             index=tm.makeStringIndex(N),
         )
-        self.df.loc[1000:3000, "float_with_nan"] = np.nan
+        self.df.iloc[1000:3000, 1] = np.nan
         self.df["date"] = self.df["datetime"].dt.date
         self.df["time"] = self.df["datetime"].dt.time
         self.df["datetime_string"] = self.df["datetime"].astype(str)
@@ -117,7 +117,7 @@ def setup(self):
             },
             index=tm.makeStringIndex(N),
         )
-        self.df.loc[1000:3000, "float_with_nan"] = np.nan
+        self.df.iloc[1000:3000, 1] = np.nan
         self.df["date"] = self.df["datetime"].dt.date
         self.df["time"] = self.df["datetime"].dt.time
         self.df["datetime_string"] = self.df["datetime"].astype(str)
@@ -164,7 +164,7 @@ def setup(self, dtype):
             },
             index=tm.makeStringIndex(N),
         )
-        self.df.loc[1000:3000, "float_with_nan"] = np.nan
+        self.df.iloc[1000:3000, 1] = np.nan
         self.df["date"] = self.df["datetime"].dt.date
         self.df["time"] = self.df["datetime"].dt.time
         self.df["datetime_string"] = self.df["datetime"].astype(str)
 
@@ -36,7 +36,7 @@ def setup(self):
         self.df = DataFrame(data)
 
     def time_reshape_pivot_time_series(self):
-        self.df.pivot("date", "variable", "value")
+        self.df.pivot(index="date", columns="variable", values="value")
 
 
 class SimpleReshape:
 
@@ -0,0 +1,108 @@
+from timeit import repeat as timeit
+
+import numpy as np
+import seaborn as sns
+
+from pandas import DataFrame
+
+setup_common = """from pandas import DataFrame
+from numpy.random import randn
+df = DataFrame(randn(%d, 3), columns=list('abc'))
+%s"""
+
+setup_with = "s = 'a + b * (c ** 2 + b ** 2 - a) / (a * c) ** 3'"
+
+
+def bench_with(n, times=10, repeat=3, engine="numexpr"):
+    return (
+        np.array(
+            timeit(
+                "df.eval(s, engine=%r)" % engine,
+                setup=setup_common % (n, setup_with),
+                repeat=repeat,
+                number=times,
+            )
+        )
+        / times
+    )
+
+
+setup_subset = "s = 'a <= b <= c ** 2 + b ** 2 - a and b > c'"
+
+
+def bench_subset(n, times=20, repeat=3, engine="numexpr"):
+    return (
+        np.array(
+            timeit(
+                "df.query(s, engine=%r)" % engine,
+                setup=setup_common % (n, setup_subset),
+                repeat=repeat,
+                number=times,
+            )
+        )
+        / times
+    )
+
+
+def bench(mn=3, mx=7, num=100, engines=("python", "numexpr"), verbose=False):
+    r = np.logspace(mn, mx, num=num).round().astype(int)
+
+    ev = DataFrame(np.empty((num, len(engines))), columns=engines)
+    qu = ev.copy(deep=True)
+
+    ev["size"] = qu["size"] = r
+
+    for engine in engines:
+        for i, n in enumerate(r):
+            if verbose & (i % 10 == 0):
+                print("engine: %r, i == %d" % (engine, i))
+            ev_times = bench_with(n, times=1, repeat=1, engine=engine)
+            ev.loc[i, engine] = np.mean(ev_times)
+            qu_times = bench_subset(n, times=1, repeat=1, engine=engine)
+            qu.loc[i, engine] = np.mean(qu_times)
+
+    return ev, qu
+
+
+def plot_perf(df, engines, title, filename=None):
+    from matplotlib.pyplot import figure
+
+    sns.set()
+    sns.set_palette("Set2")
+
+    fig = figure(figsize=(4, 3), dpi=120)
+    ax = fig.add_subplot(111)
+
+    for engine in engines:
+        ax.loglog(df["size"], df[engine], label=engine, lw=2)
+
+    ax.set_xlabel("Number of Rows")
+    ax.set_ylabel("Time (s)")
+    ax.set_title(title)
+    ax.legend(loc="best")
+    ax.tick_params(top=False, right=False)
+
+    fig.tight_layout()
+
+    if filename is not None:
+        fig.savefig(filename)
+
+
+if __name__ == "__main__":
+    import os
+
+    pandas_dir = os.path.dirname(
+        os.path.dirname(os.path.abspath(os.path.dirname(__file__)))
+    )
+    static_path = os.path.join(pandas_dir, "doc", "source", "_static")
+
+    join = lambda p: os.path.join(static_path, p)
+
+    fn = join("eval-query-perf-data.h5")
+
+    engines = "python", "numexpr"
+
+    ev, qu = bench(verbose=True)  # only this one
+
+    plot_perf(ev, engines, "DataFrame.eval()", filename=join("eval-perf.png"))
+    plot_perf(qu, engines, "DataFrame.query()", filename=join("query-perf.png"))
@@ -236,7 +236,7 @@
 if ".dev" in version:
     switcher_version = "dev"
 elif "rc" in version:
-    switcher_version = version.split("rc")[0] + " (rc)"
+    switcher_version = version.split("rc", maxsplit=1)[0] + " (rc)"
 
 html_theme_options = {
     "external_links": [],
 
@@ -10,7 +10,7 @@ To test out code changes, you'll need to build pandas from source, which
 requires a C/C++ compiler and Python environment. If you're making documentation
 changes, you can skip to :ref:`contributing to the documentation <contributing_documentation>` but if you skip
 creating the development environment you won't be able to build the documentation
-locally before pushing your changes.
+locally before pushing your changes. It's recommended to also install the :ref:`pre-commit hooks <contributing.pre-commit>`.
 
 .. contents:: Table of contents:
    :local:
 
@@ -353,11 +353,6 @@ Renaming categories is done by using the
 
     In contrast to R's ``factor``, categorical data can have categories of other types than string.
 
-.. note::
-
-    Be aware that assigning new categories is an inplace operation, while most other operations
-    under ``Series.cat`` per default return a new ``Series`` of dtype ``category``.
-
 Categories must be unique or a ``ValueError`` is raised:
 
 .. ipython:: python
@@ -952,7 +947,6 @@ categorical (categories and ordering). So if you read back the CSV file you have
 relevant columns back to ``category`` and assign the right categories and categories ordering.
 
 .. ipython:: python
-    :okwarning:
 
     import io
 
@@ -969,8 +963,8 @@ relevant columns back to ``category`` and assign the right categories and catego
     df2["cats"]
     # Redo the category
     df2["cats"] = df2["cats"].astype("category")
-    df2["cats"].cat.set_categories(
-        ["very bad", "bad", "medium", "good", "very good"], inplace=True
+    df2["cats"] = df2["cats"].cat.set_categories(
+        ["very bad", "bad", "medium", "good", "very good"]
     )
     df2.dtypes
     df2["cats"]
@@ -1162,16 +1156,12 @@ Constructing a ``Series`` from a ``Categorical`` will not copy the input
 change the original ``Categorical``:
 
 .. ipython:: python
-    :okwarning:
 
     cat = pd.Categorical([1, 2, 3, 10], categories=[1, 2, 3, 4, 10])
     s = pd.Series(cat, name="cat")
     cat
     s.iloc[0:2] = 10
     cat
-    df = pd.DataFrame(s)
-    df["cat"].cat.categories = [1, 2, 3, 4, 5]
-    cat
 
 Use ``copy=True`` to prevent such a behaviour or simply don't reuse ``Categoricals``:
 
 
@@ -690,21 +690,12 @@ The equivalent in standard Python would be
    df["a"] = 1
    df
 
-The :class:`DataFrame.query` method has a ``inplace`` keyword which determines
-whether the query modifies the original frame.
-
-.. ipython:: python
-
-   df = pd.DataFrame(dict(a=range(5), b=range(5, 10)))
-   df.query("a > 2")
-   df.query("a > 2", inplace=True)
-   df
-
 Local variables
 ~~~~~~~~~~~~~~~
 
 You must *explicitly reference* any local variable that you want to use in an
-expression by placing the ``@`` character in front of the name. For example,
+expression by placing the ``@`` character in front of the name. This mechanism is
+the same for both :meth:`DataFrame.query` and :meth:`DataFrame.eval`. For example,
 
 .. ipython:: python
 
@@ -820,17 +811,12 @@ significant performance benefit.  Here is a plot showing the running time of
 :func:`pandas.eval` as function of the size of the frame involved in the
 computation. The two lines are two different engines.
 
+..
+    The eval-perf.png figure below was generated with /doc/scripts/eval_performance.py
 
 .. image:: ../_static/eval-perf.png
 
-
-.. note::
-
-   Operations with smallish objects (around 15k-20k rows) are faster using
-   plain Python:
-
-       .. image:: ../_static/eval-perf-small.png
-
+You will only see the performance benefits of using the ``numexpr`` engine with :func:`pandas.eval` if your frame has more than approximately 100,000 rows.
 
 This plot was created using a :class:`DataFrame` with 3 columns each containing
 floating point values generated using ``numpy.random.randn()``.
 
@@ -1240,6 +1240,17 @@ If instead you don't want to or cannot name your index, you can use the name
    renaming your columns to something less ambiguous.
 
 
+The :class:`DataFrame.query` method has a ``inplace`` keyword which determines
+whether the query modifies the original frame.
+
+.. ipython:: python
+
+   df = pd.DataFrame(dict(a=range(5), b=range(5, 10)))
+   df.query("a > 2")
+   df.query("a > 2", inplace=True)
+   df
+
+
 :class:`~pandas.MultiIndex` :meth:`~pandas.DataFrame.query` Syntax
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 
@@ -1438,15 +1449,18 @@ Performance of :meth:`~pandas.DataFrame.query`
 ``DataFrame.query()`` using ``numexpr`` is slightly faster than Python for
 large frames.
 
+..
+    The eval-perf.png figure below was generated with /doc/scripts/eval_performance.py
+
 .. image:: ../_static/query-perf.png
 
-.. note::
 
-   You will only see the performance benefits of using the ``numexpr`` engine
-   with ``DataFrame.query()`` if your frame has more than approximately 200,000
-   rows.
 
-      .. image:: ../_static/query-perf-small.png
+You will only see the performance benefits of using the ``numexpr`` engine
+with ``DataFrame.query()`` if your frame has more than approximately 100,000
+rows.
+
+
 
 This plot was created using a ``DataFrame`` with 3 columns each containing
 floating point values generated using ``numpy.random.randn()``.
 
@@ -70,7 +70,6 @@ For full docs, see the :ref:`categorical introduction <categorical>` and the
 :ref:`API documentation <api.arrays.categorical>`.
 
 .. ipython:: python
-    :okwarning:
 
     df = pd.DataFrame({"id": [1, 2, 3, 4, 5, 6],
                        "raw_grade": ['a', 'b', 'b', 'a', 'a', 'e']})
@@ -79,7 +78,7 @@ For full docs, see the :ref:`categorical introduction <categorical>` and the
     df["grade"]
 
     # Rename the categories
-    df["grade"].cat.categories = ["very good", "good", "very bad"]
+    df["grade"] = df["grade"].cat.rename_categories(["very good", "good", "very bad"])
 
     # Reorder the categories and simultaneously add the missing categories
     df["grade"] = df["grade"].cat.set_categories(["very bad", "bad",
 
@@ -271,12 +271,12 @@ Individual columns can be parsed as a ``Categorical`` using a dict specification
    such as :func:`to_datetime`.
 
    .. ipython:: python
-      :okwarning:
 
       df = pd.read_csv(StringIO(data), dtype="category")
       df.dtypes
       df["col3"]
-      df["col3"].cat.categories = pd.to_numeric(df["col3"].cat.categories)
+      new_categories = pd.to_numeric(df["col3"].cat.categories)
+      df["col3"] = df["col3"].cat.rename_categories(new_categories)
       df["col3"]
 
 .. _whatsnew_0190.enhancements.union_categoricals:
 
@@ -229,6 +229,13 @@ Removal of prior version deprecations/changes
 - Disallow passing non-round floats to :class:`Timestamp` with ``unit="M"`` or ``unit="Y"`` (:issue:`47266`)
 - Remove keywords ``convert_float`` and ``mangle_dupe_cols`` from :func:`read_excel` (:issue:`41176`)
 - Disallow passing non-keyword arguments to :func:`read_excel` except ``io`` and ``sheet_name`` (:issue:`34418`)
+- Disallow passing non-keyword arguments to :meth:`DataFrame.set_index` except ``keys`` (:issue:`41495`)
+- Disallow passing non-keyword arguments to :meth:`Resampler.interpolate` except ``method`` (:issue:`41699`)
+- Disallow passing non-keyword arguments to :meth:`DataFrame.reset_index` and :meth:`Series.reset_index` except ``level`` (:issue:`41496`)
+- Disallow passing non-keyword arguments to :meth:`DataFrame.dropna` and :meth:`Series.dropna` (:issue:`41504`)
+- Disallow passing non-keyword arguments to :meth:`ExtensionArray.argsort` (:issue:`46134`)
+- Disallow passing non-keyword arguments to :meth:`Categorical.sort_values` (:issue:`47618`)
+- Disallow passing non-keyword arguments to :meth:`Index.drop_duplicates` and :meth:`Series.drop_duplicates` (:issue:`41485`)
 - Disallow passing non-keyword arguments to :meth:`DataFrame.drop_duplicates` except for ``subset`` (:issue:`41485`)
 - Disallow passing non-keyword arguments to :meth:`DataFrame.sort_index` and :meth:`Series.sort_index` (:issue:`41506`)
 - Disallow passing non-keyword arguments to :meth:`DataFrame.interpolate` and :meth:`Series.interpolate` except for ``method`` (:issue:`41510`)
@@ -241,6 +248,9 @@ Removal of prior version deprecations/changes
 - Disallow passing non-keyword arguments to :func:`read_json` except for ``path_or_buf`` (:issue:`27573`)
 - Disallow passing non-keyword arguments to :func:`read_sas` except for ``filepath_or_buffer`` (:issue:`47154`)
 - Disallow passing non-keyword arguments to :func:`read_stata` except for ``filepath_or_buffer`` (:issue:`48128`)
+- Disallow passing non-keyword arguments to :func:`read_csv` except ``filepath_or_buffer`` (:issue:`41485`)
+- Disallow passing non-keyword arguments to :func:`read_table` except ``filepath_or_buffer`` (:issue:`41485`)
+- Disallow passing non-keyword arguments to :func:`read_fwf` except ``filepath_or_buffer`` (:issue:`44710`)
 - Disallow passing non-keyword arguments to :func:`read_xml` except for ``path_or_buffer`` (:issue:`45133`)
 - Disallow passing non-keyword arguments to :meth:`Series.mask` and :meth:`DataFrame.mask` except ``cond`` and ``other`` (:issue:`41580`)
 - Disallow passing non-keyword arguments to :meth:`DataFrame.to_stata` except for ``path`` (:issue:`48128`)
@@ -275,8 +285,10 @@ Removal of prior version deprecations/changes
 - Removed :meth:`Series.str.__iter__` (:issue:`28277`)
 - Removed ``pandas.SparseArray`` in favor of :class:`arrays.SparseArray` (:issue:`30642`)
 - Removed ``pandas.SparseSeries`` and ``pandas.SparseDataFrame``, including pickle support. (:issue:`30642`)
+- Enforced disallowing passing an integer ``fill_value`` to :meth:`DataFrame.shift` and :meth:`Series.shift`` with datetime64, timedelta64, or period dtypes (:issue:`32591`)
 - Enforced disallowing a string column label into ``times`` in :meth:`DataFrame.ewm` (:issue:`43265`)
 - Enforced disallowing a tuple of column labels into :meth:`.DataFrameGroupBy.__getitem__` (:issue:`30546`)
+- Enforced disallowing setting values with ``.loc`` using a positional slice. Use ``.loc`` with labels or ``.iloc`` with positions instead (:issue:`31840`)
 - Removed setting Categorical._codes directly (:issue:`41429`)
 - Enforced :meth:`Rolling.count` with ``min_periods=None`` to default to the size of the window (:issue:`31302`)
 - Renamed ``fname`` to ``path`` in :meth:`DataFrame.to_parquet`, :meth:`DataFrame.to_stata` and :meth:`DataFrame.to_feather` (:issue:`30338`)
@@ -290,6 +302,8 @@ Removal of prior version deprecations/changes
 - Changed behavior of :class:`DataFrame` constructor when passed a ``dtype`` (other than int) that the data cannot be cast to; it now raises instead of silently ignoring the dtype (:issue:`41733`)
 - Changed the behavior of :class:`Series` constructor, it will no longer infer a datetime64 or timedelta64 dtype from string entries (:issue:`41731`)
 - Changed behavior of :class:`Index` constructor when passed a ``SparseArray`` or ``SparseDtype`` to retain that dtype instead of casting to ``numpy.ndarray`` (:issue:`43930`)
+- Changed behavior of :meth:`DataFrame.any` and :meth:`DataFrame.all` with ``bool_only=True``; object-dtype columns with all-bool values will no longer be included, manually cast to ``bool`` dtype first (:issue:`46188`)
+-
 
 .. ---------------------------------------------------------------------------
 .. _whatsnew_200.performance:
 
@@ -74,7 +74,7 @@ from pandas._libs.util cimport (
     UINT64_MAX,
 )
 
-import pandas._libs.lib as lib
+from pandas._libs import lib
 
 from pandas._libs.khash cimport (
     kh_destroy_float64,
Original file line number	Diff line number	Diff line change
`@@ -74,7 +74,7 @@ from pandas._libs.util cimport (`
`74`	`74`	`UINT64_MAX,`
`75`	`75`	`)`
`76`	`76`
`77`		`-import pandas._libs.lib as lib`
	`77`	`+from pandas._libs import lib`
`78`	`78`
`79`	`79`	`from pandas._libs.khash cimport (`
`80`	`80`	`kh_destroy_float64,`