pandas-dev
diff --git a/‎asv_bench/benchmarks/io/sql.py
Lines changed: 4 additions & 4 deletions b/‎asv_bench/benchmarks/io/sql.py
Lines changed: 4 additions & 4 deletions
diff --git a/‎asv_bench/benchmarks/reshape.py
Lines changed: 1 addition & 1 deletion b/‎asv_bench/benchmarks/reshape.py
Lines changed: 1 addition & 1 deletion
diff --git a/‎doc/scripts/eval_performance.py
Lines changed: 108 additions & 0 deletions b/‎doc/scripts/eval_performance.py
Lines changed: 108 additions & 0 deletions
diff --git a/‎doc/source/_static/eval-perf-small.png
-24.7 KB b/‎doc/source/_static/eval-perf-small.png
-24.7 KB
diff --git a/‎doc/source/_static/eval-perf.png
10.8 KB b/‎doc/source/_static/eval-perf.png
10.8 KB
diff --git a/‎doc/source/_static/query-perf-small.png
-21.2 KB b/‎doc/source/_static/query-perf-small.png
-21.2 KB
diff --git a/‎doc/source/_static/query-perf.png
8.79 KB b/‎doc/source/_static/query-perf.png
8.79 KB
diff --git a/‎doc/source/conf.py
Lines changed: 1 addition & 1 deletion b/‎doc/source/conf.py
Lines changed: 1 addition & 1 deletion
diff --git a/‎doc/source/getting_started/intro_tutorials/09_timeseries.rst
Lines changed: 1 addition & 1 deletion b/‎doc/source/getting_started/intro_tutorials/09_timeseries.rst
Lines changed: 1 addition & 1 deletion
diff --git a/‎doc/source/reference/indexing.rst
Lines changed: 0 additions & 2 deletions b/‎doc/source/reference/indexing.rst
Lines changed: 0 additions & 2 deletions
diff --git a/‎doc/source/reference/series.rst
Lines changed: 0 additions & 2 deletions b/‎doc/source/reference/series.rst
Lines changed: 0 additions & 2 deletions
diff --git a/‎doc/source/user_guide/categorical.rst
Lines changed: 2 additions & 12 deletions b/‎doc/source/user_guide/categorical.rst
Lines changed: 2 additions & 12 deletions
diff --git a/‎doc/source/user_guide/enhancingperf.rst
Lines changed: 5 additions & 19 deletions b/‎doc/source/user_guide/enhancingperf.rst
Lines changed: 5 additions & 19 deletions
diff --git a/‎doc/source/user_guide/indexing.rst
Lines changed: 19 additions & 5 deletions b/‎doc/source/user_guide/indexing.rst
Lines changed: 19 additions & 5 deletions
diff --git a/‎doc/source/whatsnew/v0.15.0.rst
Lines changed: 1 addition & 2 deletions b/‎doc/source/whatsnew/v0.15.0.rst
Lines changed: 1 addition & 2 deletions
diff --git a/‎doc/source/whatsnew/v0.19.0.rst
Lines changed: 2 additions & 2 deletions b/‎doc/source/whatsnew/v0.19.0.rst
Lines changed: 2 additions & 2 deletions
@@ -38,7 +38,7 @@ def setup(self, connection):
             },
             index=tm.makeStringIndex(N),
         )
-        self.df.loc[1000:3000, "float_with_nan"] = np.nan
+        self.df.iloc[1000:3000, 1] = np.nan
         self.df["date"] = self.df["datetime"].dt.date
         self.df["time"] = self.df["datetime"].dt.time
         self.df["datetime_string"] = self.df["datetime"].astype(str)
@@ -88,7 +88,7 @@ def setup(self, connection, dtype):
             },
             index=tm.makeStringIndex(N),
         )
-        self.df.loc[1000:3000, "float_with_nan"] = np.nan
+        self.df.iloc[1000:3000, 1] = np.nan
         self.df["date"] = self.df["datetime"].dt.date
         self.df["time"] = self.df["datetime"].dt.time
         self.df["datetime_string"] = self.df["datetime"].astype(str)
@@ -117,7 +117,7 @@ def setup(self):
             },
             index=tm.makeStringIndex(N),
         )
-        self.df.loc[1000:3000, "float_with_nan"] = np.nan
+        self.df.iloc[1000:3000, 1] = np.nan
         self.df["date"] = self.df["datetime"].dt.date
         self.df["time"] = self.df["datetime"].dt.time
         self.df["datetime_string"] = self.df["datetime"].astype(str)
@@ -164,7 +164,7 @@ def setup(self, dtype):
             },
             index=tm.makeStringIndex(N),
         )
-        self.df.loc[1000:3000, "float_with_nan"] = np.nan
+        self.df.iloc[1000:3000, 1] = np.nan
         self.df["date"] = self.df["datetime"].dt.date
         self.df["time"] = self.df["datetime"].dt.time
         self.df["datetime_string"] = self.df["datetime"].astype(str)
 
@@ -36,7 +36,7 @@ def setup(self):
         self.df = DataFrame(data)
 
     def time_reshape_pivot_time_series(self):
-        self.df.pivot("date", "variable", "value")
+        self.df.pivot(index="date", columns="variable", values="value")
 
 
 class SimpleReshape:
 
@@ -0,0 +1,108 @@
+from timeit import repeat as timeit
+
+import numpy as np
+import seaborn as sns
+
+from pandas import DataFrame
+
+setup_common = """from pandas import DataFrame
+from numpy.random import randn
+df = DataFrame(randn(%d, 3), columns=list('abc'))
+%s"""
+
+setup_with = "s = 'a + b * (c ** 2 + b ** 2 - a) / (a * c) ** 3'"
+
+
+def bench_with(n, times=10, repeat=3, engine="numexpr"):
+    return (
+        np.array(
+            timeit(
+                "df.eval(s, engine=%r)" % engine,
+                setup=setup_common % (n, setup_with),
+                repeat=repeat,
+                number=times,
+            )
+        )
+        / times
+    )
+
+
+setup_subset = "s = 'a <= b <= c ** 2 + b ** 2 - a and b > c'"
+
+
+def bench_subset(n, times=20, repeat=3, engine="numexpr"):
+    return (
+        np.array(
+            timeit(
+                "df.query(s, engine=%r)" % engine,
+                setup=setup_common % (n, setup_subset),
+                repeat=repeat,
+                number=times,
+            )
+        )
+        / times
+    )
+
+
+def bench(mn=3, mx=7, num=100, engines=("python", "numexpr"), verbose=False):
+    r = np.logspace(mn, mx, num=num).round().astype(int)
+
+    ev = DataFrame(np.empty((num, len(engines))), columns=engines)
+    qu = ev.copy(deep=True)
+
+    ev["size"] = qu["size"] = r
+
+    for engine in engines:
+        for i, n in enumerate(r):
+            if verbose & (i % 10 == 0):
+                print("engine: %r, i == %d" % (engine, i))
+            ev_times = bench_with(n, times=1, repeat=1, engine=engine)
+            ev.loc[i, engine] = np.mean(ev_times)
+            qu_times = bench_subset(n, times=1, repeat=1, engine=engine)
+            qu.loc[i, engine] = np.mean(qu_times)
+
+    return ev, qu
+
+
+def plot_perf(df, engines, title, filename=None):
+    from matplotlib.pyplot import figure
+
+    sns.set()
+    sns.set_palette("Set2")
+
+    fig = figure(figsize=(4, 3), dpi=120)
+    ax = fig.add_subplot(111)
+
+    for engine in engines:
+        ax.loglog(df["size"], df[engine], label=engine, lw=2)
+
+    ax.set_xlabel("Number of Rows")
+    ax.set_ylabel("Time (s)")
+    ax.set_title(title)
+    ax.legend(loc="best")
+    ax.tick_params(top=False, right=False)
+
+    fig.tight_layout()
+
+    if filename is not None:
+        fig.savefig(filename)
+
+
+if __name__ == "__main__":
+    import os
+
+    pandas_dir = os.path.dirname(
+        os.path.dirname(os.path.abspath(os.path.dirname(__file__)))
+    )
+    static_path = os.path.join(pandas_dir, "doc", "source", "_static")
+
+    join = lambda p: os.path.join(static_path, p)
+
+    fn = join("eval-query-perf-data.h5")
+
+    engines = "python", "numexpr"
+
+    ev, qu = bench(verbose=True)  # only this one
+
+    plot_perf(ev, engines, "DataFrame.eval()", filename=join("eval-perf.png"))
+    plot_perf(qu, engines, "DataFrame.query()", filename=join("query-perf.png"))
@@ -236,7 +236,7 @@
 if ".dev" in version:
     switcher_version = "dev"
 elif "rc" in version:
-    switcher_version = version.split("rc")[0] + " (rc)"
+    switcher_version = version.split("rc", maxsplit=1)[0] + " (rc)"
 
 html_theme_options = {
     "external_links": [],
 
@@ -144,7 +144,7 @@ I want to add a new column to the ``DataFrame`` containing only the month of the
 
 By using ``Timestamp`` objects for dates, a lot of time-related
 properties are provided by pandas. For example the ``month``, but also
-``year``, ``weekofyear``, ``quarter``,… All of these properties are
+``year``, ``quarter``,… All of these properties are
 accessible by the ``dt`` accessor.
 
 .. raw:: html
 
@@ -343,8 +343,6 @@ Time/date components
    DatetimeIndex.timetz
    DatetimeIndex.dayofyear
    DatetimeIndex.day_of_year
-   DatetimeIndex.weekofyear
-   DatetimeIndex.week
    DatetimeIndex.dayofweek
    DatetimeIndex.day_of_week
    DatetimeIndex.weekday
 
@@ -311,8 +311,6 @@ Datetime properties
    Series.dt.second
    Series.dt.microsecond
    Series.dt.nanosecond
-   Series.dt.week
-   Series.dt.weekofyear
    Series.dt.dayofweek
    Series.dt.day_of_week
    Series.dt.weekday
 
@@ -353,11 +353,6 @@ Renaming categories is done by using the
 
     In contrast to R's ``factor``, categorical data can have categories of other types than string.
 
-.. note::
-
-    Be aware that assigning new categories is an inplace operation, while most other operations
-    under ``Series.cat`` per default return a new ``Series`` of dtype ``category``.
-
 Categories must be unique or a ``ValueError`` is raised:
 
 .. ipython:: python
@@ -952,7 +947,6 @@ categorical (categories and ordering). So if you read back the CSV file you have
 relevant columns back to ``category`` and assign the right categories and categories ordering.
 
 .. ipython:: python
-    :okwarning:
 
     import io
 
@@ -969,8 +963,8 @@ relevant columns back to ``category`` and assign the right categories and catego
     df2["cats"]
     # Redo the category
     df2["cats"] = df2["cats"].astype("category")
-    df2["cats"].cat.set_categories(
-        ["very bad", "bad", "medium", "good", "very good"], inplace=True
+    df2["cats"] = df2["cats"].cat.set_categories(
+        ["very bad", "bad", "medium", "good", "very good"]
     )
     df2.dtypes
     df2["cats"]
@@ -1162,16 +1156,12 @@ Constructing a ``Series`` from a ``Categorical`` will not copy the input
 change the original ``Categorical``:
 
 .. ipython:: python
-    :okwarning:
 
     cat = pd.Categorical([1, 2, 3, 10], categories=[1, 2, 3, 4, 10])
     s = pd.Series(cat, name="cat")
     cat
     s.iloc[0:2] = 10
     cat
-    df = pd.DataFrame(s)
-    df["cat"].cat.categories = [1, 2, 3, 4, 5]
-    cat
 
 Use ``copy=True`` to prevent such a behaviour or simply don't reuse ``Categoricals``:
 
 
@@ -690,21 +690,12 @@ The equivalent in standard Python would be
    df["a"] = 1
    df
 
-The :class:`DataFrame.query` method has a ``inplace`` keyword which determines
-whether the query modifies the original frame.
-
-.. ipython:: python
-
-   df = pd.DataFrame(dict(a=range(5), b=range(5, 10)))
-   df.query("a > 2")
-   df.query("a > 2", inplace=True)
-   df
-
 Local variables
 ~~~~~~~~~~~~~~~
 
 You must *explicitly reference* any local variable that you want to use in an
-expression by placing the ``@`` character in front of the name. For example,
+expression by placing the ``@`` character in front of the name. This mechanism is
+the same for both :meth:`DataFrame.query` and :meth:`DataFrame.eval`. For example,
 
 .. ipython:: python
 
@@ -820,17 +811,12 @@ significant performance benefit.  Here is a plot showing the running time of
 :func:`pandas.eval` as function of the size of the frame involved in the
 computation. The two lines are two different engines.
 
+..
+    The eval-perf.png figure below was generated with /doc/scripts/eval_performance.py
 
 .. image:: ../_static/eval-perf.png
 
-
-.. note::
-
-   Operations with smallish objects (around 15k-20k rows) are faster using
-   plain Python:
-
-       .. image:: ../_static/eval-perf-small.png
-
+You will only see the performance benefits of using the ``numexpr`` engine with :func:`pandas.eval` if your frame has more than approximately 100,000 rows.
 
 This plot was created using a :class:`DataFrame` with 3 columns each containing
 floating point values generated using ``numpy.random.randn()``.
 
@@ -1240,6 +1240,17 @@ If instead you don't want to or cannot name your index, you can use the name
    renaming your columns to something less ambiguous.
 
 
+The :class:`DataFrame.query` method has a ``inplace`` keyword which determines
+whether the query modifies the original frame.
+
+.. ipython:: python
+
+   df = pd.DataFrame(dict(a=range(5), b=range(5, 10)))
+   df.query("a > 2")
+   df.query("a > 2", inplace=True)
+   df
+
+
 :class:`~pandas.MultiIndex` :meth:`~pandas.DataFrame.query` Syntax
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 
@@ -1438,15 +1449,18 @@ Performance of :meth:`~pandas.DataFrame.query`
 ``DataFrame.query()`` using ``numexpr`` is slightly faster than Python for
 large frames.
 
+..
+    The eval-perf.png figure below was generated with /doc/scripts/eval_performance.py
+
 .. image:: ../_static/query-perf.png
 
-.. note::
 
-   You will only see the performance benefits of using the ``numexpr`` engine
-   with ``DataFrame.query()`` if your frame has more than approximately 200,000
-   rows.
 
-      .. image:: ../_static/query-perf-small.png
+You will only see the performance benefits of using the ``numexpr`` engine
+with ``DataFrame.query()`` if your frame has more than approximately 100,000
+rows.
+
+
 
 This plot was created using a ``DataFrame`` with 3 columns each containing
 floating point values generated using ``numpy.random.randn()``.
 
@@ -70,7 +70,6 @@ For full docs, see the :ref:`categorical introduction <categorical>` and the
 :ref:`API documentation <api.arrays.categorical>`.
 
 .. ipython:: python
-    :okwarning:
 
     df = pd.DataFrame({"id": [1, 2, 3, 4, 5, 6],
                        "raw_grade": ['a', 'b', 'b', 'a', 'a', 'e']})
@@ -79,7 +78,7 @@ For full docs, see the :ref:`categorical introduction <categorical>` and the
     df["grade"]
 
     # Rename the categories
-    df["grade"].cat.categories = ["very good", "good", "very bad"]
+    df["grade"] = df["grade"].cat.rename_categories(["very good", "good", "very bad"])
 
     # Reorder the categories and simultaneously add the missing categories
     df["grade"] = df["grade"].cat.set_categories(["very bad", "bad",
 
@@ -271,12 +271,12 @@ Individual columns can be parsed as a ``Categorical`` using a dict specification
    such as :func:`to_datetime`.
 
    .. ipython:: python
-      :okwarning:
 
       df = pd.read_csv(StringIO(data), dtype="category")
       df.dtypes
       df["col3"]
-      df["col3"].cat.categories = pd.to_numeric(df["col3"].cat.categories)
+      new_categories = pd.to_numeric(df["col3"].cat.categories)
+      df["col3"] = df["col3"].cat.rename_categories(new_categories)
       df["col3"]
 
 .. _whatsnew_0190.enhancements.union_categoricals: