diff --git a/doc/source/user_guide/basics.rst b/doc/source/user_guide/basics.rst index 8d38c12252df4..70cfa3500f6b4 100644 --- a/doc/source/user_guide/basics.rst +++ b/doc/source/user_guide/basics.rst @@ -1184,11 +1184,9 @@ a single value and returning a single value. For example: df4 - def f(x): return len(str(x)) - df4["one"].map(f) df4.applymap(f) diff --git a/doc/source/user_guide/cookbook.rst b/doc/source/user_guide/cookbook.rst index 94a5f807d2262..e1aae0fd481b1 100644 --- a/doc/source/user_guide/cookbook.rst +++ b/doc/source/user_guide/cookbook.rst @@ -494,15 +494,12 @@ Unlike agg, apply's callable is passed a sub-DataFrame which gives you access to S = pd.Series([i / 100.0 for i in range(1, 11)]) - def cum_ret(x, y): return x * (1 + y) - def red(x): return functools.reduce(cum_ret, x, 1.0) - S.expanding().apply(red, raw=True) @@ -514,12 +511,10 @@ Unlike agg, apply's callable is passed a sub-DataFrame which gives you access to df = pd.DataFrame({"A": [1, 1, 2, 2], "B": [1, -1, 1, 2]}) gb = df.groupby("A") - def replace(g): mask = g < 0 return g.where(mask, g[~mask].mean()) - gb.transform(replace) `Sort groups by aggregated data @@ -551,13 +546,11 @@ Unlike agg, apply's callable is passed a sub-DataFrame which gives you access to rng = pd.date_range(start="2014-10-07", periods=10, freq="2min") ts = pd.Series(data=list(range(10)), index=rng) - def MyCust(x): if len(x) > 2: return x[1] * 1.234 return pd.NaT - mhc = {"Mean": np.mean, "Max": np.max, "Custom": MyCust} ts.resample("5min").apply(mhc) ts @@ -803,11 +796,9 @@ Apply index=["I", "II", "III"], ) - def SeriesFromSubList(aList): return pd.Series(aList) - df_orgz = pd.concat( {ind: row.apply(SeriesFromSubList) for ind, row in df.iterrows()} ) @@ -827,12 +818,10 @@ Rolling Apply to multiple columns where function calculates a Series before a Sc ) df - def gm(df, const): v = ((((df["A"] + df["B"]) + 1).cumprod()) - 1) * const return v.iloc[-1] - s = pd.Series( { df.index[i]: gm(df.iloc[i: min(i + 51, len(df) - 1)], 5) @@ -859,11 +848,9 @@ Rolling Apply to multiple columns where function returns a Scalar (Volume Weight ) df - def vwap(bars): return (bars.Close * bars.Volume).sum() / bars.Volume.sum() - window = 5 s = pd.concat( [ diff --git a/doc/source/user_guide/groupby.rst b/doc/source/user_guide/groupby.rst index 3f596388ca226..ef6d45fa0140b 100644 --- a/doc/source/user_guide/groupby.rst +++ b/doc/source/user_guide/groupby.rst @@ -1617,12 +1617,10 @@ column index name will be used as the name of the inserted column: } ) - def compute_metrics(x): result = {"b_sum": x["b"].sum(), "c_mean": x["c"].mean()} return pd.Series(result, name="metrics") - result = df.groupby("a").apply(compute_metrics) result diff --git a/doc/source/user_guide/io.rst b/doc/source/user_guide/io.rst index 5148bb87b0eb0..7f0cd613726dc 100644 --- a/doc/source/user_guide/io.rst +++ b/doc/source/user_guide/io.rst @@ -4648,11 +4648,9 @@ chunks. store.append("dfeq", dfeq, data_columns=["number"]) - def chunks(l, n): return [l[i: i + n] for i in range(0, len(l), n)] - evens = [2, 4, 6, 8, 10] coordinates = store.select_as_coordinates("dfeq", "number=evens") for c in chunks(coordinates, 2): diff --git a/doc/source/user_guide/merging.rst b/doc/source/user_guide/merging.rst index d8998a9a0a6e1..09b3d3a8c96df 100644 --- a/doc/source/user_guide/merging.rst +++ b/doc/source/user_guide/merging.rst @@ -1578,4 +1578,5 @@ to ``True``. You may also keep all the original values even if they are equal. .. ipython:: python + df.compare(df2, keep_shape=True, keep_equal=True) diff --git a/doc/source/user_guide/reshaping.rst b/doc/source/user_guide/reshaping.rst index 77cf43b2e2b19..7d1d03fe020a6 100644 --- a/doc/source/user_guide/reshaping.rst +++ b/doc/source/user_guide/reshaping.rst @@ -18,7 +18,6 @@ Reshaping by pivoting DataFrame objects import pandas._testing as tm - def unpivot(frame): N, K = frame.shape data = { @@ -29,7 +28,6 @@ Reshaping by pivoting DataFrame objects columns = ["date", "variable", "value"] return pd.DataFrame(data, columns=columns) - df = unpivot(tm.makeTimeDataFrame(3)) Data is often stored in so-called "stacked" or "record" format: diff --git a/doc/source/user_guide/sparse.rst b/doc/source/user_guide/sparse.rst index e4eea57c43dbb..982a5b0a70b55 100644 --- a/doc/source/user_guide/sparse.rst +++ b/doc/source/user_guide/sparse.rst @@ -325,7 +325,6 @@ In the example below, we transform the ``Series`` to a sparse representation of row_levels=["A", "B"], column_levels=["C", "D"], sort_labels=True ) - A A.todense() rows diff --git a/doc/source/user_guide/text.rst b/doc/source/user_guide/text.rst index 9b1c9b8d04270..db9485f3f2348 100644 --- a/doc/source/user_guide/text.rst +++ b/doc/source/user_guide/text.rst @@ -297,24 +297,19 @@ positional argument (a regex object) and return a string. # Reverse every lowercase alphabetic word pat = r"[a-z]+" - def repl(m): return m.group(0)[::-1] - pd.Series(["foo 123", "bar baz", np.nan], dtype="string").str.replace( pat, repl, regex=True ) - # Using regex groups pat = r"(?P\w+) (?P\w+) (?P\w+)" - def repl(m): return m.group("two").swapcase() - pd.Series(["Foo Bar Baz", np.nan], dtype="string").str.replace( pat, repl, regex=True ) diff --git a/doc/source/user_guide/timeseries.rst b/doc/source/user_guide/timeseries.rst index 01ff62d984544..6f005f912fe37 100644 --- a/doc/source/user_guide/timeseries.rst +++ b/doc/source/user_guide/timeseries.rst @@ -1422,7 +1422,6 @@ An example of how holidays and holiday calendars are defined: MO, ) - class ExampleCalendar(AbstractHolidayCalendar): rules = [ USMemorialDay, @@ -1435,7 +1434,6 @@ An example of how holidays and holiday calendars are defined: ), ] - cal = ExampleCalendar() cal.holidays(datetime.datetime(2012, 1, 1), datetime.datetime(2012, 12, 31)) @@ -1707,13 +1705,11 @@ We can instead only resample those groups where we have points as follows: from functools import partial from pandas.tseries.frequencies import to_offset - def round(t, freq): # round a Timestamp to a specified freq freq = to_offset(freq) return pd.Timestamp((t.value // freq.delta.value) * freq.delta.value) - ts.groupby(partial(round, freq="3T")).sum() .. _timeseries.aggregate: @@ -2255,11 +2251,9 @@ To convert from an ``int64`` based YYYYMMDD representation. s = pd.Series([20121231, 20141130, 99991231]) s - def conv(x): return pd.Period(year=x // 10000, month=x // 100 % 100, day=x % 100, freq="D") - s.apply(conv) s.apply(conv)[2] diff --git a/doc/source/user_guide/window.rst b/doc/source/user_guide/window.rst index b692685b90234..c8687f808a802 100644 --- a/doc/source/user_guide/window.rst +++ b/doc/source/user_guide/window.rst @@ -212,7 +212,6 @@ from present information back to past information. This allows the rolling windo df - .. _window.custom_rolling_window: Custom window rolling @@ -294,13 +293,12 @@ conditions. In these cases it can be useful to perform forward-looking rolling w This :func:`BaseIndexer ` subclass implements a closed fixed-width forward-looking rolling window, and we can use it as follows: -.. ipython:: ipython +.. ipython:: python from pandas.api.indexers import FixedForwardWindowIndexer indexer = FixedForwardWindowIndexer(window_size=2) df.rolling(indexer, min_periods=1).sum() - .. _window.rolling_apply: Rolling apply @@ -319,7 +317,6 @@ the windows are cast as :class:`Series` objects (``raw=False``) or ndarray objec s = pd.Series(range(10)) s.rolling(window=4).apply(mad, raw=True) - .. _window.numba_engine: Numba engine