pandas-dev
diff --git a/‎codecov.yml
Lines changed: 1 addition & 1 deletion b/‎codecov.yml
Lines changed: 1 addition & 1 deletion
diff --git a/‎doc/cheatsheet/Pandas_Cheat_Sheet.pdf
9.56 KB b/‎doc/cheatsheet/Pandas_Cheat_Sheet.pdf
9.56 KB
diff --git a/‎doc/cheatsheet/Pandas_Cheat_Sheet.pptx
9.14 KB b/‎doc/cheatsheet/Pandas_Cheat_Sheet.pptx
9.14 KB
diff --git a/‎doc/source/ecosystem.rst
Lines changed: 10 additions & 1 deletion b/‎doc/source/ecosystem.rst
Lines changed: 10 additions & 1 deletion
diff --git a/‎doc/source/user_guide/index.rst
Lines changed: 1 addition & 1 deletion b/‎doc/source/user_guide/index.rst
Lines changed: 1 addition & 1 deletion
diff --git a/‎doc/source/user_guide/style.ipynb
Lines changed: 794 additions & 404 deletions b/‎doc/source/user_guide/style.ipynb
Lines changed: 794 additions & 404 deletions
diff --git a/‎doc/source/user_guide/visualization.rst
Lines changed: 6 additions & 3 deletions b/‎doc/source/user_guide/visualization.rst
Lines changed: 6 additions & 3 deletions
diff --git a/‎doc/source/user_guide/window.rst
Lines changed: 1 addition & 1 deletion b/‎doc/source/user_guide/window.rst
Lines changed: 1 addition & 1 deletion
diff --git a/‎doc/source/whatsnew/v1.3.0.rst
Lines changed: 59 additions & 0 deletions b/‎doc/source/whatsnew/v1.3.0.rst
Lines changed: 59 additions & 0 deletions
diff --git a/‎pandas/_libs/algos.pyx
Lines changed: 23 additions & 65 deletions b/‎pandas/_libs/algos.pyx
Lines changed: 23 additions & 65 deletions
@@ -8,7 +8,7 @@ coverage:
   status:
     project:
       default:
-        target: '82'
+        target: '72'
     patch:
       default:
         target: '50'
 
@@ -98,7 +98,8 @@ which can be used for a wide variety of time series data mining tasks.
 Visualization
 -------------
 
-While :ref:`pandas has built-in support for data visualization with matplotlib <visualization>`,
+`Pandas has its own Styler class for table visualization <user_guide/style.ipynb>`_, and while
+:ref:`pandas also has built-in support for data visualization through charts with matplotlib <visualization>`,
 there are a number of other pandas-compatible libraries.
 
 `Altair <https://altair-viz.github.io/>`__
@@ -368,6 +369,14 @@ far exceeding the performance of the native ``df.to_sql`` method. Internally, it
 Microsoft's BCP utility, but the complexity is fully abstracted away from the end user.
 Rigorously tested, it is a complete replacement for ``df.to_sql``.
 
+`Deltalake <https://pypi.org/project/deltalake>`__
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+Deltalake python package lets you access tables stored in
+`Delta Lake <https://delta.io/>`__ natively in Python without the need to use Spark or
+JVM. It provides the ``delta_table.to_pyarrow_table().to_pandas()`` method to convert
+any Delta table into Pandas dataframe.
+
 
 .. _ecosystem.out-of-core:
 
 
@@ -38,12 +38,12 @@ Further information on any specific method can be obtained in the
     integer_na
     boolean
     visualization
+    style
     computation
     groupby
     window
     timeseries
     timedeltas
-    style
     options
     enhancingperf
     scale
 
@@ -2,9 +2,12 @@
 
 {{ header }}
 
-*************
-Visualization
-*************
+*******************
+Chart Visualization
+*******************
+
+This section demonstrates visualization through charting. For information on
+visualization of tabular data please see the section on `Table Visualization <style.ipynb>`_.
 
 We use the standard convention for referencing the matplotlib API:
 
 
@@ -101,7 +101,7 @@ be calculated with :meth:`~Rolling.apply` by specifying a separate column of wei
 
 All windowing operations support a ``min_periods`` argument that dictates the minimum amount of
 non-``np.nan`` values a window must have; otherwise, the resulting value is ``np.nan``.
-``min_peridos`` defaults to 1 for time-based windows and ``window`` for fixed windows
+``min_periods`` defaults to 1 for time-based windows and ``window`` for fixed windows
 
 .. ipython:: python
 
 
@@ -110,6 +110,30 @@ both XPath 1.0 and XSLT 1.0 is available. (:issue:`27554`)
 
 For more, see :ref:`io.xml` in the user guide on IO tools.
 
+.. _whatsnew_130.dataframe_honors_copy_with_dict:
+
+DataFrame constructor honors ``copy=False`` with dict
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+When passing a dictionary to :class:`DataFrame` with ``copy=False``,
+a copy will no longer be made (:issue:`32960`)
+
+.. ipython:: python
+
+    arr = np.array([1, 2, 3])
+    df = pd.DataFrame({"A": arr, "B": arr.copy()}, copy=False)
+    df
+
+``df["A"]`` remains a view on ``arr``:
+
+.. ipython:: python
+
+    arr[0] = 0
+    assert df.iloc[0, 0] == 0
+
+The default behavior when not passing ``copy`` will remain unchanged, i.e.
+a copy will be made.
+
 .. _whatsnew_130.enhancements.other:
 
 Other enhancements
@@ -302,6 +326,38 @@ cast to ``dtype=object`` (:issue:`38709`)
    ser2
 
 
+.. _whatsnew_130.notable_bug_fixes.rolling_groupby_column:
+
+GroupBy.rolling no longer returns grouped-by column in values
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+The group-by column will now be dropped from the result of a
+``groupby.rolling`` operation (:issue:`32262`)
+
+.. ipython:: python
+
+    df = pd.DataFrame({"A": [1, 1, 2, 3], "B": [0, 1, 2, 3]})
+    df
+
+*Previous behavior*:
+
+.. code-block:: ipython
+
+    In [1]: df.groupby("A").rolling(2).sum()
+    Out[1]:
+           A    B
+    A
+    1 0  NaN  NaN
+    1    2.0  1.0
+    2 2  NaN  NaN
+    3 3  NaN  NaN
+
+*New behavior*:
+
+.. ipython:: python
+
+    df.groupby("A").rolling(2).sum()
+
 .. _whatsnew_130.notable_bug_fixes.rolling_var_precision:
 
 Removed artificial truncation in rolling variance and standard deviation
@@ -501,6 +557,7 @@ Numeric
 - Bug in :meth:`DataFrame.mode` and :meth:`Series.mode` not keeping consistent integer :class:`Index` for empty input (:issue:`33321`)
 - Bug in :meth:`DataFrame.rank` with ``np.inf`` and mixture of ``np.nan`` and ``np.inf`` (:issue:`32593`)
 - Bug in :meth:`DataFrame.rank` with ``axis=0`` and columns holding incomparable types raising ``IndexError`` (:issue:`38932`)
+- Bug in ``rank`` method for :class:`Series`, :class:`DataFrame`, :class:`DataFrameGroupBy`, and :class:`SeriesGroupBy` treating the most negative ``int64`` value as missing (:issue:`32859`)
 - Bug in :func:`select_dtypes` different behavior between Windows and Linux with ``include="int"`` (:issue:`36569`)
 - Bug in :meth:`DataFrame.apply` and :meth:`DataFrame.agg` when passed argument ``func="size"`` would operate on the entire ``DataFrame`` instead of rows or columns (:issue:`39934`)
 - Bug in :meth:`DataFrame.transform` would raise ``SpecificationError`` when passed a dictionary and columns were missing; will now raise a ``KeyError`` instead (:issue:`40004`)
@@ -513,6 +570,8 @@ Conversion
 - Bug in creating a :class:`DataFrame` from an empty ``np.recarray`` not retaining the original dtypes (:issue:`40121`)
 - Bug in :class:`DataFrame` failing to raise ``TypeError`` when constructing from a ``frozenset`` (:issue:`40163`)
 - Bug in :class:`Index` construction silently ignoring a passed ``dtype`` when the data cannot be cast to that dtype (:issue:`21311`)
+- Bug in :class:`DataFrame` construction with a dictionary containing an arraylike with ``ExtensionDtype`` and ``copy=True`` failing to make a copy (:issue:`38939`)
+-
 
 Strings
 ^^^^^^^
 
@@ -794,68 +794,14 @@ def backfill(ndarray[algos_t] old, ndarray[algos_t] new, limit=None) -> ndarray:
     return indexer
 
 
-@cython.boundscheck(False)
-@cython.wraparound(False)
 def backfill_inplace(algos_t[:] values, uint8_t[:] mask, limit=None):
-    cdef:
-        Py_ssize_t i, N
-        algos_t val
-        uint8_t prev_mask
-        int lim, fill_count = 0
-
-    N = len(values)
-
-    # GH#2778
-    if N == 0:
-        return
-
-    lim = validate_limit(N, limit)
+    pad_inplace(values[::-1], mask[::-1], limit=limit)
 
-    val = values[N - 1]
-    prev_mask = mask[N - 1]
-    for i in range(N - 1, -1, -1):
-        if mask[i]:
-            if fill_count >= lim:
-                continue
-            fill_count += 1
-            values[i] = val
-            mask[i] = prev_mask
-        else:
-            fill_count = 0
-            val = values[i]
-            prev_mask = mask[i]
 
-
-@cython.boundscheck(False)
-@cython.wraparound(False)
 def backfill_2d_inplace(algos_t[:, :] values,
                         const uint8_t[:, :] mask,
                         limit=None):
-    cdef:
-        Py_ssize_t i, j, N, K
-        algos_t val
-        int lim, fill_count = 0
-
-    K, N = (<object>values).shape
-
-    # GH#2778
-    if N == 0:
-        return
-
-    lim = validate_limit(N, limit)
-
-    for j in range(K):
-        fill_count = 0
-        val = values[j, N - 1]
-        for i in range(N - 1, -1, -1):
-            if mask[j, i]:
-                if fill_count >= lim:
-                    continue
-                fill_count += 1
-                values[j, i] = val
-            else:
-                fill_count = 0
-                val = values[j, i]
+    pad_2d_inplace(values[:, ::-1], mask[:, ::-1], limit)
 
 
 @cython.boundscheck(False)
@@ -962,6 +908,7 @@ ctypedef fused rank_t:
 def rank_1d(
     ndarray[rank_t, ndim=1] values,
     const intp_t[:] labels,
+    bint is_datetimelike=False,
     ties_method="average",
     bint ascending=True,
     bint pct=False,
@@ -977,17 +924,19 @@ def rank_1d(
         Array containing unique label for each group, with its ordering
         matching up to the corresponding record in `values`. If not called
         from a groupby operation, will be an array of 0's
+    is_datetimelike : bool, default False
+        True if `values` contains datetime-like entries.
     ties_method : {'average', 'min', 'max', 'first', 'dense'}, default
         'average'
         * average: average rank of group
         * min: lowest rank in group
         * max: highest rank in group
         * first: ranks assigned in order they appear in the array
         * dense: like 'min', but rank always increases by 1 between groups
-    ascending : boolean, default True
+    ascending : bool, default True
         False for ranks by high (1) to low (N)
         na_option : {'keep', 'top', 'bottom'}, default 'keep'
-    pct : boolean, default False
+    pct : bool, default False
         Compute percentage rank of data within each group
     na_option : {'keep', 'top', 'bottom'}, default 'keep'
         * keep: leave NA values where they are
@@ -1032,7 +981,7 @@ def rank_1d(
 
     if rank_t is object:
         mask = missing.isnaobj(masked_vals)
-    elif rank_t is int64_t:
+    elif rank_t is int64_t and is_datetimelike:
         mask = (masked_vals == NPY_NAT).astype(np.uint8)
     elif rank_t is float64_t:
         mask = np.isnan(masked_vals).astype(np.uint8)
@@ -1059,7 +1008,7 @@ def rank_1d(
         if rank_t is object:
             nan_fill_val = NegInfinity()
         elif rank_t is int64_t:
-            nan_fill_val = np.iinfo(np.int64).min
+            nan_fill_val = NPY_NAT
         elif rank_t is uint64_t:
             nan_fill_val = 0
         else:
@@ -1275,6 +1224,7 @@ def rank_1d(
 def rank_2d(
     ndarray[rank_t, ndim=2] in_arr,
     int axis=0,
+    bint is_datetimelike=False,
     ties_method="average",
     bint ascending=True,
     na_option="keep",
@@ -1299,7 +1249,9 @@ def rank_2d(
     tiebreak = tiebreakers[ties_method]
 
     keep_na = na_option == 'keep'
-    check_mask = rank_t is not uint64_t
+
+    # For cases where a mask is not possible, we can avoid mask checks
+    check_mask = not (rank_t is uint64_t or (rank_t is int64_t and not is_datetimelike))
 
     if axis == 0:
         values = np.asarray(in_arr).T.copy()
@@ -1310,28 +1262,34 @@ def rank_2d(
         if values.dtype != np.object_:
             values = values.astype('O')
 
-    if rank_t is not uint64_t:
+    if check_mask:
         if ascending ^ (na_option == 'top'):
             if rank_t is object:
                 nan_value = Infinity()
             elif rank_t is float64_t:
                 nan_value = np.inf
-            elif rank_t is int64_t:
+
+            # int64 and datetimelike
+            else:
                 nan_value = np.iinfo(np.int64).max
 
         else:
             if rank_t is object:
                 nan_value = NegInfinity()
             elif rank_t is float64_t:
                 nan_value = -np.inf
-            elif rank_t is int64_t:
+
+            # int64 and datetimelike
+            else:
                 nan_value = NPY_NAT
 
         if rank_t is object:
             mask = missing.isnaobj2d(values)
         elif rank_t is float64_t:
             mask = np.isnan(values)
-        elif rank_t is int64_t:
+
+        # int64 and datetimelike
+        else:
             mask = values == NPY_NAT
 
         np.putmask(values, mask, nan_value)