pandas-dev
diff --git a/‎.github/workflows/asv-bot.yml
Lines changed: 0 additions & 78 deletions b/‎.github/workflows/asv-bot.yml
Lines changed: 0 additions & 78 deletions
diff --git a/‎.github/workflows/comment-commands.yml
Lines changed: 67 additions & 2 deletions b/‎.github/workflows/comment-commands.yml
Lines changed: 67 additions & 2 deletions
diff --git a/‎.github/workflows/sdist.yml
Lines changed: 3 additions & 3 deletions b/‎.github/workflows/sdist.yml
Lines changed: 3 additions & 3 deletions
diff --git a/‎ci/deps/actions-38-minimum_versions.yaml
Lines changed: 1 addition & 1 deletion b/‎ci/deps/actions-38-minimum_versions.yaml
Lines changed: 1 addition & 1 deletion
diff --git a/‎doc/source/getting_started/install.rst
Lines changed: 1 addition & 1 deletion b/‎doc/source/getting_started/install.rst
Lines changed: 1 addition & 1 deletion
diff --git a/‎doc/source/user_guide/groupby.rst
Lines changed: 1 addition & 1 deletion b/‎doc/source/user_guide/groupby.rst
Lines changed: 1 addition & 1 deletion
diff --git a/‎doc/source/user_guide/io.rst
Lines changed: 15 additions & 2 deletions b/‎doc/source/user_guide/io.rst
Lines changed: 15 additions & 2 deletions
diff --git a/‎doc/source/whatsnew/v2.0.1.rst
Lines changed: 3 additions & 1 deletion b/‎doc/source/whatsnew/v2.0.1.rst
Lines changed: 3 additions & 1 deletion
diff --git a/‎doc/source/whatsnew/v2.1.0.rst
Lines changed: 3 additions & 0 deletions b/‎doc/source/whatsnew/v2.1.0.rst
Lines changed: 3 additions & 0 deletions
diff --git a/‎pandas/_libs/tslibs/conversion.pyx
Lines changed: 1 addition & 2 deletions b/‎pandas/_libs/tslibs/conversion.pyx
Lines changed: 1 addition & 2 deletions
diff --git a/‎pandas/_libs/tslibs/timestamps.pyx
Lines changed: 1 addition & 1 deletion b/‎pandas/_libs/tslibs/timestamps.pyx
Lines changed: 1 addition & 1 deletion
diff --git a/‎pandas/_libs/tslibs/util.pxd
Lines changed: 1 addition & 0 deletions b/‎pandas/_libs/tslibs/util.pxd
Lines changed: 1 addition & 0 deletions
diff --git a/‎pandas/_typing.py
Lines changed: 43 additions & 0 deletions b/‎pandas/_typing.py
Lines changed: 43 additions & 0 deletions
@@ -11,18 +11,83 @@ permissions:
 jobs:
   issue_assign:
     runs-on: ubuntu-22.04
+    if: (!github.event.issue.pull_request) && github.event.comment.body == 'take'
+    concurrency:
+      group: ${{ github.actor }}-issue-assign
     steps:
-    - if: (!github.event.issue.pull_request) && github.event.comment.body == 'take'
       run: |
         echo "Assigning issue ${{ github.event.issue.number }} to ${{ github.event.comment.user.login }}"
         curl -H "Authorization: token ${{ secrets.GITHUB_TOKEN }}" -d '{"assignees": ["${{ github.event.comment.user.login }}"]}' https://api.github.com/repos/${{ github.repository }}/issues/${{ github.event.issue.number }}/assignees
   preview_docs:
     runs-on: ubuntu-22.04
+    if: github.event.issue.pull_request && github.event.comment.body == '/preview'
+    concurrency:
+      group: ${{ github.actor }}-preview-docs
     steps:
-    - if: github.event.issue.pull_request && github.event.comment.body == '/preview'
       run: |
         if curl --output /dev/null --silent --head --fail "https://pandas.pydata.org/preview/${{ github.event.issue.number }}/"; then
           curl -H "Authorization: token ${{ secrets.GITHUB_TOKEN }}" -d '{"body": "Website preview of this PR available at: https://pandas.pydata.org/preview/${{ github.event.issue.number }}/"}' https://api.github.com/repos/${{ github.repository }}/issues/${{ github.event.issue.number }}/comments
         else
           curl -H "Authorization: token ${{ secrets.GITHUB_TOKEN }}" -d '{"body": "No preview found for PR #${{ github.event.issue.number }}. Did the docs build complete?"}' https://api.github.com/repos/${{ github.repository }}/issues/${{ github.event.issue.number }}/comments
         fi
+  asv_run:
+    runs-on: ubuntu-22.04
+    # TODO: Support more benchmarking options later, against different branches, against self, etc
+    if: github.event.issue.pull_request && startsWith(github.event.comment.body, '@github-actions benchmark')
+    defaults:
+      run:
+        shell: bash -el {0}
+    env:
+      ENV_FILE: environment.yml
+      COMMENT: ${{github.event.comment.body}}
+
+    concurrency:
+      # Set concurrency to prevent abuse(full runs are ~5.5 hours !!!)
+      # each user can only run one concurrent benchmark bot at a time
+      # We don't cancel in progress jobs, but if you want to benchmark multiple PRs, you're gonna have
+      # to wait
+      group: ${{ github.actor }}-asv
+      cancel-in-progress: false
+
+    steps:
+      - name: Checkout
+        uses: actions/checkout@v3
+        with:
+          fetch-depth: 0
+
+        # Although asv sets up its own env, deps are still needed
+        # during discovery process
+      - name: Set up Conda
+        uses: ./.github/actions/setup-conda
+
+      - name: Run benchmarks
+        id: bench
+        continue-on-error: true # asv will exit code 1 for regressions
+        run: |
+          # extracting the regex, see https://stackoverflow.com/a/36798723
+          REGEX=$(echo "$COMMENT" | sed -n "s/^.*-b\s*\(\S*\).*$/\1/p")
+          cd asv_bench
+          asv check -E existing
+          git remote add upstream https://github.com/pandas-dev/pandas.git
+          git fetch upstream
+          asv machine --yes
+          asv continuous -f 1.1 -b $REGEX upstream/main HEAD
+          echo 'BENCH_OUTPUT<<EOF' >> $GITHUB_ENV
+          asv compare -f 1.1 upstream/main HEAD >> $GITHUB_ENV
+          echo 'EOF' >> $GITHUB_ENV
+          echo "REGEX=$REGEX" >> $GITHUB_ENV
+
+      - uses: actions/github-script@v6
+        env:
+          BENCH_OUTPUT: ${{env.BENCH_OUTPUT}}
+          REGEX: ${{env.REGEX}}
+        with:
+          script: |
+            const ENV_VARS = process.env
+            const run_url = `https://github.com/${context.repo.owner}/${context.repo.repo}/actions/runs/${context.runId}`
+            github.rest.issues.createComment({
+              issue_number: context.issue.number,
+              owner: context.repo.owner,
+              repo: context.repo.repo,
+              body: '\nBenchmarks completed. View runner logs here.' + run_url + '\nRegex used: '+ 'regex ' + ENV_VARS["REGEX"] + '\n' + ENV_VARS["BENCH_OUTPUT"]
+            })
@@ -81,11 +81,11 @@ jobs:
       run: |
         case "${{matrix.python-version}}" in
         3.8)
-          pip install numpy==1.20.3 ;;
+          pip install numpy==1.21.6 ;;
         3.9)
-          pip install numpy==1.20.3 ;;
+          pip install numpy==1.21.6 ;;
         3.10)
-          pip install numpy==1.21.2 ;;
+          pip install numpy==1.21.6 ;;
         3.11)
           pip install numpy==1.23.2 ;;
         esac
 
@@ -19,7 +19,7 @@ dependencies:
 
   # required dependencies
   - python-dateutil=2.8.2
-  - numpy=1.20.3
+  - numpy=1.21.6
   - pytz=2020.1
 
   # optional dependencies
 
@@ -260,7 +260,7 @@ pandas requires the following dependencies.
 ================================================================ ==========================
 Package                                                          Minimum supported version
 ================================================================ ==========================
-`NumPy <https://numpy.org>`__                                    1.20.3
+`NumPy <https://numpy.org>`__                                    1.21.6
 `python-dateutil <https://dateutil.readthedocs.io/en/stable/>`__ 2.8.2
 `pytz <https://pypi.org/project/pytz/>`__                        2020.1
 ================================================================ ==========================
 
@@ -196,7 +196,7 @@ only verifies that you've passed a valid mapping.
 GroupBy sorting
 ~~~~~~~~~~~~~~~~~~~~~~~~~
 
-By default the group keys are sorted during the ``groupby`` operation. You may however pass ``sort=False`` for potential speedups:
+By default the group keys are sorted during the ``groupby`` operation. You may however pass ``sort=False`` for potential speedups. With ``sort=False`` the order among group-keys follows the order of appearance of the keys in the original dataframe:
 
 .. ipython:: python
 
 
@@ -998,7 +998,7 @@ pass  ``format='mixed'``
 
 .. ipython:: python
 
-   data = io.StringIO("date\n12 Jan 2000\n2000-01-13\n")
+   data = StringIO("date\n12 Jan 2000\n2000-01-13\n")
    df = pd.read_csv(data)
    df['date'] = pd.to_datetime(df['date'], format='mixed')
    df
@@ -1007,7 +1007,7 @@ or, if your datetime formats are all ISO8601 (possibly not identically-formatted
 
 .. ipython:: python
 
-   data = io.StringIO("date\n2020-01-01\n2020-01-01 03:00\n")
+   data = StringIO("date\n2020-01-01\n2020-01-01 03:00\n")
    df = pd.read_csv(data)
    df['date'] = pd.to_datetime(df['date'], format='ISO8601')
    df
@@ -2167,6 +2167,19 @@ Dates written in nanoseconds need to be read back in nanoseconds:
    dfju = pd.read_json(json, date_unit="ns")
    dfju
 
+By setting the ``dtype_backend`` argument you can control the default dtypes used for the resulting DataFrame.
+
+.. ipython:: python
+
+    data = (
+     '{"a":{"0":1,"1":3},"b":{"0":2.5,"1":4.5},"c":{"0":true,"1":false},"d":{"0":"a","1":"b"},'
+     '"e":{"0":null,"1":6.0},"f":{"0":null,"1":7.5},"g":{"0":null,"1":true},"h":{"0":null,"1":"a"},'
+     '"i":{"0":"12-31-2019","1":"12-31-2019"},"j":{"0":null,"1":null}}'
+    )
+    df = pd.read_json(StringIO(data), dtype_backend="pyarrow")
+    df
+    df.dtypes
+
 .. _io.json_normalize:
 
 Normalization
 
@@ -15,6 +15,7 @@ Fixed regressions
 ~~~~~~~~~~~~~~~~~
 - Fixed regression for subclassed Series when constructing from a dictionary (:issue:`52445`)
 - Fixed regression in :meth:`DataFrame.pivot` changing :class:`Index` name of input object (:issue:`52629`)
+- Fixed regression in :meth:`DataFrame.resample` raising on a DataFrame with no columns (:issue:`52484`)
 - Fixed regression in :meth:`DataFrame.sort_values` not resetting index when :class:`DataFrame` is already sorted and ``ignore_index=True`` (:issue:`52553`)
 - Fixed regression in :meth:`MultiIndex.isin` raising ``TypeError`` for ``Generator`` (:issue:`52568`)
 - Fixed regression in :meth:`Series.describe` showing ``RuntimeWarning`` for extension dtype :class:`Series` with one element (:issue:`52515`)
@@ -27,7 +28,8 @@ Bug fixes
 - Bug in :attr:`Series.dt.days` that would overflow ``int32`` number of days (:issue:`52391`)
 - Bug in :class:`arrays.DatetimeArray` constructor returning an incorrect unit when passed a non-nanosecond numpy datetime array (:issue:`52555`)
 - Bug in :func:`Series.median` with :class:`ArrowDtype` returning an approximate median (:issue:`52679`)
-- Bug in :func:`api.interchange.from_dataframe` was unnecessarily raising on-categorical dtypes (:issue:`49889`)
+- Bug in :func:`api.interchange.from_dataframe` was unnecessarily raising on categorical dtypes (:issue:`49889`)
+- Bug in :func:`api.interchange.from_dataframe` was unnecessarily raising on large string dtypes (:issue:`52795`)
 - Bug in :func:`pandas.testing.assert_series_equal` where ``check_dtype=False`` would still raise for datetime or timedelta types with different resolutions (:issue:`52449`)
 - Bug in :func:`read_csv` casting PyArrow datetimes to NumPy when ``dtype_backend="pyarrow"`` and ``parse_dates`` is set causing a performance bottleneck in the process (:issue:`52546`)
 - Bug in :func:`to_datetime` and :func:`to_timedelta` when trying to convert numeric data with a :class:`ArrowDtype` (:issue:`52425`)
 
@@ -131,6 +131,8 @@ If installed, we now require:
 +-----------------+-----------------+----------+---------+
 | Package         | Minimum Version | Required | Changed |
 +=================+=================+==========+=========+
+| numpy           | 1.21.6          |    X     |    X    |
++-----------------+-----------------+----------+---------+
 | mypy (dev)      | 1.2             |          |    X    |
 +-----------------+-----------------+----------+---------+
 | beautifulsoup4  | 4.11.1          |          |    X    |
@@ -316,6 +318,7 @@ Conversion
 ^^^^^^^^^^
 - Bug in :func:`DataFrame.style.to_latex` and :func:`DataFrame.style.to_html` if the DataFrame contains integers with more digits than can be represented by floating point double precision (:issue:`52272`)
 - Bug in :meth:`ArrowDtype.numpy_dtype` returning nanosecond units for non-nanosecond ``pyarrow.timestamp`` and ``pyarrow.duration`` types (:issue:`51800`)
+- Bug in :meth:`DataFrame.__repr__` incorrectly raising a ``TypeError`` when the dtype of a column is ``np.record`` (:issue:`48526`)
 - Bug in :meth:`DataFrame.info` raising  ``ValueError`` when ``use_numba`` is set (:issue:`51922`)
 -
 
 
@@ -61,7 +61,6 @@ from pandas._libs.tslibs.nattype cimport (
     c_nat_strings as nat_strings,
 )
 from pandas._libs.tslibs.parsing cimport parse_datetime_string
-from pandas._libs.tslibs.timestamps cimport _Timestamp
 from pandas._libs.tslibs.timezones cimport (
     get_utcoffset,
     is_utc,
@@ -761,7 +760,7 @@ cdef int64_t parse_pydatetime(
             _ts.ensure_reso(NPY_FR_ns)
             result = _ts.value
     else:
-        if isinstance(val, _Timestamp):
+        if isinstance(val, ABCTimestamp):
             result = val.as_unit("ns")._value
         else:
             result = pydatetime_to_dt64(val, dts)
 
@@ -22,7 +22,7 @@ from numpy cimport (
 
 cnp.import_array()
 
-from cpython.datetime cimport (  # alias bc `tzinfo` is a kwarg below
+from cpython.datetime cimport (  # alias tzinfo_type bc `tzinfo` is a kwarg below
     PyDate_Check,
     PyDateTime_Check,
     PyDelta_Check,
 
@@ -10,6 +10,7 @@ cdef extern from "Python.h":
     bint PyComplex_Check(object obj) nogil
     bint PyObject_TypeCheck(object obj, PyTypeObject* type) nogil
 
+    # TODO(cython3): cimport this, xref GH#49670
     # Note that following functions can potentially raise an exception,
     # thus they cannot be declared 'nogil'. Also PyUnicode_AsUTF8AndSize() can
     # potentially allocate memory inside in unlikely case of when underlying
 
@@ -132,6 +132,8 @@
 ]
 Timezone = Union[str, tzinfo]
 
+ToTimestampHow = Literal["s", "e", "start", "end"]
+
 # NDFrameT is stricter and ensures that the same subclass of NDFrame always is
 # used. E.g. `def func(a: NDFrameT) -> NDFrameT: ...` means that if a
 # Series is passed into a function, a Series is always returned and if a DataFrame is
@@ -361,6 +363,9 @@ def closed(self) -> bool:
 SortKind = Literal["quicksort", "mergesort", "heapsort", "stable"]
 NaPosition = Literal["first", "last"]
 
+# Arguments for nsmalles and n_largest
+NsmallestNlargestKeep = Literal["first", "last", "all"]
+
 # quantile interpolation
 QuantileInterpolation = Literal["linear", "lower", "higher", "midpoint", "nearest"]
 
@@ -372,9 +377,32 @@ def closed(self) -> bool:
 
 # merge
 MergeHow = Literal["left", "right", "inner", "outer", "cross"]
+MergeValidate = Literal[
+    "one_to_one",
+    "1:1",
+    "one_to_many",
+    "1:m",
+    "many_to_one",
+    "m:1",
+    "many_to_many",
+    "m:m",
+]
 
 # join
 JoinHow = Literal["left", "right", "inner", "outer"]
+JoinValidate = Literal[
+    "one_to_one",
+    "1:1",
+    "one_to_many",
+    "1:m",
+    "many_to_one",
+    "m:1",
+    "many_to_many",
+    "m:m",
+]
+
+# reindex
+ReindexMethod = Union[FillnaOptions, Literal["nearest"]]
 
 MatplotlibColor = Union[str, Sequence[float]]
 TimeGrouperOrigin = Union[
@@ -400,3 +428,18 @@ def closed(self) -> bool:
     "backslashreplace",
     "namereplace",
 ]
+
+# update
+UpdateJoin = Literal["left"]
+
+# applymap
+NaAction = Literal["ignore"]
+
+# from_dict
+FromDictOrient = Literal["columns", "index", "tight"]
+
+# to_gbc
+ToGbqIfexist = Literal["fail", "replace", "append"]
+
+# to_stata
+ToStataByteorder = Literal[">", "<", "little", "big"]