pandas-dev
diff --git a/‎asv_bench/benchmarks/array.py
Lines changed: 41 additions & 0 deletions b/‎asv_bench/benchmarks/array.py
Lines changed: 41 additions & 0 deletions
diff --git a/‎asv_bench/benchmarks/reshape.py
Lines changed: 10 additions & 5 deletions b/‎asv_bench/benchmarks/reshape.py
Lines changed: 10 additions & 5 deletions
diff --git a/‎ci/deps/actions-38-downstream_compat.yaml
Lines changed: 0 additions & 1 deletion b/‎ci/deps/actions-38-downstream_compat.yaml
Lines changed: 0 additions & 1 deletion
diff --git a/‎doc/source/development/contributing_environment.rst
Lines changed: 51 additions & 87 deletions b/‎doc/source/development/contributing_environment.rst
Lines changed: 51 additions & 87 deletions
diff --git a/‎doc/source/user_guide/io.rst
Lines changed: 3 additions & 1 deletion b/‎doc/source/user_guide/io.rst
Lines changed: 3 additions & 1 deletion
diff --git a/‎doc/source/whatsnew/v1.5.3.rst
Lines changed: 1 addition & 0 deletions b/‎doc/source/whatsnew/v1.5.3.rst
Lines changed: 1 addition & 0 deletions
diff --git a/‎doc/source/whatsnew/v2.0.0.rst
Lines changed: 8 additions & 1 deletion b/‎doc/source/whatsnew/v2.0.0.rst
Lines changed: 8 additions & 1 deletion
@@ -90,5 +90,46 @@ def time_setitem_list(self, multiple_chunks):
     def time_setitem_slice(self, multiple_chunks):
         self.array[::10] = "foo"
 
+    def time_setitem_null_slice(self, multiple_chunks):
+        self.array[:] = "foo"
+
     def time_tolist(self, multiple_chunks):
         self.array.tolist()
+
+
+class ArrowExtensionArray:
+
+    params = [
+        [
+            "boolean[pyarrow]",
+            "float64[pyarrow]",
+            "int64[pyarrow]",
+            "string[pyarrow]",
+            "timestamp[ns][pyarrow]",
+        ],
+        [False, True],
+    ]
+    param_names = ["dtype", "hasna"]
+
+    def setup(self, dtype, hasna):
+        N = 100_000
+        if dtype == "boolean[pyarrow]":
+            data = np.random.choice([True, False], N, replace=True)
+        elif dtype == "float64[pyarrow]":
+            data = np.random.randn(N)
+        elif dtype == "int64[pyarrow]":
+            data = np.arange(N)
+        elif dtype == "string[pyarrow]":
+            data = tm.rands_array(10, N)
+        elif dtype == "timestamp[ns][pyarrow]":
+            data = pd.date_range("2000-01-01", freq="s", periods=N)
+        else:
+            raise NotImplementedError
+
+        arr = pd.array(data, dtype=dtype)
+        if hasna:
+            arr[::2] = pd.NA
+        self.arr = arr
+
+    def time_to_numpy(self, dtype, hasna):
+        self.arr.to_numpy()
@@ -15,12 +15,17 @@
 
 
 class Melt:
-    def setup(self):
-        self.df = DataFrame(np.random.randn(10000, 3), columns=["A", "B", "C"])
-        self.df["id1"] = np.random.randint(0, 10, 10000)
-        self.df["id2"] = np.random.randint(100, 1000, 10000)
+    params = ["float64", "Float64"]
+    param_names = ["dtype"]
+
+    def setup(self, dtype):
+        self.df = DataFrame(
+            np.random.randn(100_000, 3), columns=["A", "B", "C"], dtype=dtype
+        )
+        self.df["id1"] = pd.Series(np.random.randint(0, 10, 10000))
+        self.df["id2"] = pd.Series(np.random.randint(100, 1000, 10000))
 
-    def time_melt_dataframe(self):
+    def time_melt_dataframe(self, dtype):
         melt(self.df, id_vars=["id1", "id2"])
 
 
 
@@ -56,7 +56,6 @@ dependencies:
   - zstandard
 
   # downstream packages
-  - aiobotocore
   - botocore
   - cftime
   - dask
 
@@ -15,24 +15,11 @@ locally before pushing your changes. It's recommended to also install the :ref:`
 .. contents:: Table of contents:
    :local:
 
+Step 1: install a C compiler
+----------------------------
 
-Option 1: creating an environment without Docker
-------------------------------------------------
-
-Installing a C compiler
-~~~~~~~~~~~~~~~~~~~~~~~
-
-pandas uses C extensions (mostly written using Cython) to speed up certain
-operations. To install pandas from source, you need to compile these C
-extensions, which means you need a C compiler. This process depends on which
-platform you're using.
-
-If you have setup your environment using :ref:`mamba <contributing.mamba>`, the packages ``c-compiler``
-and ``cxx-compiler`` will install a fitting compiler for your platform that is
-compatible with the remaining mamba packages. On Windows and macOS, you will
-also need to install the SDKs as they have to be distributed separately.
-These packages will automatically be installed by using the ``pandas``
-``environment.yml`` file.
+How to do this will depend on your platform. If you choose to user ``Docker``
+in the next step, then you can skip this step.
 
 **Windows**
 
@@ -48,6 +35,9 @@ You will need `Build Tools for Visual Studio 2022
 Alternatively, you can install the necessary components on the commandline using
 `vs_BuildTools.exe <https://learn.microsoft.com/en-us/visualstudio/install/use-command-line-parameters-to-install-visual-studio?source=recommendations&view=vs-2022>`_
 
+Alternatively, you could use the `WSL <https://learn.microsoft.com/en-us/windows/wsl/install>`_
+and consult the ``Linux`` instructions below.
+
 **macOS**
 
 To use the :ref:`mamba <contributing.mamba>`-based compilers, you will need to install the
@@ -71,67 +61,40 @@ which compilers (and versions) are installed on your system::
 
 `GCC (GNU Compiler Collection) <https://gcc.gnu.org/>`_, is a widely used
 compiler, which supports C and a number of other languages. If GCC is listed
-as an installed compiler nothing more is required. If no C compiler is
-installed (or you wish to install a newer version) you can install a compiler
-(GCC in the example code below) with::
+as an installed compiler nothing more is required.
 
-    # for recent Debian/Ubuntu:
-    sudo apt install build-essential
-    # for Red Had/RHEL/CentOS/Fedora
-    yum groupinstall "Development Tools"
-
-For other Linux distributions, consult your favorite search engine for
-compiler installation instructions.
+If no C compiler is installed, or you wish to upgrade, or you're using a different
+Linux distribution, consult your favorite search engine for compiler installation/update
+instructions.
 
 Let us know if you have any difficulties by opening an issue or reaching out on our contributor
 community :ref:`Slack <community.slack>`.
 
-.. _contributing.mamba:
-
-Option 1a: using mamba (recommended)
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+Step 2: create an isolated environment
+----------------------------------------
 
-Now create an isolated pandas development environment:
+Before we begin, please:
 
-* Install `mamba <https://mamba.readthedocs.io/en/latest/installation.html>`_
-* Make sure your mamba is up to date (``mamba update mamba``)
 * Make sure that you have :any:`cloned the repository <contributing.forking>`
 * ``cd`` to the pandas source directory
 
-We'll now kick off a three-step process:
+.. _contributing.mamba:
+
+Option 1: using mamba (recommended)
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 
-1. Install the build dependencies
-2. Build and install pandas
-3. Install the optional dependencies
+* Install `mamba <https://mamba.readthedocs.io/en/latest/installation.html>`_
+* Make sure your mamba is up to date (``mamba update mamba``)
 
 .. code-block:: none
 
    # Create and activate the build environment
    mamba env create --file environment.yml
    mamba activate pandas-dev
 
-   # Build and install pandas
-   python setup.py build_ext -j 4
-   python -m pip install -e . --no-build-isolation --no-use-pep517
-
-At this point you should be able to import pandas from your locally built version::
-
-   $ python
-   >>> import pandas
-   >>> print(pandas.__version__)  # note: the exact output may differ
-   1.5.0.dev0+1355.ge65a30e3eb.dirty
-
-This will create the new environment, and not touch any of your existing environments,
-nor any existing Python installation.
-
-To return to your root environment::
-
-      mamba deactivate
-
-Option 1b: using pip
-~~~~~~~~~~~~~~~~~~~~
+Option 2: using pip
+~~~~~~~~~~~~~~~~~~~
 
-If you aren't using mamba for your development environment, follow these instructions.
 You'll need to have at least the :ref:`minimum Python version <install.version>` that pandas supports.
 You also need to have ``setuptools`` 51.0.0 or later to build pandas.
 
@@ -150,10 +113,6 @@ You also need to have ``setuptools`` 51.0.0 or later to build pandas.
    # Install the build dependencies
    python -m pip install -r requirements-dev.txt
 
-   # Build and install pandas
-   python setup.py build_ext -j 4
-   python -m pip install -e . --no-build-isolation --no-use-pep517
-
 **Unix**/**macOS with pyenv**
 
 Consult the docs for setting up pyenv `here <https://github.com/pyenv/pyenv>`__.
@@ -162,7 +121,6 @@ Consult the docs for setting up pyenv `here <https://github.com/pyenv/pyenv>`__.
 
    # Create a virtual environment
    # Use an ENV_DIR of your choice. We'll use ~/Users/<yourname>/.pyenv/versions/pandas-dev
-
    pyenv virtualenv <version> <name-to-give-it>
 
    # For instance:
@@ -174,19 +132,15 @@ Consult the docs for setting up pyenv `here <https://github.com/pyenv/pyenv>`__.
    # Now install the build dependencies in the cloned pandas repo
    python -m pip install -r requirements-dev.txt
 
-   # Build and install pandas
-   python setup.py build_ext -j 4
-   python -m pip install -e . --no-build-isolation --no-use-pep517
-
 **Windows**
 
 Below is a brief overview on how to set-up a virtual environment with Powershell
 under Windows. For details please refer to the
 `official virtualenv user guide <https://virtualenv.pypa.io/en/latest/user_guide.html#activators>`__.
 
-Use an ENV_DIR of your choice. We'll use ~\\virtualenvs\\pandas-dev where
-'~' is the folder pointed to by either $env:USERPROFILE (Powershell) or
-%USERPROFILE% (cmd.exe) environment variable. Any parent directories
+Use an ENV_DIR of your choice. We'll use ``~\\virtualenvs\\pandas-dev`` where
+``~`` is the folder pointed to by either ``$env:USERPROFILE`` (Powershell) or
+``%USERPROFILE%`` (cmd.exe) environment variable. Any parent directories
 should already exist.
 
 .. code-block:: powershell
@@ -200,16 +154,10 @@ should already exist.
    # Install the build dependencies
    python -m pip install -r requirements-dev.txt
 
-   # Build and install pandas
-   python setup.py build_ext -j 4
-   python -m pip install -e . --no-build-isolation --no-use-pep517
-
-Option 2: creating an environment using Docker
-----------------------------------------------
+Option 3: using Docker
+~~~~~~~~~~~~~~~~~~~~~~
 
-Instead of manually setting up a development environment, you can use `Docker
-<https://docs.docker.com/get-docker/>`_ to automatically create the environment with just several
-commands. pandas provides a ``DockerFile`` in the root directory to build a Docker image
+pandas provides a ``DockerFile`` in the root directory to build a Docker image
 with a full pandas development environment.
 
 **Docker Commands**
@@ -226,13 +174,6 @@ Run Container::
     # but if not alter ${PWD} to match your local repo path
     docker run -it --rm -v ${PWD}:/home/pandas pandas-dev
 
-When inside the running container you can build and install pandas the same way as the other methods
-
-.. code-block:: bash
-
-   python setup.py build_ext -j 4
-   python -m pip install -e . --no-build-isolation --no-use-pep517
-
 *Even easier, you can integrate Docker with the following IDEs:*
 
 **Visual Studio Code**
@@ -246,3 +187,26 @@ See https://code.visualstudio.com/docs/remote/containers for details.
 Enable Docker support and use the Services tool window to build and manage images as well as
 run and interact with containers.
 See https://www.jetbrains.com/help/pycharm/docker.html for details.
+
+Step 3: build and install pandas
+--------------------------------
+
+You can now run::
+
+   # Build and install pandas
+   python setup.py build_ext -j 4
+   python -m pip install -e . --no-build-isolation --no-use-pep517
+
+At this point you should be able to import pandas from your locally built version::
+
+   $ python
+   >>> import pandas
+   >>> print(pandas.__version__)  # note: the exact output may differ
+   2.0.0.dev0+880.g2b9e661fbb.dirty
+
+This will create the new environment, and not touch any of your existing environments,
+nor any existing Python installation.
+
+.. note::
+   You will need to repeat this step each time the C extensions change, for example
+   if you modified any file in ``pandas/_libs`` or if you did a fetch and merge from ``upstream/main``.
@@ -471,7 +471,9 @@ Setting ``use_nullable_dtypes=True`` will result in nullable dtypes for every co
    3,4.5,False,b,6,7.5,True,a,12-31-2019,
    """
 
-   pd.read_csv(StringIO(data), use_nullable_dtypes=True, parse_dates=["i"])
+   df = pd.read_csv(StringIO(data), use_nullable_dtypes=True, parse_dates=["i"])
+   df
+   df.dtypes
 
 .. _io.categorical:
 
 
@@ -37,6 +37,7 @@ Bug fixes
 
 Other
 ~~~~~
+- Reverted deprecation (:issue:`45324`) of behavior of :meth:`Series.__getitem__` and :meth:`Series.__setitem__` slicing with an integer :class:`Index`; this will remain positional (:issue:`49612`)
 -
 
 .. ---------------------------------------------------------------------------
 
@@ -44,7 +44,7 @@ The ``use_nullable_dtypes`` keyword argument has been expanded to the following
 Additionally a new global configuration, ``mode.nullable_backend`` can now be used in conjunction with the parameter ``use_nullable_dtypes=True`` in the following functions
 to select the nullable dtypes implementation.
 
-* :func:`read_csv` (with ``engine="pyarrow"``)
+* :func:`read_csv` (with ``engine="pyarrow"`` or ``engine="python"``)
 * :func:`read_excel`
 * :func:`read_parquet`
 * :func:`read_orc`
@@ -738,6 +738,7 @@ Performance improvements
 - Performance improvement in :meth:`MultiIndex.isin` when ``level=None`` (:issue:`48622`, :issue:`49577`)
 - Performance improvement in :meth:`MultiIndex.putmask` (:issue:`49830`)
 - Performance improvement in :meth:`Index.union` and :meth:`MultiIndex.union` when index contains duplicates (:issue:`48900`)
+- Performance improvement in :meth:`Series.rank` for pyarrow-backed dtypes (:issue:`50264`)
 - Performance improvement in :meth:`Series.fillna` for extension array dtypes (:issue:`49722`, :issue:`50078`)
 - Performance improvement for :meth:`Series.value_counts` with nullable dtype (:issue:`48338`)
 - Performance improvement for :class:`Series` constructor passing integer numpy array with nullable dtype (:issue:`48338`)
@@ -750,6 +751,8 @@ Performance improvements
 - Reduce memory usage of :meth:`DataFrame.to_pickle`/:meth:`Series.to_pickle` when using BZ2 or LZMA (:issue:`49068`)
 - Performance improvement for :class:`~arrays.StringArray` constructor passing a numpy array with type ``np.str_`` (:issue:`49109`)
 - Performance improvement in :meth:`~arrays.ArrowExtensionArray.factorize` (:issue:`49177`)
+- Performance improvement in :meth:`~arrays.ArrowExtensionArray.__setitem__` when key is a null slice (:issue:`50248`)
+- Performance improvement in :meth:`~arrays.ArrowExtensionArray.to_numpy` (:issue:`49973`)
 - Performance improvement in :meth:`DataFrame.join` when joining on a subset of a :class:`MultiIndex` (:issue:`48611`)
 - Performance improvement for :meth:`MultiIndex.intersection` (:issue:`48604`)
 - Performance improvement in ``var`` for nullable dtypes (:issue:`48379`).
@@ -785,6 +788,7 @@ Datetimelike
 - Bug in ``pandas.tseries.holiday.Holiday`` where a half-open date interval causes inconsistent return types from :meth:`USFederalHolidayCalendar.holidays` (:issue:`49075`)
 - Bug in rendering :class:`DatetimeIndex` and :class:`Series` and :class:`DataFrame` with timezone-aware dtypes with ``dateutil`` or ``zoneinfo`` timezones near daylight-savings transitions (:issue:`49684`)
 - Bug in :func:`to_datetime` was raising ``ValueError`` when parsing :class:`Timestamp`, ``datetime.datetime``, ``datetime.date``, or ``np.datetime64`` objects when non-ISO8601 ``format`` was passed (:issue:`49298`, :issue:`50036`)
+- Bug in :func:`to_datetime` was raising ``ValueError`` when parsing empty string and non-ISO8601 format was passed. Now, empty strings will be parsed as :class:`NaT`, for compatibility with how is done for ISO8601 formats (:issue:`50251`)
 - Bug in :class:`Timestamp` was showing ``UserWarning``, which was not actionable by users, when parsing non-ISO8601 delimited date strings (:issue:`50232`)
 -
 
@@ -831,6 +835,7 @@ Interval
 
 Indexing
 ^^^^^^^^
+- Bug in :meth:`DataFrame.__setitem__` raising when indexer is a :class:`DataFrame` with ``boolean`` dtype (:issue:`47125`)
 - Bug in :meth:`DataFrame.reindex` filling with wrong values when indexing columns and index for ``uint`` dtypes (:issue:`48184`)
 - Bug in :meth:`DataFrame.loc` coercing dtypes when setting values with a list indexer (:issue:`49159`)
 - Bug in :meth:`DataFrame.loc` raising ``ValueError`` with ``bool`` indexer and :class:`MultiIndex` (:issue:`47687`)
@@ -870,6 +875,7 @@ I/O
 - Bug in :func:`read_sas` caused fragmentation of :class:`DataFrame` and raised :class:`.errors.PerformanceWarning` (:issue:`48595`)
 - Improved error message in :func:`read_excel` by including the offending sheet name when an exception is raised while reading a file (:issue:`48706`)
 - Bug when a pickling a subset PyArrow-backed data that would serialize the entire data instead of the subset (:issue:`42600`)
+- Bug in :func:`read_sql_query` ignoring ``dtype`` argument when ``chunksize`` is specified and result is empty (:issue:`50245`)
 - Bug in :func:`read_csv` for a single-line csv with fewer columns than ``names`` raised :class:`.errors.ParserError` with ``engine="c"`` (:issue:`47566`)
 - Bug in displaying ``string`` dtypes not showing storage option (:issue:`50099`)
 - Bug in :func:`DataFrame.to_string` with ``header=False`` that printed the index name on the same line as the first row of the data (:issue:`49230`)
@@ -906,6 +912,7 @@ Reshaping
 ^^^^^^^^^
 - Bug in :meth:`DataFrame.pivot_table` raising ``TypeError`` for nullable dtype and ``margins=True`` (:issue:`48681`)
 - Bug in :meth:`DataFrame.unstack` and :meth:`Series.unstack` unstacking wrong level of :class:`MultiIndex` when :class:`MultiIndex` has mixed names (:issue:`48763`)
+- Bug in :meth:`DataFrame.melt` losing extension array dtype (:issue:`41570`)
 - Bug in :meth:`DataFrame.pivot` not respecting ``None`` as column name (:issue:`48293`)
 - Bug in :func:`join` when ``left_on`` or ``right_on`` is or includes a :class:`CategoricalIndex` incorrectly raising ``AttributeError`` (:issue:`48464`)
 - Bug in :meth:`DataFrame.pivot_table` raising ``ValueError`` with parameter ``margins=True`` when result is an empty :class:`DataFrame` (:issue:`49240`)
Original file line number	Diff line number	Diff line change
`@@ -37,6 +37,7 @@ Bug fixes`
`37`	`37`
`38`	`38`	`Other`
`39`	`39`	`~~~~~`
	`40`	+- Reverted deprecation (:issue:`45324`) of behavior of :meth:`Series.__getitem__` and :meth:`Series.__setitem__` slicing with an integer :class:`Index`; this will remain positional (:issue:`49612`)
`40`	`41`	`-`
`41`	`42`
`42`	`43`	`.. ---------------------------------------------------------------------------`