pandas-dev · mroeschke · Nov 22, 2023 · Jun 26, 2023 · Jun 26, 2023 · Jun 26, 2023
@@ -57,5 +57,7 @@ dependencies:
   - zstandard>=0.17.0
 
   - pip:
+    - adbc_driver_postgresql>=0.6.0
+    - adbc_driver_sqlite>=0.6.0
     - pyqt5>=5.15.6
     - tzdata>=2022.1
@@ -71,6 +71,8 @@ dependencies:
   - pyyaml
   - py
   - pip:
+    - adbc_driver_postgresql>=0.6.0
+    - adbc_driver_sqlite>=0.6.0
     - dataframe-api-compat>=0.1.7
     - pyqt5>=5.15.6
     - tzdata>=2022.1
@@ -57,5 +57,7 @@ dependencies:
   - zstandard>=0.17.0
 
   - pip:
+    - adbc_driver_postgresql>=0.6.0
+    - adbc_driver_sqlite>=0.6.0
     - pyqt5>=5.15.6
     - tzdata>=2022.1
@@ -59,6 +59,8 @@ dependencies:
   - zstandard=0.17.0
 
   - pip:
+    - adbc_driver_postgresql==0.6.0
+    - adbc_driver_sqlite==0.6.0
     - dataframe-api-compat==0.1.7
     - pyqt5==5.15.6
     - tzdata==2022.1
@@ -57,5 +57,7 @@ dependencies:
   - zstandard>=0.17.0
 
   - pip:
+    - adbc_driver_postgresql>=0.6.0
+    - adbc_driver_sqlite>=0.6.0
     - pyqt5>=5.15.6
     - tzdata>=2022.1
@@ -56,3 +56,7 @@ dependencies:
   - xlrd>=2.0.1
   - xlsxwriter>=3.0.3
   - zstandard>=0.17.0
+
+  - pip:
+    - adbc_driver_postgresql>=0.6.0
+    - adbc_driver_sqlite>=0.6.0
diff --git a/doc/source/getting_started/install.rst b/doc/source/getting_started/install.rst
@@ -344,6 +344,8 @@ SQLAlchemy                1.4.36             postgresql,     SQL support for dat
                                              sql-other
 psycopg2                  2.9.3              postgresql      PostgreSQL engine for sqlalchemy
 pymysql                   1.0.2              mysql           MySQL engine for sqlalchemy
+adbc-driver-postgresql    0.6.0                              ADBC Driver for PostgreSQL
+adbc-driver-sqlite        0.6.0                              ADBC Driver for SQLite
 ========================= ================== =============== =============================================================
 
 Other data sources

diff --git a/environment.yml b/environment.yml
@@ -113,6 +113,8 @@ dependencies:
   - pygments # Code highlighting
 
   - pip:
+      - adbc_driver_postgresql>=0.6.0
+      - adbc_driver_sqlite>=0.6.0
       - dataframe-api-compat>=0.1.7
       - sphinx-toggleprompt  # conda-forge version has stricter pins on jinja2
       - typing_extensions; python_version<"3.11"

diff --git a/pandas/compat/_optional.py b/pandas/compat/_optional.py
@@ -15,6 +15,8 @@
 # Update install.rst & setup.cfg when updating versions!
 
 VERSIONS = {
+    "adbc_driver_postgresql": "0.6.0",
+    "adbc_driver_sqlite": "0.6.0",
     "bs4": "4.11.1",
     "blosc": "1.21.0",
     "bottleneck": "1.3.4",

diff --git a/pandas/io/sql.py b/pandas/io/sql.py
@@ -45,7 +45,10 @@
     is_dict_like,
     is_list_like,
 )
-from pandas.core.dtypes.dtypes import DatetimeTZDtype
+from pandas.core.dtypes.dtypes import (
+    ArrowDtype,
+    DatetimeTZDtype,
+)
 from pandas.core.dtypes.missing import isna
 
 from pandas import get_option
@@ -642,6 +645,17 @@ def read_sql(
        int_column date_column
     0           0  2012-11-10
     1           1  2010-11-12
+
+    .. versionadded:: 2.1.0
+
+       pandas now supports reading via ADBC drivers
+
+    >>> from adbc_driver_postgresql import dbapi
+    >>> with dbapi.connect('postgres:///db_name') as conn:  # doctest:+SKIP
+    ...     pd.read_sql('SELECT int_column FROM test_data', conn)
+       int_column
+    0           0
+    1           1
     """
 
     check_dtype_backend(dtype_backend)
@@ -850,6 +864,10 @@ def pandasSQL_builder(
     if sqlalchemy is not None and isinstance(con, (str, sqlalchemy.engine.Connectable)):
         return SQLDatabase(con, schema, need_transaction)
 
+    adbc = import_optional_dependency("adbc_driver_manager.dbapi", errors="ignore")
+    if adbc and isinstance(con, adbc.Connection):
+        return ADBCDatabase(con)
+
     warnings.warn(
         "pandas only supports SQLAlchemy connectable (engine/connection) or "
         "database string URI or sqlite3 DBAPI2 connection. Other DBAPI2 "
@@ -2024,6 +2042,255 @@ def _create_sql_schema(
 
 
 # ---- SQL without SQLAlchemy ---
+
+
+class ADBCDatabase(PandasSQL):
+    """
+    This class enables conversion between DataFrame and SQL databases
+    using ADBC to handle DataBase abstraction.
+
+    Parameters
+    ----------
+    con : adbc_driver_manager.dbapi.Connection
+    """
+
+    def __init__(self, con) -> None:
+        self.con = con
+
+    def execute(self, sql: str | Select | TextClause, params=None):
+        with self.con.cursor() as cur:
+            return cur.execute(sql)
+
+    def read_table(
+        self,
+        table_name: str,
+        index_col: str | list[str] | None = None,
+        coerce_float: bool = True,
+        parse_dates=None,
+        columns=None,
+        schema: str | None = None,
+        chunksize: int | None = None,
+        dtype_backend: DtypeBackend | Literal["numpy"] = "numpy",
+    ) -> DataFrame | Iterator[DataFrame]:
+        """
+        Read SQL database table into a DataFrame. Only keyword arguments used
+        are table_name and schema. The rest are silently discarded.
+
+        Parameters
+        ----------
+        table_name : str
+            Name of SQL table in database.
+        schema : string, default None
+            Name of SQL schema in database to read from
+
+        Returns
+        -------
+        DataFrame
+
+        See Also
+        --------
+        pandas.read_sql_table
+        SQLDatabase.read_query
+
+        """
+        if index_col:
+            raise NotImplementedError("'index_col' is not implemented for ADBC drivers")
+        if coerce_float is not True:
+            raise NotImplementedError(
+                "'coerce_float' is not implemented for ADBC drivers"
+            )
+        if parse_dates:
+            raise NotImplementedError(
+                "'parse_dates' is not implemented for ADBC drivers"
+            )
+        if columns:
+            raise NotImplementedError("'columns' is not implemented for ADBC drivers")
+        if chunksize:
+            raise NotImplementedError("'chunksize' is not implemented for ADBC drivers")
+
+        if schema:
+            stmt = f"SELECT * FROM {schema}.{table_name}"
+        else:
+            stmt = f"SELECT * FROM {table_name}"
+
+        mapping: type[ArrowDtype] | None | Callable
+        if dtype_backend == "pyarrow":
+            mapping = ArrowDtype
+        elif dtype_backend == "numpy_nullable":
+            from pandas.io._util import _arrow_dtype_mapping
+
+            mapping = _arrow_dtype_mapping().get
+        else:
-        else:
+        elif using_pyarrow_string_dtype():
+            from pandas.io._util import arrow_string_types_mapper
+            mappping = arrow_string_types_mapper())
+        else:
-        else:
+        elif using_pyarrow_string_dtype():
+            from pandas.io._util import arrow_string_types_mapper
+            mappping = arrow_string_types_mapper())
+        else:
+            mapping = None
+
+        with self.con.cursor() as cur:
+            return cur(stmt).fetch_arrow_table().to_pandas(types_mapper=mapping)
-            return cur(stmt).fetch_arrow_table().to_pandas(types_mapper=mapping)
+            return cur.execute(stmt).fetch_arrow_table().to_pandas(types_mapper=mapping)
-            return cur(stmt).fetch_arrow_table().to_pandas(types_mapper=mapping)
+            return cur.execute(stmt).fetch_arrow_table().to_pandas(types_mapper=mapping)
+
+    def read_query(
+        self,
+        sql: str,
+        index_col: str | list[str] | None = None,
+        coerce_float: bool = True,
+        parse_dates=None,
+        params=None,
+        chunksize: int | None = None,
+        dtype: DtypeArg | None = None,
+        dtype_backend: DtypeBackend | Literal["numpy"] = "numpy",
+    ) -> DataFrame | Iterator[DataFrame]:
+        """
+        Read SQL query into a DataFrame. Keyword arguments are discarded.
+
+        Parameters
+        ----------
+        sql : str
+            SQL query to be executed.
+
+        Returns
+        -------
+        DataFrame
+
+        See Also
+        --------
+        read_sql_table : Read SQL database table into a DataFrame.
+        read_sql
+
+        """
+        if index_col:
+            raise NotImplementedError("'index_col' is not implemented for ADBC drivers")
+        if coerce_float is not True:
+            raise NotImplementedError(
+                "'coerce_float' is not implemented for ADBC drivers"
+            )
+        if parse_dates:
+            raise NotImplementedError(
+                "'parse_dates' is not implemented for ADBC drivers"
+            )
+        if params:
+            raise NotImplementedError("'params' is not implemented for ADBC drivers")
+        if chunksize:
+            raise NotImplementedError("'chunksize' is not implemented for ADBC drivers")
+        if dtype:
+            raise NotImplementedError("'dtype' is not implemented for ADBC drivers")
+
+        mapping: type[ArrowDtype] | None | Callable
+        if dtype_backend == "pyarrow":
+            mapping = ArrowDtype
+        elif dtype_backend == "numpy_nullable":
+            from pandas.io._util import _arrow_dtype_mapping
+
+            mapping = _arrow_dtype_mapping().get
+        else:
+            mapping = None
+
+        with self.con.cursor() as cur:
+            return cur(sql).fetch_arrow_table().to_pandas(types_mapper=mapping)
+
+    read_sql = read_query
+
+    def to_sql(
+        self,
+        frame,
+        name: str,
+        if_exists: Literal["fail", "replace", "append"] = "fail",
+        index: bool = True,
+        index_label=None,
+        schema: str | None = None,
+        chunksize: int | None = None,
+        dtype: DtypeArg | None = None,
+        method: Literal["multi"] | Callable | None = None,
+        engine: str = "auto",
+        **engine_kwargs,
+    ) -> int | None:
+        """
+        Write records stored in a DataFrame to a SQL database.
+        Only frame, name, if_exists and schema are valid arguments.
+
+        Parameters
+        ----------
+        frame : DataFrame
+        name : string
+            Name of SQL table.
+        if_exists : {'fail', 'replace', 'append'}, default 'fail'
+            - fail: If table exists, do nothing.
+            - replace: If table exists, drop it, recreate it, and insert data.
+            - append: If table exists, insert data. Create if does not exist.
+        schema : string, default None
+            Name of SQL schema in database to write to (if database flavor
+            supports this). If specified, this overwrites the default
+            schema of the SQLDatabase object.
+        """
+        if index_label:
+            raise NotImplementedError(
+                "'index_label' is not implemented for ADBC drivers"
+            )
+        if schema:
+            raise NotImplementedError("'schema' is not implemented for ADBC drivers")
+        if chunksize:
+            raise NotImplementedError("'chunksize' is not implemented for ADBC drivers")
+        if dtype:
+            raise NotImplementedError("'dtype' is not implemented for ADBC drivers")
+        if method:
+            raise NotImplementedError("'method' is not implemented for ADBC drivers")
+        if engine != "auto":
+            raise NotImplementedError("'auto' is not implemented for ADBC drivers")
+
+        if schema:
+            table_name = f"{schema}.{name}"
+        else:
+            table_name = name
+
+        # TODO: pandas if_exists="append" will still create the
+        # table if it does not exist; ADBC has append/create
+        # as applicable modes, so the semantics get blurred across
+        # the libraries
+        mode = "create"
+        if self.has_table(name, schema):
+            if if_exists == "fail":
+                raise ValueError(f"Table '{table_name}' already exists.")
+            elif if_exists == "replace":
+                with self.con.cursor() as cur:
+                    cur.execute(f"DROP TABLE {table_name}")
+            elif if_exists == "append":
+                mode = "append"
+
+        import pyarrow as pa
+
+        tbl = pa.Table.from_pandas(frame, preserve_index=index)
+        with self.con.cursor() as cur:
+            total_inserted = cur.adbc_ingest(table_name, tbl, mode=mode)
+
+        self.con.commit()
+        return total_inserted
+
+    def has_table(self, name: str, schema: str | None = None) -> bool:
+        meta = self.con.adbc_get_objects(
+            db_schema_filter=schema, table_name_filter=name
+        ).read_all()
+
+        for catalog_schema in meta["catalog_db_schemas"].to_pylist():
+            if not catalog_schema:
+                continue
+            for schema_record in catalog_schema:
+                if not schema_record:
+                    continue
+
+                for table_record in schema_record["db_schema_tables"]:
+                    if table_record["table_name"] == name:
+                        return True
+
+        return False
+
+    def _create_sql_schema(
+        self,
+        frame: DataFrame,
+        table_name: str,
+        keys: list[str] | None = None,
+        dtype: DtypeArg | None = None,
+        schema: str | None = None,
+    ):
+        raise NotImplementedError("not implemented for adbc")
+
+
 # sqlite-specific sql strings and handler class
 # dictionary used for readability purposes
 _SQL_TYPES = {