From fad69fbe3f7b1b8788a170faf544efca1ffe4ec6 Mon Sep 17 00:00:00 2001
From: Tim Swast <swast@google.com>
Date: Wed, 3 Nov 2021 14:56:03 -0500
Subject: [PATCH 1/9] feat: support conversion from pyarrow RecordBatch to
 pandas DataFrame

---
 db_dtypes/core.py        |   3 +-
 tests/unit/test_arrow.py | 294 +++++++++++++++++++++------------------
 2 files changed, 157 insertions(+), 140 deletions(-)

diff --git a/db_dtypes/core.py b/db_dtypes/core.py
index fbc784e..7f6da0f 100644
--- a/db_dtypes/core.py
+++ b/db_dtypes/core.py
@@ -17,6 +17,7 @@
 import numpy
 import pandas
 from pandas._libs import NaT
+import pandas.api.extensions
 import pandas.compat.numpy.function
 import pandas.core.algorithms
 import pandas.core.arrays
@@ -32,7 +33,7 @@
 pandas_release = pandas_backports.pandas_release
 
 
-class BaseDatetimeDtype(pandas.core.dtypes.base.ExtensionDtype):
+class BaseDatetimeDtype(pandas.api.extensions.ExtensionDtype):
     na_value = NaT
     kind = "o"
     names = None
diff --git a/tests/unit/test_arrow.py b/tests/unit/test_arrow.py
index d3745ea..82cf60d 100644
--- a/tests/unit/test_arrow.py
+++ b/tests/unit/test_arrow.py
@@ -13,160 +13,176 @@
 # limitations under the License.
 
 import datetime as dt
+from typing import Optional
 
 import pandas
+import pandas.api.extensions
+import pandas.testing
 import pyarrow
 import pytest
 
-# To register the types.
-import db_dtypes  # noqa
+import db_dtypes
 
 
-@pytest.mark.parametrize(
-    ("series", "expected"),
+def types_mapper(
+    pyarrow_type: pyarrow.DataType,
+) -> Optional[pandas.api.extensions.ExtensionDtype]:
+    type_str = str(pyarrow_type)
+
+    if type_str.startswith("date32") or type_str.startswith("date64"):
+        return db_dtypes.DateDtype
+    elif type_str.startswith("time32") or type_str.startswith("time64"):
+        return db_dtypes.TimeDtype
+    else:
+        # Use default type mapping.
+        return None
+
+
+SERIES_ARRAYS_DEFAULT_TYPES = [
+    (pandas.Series([], dtype="dbdate"), pyarrow.array([], type=pyarrow.date32())),
     (
-        (pandas.Series([], dtype="dbdate"), pyarrow.array([], type=pyarrow.date32())),
-        (
-            pandas.Series([None, None, None], dtype="dbdate"),
-            pyarrow.array([None, None, None], type=pyarrow.date32()),
-        ),
-        (
-            pandas.Series(
-                [dt.date(2021, 9, 27), None, dt.date(2011, 9, 27)], dtype="dbdate"
-            ),
-            pyarrow.array(
-                [dt.date(2021, 9, 27), None, dt.date(2011, 9, 27)],
-                type=pyarrow.date32(),
-            ),
-        ),
-        (
-            pandas.Series(
-                [dt.date(1677, 9, 22), dt.date(1970, 1, 1), dt.date(2262, 4, 11)],
-                dtype="dbdate",
-            ),
-            pyarrow.array(
-                [dt.date(1677, 9, 22), dt.date(1970, 1, 1), dt.date(2262, 4, 11)],
-                type=pyarrow.date32(),
-            ),
-        ),
-        (
-            pandas.Series([], dtype="dbtime"),
-            pyarrow.array([], type=pyarrow.time64("ns")),
-        ),
-        (
-            pandas.Series([None, None, None], dtype="dbtime"),
-            pyarrow.array([None, None, None], type=pyarrow.time64("ns")),
-        ),
-        (
-            pandas.Series(
-                [dt.time(0, 0, 0, 0), None, dt.time(23, 59, 59, 999_999)],
-                dtype="dbtime",
-            ),
-            pyarrow.array(
-                [dt.time(0, 0, 0, 0), None, dt.time(23, 59, 59, 999_999)],
-                type=pyarrow.time64("ns"),
-            ),
-        ),
-        (
-            pandas.Series(
-                [
-                    dt.time(0, 0, 0, 0),
-                    dt.time(12, 30, 15, 125_000),
-                    dt.time(23, 59, 59, 999_999),
-                ],
-                dtype="dbtime",
-            ),
-            pyarrow.array(
-                [
-                    dt.time(0, 0, 0, 0),
-                    dt.time(12, 30, 15, 125_000),
-                    dt.time(23, 59, 59, 999_999),
-                ],
-                type=pyarrow.time64("ns"),
-            ),
+        pandas.Series([None, None, None], dtype="dbdate"),
+        pyarrow.array([None, None, None], type=pyarrow.date32()),
+    ),
+    (
+        pandas.Series(
+            [dt.date(2021, 9, 27), None, dt.date(2011, 9, 27)], dtype="dbdate"
+        ),
+        pyarrow.array(
+            [dt.date(2021, 9, 27), None, dt.date(2011, 9, 27)], type=pyarrow.date32(),
         ),
     ),
-)
+    (
+        pandas.Series(
+            [dt.date(1677, 9, 22), dt.date(1970, 1, 1), dt.date(2262, 4, 11)],
+            dtype="dbdate",
+        ),
+        pyarrow.array(
+            [dt.date(1677, 9, 22), dt.date(1970, 1, 1), dt.date(2262, 4, 11)],
+            type=pyarrow.date32(),
+        ),
+    ),
+    (pandas.Series([], dtype="dbtime"), pyarrow.array([], type=pyarrow.time64("ns")),),
+    (
+        pandas.Series([None, None, None], dtype="dbtime"),
+        pyarrow.array([None, None, None], type=pyarrow.time64("ns")),
+    ),
+    (
+        pandas.Series(
+            [dt.time(0, 0, 0, 0), None, dt.time(23, 59, 59, 999_999)], dtype="dbtime",
+        ),
+        pyarrow.array(
+            [dt.time(0, 0, 0, 0), None, dt.time(23, 59, 59, 999_999)],
+            type=pyarrow.time64("ns"),
+        ),
+    ),
+    (
+        pandas.Series(
+            [
+                dt.time(0, 0, 0, 0),
+                dt.time(12, 30, 15, 125_000),
+                dt.time(23, 59, 59, 999_999),
+            ],
+            dtype="dbtime",
+        ),
+        pyarrow.array(
+            [
+                dt.time(0, 0, 0, 0),
+                dt.time(12, 30, 15, 125_000),
+                dt.time(23, 59, 59, 999_999),
+            ],
+            type=pyarrow.time64("ns"),
+        ),
+    ),
+]
+SERIES_ARRAYS_CUSTOM_ARROW_TYPES = [
+    (pandas.Series([], dtype="dbdate"), pyarrow.array([], type=pyarrow.date64())),
+    (
+        pandas.Series([None, None, None], dtype="dbdate"),
+        pyarrow.array([None, None, None], type=pyarrow.date64()),
+    ),
+    (
+        pandas.Series(
+            [dt.date(2021, 9, 27), None, dt.date(2011, 9, 27)], dtype="dbdate"
+        ),
+        pyarrow.array(
+            [dt.date(2021, 9, 27), None, dt.date(2011, 9, 27)], type=pyarrow.date64(),
+        ),
+    ),
+    (
+        pandas.Series(
+            [dt.date(1677, 9, 22), dt.date(1970, 1, 1), dt.date(2262, 4, 11)],
+            dtype="dbdate",
+        ),
+        pyarrow.array(
+            [dt.date(1677, 9, 22), dt.date(1970, 1, 1), dt.date(2262, 4, 11)],
+            type=pyarrow.date64(),
+        ),
+    ),
+    (pandas.Series([], dtype="dbtime"), pyarrow.array([], type=pyarrow.time32("ms")),),
+    (
+        pandas.Series([None, None, None], dtype="dbtime"),
+        pyarrow.array([None, None, None], type=pyarrow.time32("ms")),
+    ),
+    (
+        pandas.Series(
+            [dt.time(0, 0, 0, 0), None, dt.time(23, 59, 59, 999_000)], dtype="dbtime",
+        ),
+        pyarrow.array(
+            [dt.time(0, 0, 0, 0), None, dt.time(23, 59, 59, 999_000)],
+            type=pyarrow.time32("ms"),
+        ),
+    ),
+    (
+        pandas.Series(
+            [dt.time(0, 0, 0, 0), None, dt.time(23, 59, 59, 999_999)], dtype="dbtime",
+        ),
+        pyarrow.array(
+            [dt.time(0, 0, 0, 0), None, dt.time(23, 59, 59, 999_999)],
+            type=pyarrow.time64("us"),
+        ),
+    ),
+    (
+        pandas.Series(
+            [
+                dt.time(0, 0, 0, 0),
+                dt.time(12, 30, 15, 125_000),
+                dt.time(23, 59, 59, 999_999),
+            ],
+            dtype="dbtime",
+        ),
+        pyarrow.array(
+            [
+                dt.time(0, 0, 0, 0),
+                dt.time(12, 30, 15, 125_000),
+                dt.time(23, 59, 59, 999_999),
+            ],
+            type=pyarrow.time64("us"),
+        ),
+    ),
+]
+
+
+@pytest.mark.parametrize(("series", "expected"), SERIES_ARRAYS_DEFAULT_TYPES)
 def test_to_arrow(series, expected):
     array = pyarrow.array(series)
     assert array.equals(expected)
 
 
-@pytest.mark.parametrize(
-    ("series", "expected"),
-    (
-        (pandas.Series([], dtype="dbdate"), pyarrow.array([], type=pyarrow.date64())),
-        (
-            pandas.Series([None, None, None], dtype="dbdate"),
-            pyarrow.array([None, None, None], type=pyarrow.date64()),
-        ),
-        (
-            pandas.Series(
-                [dt.date(2021, 9, 27), None, dt.date(2011, 9, 27)], dtype="dbdate"
-            ),
-            pyarrow.array(
-                [dt.date(2021, 9, 27), None, dt.date(2011, 9, 27)],
-                type=pyarrow.date64(),
-            ),
-        ),
-        (
-            pandas.Series(
-                [dt.date(1677, 9, 22), dt.date(1970, 1, 1), dt.date(2262, 4, 11)],
-                dtype="dbdate",
-            ),
-            pyarrow.array(
-                [dt.date(1677, 9, 22), dt.date(1970, 1, 1), dt.date(2262, 4, 11)],
-                type=pyarrow.date64(),
-            ),
-        ),
-        (
-            pandas.Series([], dtype="dbtime"),
-            pyarrow.array([], type=pyarrow.time32("ms")),
-        ),
-        (
-            pandas.Series([None, None, None], dtype="dbtime"),
-            pyarrow.array([None, None, None], type=pyarrow.time32("ms")),
-        ),
-        (
-            pandas.Series(
-                [dt.time(0, 0, 0, 0), None, dt.time(23, 59, 59, 999_000)],
-                dtype="dbtime",
-            ),
-            pyarrow.array(
-                [dt.time(0, 0, 0, 0), None, dt.time(23, 59, 59, 999_000)],
-                type=pyarrow.time32("ms"),
-            ),
-        ),
-        (
-            pandas.Series(
-                [dt.time(0, 0, 0, 0), None, dt.time(23, 59, 59, 999_999)],
-                dtype="dbtime",
-            ),
-            pyarrow.array(
-                [dt.time(0, 0, 0, 0), None, dt.time(23, 59, 59, 999_999)],
-                type=pyarrow.time64("us"),
-            ),
-        ),
-        (
-            pandas.Series(
-                [
-                    dt.time(0, 0, 0, 0),
-                    dt.time(12, 30, 15, 125_000),
-                    dt.time(23, 59, 59, 999_999),
-                ],
-                dtype="dbtime",
-            ),
-            pyarrow.array(
-                [
-                    dt.time(0, 0, 0, 0),
-                    dt.time(12, 30, 15, 125_000),
-                    dt.time(23, 59, 59, 999_999),
-                ],
-                type=pyarrow.time64("us"),
-            ),
-        ),
-    ),
-)
+@pytest.mark.parametrize(("series", "expected"), SERIES_ARRAYS_CUSTOM_ARROW_TYPES)
 def test_to_arrow_w_arrow_type(series, expected):
     array = pyarrow.array(series, type=expected.type)
     assert array.equals(expected)
+
+
+@pytest.mark.parametrize(
+    ["expected", "pyarrow_array"],
+    SERIES_ARRAYS_DEFAULT_TYPES + SERIES_ARRAYS_CUSTOM_ARROW_TYPES,
+)
+def test_from_arrow(pyarrow_array: pyarrow.Array, expected: pandas.Series):
+    # Convert to RecordBatch because types_mapper argument is ignored when
+    # using a pyarrow.Array. https://issues.apache.org/jira/browse/ARROW-9664
+    record_batch = pyarrow.RecordBatch.from_arrays([pyarrow_array], ["test_col"])
+    dataframe = record_batch.to_pandas(date_as_object=False, types_mapper=types_mapper)
+    series = dataframe["test_col"]
+    pandas.testing.assert_extension_array_equal(series, expected)

From 7b95de3056f87cef42a5b9a788eee5e249555a08 Mon Sep 17 00:00:00 2001
From: Tim Swast <swast@google.com>
Date: Wed, 3 Nov 2021 16:06:28 -0500
Subject: [PATCH 2/9] hack together working implementation

TODO: add tests for constructing pandas Series with pyarrow scalars
---
 db_dtypes/__init__.py    | 31 +++++++++++++++++++++++++++++--
 db_dtypes/core.py        |  5 +----
 tests/unit/test_arrow.py |  2 +-
 3 files changed, 31 insertions(+), 7 deletions(-)

diff --git a/db_dtypes/__init__.py b/db_dtypes/__init__.py
index bce2bf0..219fd9f 100644
--- a/db_dtypes/__init__.py
+++ b/db_dtypes/__init__.py
@@ -17,6 +17,7 @@
 
 import datetime
 import re
+from typing import Union
 
 import numpy
 import packaging.version
@@ -52,6 +53,13 @@ class TimeDtype(core.BaseDatetimeDtype):
     def construct_array_type(self):
         return TimeArray
 
+    @staticmethod
+    def __from_arrow__(
+        array: Union[pyarrow.Array, pyarrow.ChunkedArray]
+    ) -> "TimeArray":
+        # TODO: Consider a more efficient conversion to "M8[ns]" numpy array.
+        return TimeArray(array)
+
 
 class TimeArray(core.BaseDatetimeArray):
     """
@@ -75,7 +83,13 @@ def _datetime(
             r"(?:\.(?P<fraction>\d*))?)?)?\s*$"
         ).match,
     ):
-        if isinstance(scalar, datetime.time):
+        # Convert pyarrow values to datetime.time.
+        if isinstance(scalar, (pyarrow.Time32Scalar, pyarrow.Time64Scalar)):
+            scalar = scalar.as_py()
+
+        if scalar is None:
+            return None
+        elif isinstance(scalar, datetime.time):
             return datetime.datetime.combine(cls._epoch, scalar)
         elif isinstance(scalar, str):
             # iso string
@@ -146,6 +160,13 @@ class DateDtype(core.BaseDatetimeDtype):
     def construct_array_type(self):
         return DateArray
 
+    @staticmethod
+    def __from_arrow__(
+        array: Union[pyarrow.Array, pyarrow.ChunkedArray]
+    ) -> "DateArray":
+        # TODO: Consider a more efficient conversion to "M8[ns]" numpy array.
+        return DateArray(array)
+
 
 class DateArray(core.BaseDatetimeArray):
     """
@@ -161,7 +182,13 @@ def _datetime(
         scalar,
         match_fn=re.compile(r"\s*(?P<year>\d+)-(?P<month>\d+)-(?P<day>\d+)\s*$").match,
     ):
-        if isinstance(scalar, datetime.date):
+        # Convert pyarrow values to datetime.date.
+        if isinstance(scalar, (pyarrow.Date32Scalar, pyarrow.Date64Scalar)):
+            scalar = scalar.as_py()
+
+        if scalar is None:
+            return None
+        elif isinstance(scalar, datetime.date):
             return datetime.datetime(scalar.year, scalar.month, scalar.day)
         elif isinstance(scalar, str):
             match = match_fn(scalar)
diff --git a/db_dtypes/core.py b/db_dtypes/core.py
index 7f6da0f..c8f3ad4 100644
--- a/db_dtypes/core.py
+++ b/db_dtypes/core.py
@@ -61,10 +61,7 @@ def __init__(self, values, dtype=None, copy: bool = False):
 
     @classmethod
     def __ndarray(cls, scalars):
-        return numpy.array(
-            [None if scalar is None else cls._datetime(scalar) for scalar in scalars],
-            "M8[ns]",
-        )
+        return numpy.array([cls._datetime(scalar) for scalar in scalars], "M8[ns]",)
 
     @classmethod
     def _from_sequence(cls, scalars, *, dtype=None, copy=False):
diff --git a/tests/unit/test_arrow.py b/tests/unit/test_arrow.py
index 82cf60d..4bac5b3 100644
--- a/tests/unit/test_arrow.py
+++ b/tests/unit/test_arrow.py
@@ -185,4 +185,4 @@ def test_from_arrow(pyarrow_array: pyarrow.Array, expected: pandas.Series):
     record_batch = pyarrow.RecordBatch.from_arrays([pyarrow_array], ["test_col"])
     dataframe = record_batch.to_pandas(date_as_object=False, types_mapper=types_mapper)
     series = dataframe["test_col"]
-    pandas.testing.assert_extension_array_equal(series, expected)
+    pandas.testing.assert_series_equal(series, expected, check_names=False)

From a009fc4cefa798655c5de087bb1e841b5aaa1979 Mon Sep 17 00:00:00 2001
From: Tim Swast <swast@google.com>
Date: Thu, 4 Nov 2021 10:25:28 -0500
Subject: [PATCH 3/9] fix unit test coverage, optimize arrow to numpy
 conversion

---
 db_dtypes/__init__.py    | 34 +++++++++++++++++------
 tests/unit/test_arrow.py | 60 +++++++++++++++++++++++++++++++++++++++-
 2 files changed, 84 insertions(+), 10 deletions(-)

diff --git a/db_dtypes/__init__.py b/db_dtypes/__init__.py
index 219fd9f..acaa577 100644
--- a/db_dtypes/__init__.py
+++ b/db_dtypes/__init__.py
@@ -30,6 +30,7 @@
 import pandas.core.dtypes.generic
 import pandas.core.nanops
 import pyarrow
+import pyarrow.compute
 
 from db_dtypes.version import __version__
 from db_dtypes import core
@@ -37,6 +38,9 @@
 
 date_dtype_name = "dbdate"
 time_dtype_name = "dbtime"
+_EPOCH = datetime.datetime(1970, 1, 1)
+_NPEPOCH = numpy.datetime64(_EPOCH)
+_PAEPOCH = pyarrow.scalar(_EPOCH, pyarrow.timestamp("ns"))
 
 pandas_release = packaging.version.parse(pandas.__version__).release
 
@@ -57,8 +61,21 @@ def construct_array_type(self):
     def __from_arrow__(
         array: Union[pyarrow.Array, pyarrow.ChunkedArray]
     ) -> "TimeArray":
-        # TODO: Consider a more efficient conversion to "M8[ns]" numpy array.
-        return TimeArray(array)
+        # We can't call combine_chunks on an empty array, so short-circuit the
+        # rest of the function logic for this special case.
+        if len(array) == 0:
+            return TimeArray(numpy.array([], dtype="datetime64[ns]"))
+
+        # We can't cast to timestamp("ns"), but we time64("ns") has the same
+        # memory layout: 64-bit integers representing the number of nanoseconds
+        # since the datetime epoch (midnight 1970-01-01).
+        array = pyarrow.compute.cast(array, pyarrow.time64("ns"))
+
+        # ChunkedArray has no "view" method, so combine into an Array.
+        array = array.combine_chunks() if hasattr(array, "combine_chunks") else array
+        array = array.view(pyarrow.timestamp("ns"))
+        np_array = array.to_numpy(zero_copy_only=False)
+        return TimeArray(np_array)
 
 
 class TimeArray(core.BaseDatetimeArray):
@@ -69,8 +86,6 @@ class TimeArray(core.BaseDatetimeArray):
     # Data are stored as datetime64 values with a date of Jan 1, 1970
 
     dtype = TimeDtype()
-    _epoch = datetime.datetime(1970, 1, 1)
-    _npepoch = numpy.datetime64(_epoch)
 
     @classmethod
     def _datetime(
@@ -90,7 +105,7 @@ def _datetime(
         if scalar is None:
             return None
         elif isinstance(scalar, datetime.time):
-            return datetime.datetime.combine(cls._epoch, scalar)
+            return datetime.datetime.combine(_EPOCH, scalar)
         elif isinstance(scalar, str):
             # iso string
             parsed = match_fn(scalar)
@@ -127,7 +142,7 @@ def _box_func(self, x):
     __return_deltas = {"timedelta", "timedelta64", "timedelta64[ns]", "<m8", "<m8[ns]"}
 
     def astype(self, dtype, copy=True):
-        deltas = self._ndarray - self._npepoch
+        deltas = self._ndarray - _NPEPOCH
         stype = str(dtype)
         if stype in self.__return_deltas:
             return deltas
@@ -164,8 +179,9 @@ def construct_array_type(self):
     def __from_arrow__(
         array: Union[pyarrow.Array, pyarrow.ChunkedArray]
     ) -> "DateArray":
-        # TODO: Consider a more efficient conversion to "M8[ns]" numpy array.
-        return DateArray(array)
+        array = pyarrow.compute.cast(array, pyarrow.timestamp("ns"))
+        np_array = array.to_numpy()
+        return DateArray(np_array)
 
 
 class DateArray(core.BaseDatetimeArray):
@@ -233,7 +249,7 @@ def __add__(self, other):
             return self.astype("object") + other
 
         if isinstance(other, TimeArray):
-            return (other._ndarray - other._npepoch) + self._ndarray
+            return (other._ndarray - _NPEPOCH) + self._ndarray
 
         return super().__add__(other)
 
diff --git a/tests/unit/test_arrow.py b/tests/unit/test_arrow.py
index 4bac5b3..66d3183 100644
--- a/tests/unit/test_arrow.py
+++ b/tests/unit/test_arrow.py
@@ -179,10 +179,68 @@ def test_to_arrow_w_arrow_type(series, expected):
     ["expected", "pyarrow_array"],
     SERIES_ARRAYS_DEFAULT_TYPES + SERIES_ARRAYS_CUSTOM_ARROW_TYPES,
 )
-def test_from_arrow(pyarrow_array: pyarrow.Array, expected: pandas.Series):
+def test_series_from_arrow(pyarrow_array: pyarrow.Array, expected: pandas.Series):
     # Convert to RecordBatch because types_mapper argument is ignored when
     # using a pyarrow.Array. https://issues.apache.org/jira/browse/ARROW-9664
     record_batch = pyarrow.RecordBatch.from_arrays([pyarrow_array], ["test_col"])
     dataframe = record_batch.to_pandas(date_as_object=False, types_mapper=types_mapper)
     series = dataframe["test_col"]
     pandas.testing.assert_series_equal(series, expected, check_names=False)
+
+
+@pytest.mark.parametrize(
+    ["expected", "pyarrow_array"],
+    SERIES_ARRAYS_DEFAULT_TYPES + SERIES_ARRAYS_CUSTOM_ARROW_TYPES,
+)
+def test_series_from_arrow_scalars(
+    pyarrow_array: pyarrow.Array, expected: pandas.Series
+):
+    scalars = []
+    for scalar in pyarrow_array:
+        scalars.append(scalar)
+        assert isinstance(scalar, pyarrow.Scalar)
+    series = pandas.Series(scalars, dtype=expected.dtype)
+    pandas.testing.assert_series_equal(series, expected)
+
+
+def test_dataframe_from_arrow():
+    record_batch = pyarrow.RecordBatch.from_arrays(
+        [
+            pyarrow.array(
+                [dt.date(2021, 11, 4), dt.date(2038, 1, 20), None, dt.date(1970, 1, 1)],
+                type=pyarrow.date32(),
+            ),
+            pyarrow.array(
+                [
+                    dt.time(10, 7, 8, 995_325),
+                    dt.time(23, 59, 59, 999_999),
+                    None,
+                    dt.time(0, 0, 0, 0),
+                ],
+                type=pyarrow.time64("us"),
+            ),
+            pyarrow.array([1, 2, 3, 4]),
+        ],
+        ["date_col", "time_col", "int_col"],
+    )
+    dataframe = record_batch.to_pandas(date_as_object=False, types_mapper=types_mapper)
+    expected = pandas.DataFrame(
+        {
+            "date_col": pandas.Series(
+                [dt.date(2021, 11, 4), dt.date(2038, 1, 20), None, dt.date(1970, 1, 1)],
+                dtype="dbdate",
+            ),
+            "time_col": pandas.Series(
+                [
+                    dt.time(10, 7, 8, 995_325),
+                    dt.time(23, 59, 59, 999_999),
+                    None,
+                    dt.time(0, 0, 0, 0),
+                ],
+                dtype="dbtime",
+            ),
+            "int_col": [1, 2, 3, 4],
+        },
+        columns=["date_col", "time_col", "int_col"],
+    )
+    pandas.testing.assert_frame_equal(dataframe, expected)

From b4dd5cd991e0024f57fd93ae01282427c7acba79 Mon Sep 17 00:00:00 2001
From: Tim Swast <swast@google.com>
Date: Thu, 4 Nov 2021 10:36:51 -0500
Subject: [PATCH 4/9] apply same optimizations to to_arrow conversion

---
 db_dtypes/__init__.py | 22 +++++++++++++++-------
 1 file changed, 15 insertions(+), 7 deletions(-)

diff --git a/db_dtypes/__init__.py b/db_dtypes/__init__.py
index acaa577..8b47a18 100644
--- a/db_dtypes/__init__.py
+++ b/db_dtypes/__init__.py
@@ -40,7 +40,6 @@
 time_dtype_name = "dbtime"
 _EPOCH = datetime.datetime(1970, 1, 1)
 _NPEPOCH = numpy.datetime64(_EPOCH)
-_PAEPOCH = pyarrow.scalar(_EPOCH, pyarrow.timestamp("ns"))
 
 pandas_release = packaging.version.parse(pandas.__version__).release
 
@@ -66,7 +65,7 @@ def __from_arrow__(
         if len(array) == 0:
             return TimeArray(numpy.array([], dtype="datetime64[ns]"))
 
-        # We can't cast to timestamp("ns"), but we time64("ns") has the same
+        # We can't cast to timestamp("ns"), but time64("ns") has the same
         # memory layout: 64-bit integers representing the number of nanoseconds
         # since the datetime epoch (midnight 1970-01-01).
         array = pyarrow.compute.cast(array, pyarrow.time64("ns"))
@@ -157,9 +156,17 @@ def to_numpy(self, dtype="object"):
             return self.astype(dtype)
 
     def __arrow_array__(self, type=None):
-        return pyarrow.array(
-            self.to_numpy(dtype="object"),
-            type=type if type is not None else pyarrow.time64("ns"),
+        array = pyarrow.array(self._ndarray, type=pyarrow.timestamp("ns"))
+
+        # ChunkedArray has no "view" method, so combine into an Array.
+        array = array.combine_chunks() if hasattr(array, "combine_chunks") else array
+
+        # We can't cast to time64("ns"), but timestamp("ns") has the same
+        # memory layout: 64-bit integers representing the number of nanoseconds
+        # since the datetime epoch (midnight 1970-01-01).
+        array = array.view(pyarrow.time64("ns"))
+        return pyarrow.compute.cast(
+            array, type if type is not None else pyarrow.time64("ns"),
         )
 
 
@@ -240,8 +247,9 @@ def astype(self, dtype, copy=True):
         return super().astype(dtype, copy=copy)
 
     def __arrow_array__(self, type=None):
-        return pyarrow.array(
-            self._ndarray, type=type if type is not None else pyarrow.date32(),
+        array = pyarrow.array(self._ndarray, type=pyarrow.timestamp("ns"))
+        return pyarrow.compute.cast(
+            array, type if type is not None else pyarrow.date32(),
         )
 
     def __add__(self, other):

From 9c239a481ff55a38ab8f2b7519a376c1637b5905 Mon Sep 17 00:00:00 2001
From: Tim Swast <swast@google.com>
Date: Thu, 4 Nov 2021 11:03:09 -0500
Subject: [PATCH 5/9] remove redundant to_numpy now that to_arrow doesn't use
 it

---
 db_dtypes/__init__.py | 5 -----
 1 file changed, 5 deletions(-)

diff --git a/db_dtypes/__init__.py b/db_dtypes/__init__.py
index 8b47a18..7852319 100644
--- a/db_dtypes/__init__.py
+++ b/db_dtypes/__init__.py
@@ -150,11 +150,6 @@ def astype(self, dtype, copy=True):
         else:
             return super().astype(dtype, copy=copy)
 
-    if pandas_release < (1,):
-
-        def to_numpy(self, dtype="object"):
-            return self.astype(dtype)
-
     def __arrow_array__(self, type=None):
         array = pyarrow.array(self._ndarray, type=pyarrow.timestamp("ns"))
 

From fe021fd33a7cb44b1548efc50e06c34288040bbb Mon Sep 17 00:00:00 2001
From: Tim Swast <swast@google.com>
Date: Thu, 4 Nov 2021 15:28:02 -0500
Subject: [PATCH 6/9] be explicit about chunked array vs array

---
 db_dtypes/__init__.py    |  8 ++++++--
 tests/unit/test_arrow.py | 23 +++++++++++++++++++++++
 2 files changed, 29 insertions(+), 2 deletions(-)

diff --git a/db_dtypes/__init__.py b/db_dtypes/__init__.py
index 7852319..f4394f2 100644
--- a/db_dtypes/__init__.py
+++ b/db_dtypes/__init__.py
@@ -71,7 +71,9 @@ def __from_arrow__(
         array = pyarrow.compute.cast(array, pyarrow.time64("ns"))
 
         # ChunkedArray has no "view" method, so combine into an Array.
-        array = array.combine_chunks() if hasattr(array, "combine_chunks") else array
+        if isinstance(array, pyarrow.ChunkedArray):
+            array = array.combine_chunks()
+
         array = array.view(pyarrow.timestamp("ns"))
         np_array = array.to_numpy(zero_copy_only=False)
         return TimeArray(np_array)
@@ -154,7 +156,9 @@ def __arrow_array__(self, type=None):
         array = pyarrow.array(self._ndarray, type=pyarrow.timestamp("ns"))
 
         # ChunkedArray has no "view" method, so combine into an Array.
-        array = array.combine_chunks() if hasattr(array, "combine_chunks") else array
+        array = (
+            array.combine_chunks() if isinstance(array, pyarrow.ChunkedArray) else array
+        )
 
         # We can't cast to time64("ns"), but timestamp("ns") has the same
         # memory layout: 64-bit integers representing the number of nanoseconds
diff --git a/tests/unit/test_arrow.py b/tests/unit/test_arrow.py
index 66d3183..3eb4417 100644
--- a/tests/unit/test_arrow.py
+++ b/tests/unit/test_arrow.py
@@ -203,6 +203,29 @@ def test_series_from_arrow_scalars(
     pandas.testing.assert_series_equal(series, expected)
 
 
+def test_dbtime_series_from_arrow_array():
+    """Test to explicitly check Array -> Series conversion."""
+    array = pyarrow.array([dt.time(15, 21, 0, 123_456)], type=pyarrow.time64("us"))
+    assert isinstance(array, pyarrow.Array)
+    assert not isinstance(array, pyarrow.ChunkedArray)
+    series = pandas.Series(db_dtypes.TimeDtype.__from_arrow__(array))
+    expected = pandas.Series([dt.time(15, 21, 0, 123_456)], dtype="dbtime")
+    pandas.testing.assert_series_equal(series, expected)
+
+
+def test_dbtime_series_from_arrow_chunkedarray():
+    """Test to explicitly check ChunkedArray -> Series conversion."""
+    array1 = pyarrow.array([dt.time(15, 21, 0, 123_456)], type=pyarrow.time64("us"))
+    array2 = pyarrow.array([dt.time(0, 0, 0, 0)], type=pyarrow.time64("us"))
+    array = pyarrow.chunked_array([array1, array2])
+    assert isinstance(array, pyarrow.ChunkedArray)
+    series = pandas.Series(db_dtypes.TimeDtype.__from_arrow__(array))
+    expected = pandas.Series(
+        [dt.time(15, 21, 0, 123_456), dt.time(0, 0, 0, 0)], dtype="dbtime"
+    )
+    pandas.testing.assert_series_equal(series, expected)
+
+
 def test_dataframe_from_arrow():
     record_batch = pyarrow.RecordBatch.from_arrays(
         [

From f042e83f210feee4d850683b74d6061953c14bee Mon Sep 17 00:00:00 2001
From: Tim Swast <swast@google.com>
Date: Mon, 8 Nov 2021 16:02:07 -0600
Subject: [PATCH 7/9] add docstrings to arrow conversion functions

---
 db_dtypes/__init__.py | 20 ++++++++++++++++++++
 1 file changed, 20 insertions(+)

diff --git a/db_dtypes/__init__.py b/db_dtypes/__init__.py
index f4394f2..f954e09 100644
--- a/db_dtypes/__init__.py
+++ b/db_dtypes/__init__.py
@@ -60,6 +60,11 @@ def construct_array_type(self):
     def __from_arrow__(
         array: Union[pyarrow.Array, pyarrow.ChunkedArray]
     ) -> "TimeArray":
+        """Convert to dbtime data from an Arrow array.
+
+        See:
+        https://pandas.pydata.org/pandas-docs/stable/development/extending.html#compatibility-with-apache-arrow
+        """
         # We can't call combine_chunks on an empty array, so short-circuit the
         # rest of the function logic for this special case.
         if len(array) == 0:
@@ -153,6 +158,11 @@ def astype(self, dtype, copy=True):
             return super().astype(dtype, copy=copy)
 
     def __arrow_array__(self, type=None):
+        """Convert to an Arrow array from dbtime data.
+
+        See:
+        https://pandas.pydata.org/pandas-docs/stable/development/extending.html#compatibility-with-apache-arrow
+        """
         array = pyarrow.array(self._ndarray, type=pyarrow.timestamp("ns"))
 
         # ChunkedArray has no "view" method, so combine into an Array.
@@ -185,6 +195,11 @@ def construct_array_type(self):
     def __from_arrow__(
         array: Union[pyarrow.Array, pyarrow.ChunkedArray]
     ) -> "DateArray":
+        """Convert to dbdate data from an Arrow array.
+
+        See:
+        https://pandas.pydata.org/pandas-docs/stable/development/extending.html#compatibility-with-apache-arrow
+        """
         array = pyarrow.compute.cast(array, pyarrow.timestamp("ns"))
         np_array = array.to_numpy()
         return DateArray(np_array)
@@ -246,6 +261,11 @@ def astype(self, dtype, copy=True):
         return super().astype(dtype, copy=copy)
 
     def __arrow_array__(self, type=None):
+        """Convert to an Arrow array from dbdate data.
+
+        See:
+        https://pandas.pydata.org/pandas-docs/stable/development/extending.html#compatibility-with-apache-arrow
+        """
         array = pyarrow.array(self._ndarray, type=pyarrow.timestamp("ns"))
         return pyarrow.compute.cast(
             array, type if type is not None else pyarrow.date32(),

From 5480c11d1a9a6e91f3da0056f2c9d1323ede217d Mon Sep 17 00:00:00 2001
From: Tim Swast <swast@google.com>
Date: Mon, 8 Nov 2021 16:36:07 -0600
Subject: [PATCH 8/9] add test case for round-trip to/from pyarrow
 nanosecond-precision time scalars

---
 db_dtypes/__init__.py    |  9 ++++++++-
 tests/unit/test_arrow.py | 39 +++++++++++++++++++++++++++++++++++++++
 2 files changed, 47 insertions(+), 1 deletion(-)

diff --git a/db_dtypes/__init__.py b/db_dtypes/__init__.py
index f954e09..f1424fb 100644
--- a/db_dtypes/__init__.py
+++ b/db_dtypes/__init__.py
@@ -106,12 +106,19 @@ def _datetime(
     ):
         # Convert pyarrow values to datetime.time.
         if isinstance(scalar, (pyarrow.Time32Scalar, pyarrow.Time64Scalar)):
-            scalar = scalar.as_py()
+            scalar = (
+                scalar.cast(pyarrow.time64("ns"))
+                .cast(pyarrow.int64())
+                .cast(pyarrow.timestamp("ns"))
+                .as_py()
+            )
 
         if scalar is None:
             return None
         elif isinstance(scalar, datetime.time):
             return datetime.datetime.combine(_EPOCH, scalar)
+        elif isinstance(scalar, pandas.Timestamp):
+            return scalar.to_datetime64()
         elif isinstance(scalar, str):
             # iso string
             parsed = match_fn(scalar)
diff --git a/tests/unit/test_arrow.py b/tests/unit/test_arrow.py
index 3eb4417..2bd7a5a 100644
--- a/tests/unit/test_arrow.py
+++ b/tests/unit/test_arrow.py
@@ -24,6 +24,11 @@
 import db_dtypes
 
 
+SECOND_NANOS = 1_000_000_000
+MINUTE_NANOS = 60 * SECOND_NANOS
+HOUR_NANOS = 60 * MINUTE_NANOS
+
+
 def types_mapper(
     pyarrow_type: pyarrow.DataType,
 ) -> Optional[pandas.api.extensions.ExtensionDtype]:
@@ -160,6 +165,40 @@ def types_mapper(
             type=pyarrow.time64("us"),
         ),
     ),
+    (
+        pandas.Series(
+            [
+                # Only microseconds are supported when reading data. See:
+                # https://github.com/googleapis/python-db-dtypes-pandas/issues/19
+                # Still, round-trip with pyarrow nanosecond precision scalars
+                # is supported.
+                pyarrow.scalar(0, pyarrow.time64("ns")),
+                pyarrow.scalar(
+                    12 * HOUR_NANOS
+                    + 30 * MINUTE_NANOS
+                    + 15 * SECOND_NANOS
+                    + 123_456_789,
+                    pyarrow.time64("ns"),
+                ),
+                pyarrow.scalar(
+                    23 * HOUR_NANOS
+                    + 59 * MINUTE_NANOS
+                    + 59 * SECOND_NANOS
+                    + 999_999_999,
+                    pyarrow.time64("ns"),
+                ),
+            ],
+            dtype="dbtime",
+        ),
+        pyarrow.array(
+            [
+                0,
+                12 * HOUR_NANOS + 30 * MINUTE_NANOS + 15 * SECOND_NANOS + 123_456_789,
+                23 * HOUR_NANOS + 59 * MINUTE_NANOS + 59 * SECOND_NANOS + 999_999_999,
+            ],
+            type=pyarrow.time64("ns"),
+        ),
+    ),
 ]
 
 

From a6b739623d93500aafb7ee2e1181b5779c1fa662 Mon Sep 17 00:00:00 2001
From: Tim Swast <swast@google.com>
Date: Mon, 8 Nov 2021 16:40:25 -0600
Subject: [PATCH 9/9] add time32("ms") test case without nulls for completeness

---
 tests/unit/test_arrow.py | 18 ++++++++++++++++++
 1 file changed, 18 insertions(+)

diff --git a/tests/unit/test_arrow.py b/tests/unit/test_arrow.py
index 2bd7a5a..5f45a90 100644
--- a/tests/unit/test_arrow.py
+++ b/tests/unit/test_arrow.py
@@ -138,6 +138,24 @@ def types_mapper(
             type=pyarrow.time32("ms"),
         ),
     ),
+    (
+        pandas.Series(
+            [
+                dt.time(0, 0, 0, 0),
+                dt.time(12, 30, 15, 125_000),
+                dt.time(23, 59, 59, 999_000),
+            ],
+            dtype="dbtime",
+        ),
+        pyarrow.array(
+            [
+                dt.time(0, 0, 0, 0),
+                dt.time(12, 30, 15, 125_000),
+                dt.time(23, 59, 59, 999_000),
+            ],
+            type=pyarrow.time32("ms"),
+        ),
+    ),
     (
         pandas.Series(
             [dt.time(0, 0, 0, 0), None, dt.time(23, 59, 59, 999_999)], dtype="dbtime",