Skip to content

Commit d069b05

Browse files
committed
Address whatsnew changes & fix sql tests
1 parent f56859a commit d069b05

File tree

3 files changed

+27
-30
lines changed

3 files changed

+27
-30
lines changed

doc/source/whatsnew/v0.21.0.txt

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -303,6 +303,8 @@ length 2+ levels, so a :class:`MultiIndex` is always returned from all of the
303303
UTC Localization with Series
304304
^^^^^^^^^^^^^^^^^^^^^^^^^^^^
305305

306+
Previously, :func:`to_datetime` did not localize datetime ``Series`` data as when ``utc=True`` was passed. Now, :func:`to_datetime` will correctly localize `Series` with a `datetime64[ns, UTC]` data type. (:issue:`6415`).
307+
306308
Previous Behavior
307309

308310
.. ipython:: python
@@ -331,7 +333,7 @@ UTC Localization with Series
331333

332334
pd.to_datetime(s, utc=True)
333335

334-
This new behavior will also localize datetime columns in DataFrames returned from :func:`read_sql` which previously returned datetime columns as naive UTC.
336+
Additionally, DataFrames with datetime columns returned by :func:`read_sql` will also be localized to UTC only if the original SQL columns were timezone aware datetime columns.
335337

336338
.. _whatsnew_0210.api:
337339

pandas/io/sql.py

Lines changed: 10 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -99,24 +99,24 @@ def _convert_params(sql, params):
9999
return args
100100

101101

102-
def _handle_date_column(col, format=None):
102+
def _handle_date_column(col, utc=None, format=None):
103103
if isinstance(format, dict):
104104
return to_datetime(col, errors='ignore', **format)
105105
else:
106106
if format in ['D', 's', 'ms', 'us', 'ns']:
107-
return to_datetime(col, errors='coerce', unit=format, utc=True)
107+
return to_datetime(col, errors='coerce', unit=format, utc=utc)
108108
elif (issubclass(col.dtype.type, np.floating) or
109109
issubclass(col.dtype.type, np.integer)):
110110
# parse dates as timestamp
111111
format = 's' if format is None else format
112-
return to_datetime(col, errors='coerce', unit=format, utc=True)
112+
return to_datetime(col, errors='coerce', unit=format, utc=utc)
113113
elif is_datetime64tz_dtype(col):
114114
# coerce to UTC timezone
115115
# GH11216
116116
return (to_datetime(col, errors='coerce')
117117
.astype('datetime64[ns, UTC]'))
118118
else:
119-
return to_datetime(col, errors='coerce', format=format, utc=True)
119+
return to_datetime(col, errors='coerce', format=format, utc=utc)
120120

121121

122122
def _parse_date_columns(data_frame, parse_dates):
@@ -818,10 +818,14 @@ def _harmonize_columns(self, parse_dates=None):
818818
df_col = self.frame[col_name]
819819
# the type the dataframe column should have
820820
col_type = self._get_dtype(sql_col.type)
821-
822821
if (col_type is datetime or col_type is date or
823822
col_type is DatetimeTZDtype):
824-
self.frame[col_name] = _handle_date_column(df_col)
823+
if col_type is DatetimeTZDtype:
824+
# Convert SQL Datetime columns with tz to UTC
825+
self.frame[col_name] = _handle_date_column(df_col,
826+
utc=True)
827+
else:
828+
self.frame[col_name] = _handle_date_column(df_col)
825829

826830
elif col_type is float:
827831
# floats support NA, can always convert!

pandas/tests/io/test_sql.py

Lines changed: 14 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -606,15 +606,14 @@ def test_date_parsing(self):
606606
# No Parsing
607607
df = sql.read_sql_query("SELECT * FROM types_test_data", self.conn)
608608
assert not issubclass(df.DateCol.dtype.type, np.datetime64)
609-
# Now that GH 6415 is fixed, dates are automatically parsed to UTC
610-
utc_dtype = pd.core.dtypes.dtypes.DatetimeTZDtypeType
609+
611610
df = sql.read_sql_query("SELECT * FROM types_test_data", self.conn,
612611
parse_dates=['DateCol'])
613-
assert issubclass(df.DateCol.dtype.type, utc_dtype)
612+
assert issubclass(df.DateCol.dtype.type, np.datetime64)
614613

615614
df = sql.read_sql_query("SELECT * FROM types_test_data", self.conn,
616615
parse_dates={'DateCol': '%Y-%m-%d %H:%M:%S'})
617-
assert issubclass(df.DateCol.dtype.type, utc_dtype)
616+
assert issubclass(df.DateCol.dtype.type, np.datetime64)
618617

619618
df = sql.read_sql_query("SELECT * FROM types_test_data", self.conn,
620619
parse_dates=['IntDateCol'])
@@ -632,9 +631,8 @@ def test_date_and_index(self):
632631
df = sql.read_sql_query("SELECT * FROM types_test_data", self.conn,
633632
index_col='DateCol',
634633
parse_dates=['DateCol', 'IntDateCol'])
635-
# Now that GH 6415 is fixed, dates are automatically parsed to UTC
636-
utc_dtype = pd.core.dtypes.dtypes.DatetimeTZDtypeType
637-
assert issubclass(df.index.dtype.type, utc_dtype)
634+
635+
assert issubclass(df.index.dtype.type, np.datetime64)
638636
assert issubclass(df.IntDateCol.dtype.type, np.datetime64)
639637

640638
def test_timedelta(self):
@@ -1323,15 +1321,14 @@ def check(col):
13231321
def test_date_parsing(self):
13241322
# No Parsing
13251323
df = sql.read_sql_table("types_test_data", self.conn)
1326-
# Now that GH 6415 is fixed, dates are automatically parsed to UTC
1327-
utc_dtype = pd.core.dtypes.dtypes.DatetimeTZDtypeType
1324+
13281325
df = sql.read_sql_table("types_test_data", self.conn,
13291326
parse_dates=['DateCol'])
1330-
assert issubclass(df.DateCol.dtype.type, utc_dtype)
1327+
assert issubclass(df.DateCol.dtype.type, np.datetime64)
13311328

13321329
df = sql.read_sql_table("types_test_data", self.conn,
13331330
parse_dates={'DateCol': '%Y-%m-%d %H:%M:%S'})
1334-
assert issubclass(df.DateCol.dtype.type, utc_dtype)
1331+
assert issubclass(df.DateCol.dtype.type, np.datetime64)
13351332

13361333
df = sql.read_sql_table("types_test_data", self.conn, parse_dates={
13371334
'DateCol': {'format': '%Y-%m-%d %H:%M:%S'}})
@@ -1357,11 +1354,7 @@ def test_datetime(self):
13571354
# with read_table -> type information from schema used
13581355
result = sql.read_sql_table('test_datetime', self.conn)
13591356
result = result.drop('index', axis=1)
1360-
# After GH 6415, dates outbound from a db will be localized to UTC
1361-
# xref GH 7364
1362-
expected = df.copy()
1363-
expected['A'] = expected['A'].dt.tz_localize('UTC')
1364-
tm.assert_frame_equal(result, expected)
1357+
tm.assert_frame_equal(result, df)
13651358

13661359
# with read_sql -> no type information -> sqlite has no native
13671360
result = sql.read_sql_query('SELECT * FROM test_datetime', self.conn)
@@ -1381,11 +1374,7 @@ def test_datetime_NaT(self):
13811374

13821375
# with read_table -> type information from schema used
13831376
result = sql.read_sql_table('test_datetime', self.conn)
1384-
# After GH 6415, dates outbound from a db will be localized to UTC
1385-
# xref GH 7364
1386-
expected = df.copy()
1387-
expected['A'] = expected['A'].dt.tz_localize('UTC')
1388-
tm.assert_frame_equal(result, expected)
1377+
tm.assert_frame_equal(result, df)
13891378

13901379
# with read_sql -> no type information -> sqlite has no native
13911380
result = sql.read_sql_query('SELECT * FROM test_datetime', self.conn)
@@ -1401,8 +1390,10 @@ def test_datetime_date(self):
14011390
df = DataFrame([date(2014, 1, 1), date(2014, 1, 2)], columns=["a"])
14021391
df.to_sql('test_date', self.conn, index=False)
14031392
res = read_sql_table('test_date', self.conn)
1404-
# GH 6415 comes back as datetime64[ns, UTC]
1405-
tm.assert_series_equal(res['a'], to_datetime(df['a'], utc=True))
1393+
expected = res['a']
1394+
result = to_datetime(df['a'])
1395+
# comes back as datetime64
1396+
tm.assert_series_equal(result, expected)
14061397

14071398
def test_datetime_time(self):
14081399
# test support for datetime.time

0 commit comments

Comments
 (0)