Skip to content

Commit 9c3f60b

Browse files
committed
PERF: improve perf of writing csv's with datetimes
1 parent 878d860 commit 9c3f60b

File tree

3 files changed

+62
-17
lines changed

3 files changed

+62
-17
lines changed

doc/source/whatsnew/v0.16.1.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -89,7 +89,7 @@ API changes
8989
Performance Improvements
9090
~~~~~~~~~~~~~~~~~~~~~~~~
9191

92-
92+
- Improved csv write performance with mixed dtypes, including datetimes (:issue:`9940`)
9393

9494

9595

pandas/core/internals.py

Lines changed: 5 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -1870,23 +1870,12 @@ def to_native_types(self, slicer=None, na_rep=None, date_format=None,
18701870
values = self.values
18711871
if slicer is not None:
18721872
values = values[:, slicer]
1873-
mask = isnull(values)
18741873

1875-
rvalues = np.empty(values.shape, dtype=object)
1876-
if na_rep is None:
1877-
na_rep = 'NaT'
1878-
rvalues[mask] = na_rep
1879-
imask = (~mask).ravel()
1880-
1881-
if date_format is None:
1882-
date_formatter = lambda x: Timestamp(x)._repr_base
1883-
else:
1884-
date_formatter = lambda x: Timestamp(x).strftime(date_format)
1885-
1886-
rvalues.flat[imask] = np.array([date_formatter(val) for val in
1887-
values.ravel()[imask]], dtype=object)
1888-
1889-
return rvalues.tolist()
1874+
result = tslib.format_array_from_datetime(values.view('i8').ravel(),
1875+
tz=None,
1876+
format=date_format,
1877+
na_rep=na_rep).reshape(values.shape)
1878+
return result.tolist()
18901879

18911880
def should_store(self, value):
18921881
return issubclass(value.dtype.type, np.datetime64)

pandas/tslib.pyx

Lines changed: 56 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1398,6 +1398,62 @@ def parse_datetime_string(date_string, **kwargs):
13981398
dt = parse_date(date_string, **kwargs)
13991399
return dt
14001400

1401+
def format_array_from_datetime(ndarray[int64_t] values, object tz=None, object format=None, object na_rep=None):
1402+
"""
1403+
return a np object array of the string formatted values
1404+
1405+
Parameters
1406+
----------
1407+
values : a 1-d i8 array
1408+
tz : the timezone (or None)
1409+
format : optional, default is None
1410+
a strftime capable string
1411+
na_rep : optional, default is None
1412+
a nat format
1413+
1414+
"""
1415+
cdef int64_t val, ns, N = len(values)
1416+
cdef ndarray[object] result = np.empty(N, dtype=object)
1417+
cdef object ts, res
1418+
cdef _TSObject obj
1419+
1420+
obj = _TSObject()
1421+
1422+
if na_rep is None:
1423+
na_rep = 'NaT'
1424+
1425+
for i in range(N):
1426+
val = values[i]
1427+
1428+
if val == iNaT:
1429+
result[i] = na_rep
1430+
else:
1431+
if format is None and tz is None:
1432+
1433+
obj.value = val
1434+
pandas_datetime_to_datetimestruct(val, PANDAS_FR_ns, &obj.dts)
1435+
res = '%d-%.2d-%.2d %.2d:%.2d:%.2d' % (obj.dts.year,
1436+
obj.dts.month,
1437+
obj.dts.day,
1438+
obj.dts.hour,
1439+
obj.dts.min,
1440+
obj.dts.sec)
1441+
1442+
ns = obj.dts.ps / 1000
1443+
1444+
if ns != 0:
1445+
res += '.%.9d' % (ns + 1000 * obj.dts.us)
1446+
elif obj.dts.us != 0:
1447+
res += '.%.6d' % obj.dts.us
1448+
1449+
result[i] = res
1450+
1451+
else:
1452+
ts = Timestamp(val, tz=tz)
1453+
result[i] = ts.strftime(format)
1454+
1455+
return result
1456+
14011457
def array_to_datetime(ndarray[object] values, raise_=False, dayfirst=False,
14021458
format=None, utc=None, coerce=False, unit=None):
14031459
cdef:

0 commit comments

Comments
 (0)