Skip to content

Commit 7373538

Browse files
committed
perf improvements for other native type writers
1 parent 07b39c8 commit 7373538

File tree

4 files changed

+42
-22
lines changed

4 files changed

+42
-22
lines changed

doc/source/whatsnew/v0.16.1.txt

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -89,7 +89,8 @@ API changes
8989
Performance Improvements
9090
~~~~~~~~~~~~~~~~~~~~~~~~
9191

92-
- Improved csv write performance with mixed dtypes, including datetimes (:issue:`9940`)
92+
- Improved csv write performance with mixed dtypes, including datetimes by up to 5x (:issue:`9940`)
93+
- Improved csv write performance generally by 2x (:issue:`9940`)
9394

9495

9596

pandas/core/format.py

Lines changed: 16 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1258,7 +1258,8 @@ def __init__(self, obj, path_or_buf=None, sep=",", na_rep='', float_format=None,
12581258
if isinstance(cols, Index):
12591259
cols = cols.to_native_types(na_rep=na_rep,
12601260
float_format=float_format,
1261-
date_format=date_format)
1261+
date_format=date_format,
1262+
quoting=self.quoting)
12621263
else:
12631264
cols = list(cols)
12641265
self.obj = self.obj.loc[:, cols]
@@ -1269,7 +1270,8 @@ def __init__(self, obj, path_or_buf=None, sep=",", na_rep='', float_format=None,
12691270
if isinstance(cols, Index):
12701271
cols = cols.to_native_types(na_rep=na_rep,
12711272
float_format=float_format,
1272-
date_format=date_format)
1273+
date_format=date_format,
1274+
quoting=self.quoting)
12731275
else:
12741276
cols = list(cols)
12751277

@@ -1370,8 +1372,10 @@ def strftime_with_nulls(x):
13701372
values = self.obj.copy()
13711373
values.index = data_index
13721374
values.columns = values.columns.to_native_types(
1373-
na_rep=na_rep, float_format=float_format,
1374-
date_format=date_format)
1375+
na_rep=na_rep,
1376+
float_format=float_format,
1377+
date_format=date_format,
1378+
quoting=self.quoting)
13751379
values = values[cols]
13761380

13771381
series = {}
@@ -1542,18 +1546,22 @@ def _save_chunk(self, start_i, end_i):
15421546
slicer = slice(start_i, end_i)
15431547
for i in range(len(self.blocks)):
15441548
b = self.blocks[i]
1545-
d = b.to_native_types(slicer=slicer, na_rep=self.na_rep,
1549+
d = b.to_native_types(slicer=slicer,
1550+
na_rep=self.na_rep,
15461551
float_format=self.float_format,
15471552
decimal=self.decimal,
1548-
date_format=self.date_format)
1553+
date_format=self.date_format,
1554+
quoting=self.quoting)
15491555

15501556
for col_loc, col in zip(b.mgr_locs, d):
15511557
# self.data is a preallocated list
15521558
self.data[col_loc] = col
15531559

1554-
ix = data_index.to_native_types(slicer=slicer, na_rep=self.na_rep,
1560+
ix = data_index.to_native_types(slicer=slicer,
1561+
na_rep=self.na_rep,
15551562
float_format=self.float_format,
1556-
date_format=self.date_format)
1563+
date_format=self.date_format,
1564+
quoting=self.quoting)
15571565

15581566
lib.write_csv_rows(self.data, ix, self.nlevels, self.cols, self.writer)
15591567

pandas/core/index.py

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1071,10 +1071,14 @@ def to_native_types(self, slicer=None, **kwargs):
10711071
values = values[slicer]
10721072
return values._format_native_types(**kwargs)
10731073

1074-
def _format_native_types(self, na_rep='', **kwargs):
1074+
def _format_native_types(self, na_rep='', quoting=None, **kwargs):
10751075
""" actually format my specific types """
10761076
mask = isnull(self)
1077-
values = np.array(self, dtype=object, copy=True)
1077+
if not self.is_object() and not quoting:
1078+
values = np.asarray(self).astype(str)
1079+
else:
1080+
values = np.array(self, dtype=object, copy=True)
1081+
10781082
values[mask] = na_rep
10791083
return values.tolist()
10801084

pandas/core/internals.py

Lines changed: 18 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -484,14 +484,19 @@ def _try_coerce_and_cast_result(self, result, dtype=None):
484484
def _try_fill(self, value):
485485
return value
486486

487-
def to_native_types(self, slicer=None, na_rep='', **kwargs):
487+
def to_native_types(self, slicer=None, na_rep='', quoting=None, **kwargs):
488488
""" convert to our native types format, slicing if desired """
489489

490490
values = self.values
491491
if slicer is not None:
492492
values = values[:, slicer]
493-
values = np.array(values, dtype=object)
494493
mask = isnull(values)
494+
495+
if not self.is_object and not quoting:
496+
values = values.astype(str)
497+
else:
498+
values = np.array(values, dtype='object')
499+
495500
values[mask] = na_rep
496501
return values.tolist()
497502

@@ -1221,26 +1226,28 @@ def _try_cast(self, element):
12211226
return element
12221227

12231228
def to_native_types(self, slicer=None, na_rep='', float_format=None, decimal='.',
1224-
**kwargs):
1229+
quoting=None, **kwargs):
12251230
""" convert to our native types format, slicing if desired """
12261231

12271232
values = self.values
12281233
if slicer is not None:
12291234
values = values[:, slicer]
1230-
values = np.array(values, dtype=object)
12311235
mask = isnull(values)
1232-
values[mask] = na_rep
1233-
12341236

1237+
formatter = None
12351238
if float_format and decimal != '.':
12361239
formatter = lambda v : (float_format % v).replace('.',decimal,1)
12371240
elif decimal != '.':
12381241
formatter = lambda v : ('%g' % v).replace('.',decimal,1)
12391242
elif float_format:
12401243
formatter = lambda v : float_format % v
1244+
1245+
if formatter is None and not quoting:
1246+
values = values.astype(str)
12411247
else:
1242-
formatter = None
1248+
values = np.array(values, dtype='object')
12431249

1250+
values[mask] = na_rep
12441251
if formatter:
12451252
imask = (~mask).ravel()
12461253
values.flat[imask] = np.array(
@@ -1366,7 +1373,7 @@ def _try_coerce_result(self, result):
13661373
def should_store(self, value):
13671374
return issubclass(value.dtype.type, np.timedelta64)
13681375

1369-
def to_native_types(self, slicer=None, na_rep=None, **kwargs):
1376+
def to_native_types(self, slicer=None, na_rep=None, quoting=None, **kwargs):
13701377
""" convert to our native types format, slicing if desired """
13711378

13721379
values = self.values
@@ -1763,15 +1770,15 @@ def _astype(self, dtype, copy=False, raise_on_error=True, values=None,
17631770
ndim=self.ndim,
17641771
placement=self.mgr_locs)
17651772

1766-
def to_native_types(self, slicer=None, na_rep='', **kwargs):
1773+
def to_native_types(self, slicer=None, na_rep='', quoting=None, **kwargs):
17671774
""" convert to our native types format, slicing if desired """
17681775

17691776
values = self.values
17701777
if slicer is not None:
17711778
# Categorical is always one dimension
17721779
values = values[slicer]
1773-
values = np.array(values, dtype=object)
17741780
mask = isnull(values)
1781+
values = np.array(values).astype(str)
17751782
values[mask] = na_rep
17761783
# Blocks.to_native_type returns list of lists, but we are always only a list
17771784
return [values.tolist()]
@@ -1864,7 +1871,7 @@ def fillna(self, value, limit=None,
18641871
fastpath=True, placement=self.mgr_locs)]
18651872

18661873
def to_native_types(self, slicer=None, na_rep=None, date_format=None,
1867-
**kwargs):
1874+
quoting=None, **kwargs):
18681875
""" convert to our native types format, slicing if desired """
18691876

18701877
values = self.values

0 commit comments

Comments
 (0)