Skip to content

Commit 64c7214

Browse files
chris-b1jorisvandenbossche
authored andcommitted
add test/fix for dtype=object
1 parent 5462774 commit 64c7214

File tree

2 files changed

+17
-13
lines changed

2 files changed

+17
-13
lines changed

pandas/io/parsers.py

Lines changed: 10 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1304,7 +1304,7 @@ def _apply_converter(self, values, conv_f, na_values, col_na_values,
13041304

13051305
cvals, na_count = self._infer_types(
13061306
values, set(col_na_values) | col_na_fvalues,
1307-
try_numeric=False)
1307+
try_num_bool=False)
13081308
return cvals, na_count
13091309

13101310
def _convert_to_ndarrays(self, dct, na_values, na_fvalues, verbose=False,
@@ -1331,10 +1331,15 @@ def _convert_to_ndarrays(self, dct, na_values, na_fvalues, verbose=False,
13311331
values, conv_f, na_values,
13321332
col_na_values, col_na_fvalues)
13331333
else:
1334+
try_num_bool = True
1335+
if cast_type and is_object_dtype(cast_type):
1336+
# skip inference if specified dtype is object
1337+
try_num_bool = False
1338+
13341339
# general type inference and conversion
13351340
cvals, na_count = self._infer_types(
13361341
values, set(col_na_values) | col_na_fvalues,
1337-
try_numeric=True)
1342+
try_num_bool)
13381343

13391344
if issubclass(cvals.dtype.type, np.integer) and self.compact_ints:
13401345
cvals = lib.downcast_int64(
@@ -1357,7 +1362,7 @@ def _convert_to_ndarrays(self, dct, na_values, na_fvalues, verbose=False,
13571362
print('Filled %d NA values in column %s' % (na_count, str(c)))
13581363
return result
13591364

1360-
def _infer_types(self, values, na_values, try_numeric=True):
1365+
def _infer_types(self, values, na_values, try_num_bool=True):
13611366
na_count = 0
13621367
if issubclass(values.dtype.type, (np.number, np.bool_)):
13631368
mask = lib.ismember(values, na_values)
@@ -1368,7 +1373,7 @@ def _infer_types(self, values, na_values, try_numeric=True):
13681373
np.putmask(values, mask, np.nan)
13691374
return values, na_count
13701375

1371-
if try_numeric:
1376+
if try_num_bool:
13721377
try:
13731378
result = lib.maybe_convert_numeric(values, na_values, False)
13741379
na_count = isnull(result).sum()
@@ -1381,7 +1386,7 @@ def _infer_types(self, values, na_values, try_numeric=True):
13811386
if values.dtype == np.object_:
13821387
na_count = lib.sanitize_objects(values, na_values, False)
13831388

1384-
if result.dtype == np.object_ and try_numeric:
1389+
if result.dtype == np.object_ and try_num_bool:
13851390
result = lib.maybe_convert_bool(values,
13861391
true_values=self.true_values,
13871392
false_values=self.false_values)

pandas/io/tests/parser/dtypes.py

Lines changed: 7 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -5,17 +5,12 @@
55
for all of the parsers defined in parsers.py
66
"""
77

8-
from datetime import datetime
9-
10-
import nose
11-
128
import numpy as np
139
import pandas as pd
1410
import pandas.util.testing as tm
1511

16-
from pandas.lib import Timestamp
1712
from pandas import DataFrame, Series, Index, MultiIndex, Categorical
18-
from pandas.compat import parse_date, StringIO, lmap
13+
from pandas.compat import StringIO
1914
from pandas.types.dtypes import CategoricalDtype
2015

2116

@@ -30,8 +25,12 @@ def test_passing_dtype(self):
3025

3126
# see gh-3795: passing 'str' as the dtype
3227
result = self.read_csv(path, dtype=str, index_col=0)
33-
tm.assert_series_equal(result.dtypes, Series(
34-
{'A': 'object', 'B': 'object'}))
28+
expected = df.astype(str)
29+
tm.assert_frame_equal(result, expected)
30+
31+
# for parsing, interpret object as str
32+
result = self.read_csv(path, dtype=object, index_col=0)
33+
tm.assert_frame_equal(result, expected)
3534

3635
# we expect all object columns, so need to
3736
# convert to test for equivalence

0 commit comments

Comments
 (0)