Skip to content

read_csv(date_parser=x) fails because datetime.datetime has no dtype #3071

Closed
@eloraburns

Description

@eloraburns
from cStringIO import StringIO
log_file = StringIO(
    'posix_timestamp,elapsed,sys,user,queries,query_time,rows,'
        'accountid,userid,contactid,level,silo,method\n'
    '1343103150,0.062353,0,4,6,0.01690,3,'
        '12345,1,-1,3,invoice_InvoiceResource,search\n'
)

import pandas as pd
from datetime import datetime
import gc

def posix_string_to_datetime(posix_string):
    return datetime.utcfromtimestamp(int(posix_string))

# This works on pandas 0.9.0, but not on 0.10.1 or github master
df = pd.io.parsers.read_csv(
    log_file,
    # index_col is the first column, our posix_timestamp
    index_col=0,
    # Interpret the index column as a date
    parse_dates=0,
    date_parser=posix_string_to_datetime)

The crash looks like this (on 0.11.0.dev-6e7b37b, OSX 10.6 if it matters):

---------------------------------------------------------------------------
AttributeError                            Traceback (most recent call last)
<ipython-input-2-c3fa4840399b> in <module>()
     17     # Interpret the index column as a date
     18     parse_dates=0,
---> 19     date_parser=posix_string_to_datetime)
     20 gc.enable()

/Users/taavi/src/pandas/pandas/io/parsers.pyc in parser_f(filepath_or_buffer, sep, dialect, compression, doublequote, escapechar, quotechar, quoting, skipinitialspace, lineterminator, header, index_col, names, prefix, skiprows, skipfooter, skip_footer, na_values, true_values, false_values, delimiter, converters, dtype, usecols, engine, delim_whitespace, as_recarray, na_filter, compact_ints, use_unsigned, low_memory, buffer_lines, warn_bad_lines, error_bad_lines, keep_default_na, thousands, comment, decimal, parse_dates, keep_date_col, dayfirst, date_parser, memory_map, nrows, iterator, chunksize, verbose, encoding, squeeze)
    398                     buffer_lines=buffer_lines)
    399 
--> 400         return _read(filepath_or_buffer, kwds)
    401 
    402     parser_f.__name__ = name

/Users/taavi/src/pandas/pandas/io/parsers.pyc in _read(filepath_or_buffer, kwds)
    213         return parser
    214 
--> 215     return parser.read()
    216 
    217 _parser_defaults = {

/Users/taavi/src/pandas/pandas/io/parsers.pyc in read(self, nrows)
    630             #     self._engine.set_error_bad_lines(False)
    631 
--> 632         ret = self._engine.read(nrows)
    633 
    634         if self.options.get('as_recarray'):

/Users/taavi/src/pandas/pandas/io/parsers.pyc in read(self, nrows)
   1006 
   1007             names, data = self._do_date_conversions(names, data)
-> 1008             index = self._make_index(data, alldata, names)
   1009 
   1010         return index, names, data

/Users/taavi/src/pandas/pandas/io/parsers.pyc in _make_index(self, data, alldata, columns)
    706         elif not self._has_complex_date_col:
    707             index = self._get_simple_index(alldata, columns)
--> 708             index = self._agg_index(index)
    709 
    710         elif self._has_complex_date_col:

/Users/taavi/src/pandas/pandas/io/parsers.pyc in _agg_index(self, index, try_parse_dates)
    789                                                    self.na_values)
    790 
--> 791             arr, _ = self._convert_types(arr, col_na_values)
    792             arrays.append(arr)
    793 

/Users/taavi/src/pandas/pandas/io/parsers.pyc in _convert_types(self, values, na_values, try_num_bool)
    815     def _convert_types(self, values, na_values, try_num_bool=True):
    816         na_count = 0
--> 817         if issubclass(values.dtype.type, (np.number, np.bool_)):
    818             mask = lib.ismember(values, na_values)
    819             na_count = mask.sum()

AttributeError: 'datetime.datetime' object has no attribute 'dtype'

Thanks!

Metadata

Metadata

Assignees

No one assigned

    Labels

    BugIO DataIO issues that don't fit into a more specific label

    Type

    No type

    Projects

    No projects

    Milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions