1
1
2
2
# pylint: disable-msg=E1101,W0613,W0603
3
- from pandas .compat import StringIO , long
4
- from pandas import compat
5
- import os
3
+ from pandas .compat import long
6
4
5
+ from pandas import compat , isnull
7
6
from pandas import Series , DataFrame , to_datetime
8
7
from pandas .io .common import get_filepath_or_buffer
9
8
import pandas .json as _json
12
11
13
12
import numpy as np
14
13
from pandas .tslib import iNaT
15
- import pandas .lib as lib
16
14
17
15
### interface to/from ###
18
16
19
- def to_json (path_or_buf , obj , orient = None , date_format = 'epoch' , double_precision = 10 , force_ascii = True ):
17
+
18
+ def to_json (path_or_buf , obj , orient = None , date_format = 'epoch' ,
19
+ double_precision = 10 , force_ascii = True , date_unit = 'ms' ):
20
20
21
21
if isinstance (obj , Series ):
22
- s = SeriesWriter (obj , orient = orient , date_format = date_format , double_precision = double_precision ,
23
- ensure_ascii = force_ascii ).write ()
22
+ s = SeriesWriter (
23
+ obj , orient = orient , date_format = date_format ,
24
+ double_precision = double_precision , ensure_ascii = force_ascii ,
25
+ date_unit = date_unit ).write ()
24
26
elif isinstance (obj , DataFrame ):
25
- s = FrameWriter (obj , orient = orient , date_format = date_format , double_precision = double_precision ,
26
- ensure_ascii = force_ascii ).write ()
27
+ s = FrameWriter (
28
+ obj , orient = orient , date_format = date_format ,
29
+ double_precision = double_precision , ensure_ascii = force_ascii ,
30
+ date_unit = date_unit ).write ()
27
31
else :
28
32
raise NotImplementedError
29
33
30
34
if isinstance (path_or_buf , compat .string_types ):
31
- with open (path_or_buf ,'w' ) as fh :
35
+ with open (path_or_buf , 'w' ) as fh :
32
36
fh .write (s )
33
37
elif path_or_buf is None :
34
38
return s
35
39
else :
36
40
path_or_buf .write (s )
37
41
42
+
38
43
class Writer (object ):
39
44
40
- def __init__ (self , obj , orient , date_format , double_precision , ensure_ascii ):
45
+ def __init__ (self , obj , orient , date_format , double_precision ,
46
+ ensure_ascii , date_unit ):
41
47
self .obj = obj
42
48
43
49
if orient is None :
@@ -47,38 +53,23 @@ def __init__(self, obj, orient, date_format, double_precision, ensure_ascii):
47
53
self .date_format = date_format
48
54
self .double_precision = double_precision
49
55
self .ensure_ascii = ensure_ascii
56
+ self .date_unit = date_unit
50
57
51
58
self .is_copy = False
52
59
self ._format_axes ()
53
- self ._format_dates ()
54
-
55
- def _needs_to_date (self , obj ):
56
- return obj .dtype == 'datetime64[ns]'
57
-
58
- def _format_dates (self ):
59
- raise NotImplementedError
60
60
61
61
def _format_axes (self ):
62
62
raise NotImplementedError
63
63
64
- def _format_to_date (self , data ):
65
-
66
- # iso
67
- if self .date_format == 'iso' :
68
- return data .apply (lambda x : x .isoformat ())
69
-
70
- # int64
71
- else :
72
- return data .astype (np .int64 )
73
-
74
- def copy_if_needed (self ):
75
- """ copy myself if necessary """
76
- if not self .is_copy :
77
- self .obj = self .obj .copy ()
78
- self .is_copy = True
79
-
80
64
def write (self ):
81
- return dumps (self .obj , orient = self .orient , double_precision = self .double_precision , ensure_ascii = self .ensure_ascii )
65
+ return dumps (
66
+ self .obj ,
67
+ orient = self .orient ,
68
+ double_precision = self .double_precision ,
69
+ ensure_ascii = self .ensure_ascii ,
70
+ date_unit = self .date_unit ,
71
+ iso_dates = self .date_format == 'iso' )
72
+
82
73
83
74
class SeriesWriter (Writer ):
84
75
_default_orient = 'index'
@@ -87,17 +78,7 @@ def _format_axes(self):
87
78
if not self .obj .index .is_unique and self .orient == 'index' :
88
79
raise ValueError ("Series index must be unique for orient="
89
80
"'%s'" % self .orient )
90
- if self ._needs_to_date (self .obj .index ):
91
- self .copy_if_needed ()
92
- self .obj .index = self ._format_to_date (self .obj .index .to_series ())
93
81
94
- def _format_dates (self ):
95
- if self .obj .dtype == 'datetime64[ns]' :
96
- self .obj = self ._format_to_date (self .obj )
97
-
98
- def _format_bools (self ):
99
- if self ._needs_to_bool (self .obj ):
100
- self .obj = self ._format_to_bool (self .obj )
101
82
102
83
class FrameWriter (Writer ):
103
84
_default_orient = 'columns'
@@ -113,39 +94,10 @@ def _format_axes(self):
113
94
raise ValueError ("DataFrame columns must be unique for orient="
114
95
"'%s'." % self .orient )
115
96
116
- if self .orient == 'columns' :
117
- axis = 'index'
118
- elif self .orient == 'index' :
119
- axis = 'columns'
120
- else :
121
- return
122
-
123
- a = getattr (self .obj ,axis )
124
- if self ._needs_to_date (a ):
125
- self .copy_if_needed ()
126
- setattr (self .obj ,axis ,self ._format_to_date (a .to_series ()))
127
-
128
- def _format_dates (self ):
129
- dtypes = self .obj .dtypes
130
- if len (dtypes [dtypes == 'datetime64[ns]' ]):
131
-
132
- # need to create a new object
133
- d = {}
134
-
135
- for i , (col , c ) in enumerate (self .obj .iteritems ()):
136
-
137
- if c .dtype == 'datetime64[ns]' :
138
- c = self ._format_to_date (c )
139
-
140
- d [i ] = c
141
-
142
- d = DataFrame (d ,index = self .obj .index )
143
- d .columns = self .obj .columns
144
- self .obj = d
145
97
146
98
def read_json (path_or_buf = None , orient = None , typ = 'frame' , dtype = True ,
147
99
convert_axes = True , convert_dates = True , keep_default_dates = True ,
148
- numpy = False , precise_float = False ):
100
+ numpy = False , precise_float = False , date_unit = None ):
149
101
"""
150
102
Convert JSON string to pandas object
151
103
@@ -176,18 +128,28 @@ def read_json(path_or_buf=None, orient=None, typ='frame', dtype=True,
176
128
values : just the values array
177
129
178
130
typ : type of object to recover (series or frame), default 'frame'
179
- dtype : if True, infer dtypes, if a dict of column to dtype, then use those,
180
- if False, then don't infer dtypes at all, default is True,
181
- apply only to the data
182
- convert_axes : boolean, try to convert the axes to the proper dtypes, default is True
183
- convert_dates : a list of columns to parse for dates; If True, then try to parse datelike columns
184
- default is True
185
- keep_default_dates : boolean, default True. If parsing dates,
186
- then parse the default datelike columns
187
- numpy : direct decoding to numpy arrays. default is False.Note that the JSON ordering MUST be the same
188
- for each term if numpy=True.
189
- precise_float : boolean, default False. Set to enable usage of higher precision (strtod) function
190
- when decoding string to double values. Default (False) is to use fast but less precise builtin functionality
131
+ dtype : boolean or dict, default True
132
+ If True, infer dtypes, if a dict of column to dtype, then use those,
133
+ if False, then don't infer dtypes at all, applies only to the data.
134
+ convert_axes : boolean, default True
135
+ Try to convert the axes to the proper dtypes.
136
+ convert_dates : boolean, default True
137
+ List of columns to parse for dates; If True, then try to parse
138
+ datelike columns default is True
139
+ keep_default_dates : boolean, default True.
140
+ If parsing dates, then parse the default datelike columns
141
+ numpy : boolean, default False
142
+ Direct decoding to numpy arrays. Note that the JSON ordering MUST be
143
+ the same for each term if numpy=True.
144
+ precise_float : boolean, default False.
145
+ Set to enable usage of higher precision (strtod) function when
146
+ decoding string to double values. Default (False) is to use fast but
147
+ less precise builtin functionality
148
+ date_unit : string, default None
149
+ The timestamp unit to detect if converting dates. The default behaviour
150
+ is to try and detect the correct precision, but if this is not desired
151
+ then pass one of 's', 'ms', 'us' or 'ns' to force parsing only seconds,
152
+ milliseconds, microseconds or nanoseconds respectively.
191
153
192
154
Returns
193
155
-------
@@ -208,20 +170,28 @@ def read_json(path_or_buf=None, orient=None, typ='frame', dtype=True,
208
170
209
171
obj = None
210
172
if typ == 'frame' :
211
- obj = FrameParser (json , orient , dtype , convert_axes , convert_dates , keep_default_dates , numpy ).parse ()
173
+ obj = FrameParser (json , orient , dtype , convert_axes , convert_dates , keep_default_dates , numpy , precise_float , date_unit ).parse ()
212
174
213
175
if typ == 'series' or obj is None :
214
176
if not isinstance (dtype ,bool ):
215
177
dtype = dict (data = dtype )
216
- obj = SeriesParser (json , orient , dtype , convert_axes , convert_dates , keep_default_dates , numpy ).parse ()
178
+ obj = SeriesParser (json , orient , dtype , convert_axes , convert_dates , keep_default_dates , numpy , precise_float , date_unit ).parse ()
217
179
218
180
return obj
219
181
182
+
220
183
class Parser (object ):
221
184
185
+ _STAMP_UNITS = ('s' , 'ms' , 'us' , 'ns' )
186
+ _MIN_STAMPS = {
187
+ 's' : long (31536000 ),
188
+ 'ms' : long (31536000000 ),
189
+ 'us' : long (31536000000000 ),
190
+ 'ns' : long (31536000000000000 )}
191
+
222
192
def __init__ (self , json , orient , dtype = True , convert_axes = True ,
223
193
convert_dates = True , keep_default_dates = False , numpy = False ,
224
- precise_float = False ):
194
+ precise_float = False , date_unit = None ):
225
195
self .json = json
226
196
227
197
if orient is None :
@@ -233,10 +203,20 @@ def __init__(self, json, orient, dtype=True, convert_axes=True,
233
203
if orient == "split" :
234
204
numpy = False
235
205
206
+ if date_unit is not None :
207
+ date_unit = date_unit .lower ()
208
+ if date_unit not in self ._STAMP_UNITS :
209
+ raise ValueError ('date_unit must be one of %s' %
210
+ (self ._STAMP_UNITS ,))
211
+ self .min_stamp = self ._MIN_STAMPS [date_unit ]
212
+ else :
213
+ self .min_stamp = self ._MIN_STAMPS ['s' ]
214
+
236
215
self .numpy = numpy
237
216
self .precise_float = precise_float
238
- self .convert_axes = convert_axes
217
+ self .convert_axes = convert_axes
239
218
self .convert_dates = convert_dates
219
+ self .date_unit = date_unit
240
220
self .keep_default_dates = keep_default_dates
241
221
self .obj = None
242
222
@@ -356,21 +336,23 @@ def _try_convert_to_date(self, data):
356
336
357
337
358
338
# ignore numbers that are out of range
359
- if issubclass (new_data .dtype .type ,np .number ):
360
- if not ((new_data == iNaT ) | (new_data > long (31536000000000000 ))).all ():
339
+ if issubclass (new_data .dtype .type , np .number ):
340
+ in_range = (isnull (new_data .values ) | (new_data > self .min_stamp ) |
341
+ (new_data .values == iNaT ))
342
+ if not in_range .all ():
361
343
return data , False
362
344
363
- try :
364
- new_data = to_datetime (new_data )
365
- except :
345
+ date_units = (self .date_unit ,) if self .date_unit else self ._STAMP_UNITS
346
+ for date_unit in date_units :
366
347
try :
367
- new_data = to_datetime (new_data .astype ('int64' ))
348
+ new_data = to_datetime (new_data , errors = 'raise' ,
349
+ unit = date_unit )
350
+ except OverflowError :
351
+ continue
368
352
except :
369
-
370
- # return old, noting more we can do
371
- return data , False
372
-
373
- return new_data , True
353
+ break
354
+ return new_data , True
355
+ return data , False
374
356
375
357
def _try_convert_dates (self ):
376
358
raise NotImplementedError
0 commit comments