12
12
import pandas .core .nanops as nanops
13
13
from pandas .compat import zip
14
14
from pandas .tseries .timedeltas import to_timedelta
15
- from pandas . types . common import ( needs_i8_conversion )
15
+ from pandas import to_datetime
16
16
import numpy as np
17
+ from pandas .types .common import (is_datetime64_dtype , is_timedelta64_dtype )
17
18
18
19
19
20
def cut (x , bins , right = True , labels = None , retbins = False , precision = 3 ,
@@ -83,11 +84,15 @@ def cut(x, bins, right=True, labels=None, retbins=False, precision=3,
83
84
"""
84
85
# NOTE: this binning code is changed a bit from histogram for var(x) == 0
85
86
# for handling the cut for datetime and timedelta objects
86
- if needs_i8_conversion (x ):
87
- x = x .values .view ('i8' )
88
- time_data = True
89
- else :
90
- time_data = False
87
+
88
+ dtype = None
89
+ if is_timedelta64_dtype (x ):
90
+ x = x .astype (np .int64 )
91
+ dtype = np .timedelta64
92
+
93
+ if is_datetime64_dtype (x ):
94
+ x = x .astype (np .int64 )
95
+ dtype = np .datetime64
91
96
92
97
if not np .iterable (bins ):
93
98
if is_scalar (bins ) and bins < 1 :
@@ -124,7 +129,7 @@ def cut(x, bins, right=True, labels=None, retbins=False, precision=3,
124
129
125
130
return _bins_to_cuts (x , bins , right = right , labels = labels ,
126
131
retbins = retbins , precision = precision ,
127
- include_lowest = include_lowest , time_data = time_data )
132
+ include_lowest = include_lowest , dtype = dtype )
128
133
129
134
130
135
def qcut (x , q , labels = None , retbins = False , precision = 3 ):
@@ -185,7 +190,7 @@ def qcut(x, q, labels=None, retbins=False, precision=3):
185
190
186
191
def _bins_to_cuts (x , bins , right = True , labels = None , retbins = False ,
187
192
precision = 3 , name = None , include_lowest = False ,
188
- time_data = False ):
193
+ dtype = None ):
189
194
x_is_series = isinstance (x , Series )
190
195
series_index = None
191
196
@@ -215,7 +220,7 @@ def _bins_to_cuts(x, bins, right=True, labels=None, retbins=False,
215
220
try :
216
221
levels = _format_levels (bins , precision , right = right ,
217
222
include_lowest = include_lowest ,
218
- time_data = time_data )
223
+ dtype = dtype )
219
224
except ValueError :
220
225
increases += 1
221
226
precision += 1
@@ -249,7 +254,7 @@ def _bins_to_cuts(x, bins, right=True, labels=None, retbins=False,
249
254
250
255
251
256
def _format_levels (bins , prec , right = True ,
252
- include_lowest = False , time_data = False ):
257
+ include_lowest = False , dtype = None ):
253
258
fmt = lambda v : _format_label (v , precision = prec )
254
259
if right :
255
260
levels = []
@@ -259,7 +264,10 @@ def _format_levels(bins, prec, right=True,
259
264
if a != b and fa == fb :
260
265
raise ValueError ('precision too low' )
261
266
262
- if time_data :
267
+ if dtype == np .datetime64 :
268
+ formatted = '(%s, %s]' % (to_datetime (float (fa ), unit = 'ns' ),
269
+ to_datetime (float (fb ), unit = 'ns' ))
270
+ elif dtype == np .timedelta64 :
263
271
formatted = '(%s, %s]' % (to_timedelta (float (fa ), unit = 'ns' ),
264
272
to_timedelta (float (fb ), unit = 'ns' ))
265
273
else :
@@ -270,7 +278,11 @@ def _format_levels(bins, prec, right=True,
270
278
if include_lowest :
271
279
levels [0 ] = '[' + levels [0 ][1 :]
272
280
else :
273
- if time_data :
281
+ if dtype == np .datetime64 :
282
+ levels = ['[%s, %s)' % (to_datetime (float (fmt (fa )), unit = 'ns' ),
283
+ to_datetime (float (fmt (b )), unit = 'ns' ))
284
+ for a , b in zip (bins , bins [1 :])]
285
+ elif dtype == np .timedelta64 :
274
286
levels = ['[%s, %s)' % (to_timedelta (float (fmt (fa )), unit = 'ns' ),
275
287
to_timedelta (float (fmt (b )), unit = 'ns' ))
276
288
for a , b in zip (bins , bins [1 :])]
0 commit comments