26
26
_default_index , _is_sequence )
27
27
from pandas .core .generic import NDFrame
28
28
from pandas .core .index import Index , MultiIndex , _ensure_index
29
- from pandas .core .indexing import _NDFrameIndexer , _maybe_droplevels
29
+ from pandas .core .indexing import (_NDFrameIndexer , _maybe_droplevels ,
30
+ _is_index_slice , _check_bool_indexer )
30
31
from pandas .core .internals import BlockManager , make_block , form_blocks
31
32
from pandas .core .series import Series , _radd_compat , _dtype_from_scalar
32
33
from pandas .compat .scipy import scoreatpercentile as _quantile
@@ -313,7 +314,7 @@ def f(self, other):
313
314
return self ._combine_series_infer (other , func )
314
315
else :
315
316
316
- # straight boolean comparisions we want to allow all columns
317
+ # straight boolean comparisions we want to allow all columns
317
318
# (regardless of dtype to pass thru)
318
319
return self ._combine_const (other , func , raise_on_error = False ).fillna (True ).astype (bool )
319
320
@@ -1972,72 +1973,52 @@ def iget_value(self, i, j):
1972
1973
return self .get_value (row , col )
1973
1974
1974
1975
def __getitem__ (self , key ):
1975
- # slice rows
1976
1976
if isinstance (key , slice ):
1977
- from pandas .core .indexing import _is_index_slice
1978
- idx_type = self .index .inferred_type
1979
- if idx_type == 'floating' :
1980
- indexer = self .ix ._convert_to_indexer (key , axis = 0 )
1981
- elif idx_type == 'integer' or _is_index_slice (key ):
1982
- indexer = key
1983
- else :
1984
- indexer = self .ix ._convert_to_indexer (key , axis = 0 )
1985
- new_data = self ._data .get_slice (indexer , axis = 1 )
1986
- return self ._constructor (new_data )
1987
- # either boolean or fancy integer index
1977
+ # slice rows
1978
+ return self ._getitem_slice (key )
1988
1979
elif isinstance (key , (np .ndarray , list )):
1989
- if isinstance (key , list ):
1990
- key = lib .list_to_object_array (key )
1991
-
1992
- # also raises Exception if object array with NA values
1993
- if com ._is_bool_indexer (key ):
1994
- key = np .asarray (key , dtype = bool )
1980
+ # either boolean or fancy integer index
1995
1981
return self ._getitem_array (key )
1982
+ elif isinstance (key , DataFrame ):
1983
+ return self ._getitem_frame (key )
1996
1984
elif isinstance (self .columns , MultiIndex ):
1997
1985
return self ._getitem_multilevel (key )
1998
- elif isinstance (key , DataFrame ):
1999
- if key .values .dtype == bool :
2000
- return self .where (key , try_cast = False )
2001
- else :
2002
- raise ValueError ('Cannot index using non-boolean DataFrame' )
2003
1986
else :
1987
+ # get column
2004
1988
return self ._get_item_cache (key )
2005
1989
1990
+ def _getitem_slice (self , key ):
1991
+ idx_type = self .index .inferred_type
1992
+ if idx_type == 'floating' :
1993
+ indexer = self .ix ._convert_to_indexer (key , axis = 0 )
1994
+ elif idx_type == 'integer' or _is_index_slice (key ):
1995
+ indexer = key
1996
+ else :
1997
+ indexer = self .ix ._convert_to_indexer (key , axis = 0 )
1998
+ return self ._slice (indexer , axis = 0 )
1999
+
2006
2000
def _getitem_array (self , key ):
2007
- if key .dtype == np .bool_ :
2008
- if len (key ) != len (self .index ):
2001
+ # also raises Exception if object array with NA values
2002
+ if com ._is_bool_indexer (key ):
2003
+ # warning here just in case -- previously __setitem__ was
2004
+ # reindexing but __getitem__ was not; it seems more reasonable to
2005
+ # go with the __setitem__ behavior since that is more consistent
2006
+ # with all other indexing behavior
2007
+ if isinstance (key , Series ) and not key .index .equals (self .index ):
2008
+ import warnings
2009
+ warnings .warn ("Boolean Series key will be reindexed to match "
2010
+ "DataFrame index." , UserWarning )
2011
+ elif len (key ) != len (self .index ):
2009
2012
raise ValueError ('Item wrong length %d instead of %d!' %
2010
2013
(len (key ), len (self .index )))
2011
-
2012
- inds , = key .nonzero ()
2013
- return self .take (inds )
2014
- else :
2015
- if self .columns .is_unique :
2016
- indexer = self .columns .get_indexer (key )
2017
- mask = indexer == - 1
2018
- if mask .any ():
2019
- raise KeyError ("No column(s) named: %s" %
2020
- com .pprint_thing (key [mask ]))
2021
- result = self .reindex (columns = key )
2022
- if result .columns .name is None :
2023
- result .columns .name = self .columns .name
2024
- return result
2025
- else :
2026
- mask = self .columns .isin (key )
2027
- for k in key :
2028
- if k not in self .columns :
2029
- raise KeyError ("No column(s) named: %s" %
2030
- com .pprint_thing (k ))
2031
- return self .take (mask .nonzero ()[0 ], axis = 1 )
2032
-
2033
- def _slice (self , slobj , axis = 0 ):
2034
- if axis == 0 :
2035
- mgr_axis = 1
2014
+ # _check_bool_indexer will throw exception if Series key cannot
2015
+ # be reindexed to match DataFrame rows
2016
+ key = _check_bool_indexer (self .index , key )
2017
+ indexer = key .nonzero ()[0 ]
2018
+ return self .take (indexer , axis = 0 )
2036
2019
else :
2037
- mgr_axis = 0
2038
-
2039
- new_data = self ._data .get_slice (slobj , axis = mgr_axis )
2040
- return self ._constructor (new_data )
2020
+ indexer = self .ix ._convert_to_indexer (key , axis = 1 )
2021
+ return self .take (indexer , axis = 1 )
2041
2022
2042
2023
def _getitem_multilevel (self , key ):
2043
2024
loc = self .columns .get_loc (key )
@@ -2063,6 +2044,20 @@ def _getitem_multilevel(self, key):
2063
2044
else :
2064
2045
return self ._get_item_cache (key )
2065
2046
2047
+ def _getitem_frame (self , key ):
2048
+ if key .values .dtype != np .bool_ :
2049
+ raise ValueError ('Must pass DataFrame with boolean values only' )
2050
+ return self .where (key )
2051
+
2052
+ def _slice (self , slobj , axis = 0 ):
2053
+ if axis == 0 :
2054
+ mgr_axis = 1
2055
+ else :
2056
+ mgr_axis = 0
2057
+
2058
+ new_data = self ._data .get_slice (slobj , axis = mgr_axis )
2059
+ return self ._constructor (new_data )
2060
+
2066
2061
def _box_item_values (self , key , values ):
2067
2062
items = self .columns [self .columns .get_loc (key )]
2068
2063
if values .ndim == 2 :
@@ -2096,34 +2091,56 @@ def __setattr__(self, name, value):
2096
2091
object .__setattr__ (self , name , value )
2097
2092
2098
2093
def __setitem__ (self , key , value ):
2099
- # support boolean setting with DataFrame input, e.g.
2100
- # df[df > df2] = 0
2101
- if isinstance (key , DataFrame ):
2102
- self ._boolean_set (key , value )
2094
+ if isinstance (key , slice ):
2095
+ # slice rows
2096
+ self ._setitem_slice (key , value )
2103
2097
elif isinstance (key , (np .ndarray , list )):
2104
- return self ._set_item_multiple (key , value )
2098
+ self ._setitem_array (key , value )
2099
+ elif isinstance (key , DataFrame ):
2100
+ self ._setitem_frame (key , value )
2105
2101
else :
2106
2102
# set column
2107
2103
self ._set_item (key , value )
2108
2104
2109
- def _boolean_set (self , key , value ):
2110
- if key .values .dtype != np .bool_ :
2111
- raise ValueError ('Must pass DataFrame with boolean values only' )
2112
- self .where (- key , value , inplace = True )
2105
+ def _setitem_slice (self , key , value ):
2106
+ idx_type = self .index .inferred_type
2107
+ if idx_type == 'floating' :
2108
+ indexer = self .ix ._convert_to_indexer (key , axis = 0 )
2109
+ elif idx_type == 'integer' or _is_index_slice (key ):
2110
+ indexer = key
2111
+ else :
2112
+ indexer = self .ix ._convert_to_indexer (key , axis = 0 )
2113
+ self .ix ._setitem_with_indexer (indexer , value )
2113
2114
2114
- def _set_item_multiple (self , keys , value ):
2115
- if isinstance (value , DataFrame ):
2116
- if len (value .columns ) != len (keys ):
2117
- raise AssertionError ('Columns must be same length as keys' )
2118
- for k1 , k2 in zip (keys , value .columns ):
2119
- self [k1 ] = value [k2 ]
2115
+ def _setitem_array (self , key , value ):
2116
+ # also raises Exception if object array with NA values
2117
+ if com ._is_bool_indexer (key ):
2118
+ if len (key ) != len (self .index ):
2119
+ raise ValueError ('Item wrong length %d instead of %d!' %
2120
+ (len (key ), len (self .index )))
2121
+ key = _check_bool_indexer (self .index , key )
2122
+ indexer = key .nonzero ()[0 ]
2123
+ self .ix ._setitem_with_indexer (indexer , value )
2120
2124
else :
2121
- if isinstance (keys , np .ndarray ) and keys .dtype == np .bool_ :
2122
- # boolean slicing should happen on rows, consistent with
2123
- # behavior of getitem
2124
- self .ix [keys , :] = value
2125
+ if isinstance (value , DataFrame ):
2126
+ if len (value .columns ) != len (key ):
2127
+ raise AssertionError ('Columns must be same length as key' )
2128
+ for k1 , k2 in zip (key , value .columns ):
2129
+ self [k1 ] = value [k2 ]
2125
2130
else :
2126
- self .ix [:, keys ] = value
2131
+ indexer = self .ix ._convert_to_indexer (key , axis = 1 )
2132
+ self .ix ._setitem_with_indexer ((slice (None ), indexer ), value )
2133
+
2134
+ def _setitem_frame (self , key , value ):
2135
+ # support boolean setting with DataFrame input, e.g.
2136
+ # df[df > df2] = 0
2137
+ if key .values .dtype != np .bool_ :
2138
+ raise ValueError ('Must pass DataFrame with boolean values only' )
2139
+
2140
+ if self ._is_mixed_type :
2141
+ raise ValueError ('Cannot do boolean setting on mixed-type frame' )
2142
+
2143
+ self .where (- key , value , inplace = True )
2127
2144
2128
2145
def _set_item (self , key , value ):
2129
2146
"""
@@ -2918,7 +2935,7 @@ def take(self, indices, axis=0):
2918
2935
"""
2919
2936
if isinstance (indices , list ):
2920
2937
indices = np .array (indices )
2921
- if self ._data . is_mixed_dtype () :
2938
+ if self ._is_mixed_type :
2922
2939
if axis == 0 :
2923
2940
new_data = self ._data .take (indices , axis = 1 )
2924
2941
return DataFrame (new_data )
@@ -3247,7 +3264,7 @@ def sortlevel(self, level=0, axis=0, ascending=True, inplace=False):
3247
3264
3248
3265
new_axis , indexer = the_axis .sortlevel (level , ascending = ascending )
3249
3266
3250
- if self ._data . is_mixed_dtype () and not inplace :
3267
+ if self ._is_mixed_type and not inplace :
3251
3268
if axis == 0 :
3252
3269
return self .reindex (index = new_axis )
3253
3270
else :
@@ -3472,7 +3489,7 @@ def replace(self, to_replace, value=None, method='pad', axis=0,
3472
3489
'in length. Expecting %d got %d ' %
3473
3490
(len (to_replace ), len (value )))
3474
3491
3475
- new_data = self ._data .replace_list (to_replace , value ,
3492
+ new_data = self ._data .replace_list (to_replace , value ,
3476
3493
inplace = inplace )
3477
3494
3478
3495
else : # [NA, ''] -> 0
@@ -5055,7 +5072,7 @@ def clip(self, lower=None, upper=None):
5055
5072
# GH 2747 (arguments were reversed)
5056
5073
if lower is not None and upper is not None :
5057
5074
lower , upper = min (lower ,upper ), max (lower ,upper )
5058
-
5075
+
5059
5076
return self .apply (lambda x : x .clip (lower = lower , upper = upper ))
5060
5077
5061
5078
def clip_upper (self , threshold ):
@@ -5246,25 +5263,22 @@ def where(self, cond, other=NA, inplace=False, try_cast=False, raise_on_error=Tr
5246
5263
-------
5247
5264
wh : DataFrame
5248
5265
"""
5249
- if not hasattr (cond , 'shape' ):
5250
- raise ValueError ('where requires an ndarray like object for its '
5251
- 'condition' )
5252
-
5253
- if isinstance (cond , np .ndarray ):
5266
+ if isinstance (cond , DataFrame ):
5267
+ # this already checks for index/column equality
5268
+ cond = cond .reindex (self .index , columns = self .columns )
5269
+ else :
5270
+ if not hasattr (cond , 'shape' ):
5271
+ raise ValueError ('where requires an ndarray like object for its '
5272
+ 'condition' )
5254
5273
if cond .shape != self .shape :
5255
5274
raise ValueError ('Array conditional must be same shape as self' )
5256
5275
cond = self ._constructor (cond , index = self .index ,
5257
5276
columns = self .columns )
5258
5277
5259
- if cond .shape != self .shape :
5260
- cond = cond .reindex (self .index , columns = self .columns )
5261
-
5262
- if inplace :
5263
- cond = - (cond .fillna (True ).astype (bool ))
5264
- else :
5265
- cond = cond .fillna (False ).astype (bool )
5266
- elif inplace :
5267
- cond = - cond
5278
+ if inplace :
5279
+ cond = - (cond .fillna (True ).astype (bool ))
5280
+ else :
5281
+ cond = cond .fillna (False ).astype (bool )
5268
5282
5269
5283
if isinstance (other , DataFrame ):
5270
5284
_ , other = self .align (other , join = 'left' , fill_value = NA )
0 commit comments