Skip to content

Commit fc9d0a2

Browse files
committed
fixup
1 parent f7353bf commit fc9d0a2

File tree

8 files changed

+67
-156
lines changed

8 files changed

+67
-156
lines changed

pandas/core/indexing.py

Lines changed: 3 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -530,10 +530,7 @@ def setter(item, v):
530530
# set the item, possibly having a dtype change
531531
s._consolidate_inplace()
532532
s = s.copy()
533-
# if is_sparse(s):
534-
# s.set_value(pi, v, takeable=is_list_like(pi))
535-
# else:
536-
s._data = s._data.setitem(indexer=pi, value=v) # TODO THIS USE THIS
533+
s._data = s._data.setitem(indexer=pi, value=v)
537534
s._maybe_update_cacher(clear=True)
538535

539536
# reset the sliced object if unique
@@ -638,13 +635,8 @@ def can_do_equal_len():
638635

639636
# actually do the set
640637
self.obj._consolidate_inplace()
641-
if is_sparse(self.obj):
642-
# SparseSeries has underlying SparseArray, which doesn't
643-
# support resizing
644-
self.obj[indexer] = value
645-
else:
646-
self.obj._data = self.obj._data.setitem(indexer=indexer,
647-
value=value)
638+
self.obj._data = self.obj._data.setitem(indexer=indexer,
639+
value=value)
648640
self.obj._maybe_update_cacher(clear=True)
649641

650642
def _align_series(self, indexer, ser, multiindex_indexer=False):

pandas/core/internals.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1741,6 +1741,8 @@ def putmask(self, mask, new, align=True, inplace=False, axis=0,
17411741
# Cannot modify a SparseArray
17421742
if is_sparse(new_values):
17431743
new_values = new_values.to_dense()
1744+
if is_sparse(mask):
1745+
mask = mask.to_dense()
17441746

17451747
if isinstance(new, np.ndarray) and len(new) == len(mask):
17461748
new = new[mask]
@@ -2761,8 +2763,7 @@ def _astype(self, dtype, copy=False, raise_on_error=True, values=None,
27612763
placement=self.mgr_locs)
27622764

27632765
def _can_hold_element(self, element):
2764-
return np.issubdtype(np.asanyarray(element).dtype,
2765-
self.sp_values.dtype)
2766+
return np.can_cast(np.asarray(element).dtype, self.sp_values.dtype)
27662767

27672768
def _try_coerce_result(self, result):
27682769
if np.ndim(result) > 0 and not is_sparse(result):

pandas/core/series.py

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -947,8 +947,7 @@ def set_value(self, label, value, takeable=False):
947947
948948
Returns
949949
-------
950-
series : Series
951-
self
950+
self : Series
952951
"""
953952
warnings.warn("set_value is deprecated and will be removed "
954953
"in a future release. Please use "

pandas/core/sparse/array.py

Lines changed: 16 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,7 @@
3636
import pandas.core.algorithms as algos
3737
import pandas.core.ops as ops
3838
import pandas.io.formats.printing as printing
39+
from pandas.errors import PerformanceWarning
3940
from pandas.util._decorators import Appender
4041
from pandas.core.indexes.base import _index_shared_docs
4142

@@ -398,27 +399,39 @@ def set_values(self, indexer, value):
398399
"""
399400
# If indexer is not a single int position, easiest to handle via dense
400401
if not is_scalar(indexer):
402+
warnings.warn(
403+
'Setting SparseSeries/Array values is particularly '
404+
'inefficient when indexing with multiple keys because the '
405+
'whole series series is made dense interim.',
406+
PerformanceWarning, stacklevel=2)
407+
401408
values = self.to_dense()
402409
values[indexer] = value
403410
return SparseArray(values, kind=self.kind,
404411
fill_value=self.fill_value)
405412

413+
warnings.warn(
414+
'Setting SparseSeries/Array values is inefficient '
415+
'(a copy of data is made)', PerformanceWarning, stacklevel=2)
416+
406417
# If label already in sparse index, just switch the value on a copy
407418
idx = self.sp_index.lookup(indexer)
408419
if idx != -1:
409420
obj = self.copy()
410421
obj.sp_values[idx] = value
411422
return obj
412423

424+
# Otherwise, construct a new array, and insert the new value in the
425+
# correct position
413426
indices = self.sp_index.to_int_index().indices
414427
pos = np.searchsorted(indices, indexer)
415428

416429
indices = np.insert(indices, pos, indexer)
417430
sp_values = np.insert(self.sp_values, pos, value)
431+
sp_index = _make_index(self.sp_index.length, indices, self.kind)
418432

419-
return SparseArray(
420-
sp_values, fill_value=self.fill_value,
421-
sparse_index=_make_index(self.sp_index.length, indices, self.kind))
433+
return SparseArray(sp_values, sparse_index=sp_index,
434+
fill_value=self.fill_value)
422435

423436
def to_dense(self, fill=None):
424437
"""

pandas/core/sparse/frame.py

Lines changed: 0 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -465,29 +465,6 @@ def _get_value(self, index, col, takeable=False):
465465
return series._get_value(index, takeable=takeable)
466466
_get_value.__doc__ = get_value.__doc__
467467

468-
def set_value(self, index, col, value, takeable=False):
469-
"""
470-
Put single value at passed column and index
471-
472-
Parameters
473-
----------
474-
index : row label
475-
col : column label
476-
value : scalar value
477-
takeable : interpret the index/col as indexers, default False
478-
479-
Notes
480-
-----
481-
This method *always* returns a new object for backwards compatibility.
482-
483-
Returns
484-
-------
485-
frame : DataFrame
486-
"""
487-
self = self.copy(deep=False)
488-
return super(SparseDataFrame, self).set_value(index, col, value,
489-
takeable=takeable)
490-
491468
def _slice(self, slobj, axis=0, kind=None):
492469
if axis == 0:
493470
new_index = self.index[slobj]

pandas/core/sparse/series.py

Lines changed: 4 additions & 93 deletions
Original file line numberDiff line numberDiff line change
@@ -10,12 +10,10 @@
1010

1111
from pandas.core.dtypes.missing import isna, notna
1212
from pandas.core.dtypes.common import is_scalar, is_sparse
13-
from pandas.core.common import _values_from_object, _maybe_match_name, \
14-
PerformanceWarning
13+
from pandas.core.common import _values_from_object, _maybe_match_name
1514

1615
from pandas.compat.numpy import function as nv
1716
from pandas.core.index import Index, _ensure_index, InvalidIndexError
18-
from pandas.core.indexing import check_bool_indexer
1917
from pandas.core.series import Series
2018
from pandas.core.frame import DataFrame
2119
from pandas.core.internals import SingleBlockManager
@@ -316,7 +314,7 @@ def __array_wrap__(self, result, context=None):
316314
else:
317315
fill_value = self.fill_value
318316

319-
# Results size unchanged, old sparse index is valid ???
317+
# If result size matches, old sparse index is valid ???
320318
if np.size(result) == self.sp_index.npoints:
321319
sp_index = self.sp_index
322320
else:
@@ -433,22 +431,7 @@ def _get_values(self, indexer):
433431
return self[indexer]
434432

435433
def _set_with_engine(self, key, value):
436-
<<<<<<< HEAD
437434
return self._set_value(key, value)
438-
||||||| parent of e95270f1e... ENH: Allow SparseDataFrame/SparseSeries values assignment
439-
return self.set_value(key, value)
440-
=======
441-
takeable = False
442-
443-
# Sparse doesn't support reshaping so the standard .where() does
444-
# not apply. We short-circuit bool indexers here by treating them as
445-
# regular list of indexes and setting each array/value separately
446-
if com.is_bool_indexer(key):
447-
key = check_bool_indexer(self.index, key).nonzero()[0]
448-
takeable = True
449-
450-
return self.set_value(key, value, takeable=takeable)
451-
>>>>>>> e95270f1e... ENH: Allow SparseDataFrame/SparseSeries values assignment
452435

453436
def abs(self):
454437
"""
@@ -538,7 +521,7 @@ def set_value(self, label, value, takeable=False):
538521
539522
Returns
540523
-------
541-
series : SparseSeries
524+
self : SparseSeries
542525
"""
543526
warnings.warn("set_value is deprecated and will be removed "
544527
"in a future release. Please use "
@@ -547,62 +530,7 @@ def set_value(self, label, value, takeable=False):
547530
return self._set_value(label, value, takeable=takeable)
548531

549532
def _set_value(self, label, value, takeable=False):
550-
try:
551-
loc = self.index.get_loc(label)
552-
except (KeyError, TypeError):
553-
loc = None
554-
555-
warnings.warn(
556-
'Setting SparseSeries values is inefficient '
557-
'(a copy of data is made)', PerformanceWarning, stacklevel=2)
558-
559-
# If label is not unique in index, or it is takeable,
560-
# amend the series by amending its dense copy
561-
if not isinstance(loc, int) or takeable:
562-
warnings.warn(
563-
'Setting SparseSeries values is particularly inefficient when '
564-
'indexing with a non-unique label because the whole series '
565-
'is made dense interim.', PerformanceWarning, stacklevel=2)
566-
values = self.to_dense()
567-
values.set_value(label, value, takeable=takeable)
568-
569-
index = values.index
570-
sp_index = None
571-
values = values.to_sparse(kind=self.kind,
572-
fill_value=self.fill_value)
573-
574-
# If label is unique key and not takeable, then it is more space-
575-
# efficient to not make the whole series dense, rather just its
576-
# defined part
577-
else:
578-
values = self._to_dense(sparse_only=True)
579-
old_index = values.index
580-
values.set_value(label, value, takeable=takeable)
581-
index = self.index
582-
583-
# label was already in sparse index, we can just reuse old index
584-
if label in old_index:
585-
sp_index = self.sp_index
586-
587-
# label might have been at least in .index
588-
else:
589-
# and if not, just add it, then construct both indexes anew
590-
if loc is None:
591-
index = self.index.append(Index((label,)))
592-
loc = len(index) - 1
593-
594-
indices = np.append(
595-
self.sp_index.to_int_index().indices,
596-
np.array(loc, dtype=np.int32))
597-
order = indices.argsort()
598-
values = values.values.take(order)
599-
indices = indices.take(order)
600-
sp_index = _make_index(len(index), indices, self.kind)
601-
602-
values = SparseArray(values, sparse_index=sp_index, kind=self.kind,
603-
fill_value=self.fill_value)
604-
self._data = SingleBlockManager(values, index)
605-
self._index = index
533+
self._data = self._data.copy().setitem(indexer=label, value=value)
606534
return self
607535
_set_value.__doc__ = set_value.__doc__
608536

@@ -642,23 +570,6 @@ def to_dense(self, sparse_only=False):
642570
warnings.warn(("The 'sparse_only' parameter has been deprecated "
643571
"and will be removed in a future version."),
644572
FutureWarning, stacklevel=2)
645-
return self._to_dense(sparse_only=sparse_only)
646-
647-
def _to_dense(self, sparse_only=False):
648-
"""
649-
Convert SparseSeries to a Series.
650-
651-
Parameters
652-
----------
653-
sparse_only: bool, default False
654-
If True, return just the non-sparse values, or the dense version
655-
of `self.values` if False.
656-
657-
Returns
658-
-------
659-
s : Series
660-
"""
661-
if sparse_only:
662573
int_index = self.sp_index.to_int_index()
663574
index = self.index.take(int_index.indices)
664575
return Series(self.sp_values, index=index, name=self.name)

pandas/tests/sparse/test_frame.py

Lines changed: 36 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -1400,31 +1400,49 @@ def test_numpy_func_call(self):
14001400
getattr(np, func)(self.frame)
14011401

14021402

1403-
@pytest.mark.parametrize('kind', ['integer', 'block'])
1404-
@pytest.mark.parametrize('indexer', [None, 'loc', 'iloc', 'at', 'iat'])
1405-
@pytest.mark.parametrize('key', [0, [0, 1], [True, False], None])
1406-
def test_frame_assignment(kind, indexer, key):
1407-
try_multiple = 'at' not in (indexer or '')
1408-
is_multi_key = np.asarray(key).ndim > 0
1409-
if is_multi_key and not try_multiple:
1410-
return
1411-
if not indexer and not is_multi_key and key is not None: # skip non-multikey with setitem
1412-
return
1413-
if indexer and key is None: # skip df indexer with non-setitem
1414-
return
1415-
1403+
def _test_assignment(kind, indexer, key=None):
14161404
arr = np.array([[1, nan],
14171405
[nan, 1]])
1406+
df = DataFrame(arr, copy=True)
14181407
sdf = SparseDataFrame(arr, default_kind=kind).to_sparse(kind=kind)
14191408

1409+
def get_indexer(df):
1410+
return getattr(df, indexer) if indexer else df
1411+
14201412
if key is None:
14211413
key = pd.isnull(sdf).to_sparse()
14221414

1423-
arr = arr.copy()
1424-
arr[np.asarray(key)] = 2
1425-
res = SparseDataFrame(arr, default_kind=kind).to_sparse(kind=kind)
1415+
get_indexer(sdf)[key] = 2
14261416

1427-
sdf_setitem = getattr(sdf, indexer) if indexer else sdf
1428-
sdf_setitem[key] = 2
1417+
get_indexer(df)[key] = 2
1418+
res = df.to_sparse(kind=kind)
14291419

14301420
tm.assert_sp_frame_equal(sdf, res)
1421+
1422+
1423+
@pytest.fixture(params=['integer', 'block'])
1424+
def spindex_kind(request):
1425+
return request.param
1426+
1427+
1428+
@pytest.mark.parametrize('indexer', ['at', 'iat'])
1429+
@pytest.mark.parametrize('key', [0])
1430+
def test_frame_assignment_at(spindex_kind, indexer, key):
1431+
_test_assignment(spindex_kind, indexer, key)
1432+
1433+
1434+
@pytest.mark.parametrize('indexer', ['loc', 'iloc'])
1435+
@pytest.mark.parametrize('key', [0, [0, 1], [True, False]])
1436+
def test_frame_assignment_loc(spindex_kind, indexer, key):
1437+
_test_assignment(spindex_kind, indexer, key)
1438+
1439+
1440+
@pytest.mark.parametrize('key', [None, [0, 1], [True, False]])
1441+
def test_frame_assignment_setitem(spindex_kind, key):
1442+
_test_assignment(spindex_kind, None, key)
1443+
1444+
1445+
@pytest.mark.parametrize('indexer', ['loc', 'at'])
1446+
@pytest.mark.parametrize('key', [3])
1447+
def test_frame_assignment_extend_index(spindex_kind, indexer, key):
1448+
_test_assignment(spindex_kind, indexer, key)

pandas/tests/sparse/test_series.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1434,11 +1434,11 @@ def test_constructor_dict_datetime64_index(datetime_type):
14341434
@pytest.mark.parametrize('indexer', [None, 'loc', 'iloc', 'at', 'iat'])
14351435
@pytest.mark.parametrize('key', [0, [0, 1], 2, [2, 3],
14361436
[True, False, False, False],
1437-
[False, False, False, True],])
1437+
[False, False, False, True]])
14381438
def test_series_assignment(kind, indexer, key):
1439-
try_multiple = 'at' not in (indexer or '')
1440-
is_multi_key = np.asarray(key).ndim > 0
1441-
if is_multi_key and not try_multiple:
1439+
is_multikey = np.asarray(key).ndim > 0
1440+
skip_multikey = 'at' in (indexer or '')
1441+
if is_multikey and skip_multikey:
14421442
return
14431443

14441444
arr = np.array([0., 0., nan, nan])

0 commit comments

Comments
 (0)