Skip to content

Commit 0922296

Browse files
committed
updates
1 parent f433be8 commit 0922296

File tree

3 files changed

+86
-35
lines changed

3 files changed

+86
-35
lines changed

pandas/core/arrays/sparse.py

Lines changed: 76 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -683,6 +683,8 @@ def from_spmatrix(cls, data):
683683
"""
684684
Create a SparseArray from a scipy.sparse matrix.
685685
686+
.. versionadded:: 0.25.0
687+
686688
Parameters
687689
----------
688690
data : scipy.sparse.sp_matrix
@@ -711,9 +713,14 @@ def from_spmatrix(cls, data):
711713
"'data' must have a single column, not '{}'".format(ncol)
712714
)
713715

716+
# our sparse index classes require that the positions be strictly
717+
# increasing. So we need to sort loc, and arr accordingly.
714718
arr = data.data
715719
idx, _ = data.nonzero()
720+
loc = np.argsort(idx)
721+
arr = arr.take(loc)
716722
idx.sort()
723+
717724
zero = np.array(0, dtype=arr.dtype).item()
718725
dtype = SparseDtype(arr.dtype, zero)
719726
index = IntIndex(length, idx)
@@ -2074,13 +2081,43 @@ def to_coo(self, row_levels=(0, ), column_levels=(1, ), sort_labels=False):
20742081
return A, rows, columns
20752082

20762083
def to_dense(self):
2084+
"""
2085+
Convert a Series from sparse values to dense.
2086+
2087+
.. versionadded:: 0.25.0
2088+
2089+
Returns
2090+
-------
2091+
Series:
2092+
A Series with the same values, stored as a dense array.
2093+
2094+
Examples
2095+
--------
2096+
>>> series = pd.Series(pd.SparseArray([0, 1, 0]))
2097+
>>> series
2098+
0 0
2099+
1 1
2100+
2 0
2101+
dtype: Sparse[int64, 0]
2102+
2103+
>>> series.sparse.to_dense()
2104+
0 0
2105+
1 1
2106+
2 0
2107+
dtype: int64
2108+
"""
20772109
from pandas import Series
20782110
return Series(self._parent.array.to_dense(),
20792111
index=self._parent.index,
20802112
name=self._parent.name)
20812113

20822114

20832115
class SparseFrameAccessor(BaseAccessor, PandasDelegate):
2116+
"""
2117+
DataFrame accessor for sparse data.
2118+
2119+
.. versionadded :: 0.25.0
2120+
"""
20842121

20852122
def _validate(self, data):
20862123
dtypes = data.dtypes
@@ -2092,6 +2129,8 @@ def from_spmatrix(cls, data, index=None, columns=None):
20922129
"""
20932130
Create a new DataFrame from a scipy sparse matrix.
20942131
2132+
.. versionadded:: 0.25.0
2133+
20952134
Parameters
20962135
----------
20972136
data : scipy.sparse.spmatrix
@@ -2103,6 +2142,8 @@ def from_spmatrix(cls, data, index=None, columns=None):
21032142
Returns
21042143
-------
21052144
DataFrame
2145+
Each column of the DataFrame is stored as a
2146+
:class:`SparseArray`.
21062147
21072148
Examples
21082149
--------
@@ -2127,11 +2168,23 @@ def from_spmatrix(cls, data, index=None, columns=None):
21272168

21282169
def to_dense(self):
21292170
"""
2130-
Convert to dense DataFrame
2171+
Convert a DataFrame with sparse values to dense.
2172+
2173+
.. versionadded:: 0.25.0
21312174
21322175
Returns
21332176
-------
2134-
df : DataFrame
2177+
DataFrame
2178+
A DataFrame with the same values stored as dense arrays.
2179+
2180+
Examples
2181+
--------
2182+
>>> df = pd.DataFrame({"A": pd.SparseArray([0, 1, 0])})
2183+
>>> df.sparse.to_dense()
2184+
A
2185+
0 0
2186+
1 1
2187+
2 0
21352188
"""
21362189
from pandas import DataFrame
21372190

@@ -2142,6 +2195,27 @@ def to_dense(self):
21422195
columns=self._parent.columns)
21432196

21442197
def to_coo(self):
2198+
"""
2199+
Return the contents of the frame as a sparse SciPy COO matrix.
2200+
2201+
.. versionadded:: 0.20.0
2202+
2203+
Returns
2204+
-------
2205+
coo_matrix : scipy.sparse.spmatrix
2206+
If the caller is heterogeneous and contains booleans or objects,
2207+
the result will be of dtype=object. See Notes.
2208+
2209+
Notes
2210+
-----
2211+
The dtype will be the lowest-common-denominator type (implicit
2212+
upcasting); that is to say if the dtypes (even of numeric types)
2213+
are mixed, the one that accommodates all will be chosen.
2214+
2215+
e.g. If the dtypes are float16 and float32, dtype will be upcast to
2216+
float32. By numpy.find_common_type convention, mixing int64 and
2217+
and uint64 will result in a float64 dtype.
2218+
"""
21452219
try:
21462220
from scipy.sparse import coo_matrix
21472221
except ImportError:

pandas/core/sparse/frame.py

Lines changed: 3 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -233,28 +233,8 @@ def _init_spmatrix(self, data, index, columns, dtype=None,
233233

234234
return self._init_dict(sdict, index, columns, dtype)
235235

236+
@Appender(SparseFrameAccessor.to_coo.__doc__)
236237
def to_coo(self):
237-
"""
238-
Return the contents of the frame as a sparse SciPy COO matrix.
239-
240-
.. versionadded:: 0.20.0
241-
242-
Returns
243-
-------
244-
coo_matrix : scipy.sparse.spmatrix
245-
If the caller is heterogeneous and contains booleans or objects,
246-
the result will be of dtype=object. See Notes.
247-
248-
Notes
249-
-----
250-
The dtype will be the lowest-common-denominator type (implicit
251-
upcasting); that is to say if the dtypes (even of numeric types)
252-
are mixed, the one that accommodates all will be chosen.
253-
254-
e.g. If the dtypes are float16 and float32, dtype will be upcast to
255-
float32. By numpy.find_common_type convention, mixing int64 and
256-
and uint64 will result in a float64 dtype.
257-
"""
258238
return SparseFrameAccessor(self).to_coo()
259239

260240
def __array_wrap__(self, result):
@@ -296,16 +276,9 @@ def _unpickle_sparse_frame_compat(self, state):
296276
self._default_fill_value = fv
297277
self._default_kind = kind
298278

279+
@Appender(SparseFrameAccessor.to_dense.__doc__)
299280
def to_dense(self):
300-
"""
301-
Convert to dense DataFrame
302-
303-
Returns
304-
-------
305-
df : DataFrame
306-
"""
307-
data = {k: v.to_dense() for k, v in compat.iteritems(self)}
308-
return DataFrame(data, index=self.index, columns=self.columns)
281+
return SparseFrameAccessor(self).to_dense()
309282

310283
def _apply_columns(self, func):
311284
"""

pandas/tests/arrays/sparse/test_array.py

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -173,13 +173,17 @@ def test_constructor_inferred_fill_value(self, data, fill_value):
173173
assert result == fill_value
174174

175175
@pytest.mark.parametrize('format', ['coo', 'csc', 'csr'])
176-
def test_from_spmatrix(self, format):
176+
@pytest.mark.parametrize('size', [0, 10])
177+
def test_from_spmatrix(self, size, format):
177178
pytest.importorskip('scipy')
178179
import scipy.sparse
179180

180-
mat = scipy.sparse.random(10, 1, density=0.5, format=format)
181+
mat = scipy.sparse.random(size, 1, density=0.5, format=format)
181182
result = SparseArray.from_spmatrix(mat)
182-
tm.assert_numpy_array_equal(mat.data, result.sp_values)
183+
184+
result = np.asarray(result)
185+
expected = mat.toarray().ravel()
186+
tm.assert_numpy_array_equal(result, expected)
183187

184188
def test_from_spmatrix_raises(self):
185189
pytest.importorskip('scipy')

0 commit comments

Comments
 (0)