Closed
Description
problem
concat
fails when:
- Using a SparseDataFrame
- AND that SparseDataFrame has a column of all
0.0
and afill_value
of0.0
If that SparseDataFrame doesn't have an all 0.0
column, it works beautifully.
import unittest
import pandas as pd
from pandas.util.testing import assert_frame_equal
import sys
class TestSparseConcat(unittest.TestCase):
def setUp(self):
self.orig = {"A": [1.0, 0.0, 1.0, 0.0]}
self.to_add = {
"B": [0.0, 0.0, 0.0, 0.0],
"C": [0.0, 0.0, 1.0, 0.0]
}
merged = self.orig.copy()
merged.update(self.to_add)
self.expected = pd.DataFrame(data=merged)
def test_concat_df_to_df(self):
A = pd.DataFrame(data=self.orig)
B = pd.DataFrame(data=self.to_add)
C = pd.concat([A, B], axis=1)
assert_frame_equal(C, self.expected) # this works
def test_concat_sparse_to_df(self):
A = pd.DataFrame(data=self.orig)
B = pd.DataFrame(data=self.to_add).to_sparse(fill_value=0.0)
C = pd.concat([A, B], axis=1)
sys.stderr.write("\nExpected:\n{}\nGot:\n{}\n".format(self.expected, C))
assert_frame_equal(C, self.expected) # this DOESN'T work
if __name__ == "__main__":
unittest.main()
output
.
Expected:
A B C
0 1 0 0
1 0 0 0
2 1 0 1
3 0 0 0
Got:
A B C
0 1 NaN 0
1 0 NaN 0
2 1 NaN 1
3 0 NaN 0
F
======================================================================
FAIL: test_concat_sparse_to_df (__main__.TestSparseConcat)
----------------------------------------------------------------------
Traceback (most recent call last):
File "example.py", line 28, in test_concat_sparse_to_df
assert_frame_equal(C, self.expected)
File "/home/vagrant/.virtualenvs/ai-modeling/lib/python2.7/site-packages/pandas/util/testing.py", line 748, in assert_frame_equal
check_exact=check_exact)
File "/home/vagrant/.virtualenvs/ai-modeling/lib/python2.7/site-packages/pandas/util/testing.py", line 692, in assert_series_equal
assert_almost_equal(left.values, right.values, check_less_precise)
File "das/src/testing.pyx", line 58, in pandas._testing.assert_almost_equal (pandas/src/testing.c:2758)
File "das/src/testing.pyx", line 93, in pandas._testing.assert_almost_equal (pandas/src/testing.c:1843)
File "das/src/testing.pyx", line 102, in pandas._testing.assert_almost_equal (pandas/src/testing.c:2010)
AssertionError: First object is null, second isn't: nan != 0.0
----------------------------------------------------------------------
Ran 2 tests in 0.024s
FAILED (failures=1)
metadata
INSTALLED VERSIONS
------------------
commit: None
python: 2.7.6.final.0
python-bits: 64
OS: Linux
OS-release: 2.6.32-300.3.1.el6uek.x86_64
machine: x86_64
processor: x86_64
byteorder: little
LC_ALL: None
LANG: en_US.UTF-8
pandas: 0.16.0
nose: 1.3.3
Cython: None
numpy: 1.9.2
scipy: 0.15.1
statsmodels: None
IPython: 3.0.0
sphinx: 1.2
patsy: None
dateutil: 2.2
pytz: 2014.4
bottleneck: None
tables: None
numexpr: None
matplotlib: 1.2.1
openpyxl: None
xlrd: None
xlwt: None
xlsxwriter: None
lxml: None
bs4: 4.3.2
html5lib: None
httplib2: None
apiclient: None
sqlalchemy: None
pymysql: None
psycopg2: None