Skip to content

concat erroneously sets series to NaN #9765

Closed
@RasterBurn

Description

@RasterBurn

problem

concat fails when:

  1. Using a SparseDataFrame
  2. AND that SparseDataFrame has a column of all 0.0 and a fill_value of 0.0

If that SparseDataFrame doesn't have an all 0.0 column, it works beautifully.

import unittest
import pandas as pd
from pandas.util.testing import assert_frame_equal
import sys

class TestSparseConcat(unittest.TestCase):
    def setUp(self):
        self.orig = {"A": [1.0, 0.0, 1.0, 0.0]}
        self.to_add = {
            "B": [0.0, 0.0, 0.0, 0.0],
            "C": [0.0, 0.0, 1.0, 0.0]
        }
        merged = self.orig.copy()
        merged.update(self.to_add)
        self.expected = pd.DataFrame(data=merged)

    def test_concat_df_to_df(self):
        A = pd.DataFrame(data=self.orig)
        B = pd.DataFrame(data=self.to_add)
        C = pd.concat([A, B], axis=1)
        assert_frame_equal(C, self.expected)  # this works

    def test_concat_sparse_to_df(self):
        A = pd.DataFrame(data=self.orig)
        B = pd.DataFrame(data=self.to_add).to_sparse(fill_value=0.0)
        C = pd.concat([A, B], axis=1)
        sys.stderr.write("\nExpected:\n{}\nGot:\n{}\n".format(self.expected, C))
        assert_frame_equal(C, self.expected)  # this DOESN'T work

if __name__ == "__main__":
    unittest.main()

output

.
Expected:
   A  B  C
0  1  0  0
1  0  0  0
2  1  0  1
3  0  0  0
Got:
   A   B  C
0  1 NaN  0
1  0 NaN  0
2  1 NaN  1
3  0 NaN  0
F
======================================================================
FAIL: test_concat_sparse_to_df (__main__.TestSparseConcat)
----------------------------------------------------------------------
Traceback (most recent call last):
  File "example.py", line 28, in test_concat_sparse_to_df
    assert_frame_equal(C, self.expected)
  File "/home/vagrant/.virtualenvs/ai-modeling/lib/python2.7/site-packages/pandas/util/testing.py", line 748, in assert_frame_equal
    check_exact=check_exact)
  File "/home/vagrant/.virtualenvs/ai-modeling/lib/python2.7/site-packages/pandas/util/testing.py", line 692, in assert_series_equal
    assert_almost_equal(left.values, right.values, check_less_precise)
  File "das/src/testing.pyx", line 58, in pandas._testing.assert_almost_equal (pandas/src/testing.c:2758)
  File "das/src/testing.pyx", line 93, in pandas._testing.assert_almost_equal (pandas/src/testing.c:1843)
  File "das/src/testing.pyx", line 102, in pandas._testing.assert_almost_equal (pandas/src/testing.c:2010)
AssertionError: First object is null, second isn't: nan != 0.0

----------------------------------------------------------------------
Ran 2 tests in 0.024s

FAILED (failures=1)

metadata


INSTALLED VERSIONS
------------------
commit: None
python: 2.7.6.final.0
python-bits: 64
OS: Linux
OS-release: 2.6.32-300.3.1.el6uek.x86_64
machine: x86_64
processor: x86_64
byteorder: little
LC_ALL: None
LANG: en_US.UTF-8

pandas: 0.16.0
nose: 1.3.3
Cython: None
numpy: 1.9.2
scipy: 0.15.1
statsmodels: None
IPython: 3.0.0
sphinx: 1.2
patsy: None
dateutil: 2.2
pytz: 2014.4
bottleneck: None
tables: None
numexpr: None
matplotlib: 1.2.1
openpyxl: None
xlrd: None
xlwt: None
xlsxwriter: None
lxml: None
bs4: 4.3.2
html5lib: None
httplib2: None
apiclient: None
sqlalchemy: None
pymysql: None
psycopg2: None

Metadata

Metadata

Assignees

No one assigned

    Labels

    BugReshapingConcat, Merge/Join, Stack/Unstack, ExplodeSparseSparse Data Type

    Type

    No type

    Projects

    No projects

    Milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions