From f2c4943034ae271f6d5fbc435244189460c915c1 Mon Sep 17 00:00:00 2001 From: "H. Vetinari" Date: Sun, 2 Dec 2018 12:39:53 +0100 Subject: [PATCH 1/2] BUG: all-na corner case for str.cat --- doc/source/whatsnew/v0.24.0.rst | 1 + pandas/core/strings.py | 6 ++++-- pandas/tests/test_strings.py | 29 ++++++++++++++++++++++++----- 3 files changed, 29 insertions(+), 7 deletions(-) diff --git a/doc/source/whatsnew/v0.24.0.rst b/doc/source/whatsnew/v0.24.0.rst index fc505128a2e20..57930a3cd25b2 100644 --- a/doc/source/whatsnew/v0.24.0.rst +++ b/doc/source/whatsnew/v0.24.0.rst @@ -1364,6 +1364,7 @@ Strings - Bug in :meth:`Index.str.partition` was not nan-safe (:issue:`23558`). - Bug in :meth:`Index.str.split` was not nan-safe (:issue:`23677`). - Bug :func:`Series.str.contains` not respecting the ``na`` argument for a ``Categorical`` dtype ``Series`` (:issue:`22158`) +- Bug in :meth:`Index.str.cat` when the result contained only NaN (:issue:`24044`) Interval ^^^^^^^^ diff --git a/pandas/core/strings.py b/pandas/core/strings.py index 0b791f6f91aa3..995700e79cb50 100644 --- a/pandas/core/strings.py +++ b/pandas/core/strings.py @@ -2260,9 +2260,11 @@ def cat(self, others=None, sep=None, na_rep=None, join=None): result = cat_core(all_cols, sep) if isinstance(self._orig, Index): - result = Index(result, name=self._orig.name) + # add dtype for case that result is all-NA + result = Index(result, dtype=object, name=self._orig.name) else: # Series - result = Series(result, index=data.index, name=self._orig.name) + result = Series(result, dtype=object, index=data.index, + name=self._orig.name) return result _shared_docs['str_split'] = (""" diff --git a/pandas/tests/test_strings.py b/pandas/tests/test_strings.py index 117984ce89743..ba9d0ae0e2e89 100644 --- a/pandas/tests/test_strings.py +++ b/pandas/tests/test_strings.py @@ -630,11 +630,30 @@ def test_str_cat_align_mixed_inputs(self, join): with pytest.raises(ValueError, match=rgx): s.str.cat([t, z], join=join) - def test_str_cat_raises(self): - # non-strings hiding behind object dtype - s = Series([1, 2, 3, 4], dtype='object') - with pytest.raises(TypeError, match="unsupported operand type.*"): - s.str.cat(s) + @pytest.mark.parametrize('box', [Series, Index]) + @pytest.mark.parametrize('other', [Series, Index]) + def test_str_cat_all_na(self, box, other): + # GH 24044 + + # check that all NaNs in caller / target work + s = Index(['a', 'b', 'c', 'd']) + s = s if box == Index else Series(s, index=s) + t = other([np.nan] * 4, dtype='object') + # add index of s for alignment + t = t if other == Index else Series(t, index=s) + + # all-NA target + expected = Index([np.nan] * 4, dtype='object') + expected = expected if box == Index else Series(expected, + index=s.index) + result = s.str.cat(t, join='left') + assert_series_or_index_equal(result, expected) + + # all-NA caller (only for Series) + if other == Series: + expected = Series([np.nan] * 4, dtype='object', index=t.index) + result = t.str.cat(s, join='left') + tm.assert_series_equal(result, expected) def test_str_cat_special_cases(self): s = Series(['a', 'b', 'c', 'd']) From 474ddb298a62b62debacc8582e0ea25e56bfcefa Mon Sep 17 00:00:00 2001 From: "H. Vetinari" Date: Sun, 2 Dec 2018 19:33:32 +0100 Subject: [PATCH 2/2] Review (jreback) --- doc/source/whatsnew/v0.24.0.rst | 2 +- pandas/tests/test_strings.py | 11 ++++++----- 2 files changed, 7 insertions(+), 6 deletions(-) diff --git a/doc/source/whatsnew/v0.24.0.rst b/doc/source/whatsnew/v0.24.0.rst index 5f8742620229a..3cec48be9b7ef 100644 --- a/doc/source/whatsnew/v0.24.0.rst +++ b/doc/source/whatsnew/v0.24.0.rst @@ -1365,7 +1365,7 @@ Strings - Bug in :meth:`Index.str.partition` was not nan-safe (:issue:`23558`). - Bug in :meth:`Index.str.split` was not nan-safe (:issue:`23677`). - Bug :func:`Series.str.contains` not respecting the ``na`` argument for a ``Categorical`` dtype ``Series`` (:issue:`22158`) -- Bug in :meth:`Index.str.cat` when the result contained only NaN (:issue:`24044`) +- Bug in :meth:`Index.str.cat` when the result contained only ``NaN`` (:issue:`24044`) Interval ^^^^^^^^ diff --git a/pandas/tests/test_strings.py b/pandas/tests/test_strings.py index ba9d0ae0e2e89..ced8d37678b8b 100644 --- a/pandas/tests/test_strings.py +++ b/pandas/tests/test_strings.py @@ -638,20 +638,21 @@ def test_str_cat_all_na(self, box, other): # check that all NaNs in caller / target work s = Index(['a', 'b', 'c', 'd']) s = s if box == Index else Series(s, index=s) - t = other([np.nan] * 4, dtype='object') + t = other([np.nan] * 4, dtype=object) # add index of s for alignment t = t if other == Index else Series(t, index=s) # all-NA target - expected = Index([np.nan] * 4, dtype='object') - expected = expected if box == Index else Series(expected, - index=s.index) + if box == Series: + expected = Series([np.nan] * 4, index=s.index, dtype=object) + else: # box == Index + expected = Index([np.nan] * 4, dtype=object) result = s.str.cat(t, join='left') assert_series_or_index_equal(result, expected) # all-NA caller (only for Series) if other == Series: - expected = Series([np.nan] * 4, dtype='object', index=t.index) + expected = Series([np.nan] * 4, dtype=object, index=t.index) result = t.str.cat(s, join='left') tm.assert_series_equal(result, expected)