diff --git a/pandas/core/arrays/categorical.py b/pandas/core/arrays/categorical.py index 759c7fb65374d..4c6a32ff1ba4e 100644 --- a/pandas/core/arrays/categorical.py +++ b/pandas/core/arrays/categorical.py @@ -2291,18 +2291,28 @@ def _concat_same_type( ) -> CategoricalT: from pandas.core.dtypes.concat import union_categoricals - result = union_categoricals(to_concat) - - # in case we are concatenating along axis != 0, we need to reshape - # the result from union_categoricals first = to_concat[0] if axis >= first.ndim: - raise ValueError + raise ValueError( + f"axis {axis} is out of bounds for array of dimension {first.ndim}" + ) + if axis == 1: - if not all(len(x) == len(first) for x in to_concat): + # Flatten, concatenate then reshape + if not all(x.ndim == 2 for x in to_concat): raise ValueError - # TODO: Will this get contiguity wrong? - result = result.reshape(-1, len(to_concat), order="F") + + # pass correctly-shaped to union_categoricals + tc_flat = [] + for obj in to_concat: + tc_flat.extend([obj[:, i] for i in range(obj.shape[1])]) + + res_flat = cls._concat_same_type(tc_flat, axis=0) + + result = res_flat.reshape(len(first), -1, order="F") + return result + + result = union_categoricals(to_concat) return result # ------------------------------------------------------------------ diff --git a/pandas/core/dtypes/concat.py b/pandas/core/dtypes/concat.py index 28f415476d3fd..701f9fd4a9c99 100644 --- a/pandas/core/dtypes/concat.py +++ b/pandas/core/dtypes/concat.py @@ -107,7 +107,9 @@ def is_nonempty(x) -> bool: to_concat = non_empties kinds = {obj.dtype.kind for obj in to_concat} - contains_datetime = any(kind in ["m", "M"] for kind in kinds) + contains_datetime = any(kind in ["m", "M"] for kind in kinds) or any( + isinstance(obj, ABCExtensionArray) and obj.ndim > 1 for obj in to_concat + ) all_empty = not len(non_empties) single_dtype = len({x.dtype for x in to_concat}) == 1 diff --git a/pandas/tests/dtypes/test_concat.py b/pandas/tests/dtypes/test_concat.py index a749955d35494..f624c56b54001 100644 --- a/pandas/tests/dtypes/test_concat.py +++ b/pandas/tests/dtypes/test_concat.py @@ -26,3 +26,21 @@ def test_concat_single_dataframe_tz_aware(copy): expected = df.copy() result = pd.concat([df], copy=copy) tm.assert_frame_equal(result, expected) + + +def test_concat_periodarray_2d(): + pi = pd.period_range("2016-01-01", periods=36, freq="D") + arr = pi._data.reshape(6, 6) + + result = _concat.concat_compat([arr[:2], arr[2:]], axis=0) + tm.assert_period_array_equal(result, arr) + + result = _concat.concat_compat([arr[:, :2], arr[:, 2:]], axis=1) + tm.assert_period_array_equal(result, arr) + + msg = "all the input array dimensions for the concatenation axis must match exactly" + with pytest.raises(ValueError, match=msg): + _concat.concat_compat([arr[:, :2], arr[:, 2:]], axis=0) + + with pytest.raises(ValueError, match=msg): + _concat.concat_compat([arr[:2], arr[2:]], axis=1) diff --git a/pandas/tests/extension/base/dim2.py b/pandas/tests/extension/base/dim2.py index 9328ddc91f10f..a86c07c604320 100644 --- a/pandas/tests/extension/base/dim2.py +++ b/pandas/tests/extension/base/dim2.py @@ -122,21 +122,23 @@ def test_tolist_2d(self, data): assert result == expected def test_concat_2d(self, data): - left = data.reshape(-1, 1) + left = type(data)._concat_same_type([data, data]).reshape(-1, 2) right = left.copy() # axis=0 result = left._concat_same_type([left, right], axis=0) - expected = data._concat_same_type([data, data]).reshape(-1, 1) + expected = data._concat_same_type([data] * 4).reshape(-1, 2) self.assert_extension_array_equal(result, expected) # axis=1 result = left._concat_same_type([left, right], axis=1) - expected = data.repeat(2).reshape(-1, 2) - self.assert_extension_array_equal(result, expected) + assert result.shape == (len(data), 4) + self.assert_extension_array_equal(result[:, :2], left) + self.assert_extension_array_equal(result[:, 2:], right) # axis > 1 -> invalid - with pytest.raises(ValueError): + msg = "axis 2 is out of bounds for array of dimension 2" + with pytest.raises(ValueError, match=msg): left._concat_same_type([left, right], axis=2) @pytest.mark.parametrize("method", ["backfill", "pad"])