From cb91df31f866c691430101883d129075e615cf8b Mon Sep 17 00:00:00 2001 From: gfyoung Date: Wed, 3 Jan 2018 23:43:08 -0800 Subject: [PATCH 1/2] BUG: Allow merging on Index vectors This behavior used to work in v0.19.0 and is consistent with the documentation. Closes gh-19038 --- doc/source/whatsnew/v0.23.0.txt | 2 +- pandas/core/reshape/merge.py | 8 ++++---- pandas/tests/reshape/merge/test_merge.py | 26 ++++++++++++++++++++++++ 3 files changed, 31 insertions(+), 5 deletions(-) diff --git a/doc/source/whatsnew/v0.23.0.txt b/doc/source/whatsnew/v0.23.0.txt index d53de30187156..22f9ebd8aab98 100644 --- a/doc/source/whatsnew/v0.23.0.txt +++ b/doc/source/whatsnew/v0.23.0.txt @@ -407,7 +407,7 @@ Reshaping - Bug in :func:`Series.rank` where ``Series`` containing ``NaT`` modifies the ``Series`` inplace (:issue:`18521`) - Bug in :func:`cut` which fails when using readonly arrays (:issue:`18773`) - Bug in :func:`Dataframe.pivot_table` which fails when the ``aggfunc`` arg is of type string. The behavior is now consistent with other methods like ``agg`` and ``apply`` (:issue:`18713`) - +- Bug in :func:`DataFrame.merge` in which merging using ``Index`` objects as vectors raised an Exception (:issue:`19038`) Numeric ^^^^^^^ diff --git a/pandas/core/reshape/merge.py b/pandas/core/reshape/merge.py index ad2a433b5632b..6b455b6ec2b95 100644 --- a/pandas/core/reshape/merge.py +++ b/pandas/core/reshape/merge.py @@ -814,13 +814,13 @@ def _get_merge_keys(self): join_names = [] right_drop = [] left_drop = [] + left, right = self.left, self.right stacklevel = 5 # Number of stack levels from df.merge + list_types = (np.ndarray, Series, Index) - is_lkey = lambda x: isinstance( - x, (np.ndarray, Series)) and len(x) == len(left) - is_rkey = lambda x: isinstance( - x, (np.ndarray, Series)) and len(x) == len(right) + is_lkey = lambda x: isinstance(x, list_types) and len(x) == len(left) + is_rkey = lambda x: isinstance(x, list_types) and len(x) == len(right) # Note that pd.merge_asof() has separate 'on' and 'by' parameters. A # user could, for example, request 'left_index' and 'left_by'. In a diff --git a/pandas/tests/reshape/merge/test_merge.py b/pandas/tests/reshape/merge/test_merge.py index 70b84f7a6225b..b9a667499b7a0 100644 --- a/pandas/tests/reshape/merge/test_merge.py +++ b/pandas/tests/reshape/merge/test_merge.py @@ -1370,6 +1370,32 @@ def f(): household.join(log_return, how='outer') pytest.raises(NotImplementedError, f) + @pytest.mark.parametrize("klass", [None, np.asarray, Series, Index]) + def test_merge_datetime_index(self, klass): + # see gh-19038 + df = DataFrame([1, 2, 3], + ["2016-01-01", "2017-01-01", "2018-01-01"], + columns=["a"]) + df.index = pd.to_datetime(df.index) + on_vector = df.index.year + + if klass is not None: + on_vector = klass(on_vector) + + expected = DataFrame({"a": [1, 2, 3]}) + + if klass == np.asarray: + # The join key is added for ndarray. + expected["key_1"] = [2016, 2017, 2018] + + result = df.merge(df, on=["a", on_vector], how="inner") + tm.assert_frame_equal(result, expected) + + expected = DataFrame({"a_x": [1, 2, 3], + "a_y": [1, 2, 3]}) + result = df.merge(df, on=[df.index.year], how="inner") + tm.assert_frame_equal(result, expected) + class TestMergeDtypes(object): From 1a553bcb84cce4a8ff52c1c2765d3f266baf18ef Mon Sep 17 00:00:00 2001 From: gfyoung Date: Thu, 4 Jan 2018 22:09:12 -0800 Subject: [PATCH 2/2] ENH: Add is_array_like method Used for abstracting checks in DataFrame.merge, but the function itself can be quite useful. --- pandas/core/dtypes/api.py | 1 + pandas/core/dtypes/inference.py | 33 +++++++++++++++++++++++++++ pandas/core/reshape/merge.py | 8 +++---- pandas/tests/api/test_types.py | 2 +- pandas/tests/dtypes/test_inference.py | 17 ++++++++++++++ 5 files changed, 56 insertions(+), 5 deletions(-) diff --git a/pandas/core/dtypes/api.py b/pandas/core/dtypes/api.py index a2180ecc4632f..738e1ea9062f6 100644 --- a/pandas/core/dtypes/api.py +++ b/pandas/core/dtypes/api.py @@ -55,6 +55,7 @@ is_dict_like, is_iterator, is_file_like, + is_array_like, is_list_like, is_hashable, is_named_tuple) diff --git a/pandas/core/dtypes/inference.py b/pandas/core/dtypes/inference.py index 8010a213efaf0..6fed25a0012f2 100644 --- a/pandas/core/dtypes/inference.py +++ b/pandas/core/dtypes/inference.py @@ -267,6 +267,39 @@ def is_list_like(obj): not isinstance(obj, string_and_binary_types)) +def is_array_like(obj): + """ + Check if the object is array-like. + + For an object to be considered array-like, it must be list-like and + have a `dtype` attribute. + + Parameters + ---------- + obj : The object to check. + + Returns + ------- + is_array_like : bool + Whether `obj` has array-like properties. + + Examples + -------- + >>> is_array_like(np.array([1, 2, 3])) + True + >>> is_array_like(pd.Series(["a", "b"])) + True + >>> is_array_like(pd.Index(["2016-01-01"])) + True + >>> is_array_like([1, 2, 3]) + False + >>> is_array_like(("a", "b")) + False + """ + + return is_list_like(obj) and hasattr(obj, "dtype") + + def is_nested_list_like(obj): """ Check if the object is list-like, and that all of its elements diff --git a/pandas/core/reshape/merge.py b/pandas/core/reshape/merge.py index 6b455b6ec2b95..8ee30bf72d313 100644 --- a/pandas/core/reshape/merge.py +++ b/pandas/core/reshape/merge.py @@ -10,7 +10,7 @@ from pandas.compat import range, lzip, zip, map, filter import pandas.compat as compat -from pandas import (Categorical, Series, DataFrame, +from pandas import (Categorical, DataFrame, Index, MultiIndex, Timedelta) from pandas.core.frame import _merge_doc from pandas.core.dtypes.common import ( @@ -18,6 +18,7 @@ is_datetime64_dtype, needs_i8_conversion, is_int64_dtype, + is_array_like, is_categorical_dtype, is_integer_dtype, is_float_dtype, @@ -817,10 +818,9 @@ def _get_merge_keys(self): left, right = self.left, self.right stacklevel = 5 # Number of stack levels from df.merge - list_types = (np.ndarray, Series, Index) - is_lkey = lambda x: isinstance(x, list_types) and len(x) == len(left) - is_rkey = lambda x: isinstance(x, list_types) and len(x) == len(right) + is_lkey = lambda x: is_array_like(x) and len(x) == len(left) + is_rkey = lambda x: is_array_like(x) and len(x) == len(right) # Note that pd.merge_asof() has separate 'on' and 'by' parameters. A # user could, for example, request 'left_index' and 'left_by'. In a diff --git a/pandas/tests/api/test_types.py b/pandas/tests/api/test_types.py index 1cbcf3f9109a4..7e6430accc546 100644 --- a/pandas/tests/api/test_types.py +++ b/pandas/tests/api/test_types.py @@ -30,7 +30,7 @@ class TestTypes(Base): 'is_period_dtype', 'is_interval', 'is_interval_dtype', 'is_re', 'is_re_compilable', 'is_dict_like', 'is_iterator', 'is_file_like', - 'is_list_like', 'is_hashable', + 'is_list_like', 'is_hashable', 'is_array_like', 'is_named_tuple', 'pandas_dtype', 'union_categoricals', 'infer_dtype'] deprecated = ['is_any_int_dtype', 'is_floating_dtype', 'is_sequence'] diff --git a/pandas/tests/dtypes/test_inference.py b/pandas/tests/dtypes/test_inference.py index 33c570a814e7d..b4f5d67530fbd 100644 --- a/pandas/tests/dtypes/test_inference.py +++ b/pandas/tests/dtypes/test_inference.py @@ -78,6 +78,23 @@ def test_is_list_like_fails(ll): assert not inference.is_list_like(ll) +def test_is_array_like(): + assert inference.is_array_like(Series([])) + assert inference.is_array_like(Series([1, 2])) + assert inference.is_array_like(np.array(["a", "b"])) + assert inference.is_array_like(Index(["2016-01-01"])) + + class DtypeList(list): + dtype = "special" + + assert inference.is_array_like(DtypeList()) + + assert not inference.is_array_like([1, 2, 3]) + assert not inference.is_array_like(tuple()) + assert not inference.is_array_like("foo") + assert not inference.is_array_like(123) + + @pytest.mark.parametrize('inner', [ [], [1], (1, ), (1, 2), {'a': 1}, set([1, 'a']), Series([1]), Series([]), Series(['a']).str, (x for x in range(5))