From ac587775930c172263908ed83be8763e2ef8434e Mon Sep 17 00:00:00 2001 From: cyrusmaher Date: Wed, 17 Jun 2015 23:59:06 -0700 Subject: [PATCH 01/15] Update generic.py Simple fix to allow regex filtering to work for numeric column labels, e.g. df.filter(regex="[12][34]") --- pandas/core/generic.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 35db4051c60c8..e7ce4c2891114 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -1928,7 +1928,7 @@ def filter(self, items=None, like=None, regex=None, axis=None): return self.select(matchf, axis=axis_name) elif regex: matcher = re.compile(regex) - return self.select(lambda x: matcher.search(x) is not None, + return self.select(lambda x: matcher.search(str(x)) is not None, axis=axis_name) else: raise TypeError('Must pass either `items`, `like`, or `regex`') From ac90352e27bcca8bb318614d55c1a476db32b200 Mon Sep 17 00:00:00 2001 From: cyrusmaher Date: Fri, 3 Jul 2015 12:39:29 -0700 Subject: [PATCH 02/15] Add test for regex filter on numeric column names --- pandas/tests/test_frame.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/pandas/tests/test_frame.py b/pandas/tests/test_frame.py index a4abe481cfe81..eb0c98eae2197 100644 --- a/pandas/tests/test_frame.py +++ b/pandas/tests/test_frame.py @@ -10755,7 +10755,11 @@ def test_filter(self): df = DataFrame(0., index=[0, 1, 2], columns=[0, 1, '_A', '_B']) filtered = df.filter(like='_') self.assertEqual(len(filtered.columns), 2) - + + # regex with ints in column names + df = DataFrame(0., index=[0, 1, 2], columns=[0, 1, 'A1', 'B']) + filtered = df.filter(regex='^[0-9]+$') + self.assertEqual(len(filtered.columns), 2) # pass in None with assertRaisesRegexp(TypeError, 'Must pass'): self.frame.filter(items=None) From b70b9c1b6f682aeab635f2b36c97e4e6fb1e27cc Mon Sep 17 00:00:00 2001 From: cmaher Date: Fri, 3 Jul 2015 12:56:52 -0700 Subject: [PATCH 03/15] Add release note --- doc/source/whatsnew/v0.17.0.txt | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v0.17.0.txt b/doc/source/whatsnew/v0.17.0.txt index 4a513f3122390..0949215b30ae8 100644 --- a/doc/source/whatsnew/v0.17.0.txt +++ b/doc/source/whatsnew/v0.17.0.txt @@ -26,7 +26,8 @@ New features Other enhancements ^^^^^^^^^^^^^^^^^^ - +- `regex` argument to DataFrame.filter now handles numeric column names instead of raising an exception. + .. _whatsnew_0170.api: Backwards incompatible API changes From 3a9934db238400fe2ef318beb25c3a6fa76332fe Mon Sep 17 00:00:00 2001 From: cmaher Date: Fri, 3 Jul 2015 13:04:05 -0700 Subject: [PATCH 04/15] Add second regex test --- pandas/tests/test_frame.py | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/pandas/tests/test_frame.py b/pandas/tests/test_frame.py index eb0c98eae2197..3f98511f1dc37 100644 --- a/pandas/tests/test_frame.py +++ b/pandas/tests/test_frame.py @@ -10741,7 +10741,7 @@ def test_filter(self): idx = self.frame.index[0:4] filtered = self.frame.filter(idx, axis='index') expected = self.frame.reindex(index=idx) - assert_frame_equal(filtered,expected) + assert_frame_equal(filtered, expected) # like fcopy = self.frame.copy() @@ -10757,9 +10757,15 @@ def test_filter(self): self.assertEqual(len(filtered.columns), 2) # regex with ints in column names + # from PR #10384 df = DataFrame(0., index=[0, 1, 2], columns=[0, 1, 'A1', 'B']) filtered = df.filter(regex='^[0-9]+$') self.assertEqual(len(filtered.columns), 2) + + expected = DataFrame(0., index=[0, 1, 2], columns=[0, 1, '0', '1']) + filtered = expected.filter(regex='^[0-9]+$') # shouldn't remove anything + self.assert_frame_equal(filtered, expected) + # pass in None with assertRaisesRegexp(TypeError, 'Must pass'): self.frame.filter(items=None) From 12d79e72102647e5eb658da9df535cfb01cda47c Mon Sep 17 00:00:00 2001 From: cyrusmaher Date: Wed, 17 Jun 2015 23:59:06 -0700 Subject: [PATCH 05/15] Fix regex filter for numeric columns Simple fix to allow regex filtering to work for numeric column labels, e.g. df.filter(regex="[12][34]") Add test for regex filter on numeric column names Add release note Add second regex test --- doc/source/whatsnew/v0.17.0.txt | 3 ++- pandas/core/generic.py | 2 +- pandas/tests/test_frame.py | 12 +++++++++++- 3 files changed, 14 insertions(+), 3 deletions(-) diff --git a/doc/source/whatsnew/v0.17.0.txt b/doc/source/whatsnew/v0.17.0.txt index 4a513f3122390..0949215b30ae8 100644 --- a/doc/source/whatsnew/v0.17.0.txt +++ b/doc/source/whatsnew/v0.17.0.txt @@ -26,7 +26,8 @@ New features Other enhancements ^^^^^^^^^^^^^^^^^^ - +- `regex` argument to DataFrame.filter now handles numeric column names instead of raising an exception. + .. _whatsnew_0170.api: Backwards incompatible API changes diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 35db4051c60c8..e7ce4c2891114 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -1928,7 +1928,7 @@ def filter(self, items=None, like=None, regex=None, axis=None): return self.select(matchf, axis=axis_name) elif regex: matcher = re.compile(regex) - return self.select(lambda x: matcher.search(x) is not None, + return self.select(lambda x: matcher.search(str(x)) is not None, axis=axis_name) else: raise TypeError('Must pass either `items`, `like`, or `regex`') diff --git a/pandas/tests/test_frame.py b/pandas/tests/test_frame.py index a4abe481cfe81..3f98511f1dc37 100644 --- a/pandas/tests/test_frame.py +++ b/pandas/tests/test_frame.py @@ -10741,7 +10741,7 @@ def test_filter(self): idx = self.frame.index[0:4] filtered = self.frame.filter(idx, axis='index') expected = self.frame.reindex(index=idx) - assert_frame_equal(filtered,expected) + assert_frame_equal(filtered, expected) # like fcopy = self.frame.copy() @@ -10755,6 +10755,16 @@ def test_filter(self): df = DataFrame(0., index=[0, 1, 2], columns=[0, 1, '_A', '_B']) filtered = df.filter(like='_') self.assertEqual(len(filtered.columns), 2) + + # regex with ints in column names + # from PR #10384 + df = DataFrame(0., index=[0, 1, 2], columns=[0, 1, 'A1', 'B']) + filtered = df.filter(regex='^[0-9]+$') + self.assertEqual(len(filtered.columns), 2) + + expected = DataFrame(0., index=[0, 1, 2], columns=[0, 1, '0', '1']) + filtered = expected.filter(regex='^[0-9]+$') # shouldn't remove anything + self.assert_frame_equal(filtered, expected) # pass in None with assertRaisesRegexp(TypeError, 'Must pass'): From 009422c680c161b8cdc75c6eda67f1dd13c37afb Mon Sep 17 00:00:00 2001 From: cmaher Date: Fri, 3 Jul 2015 15:21:54 -0700 Subject: [PATCH 06/15] Update docs, test --- doc/source/whatsnew/v0.17.0.txt | 2 +- pandas/tests/test_frame.py | 9 +++++---- 2 files changed, 6 insertions(+), 5 deletions(-) diff --git a/doc/source/whatsnew/v0.17.0.txt b/doc/source/whatsnew/v0.17.0.txt index 0949215b30ae8..5585dfde69ac5 100644 --- a/doc/source/whatsnew/v0.17.0.txt +++ b/doc/source/whatsnew/v0.17.0.txt @@ -26,7 +26,7 @@ New features Other enhancements ^^^^^^^^^^^^^^^^^^ -- `regex` argument to DataFrame.filter now handles numeric column names instead of raising an exception. +- ``regex`` argument to ``DataFrame.filter`` now handles numeric column names instead of raising ``ValueError`` (:issue:`10384`). .. _whatsnew_0170.api: diff --git a/pandas/tests/test_frame.py b/pandas/tests/test_frame.py index 3f98511f1dc37..fbe3a31e020a0 100644 --- a/pandas/tests/test_frame.py +++ b/pandas/tests/test_frame.py @@ -10758,12 +10758,13 @@ def test_filter(self): # regex with ints in column names # from PR #10384 - df = DataFrame(0., index=[0, 1, 2], columns=[0, 1, 'A1', 'B']) + df = DataFrame(0., index=[0, 1, 2], columns=['A1', 1, 'B', 2, 'C']) + expected = DataFrame(0., index=[0, 1, 2], columns=[1, 2]) filtered = df.filter(regex='^[0-9]+$') - self.assertEqual(len(filtered.columns), 2) + self.assert_frame_equal(filtered, expected) - expected = DataFrame(0., index=[0, 1, 2], columns=[0, 1, '0', '1']) - filtered = expected.filter(regex='^[0-9]+$') # shouldn't remove anything + expected = DataFrame(0., index=[0, 1, 2], columns=[0, '0', 1, '1']) + filtered = expected.filter(regex='^[0-9]+$') # shouldn't remove anything self.assert_frame_equal(filtered, expected) # pass in None From b46133f558e5e36b6949e9a80a6280629d644a4d Mon Sep 17 00:00:00 2001 From: cmaher Date: Fri, 3 Jul 2015 15:35:29 -0700 Subject: [PATCH 07/15] Fix merge conflict --- pandas/tests/test_frame.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/pandas/tests/test_frame.py b/pandas/tests/test_frame.py index fbe3a31e020a0..29c055899bbed 100644 --- a/pandas/tests/test_frame.py +++ b/pandas/tests/test_frame.py @@ -10765,6 +10765,9 @@ def test_filter(self): expected = DataFrame(0., index=[0, 1, 2], columns=[0, '0', 1, '1']) filtered = expected.filter(regex='^[0-9]+$') # shouldn't remove anything + + expected = DataFrame(0., index=[0, 1, 2], columns=[0, 1, '0', '1']) + filtered = expected.filter(regex='^[0-9]+$') # shouldn't remove anything self.assert_frame_equal(filtered, expected) # pass in None From 5bd0a4bc4e9c5eef2af6318d69d20238f6b35b38 Mon Sep 17 00:00:00 2001 From: cyrusmaher Date: Wed, 17 Jun 2015 23:59:06 -0700 Subject: [PATCH 08/15] # This is a combination of 2 commits. # The first commit's message is: Fix regex filter for numeric columns Simple fix to allow regex filtering to work for numeric column labels, e.g. df.filter(regex="[12][34]") Add test for regex filter on numeric column names Add release note Add second regex test # This is the 2nd commit message: Update generic.py Simple fix to allow regex filtering to work for numeric column labels, e.g. df.filter(regex="[12][34]") --- doc/source/whatsnew/v0.17.0.txt | 3 ++- pandas/core/generic.py | 2 +- pandas/tests/test_frame.py | 12 +++++++++++- 3 files changed, 14 insertions(+), 3 deletions(-) diff --git a/doc/source/whatsnew/v0.17.0.txt b/doc/source/whatsnew/v0.17.0.txt index 164ab73def894..6ac7a7698b0da 100644 --- a/doc/source/whatsnew/v0.17.0.txt +++ b/doc/source/whatsnew/v0.17.0.txt @@ -26,7 +26,8 @@ New features Other enhancements ^^^^^^^^^^^^^^^^^^ - +- `regex` argument to DataFrame.filter now handles numeric column names instead of raising an exception. + .. _whatsnew_0170.api: Backwards incompatible API changes diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 112ace3df08e2..480d0c596cbbd 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -1928,7 +1928,7 @@ def filter(self, items=None, like=None, regex=None, axis=None): return self.select(matchf, axis=axis_name) elif regex: matcher = re.compile(regex) - return self.select(lambda x: matcher.search(x) is not None, + return self.select(lambda x: matcher.search(str(x)) is not None, axis=axis_name) else: raise TypeError('Must pass either `items`, `like`, or `regex`') diff --git a/pandas/tests/test_frame.py b/pandas/tests/test_frame.py index 4b1954a3be64e..a67120992c326 100644 --- a/pandas/tests/test_frame.py +++ b/pandas/tests/test_frame.py @@ -10734,7 +10734,7 @@ def test_filter(self): idx = self.frame.index[0:4] filtered = self.frame.filter(idx, axis='index') expected = self.frame.reindex(index=idx) - assert_frame_equal(filtered,expected) + assert_frame_equal(filtered, expected) # like fcopy = self.frame.copy() @@ -10748,6 +10748,16 @@ def test_filter(self): df = DataFrame(0., index=[0, 1, 2], columns=[0, 1, '_A', '_B']) filtered = df.filter(like='_') self.assertEqual(len(filtered.columns), 2) + + # regex with ints in column names + # from PR #10384 + df = DataFrame(0., index=[0, 1, 2], columns=[0, 1, 'A1', 'B']) + filtered = df.filter(regex='^[0-9]+$') + self.assertEqual(len(filtered.columns), 2) + + expected = DataFrame(0., index=[0, 1, 2], columns=[0, 1, '0', '1']) + filtered = expected.filter(regex='^[0-9]+$') # shouldn't remove anything + self.assert_frame_equal(filtered, expected) # pass in None with assertRaisesRegexp(TypeError, 'Must pass'): From 88a8e3e198e79ed99e09b921d7b4a7d37a7cf7aa Mon Sep 17 00:00:00 2001 From: cmaher Date: Fri, 3 Jul 2015 16:10:58 -0700 Subject: [PATCH 09/15] Fix merge conflict --- pandas/tests/test_frame.py | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/pandas/tests/test_frame.py b/pandas/tests/test_frame.py index a67120992c326..3f98511f1dc37 100644 --- a/pandas/tests/test_frame.py +++ b/pandas/tests/test_frame.py @@ -10382,6 +10382,13 @@ def test_apply(self): [[1, 2, 3], [4, 5, 6], [7, 8, 9]], index=['a', 'a', 'c']) self.assertRaises(ValueError, df.apply, lambda x: x, 2) + # GH9573 + df = DataFrame({'c0':['A','A','B','B'], 'c1':['C','C','D','D']}) + df = df.apply(lambda ts: ts.astype('category')) + self.assertEqual(df.shape, (4, 2)) + self.assertTrue(isinstance(df['c0'].dtype, com.CategoricalDtype)) + self.assertTrue(isinstance(df['c1'].dtype, com.CategoricalDtype)) + def test_apply_mixed_datetimelike(self): # mixed datetimelike # GH 7778 From 2a9ddd1fbfa5ae7b7b73ef5d9902d7a14893ca2e Mon Sep 17 00:00:00 2001 From: cyrusmaher Date: Wed, 17 Jun 2015 23:59:06 -0700 Subject: [PATCH 10/15] Update generic.py Simple fix to allow regex filtering to work for numeric column labels, e.g. df.filter(regex="[12][34]") --- pandas/core/generic.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 35db4051c60c8..e7ce4c2891114 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -1928,7 +1928,7 @@ def filter(self, items=None, like=None, regex=None, axis=None): return self.select(matchf, axis=axis_name) elif regex: matcher = re.compile(regex) - return self.select(lambda x: matcher.search(x) is not None, + return self.select(lambda x: matcher.search(str(x)) is not None, axis=axis_name) else: raise TypeError('Must pass either `items`, `like`, or `regex`') From 0d3af4ce1f0a902de3e591b271a4c32e9d54b91f Mon Sep 17 00:00:00 2001 From: cyrusmaher Date: Fri, 3 Jul 2015 12:39:29 -0700 Subject: [PATCH 11/15] Add test for regex filter on numeric column names --- pandas/tests/test_frame.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/pandas/tests/test_frame.py b/pandas/tests/test_frame.py index a4abe481cfe81..eb0c98eae2197 100644 --- a/pandas/tests/test_frame.py +++ b/pandas/tests/test_frame.py @@ -10755,7 +10755,11 @@ def test_filter(self): df = DataFrame(0., index=[0, 1, 2], columns=[0, 1, '_A', '_B']) filtered = df.filter(like='_') self.assertEqual(len(filtered.columns), 2) - + + # regex with ints in column names + df = DataFrame(0., index=[0, 1, 2], columns=[0, 1, 'A1', 'B']) + filtered = df.filter(regex='^[0-9]+$') + self.assertEqual(len(filtered.columns), 2) # pass in None with assertRaisesRegexp(TypeError, 'Must pass'): self.frame.filter(items=None) From 94626ccd6b3eb1f19f9e63a4530df0b193cc6141 Mon Sep 17 00:00:00 2001 From: cmaher Date: Fri, 3 Jul 2015 12:56:52 -0700 Subject: [PATCH 12/15] Add release note --- doc/source/whatsnew/v0.17.0.txt | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v0.17.0.txt b/doc/source/whatsnew/v0.17.0.txt index 4a513f3122390..0949215b30ae8 100644 --- a/doc/source/whatsnew/v0.17.0.txt +++ b/doc/source/whatsnew/v0.17.0.txt @@ -26,7 +26,8 @@ New features Other enhancements ^^^^^^^^^^^^^^^^^^ - +- `regex` argument to DataFrame.filter now handles numeric column names instead of raising an exception. + .. _whatsnew_0170.api: Backwards incompatible API changes From 3bb6d058588918583bc44349dfafd89795b27049 Mon Sep 17 00:00:00 2001 From: cmaher Date: Fri, 3 Jul 2015 13:04:05 -0700 Subject: [PATCH 13/15] Add second regex test --- pandas/tests/test_frame.py | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/pandas/tests/test_frame.py b/pandas/tests/test_frame.py index eb0c98eae2197..3f98511f1dc37 100644 --- a/pandas/tests/test_frame.py +++ b/pandas/tests/test_frame.py @@ -10741,7 +10741,7 @@ def test_filter(self): idx = self.frame.index[0:4] filtered = self.frame.filter(idx, axis='index') expected = self.frame.reindex(index=idx) - assert_frame_equal(filtered,expected) + assert_frame_equal(filtered, expected) # like fcopy = self.frame.copy() @@ -10757,9 +10757,15 @@ def test_filter(self): self.assertEqual(len(filtered.columns), 2) # regex with ints in column names + # from PR #10384 df = DataFrame(0., index=[0, 1, 2], columns=[0, 1, 'A1', 'B']) filtered = df.filter(regex='^[0-9]+$') self.assertEqual(len(filtered.columns), 2) + + expected = DataFrame(0., index=[0, 1, 2], columns=[0, 1, '0', '1']) + filtered = expected.filter(regex='^[0-9]+$') # shouldn't remove anything + self.assert_frame_equal(filtered, expected) + # pass in None with assertRaisesRegexp(TypeError, 'Must pass'): self.frame.filter(items=None) From 86d523aa462ea8c370f1327107fb55db2ad5b700 Mon Sep 17 00:00:00 2001 From: cmaher Date: Fri, 3 Jul 2015 15:21:54 -0700 Subject: [PATCH 14/15] Update docs, test --- doc/source/whatsnew/v0.17.0.txt | 2 +- pandas/tests/test_frame.py | 9 +++++---- 2 files changed, 6 insertions(+), 5 deletions(-) diff --git a/doc/source/whatsnew/v0.17.0.txt b/doc/source/whatsnew/v0.17.0.txt index 0949215b30ae8..5585dfde69ac5 100644 --- a/doc/source/whatsnew/v0.17.0.txt +++ b/doc/source/whatsnew/v0.17.0.txt @@ -26,7 +26,7 @@ New features Other enhancements ^^^^^^^^^^^^^^^^^^ -- `regex` argument to DataFrame.filter now handles numeric column names instead of raising an exception. +- ``regex`` argument to ``DataFrame.filter`` now handles numeric column names instead of raising ``ValueError`` (:issue:`10384`). .. _whatsnew_0170.api: diff --git a/pandas/tests/test_frame.py b/pandas/tests/test_frame.py index 3f98511f1dc37..fbe3a31e020a0 100644 --- a/pandas/tests/test_frame.py +++ b/pandas/tests/test_frame.py @@ -10758,12 +10758,13 @@ def test_filter(self): # regex with ints in column names # from PR #10384 - df = DataFrame(0., index=[0, 1, 2], columns=[0, 1, 'A1', 'B']) + df = DataFrame(0., index=[0, 1, 2], columns=['A1', 1, 'B', 2, 'C']) + expected = DataFrame(0., index=[0, 1, 2], columns=[1, 2]) filtered = df.filter(regex='^[0-9]+$') - self.assertEqual(len(filtered.columns), 2) + self.assert_frame_equal(filtered, expected) - expected = DataFrame(0., index=[0, 1, 2], columns=[0, 1, '0', '1']) - filtered = expected.filter(regex='^[0-9]+$') # shouldn't remove anything + expected = DataFrame(0., index=[0, 1, 2], columns=[0, '0', 1, '1']) + filtered = expected.filter(regex='^[0-9]+$') # shouldn't remove anything self.assert_frame_equal(filtered, expected) # pass in None From f562f7f5e251f78e171b925e3316b96e6ae14980 Mon Sep 17 00:00:00 2001 From: cmaher Date: Fri, 3 Jul 2015 15:35:29 -0700 Subject: [PATCH 15/15] Fix merge conflict --- pandas/tests/test_frame.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/pandas/tests/test_frame.py b/pandas/tests/test_frame.py index fbe3a31e020a0..29c055899bbed 100644 --- a/pandas/tests/test_frame.py +++ b/pandas/tests/test_frame.py @@ -10765,6 +10765,9 @@ def test_filter(self): expected = DataFrame(0., index=[0, 1, 2], columns=[0, '0', 1, '1']) filtered = expected.filter(regex='^[0-9]+$') # shouldn't remove anything + + expected = DataFrame(0., index=[0, 1, 2], columns=[0, 1, '0', '1']) + filtered = expected.filter(regex='^[0-9]+$') # shouldn't remove anything self.assert_frame_equal(filtered, expected) # pass in None