From bb2d803da0b17988126355f3200166944cb64f19 Mon Sep 17 00:00:00 2001 From: nofarmishraki Date: Fri, 22 Jan 2021 23:11:50 +0200 Subject: [PATCH 01/32] BUG: fix case of a category value which isn't exists (#39189) --- pandas/core/computation/pytables.py | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/pandas/core/computation/pytables.py b/pandas/core/computation/pytables.py index db2385de06e93..6a3b95186d666 100644 --- a/pandas/core/computation/pytables.py +++ b/pandas/core/computation/pytables.py @@ -210,12 +210,10 @@ def stringify(value): return TermValue(int(v), v, kind) elif meta == "category": metadata = extract_array(self.metadata, extract_numpy=True) - result = metadata.searchsorted(v, side="left") - - # result returns 0 if v is first element or if v is not in metadata - # check that metadata contains v - if not result and v not in metadata: + if v not in metadata: result = -1 + else: + result = metadata.searchsorted(v, side="left") return TermValue(result, result, "integer") elif kind == "integer": v = int(float(v)) From f9be6256265643429a84a66e31e60f5db30ab08d Mon Sep 17 00:00:00 2001 From: nofarmishraki Date: Sat, 23 Jan 2021 18:24:53 +0200 Subject: [PATCH 02/32] BUG: add UT to conver_value for this use case (#39189) --- pandas/tests/computation/test_pytables.py | 30 +++++++++++++++++++++++ 1 file changed, 30 insertions(+) create mode 100644 pandas/tests/computation/test_pytables.py diff --git a/pandas/tests/computation/test_pytables.py b/pandas/tests/computation/test_pytables.py new file mode 100644 index 0000000000000..91e424ed1d459 --- /dev/null +++ b/pandas/tests/computation/test_pytables.py @@ -0,0 +1,30 @@ +import pytest +from typing import Any +from unittest.mock import patch, PropertyMock +from pandas.core.computation.pytables import BinOp, TermValue +from pandas.core.series import Series + + +@patch('pandas.core.computation.pytables.BinOp.kind', new_callable=PropertyMock, return_value='integer') +@patch('pandas.core.computation.pytables.BinOp.meta', new_callable=PropertyMock, return_value='category') +@patch('pandas.core.computation.pytables.BinOp.metadata', new_callable=PropertyMock, return_value=Series(data=['a', 'b', 's'])) +@pytest.mark.parametrize("value, expected_results", + [('q', + TermValue(-1, -1, 'integer')), + ('a', + TermValue(0, 0, 'integer'))]) +def test_convert_value(mock_kind, + mock_meta, + mock_metadata, + value: Any, + expected_results: TermValue): + + with patch.object(BinOp, "__init__", lambda p1, p2, p3, p4, p5, p6: None): + bin_op = BinOp(None, None, None, None, None) + bin_op.encoding = 'UTF-8' + + result = bin_op.convert_value(value) + + assert result.kind == expected_results.kind and\ + result.value == expected_results.value and\ + result.converted == expected_results.converted From aa9044160b1e6c44f8eedd4f602725266ac72910 Mon Sep 17 00:00:00 2001 From: nofarmishraki Date: Sat, 23 Jan 2021 18:33:57 +0200 Subject: [PATCH 03/32] BUG: change style with pre-commit (#39189) --- pandas/tests/computation/test_pytables.py | 51 ++++++++++++++--------- 1 file changed, 32 insertions(+), 19 deletions(-) diff --git a/pandas/tests/computation/test_pytables.py b/pandas/tests/computation/test_pytables.py index 91e424ed1d459..b6a9c6f6a1dd4 100644 --- a/pandas/tests/computation/test_pytables.py +++ b/pandas/tests/computation/test_pytables.py @@ -1,30 +1,43 @@ -import pytest from typing import Any -from unittest.mock import patch, PropertyMock +from unittest.mock import PropertyMock, patch + +import pytest + from pandas.core.computation.pytables import BinOp, TermValue from pandas.core.series import Series -@patch('pandas.core.computation.pytables.BinOp.kind', new_callable=PropertyMock, return_value='integer') -@patch('pandas.core.computation.pytables.BinOp.meta', new_callable=PropertyMock, return_value='category') -@patch('pandas.core.computation.pytables.BinOp.metadata', new_callable=PropertyMock, return_value=Series(data=['a', 'b', 's'])) -@pytest.mark.parametrize("value, expected_results", - [('q', - TermValue(-1, -1, 'integer')), - ('a', - TermValue(0, 0, 'integer'))]) -def test_convert_value(mock_kind, - mock_meta, - mock_metadata, - value: Any, - expected_results: TermValue): +@patch( + "pandas.core.computation.pytables.BinOp.kind", + new_callable=PropertyMock, + return_value="integer", +) +@patch( + "pandas.core.computation.pytables.BinOp.meta", + new_callable=PropertyMock, + return_value="category", +) +@patch( + "pandas.core.computation.pytables.BinOp.metadata", + new_callable=PropertyMock, + return_value=Series(data=["a", "b", "s"]), +) +@pytest.mark.parametrize( + "value, expected_results", + [("q", TermValue(-1, -1, "integer")), ("a", TermValue(0, 0, "integer"))], +) +def test_convert_value( + mock_kind, mock_meta, mock_metadata, value: Any, expected_results: TermValue +): with patch.object(BinOp, "__init__", lambda p1, p2, p3, p4, p5, p6: None): bin_op = BinOp(None, None, None, None, None) - bin_op.encoding = 'UTF-8' + bin_op.encoding = "UTF-8" result = bin_op.convert_value(value) - assert result.kind == expected_results.kind and\ - result.value == expected_results.value and\ - result.converted == expected_results.converted + assert ( + result.kind == expected_results.kind + and result.value == expected_results.value + and result.converted == expected_results.converted + ) From e8ca3fc5e8bedc8915881791785ffb9acde48d96 Mon Sep 17 00:00:00 2001 From: nofarmishraki Date: Sat, 23 Jan 2021 19:26:08 +0200 Subject: [PATCH 04/32] BUG: add a whatsnew record (#39189) --- doc/source/whatsnew/v1.3.0.rst | 3 +++ 1 file changed, 3 insertions(+) diff --git a/doc/source/whatsnew/v1.3.0.rst b/doc/source/whatsnew/v1.3.0.rst index 509265ca00544..b68a519143694 100644 --- a/doc/source/whatsnew/v1.3.0.rst +++ b/doc/source/whatsnew/v1.3.0.rst @@ -327,6 +327,9 @@ I/O - Bug in :func:`read_csv` not switching ``true_values`` and ``false_values`` for nullable ``boolean`` dtype (:issue:`34655`) - Bug in :func:`read_json` when ``orient="split"`` does not maintain numeric string index (:issue:`28556`) - :meth:`read_sql` returned an empty generator if ``chunksize`` was no-zero and the query returned no results. Now returns a generator with a single empty dataframe (:issue:`34411`) +- Bug in :func:`read_hdf` returning unexpected records when filtering on categorical string columns using ``where`` parameter (:issue:`39189`) + + Period ^^^^^^ From b5ded4925623ee8d49f4b7bb80aa233c569f5a1b Mon Sep 17 00:00:00 2001 From: nofarmishraki Date: Sat, 23 Jan 2021 19:39:13 +0200 Subject: [PATCH 05/32] Trigger Build From 0cb8ad71ba89dcb249b1cbdaa2f8162908e14731 Mon Sep 17 00:00:00 2001 From: nofarmishraki Date: Sat, 23 Jan 2021 19:45:31 +0200 Subject: [PATCH 06/32] BUG: check for tests (#39189) --- pandas/tests/computation/test_pytables.py | 86 +++++++++++------------ 1 file changed, 43 insertions(+), 43 deletions(-) diff --git a/pandas/tests/computation/test_pytables.py b/pandas/tests/computation/test_pytables.py index b6a9c6f6a1dd4..709c587ac6107 100644 --- a/pandas/tests/computation/test_pytables.py +++ b/pandas/tests/computation/test_pytables.py @@ -1,43 +1,43 @@ -from typing import Any -from unittest.mock import PropertyMock, patch - -import pytest - -from pandas.core.computation.pytables import BinOp, TermValue -from pandas.core.series import Series - - -@patch( - "pandas.core.computation.pytables.BinOp.kind", - new_callable=PropertyMock, - return_value="integer", -) -@patch( - "pandas.core.computation.pytables.BinOp.meta", - new_callable=PropertyMock, - return_value="category", -) -@patch( - "pandas.core.computation.pytables.BinOp.metadata", - new_callable=PropertyMock, - return_value=Series(data=["a", "b", "s"]), -) -@pytest.mark.parametrize( - "value, expected_results", - [("q", TermValue(-1, -1, "integer")), ("a", TermValue(0, 0, "integer"))], -) -def test_convert_value( - mock_kind, mock_meta, mock_metadata, value: Any, expected_results: TermValue -): - - with patch.object(BinOp, "__init__", lambda p1, p2, p3, p4, p5, p6: None): - bin_op = BinOp(None, None, None, None, None) - bin_op.encoding = "UTF-8" - - result = bin_op.convert_value(value) - - assert ( - result.kind == expected_results.kind - and result.value == expected_results.value - and result.converted == expected_results.converted - ) +# from typing import Any +# from unittest.mock import PropertyMock, patch +# +# import pytest +# +# from pandas.core.computation.pytables import BinOp, TermValue +# from pandas.core.series import Series +# +# +# @patch( +# "pandas.core.computation.pytables.BinOp.kind", +# new_callable=PropertyMock, +# return_value="integer", +# ) +# @patch( +# "pandas.core.computation.pytables.BinOp.meta", +# new_callable=PropertyMock, +# return_value="category", +# ) +# @patch( +# "pandas.core.computation.pytables.BinOp.metadata", +# new_callable=PropertyMock, +# return_value=Series(data=["a", "b", "s"]), +# ) +# @pytest.mark.parametrize( +# "value, expected_results", +# [("q", TermValue(-1, -1, "integer")), ("a", TermValue(0, 0, "integer"))], +# ) +# def test_convert_value( +# mock_kind, mock_meta, mock_metadata, value: Any, expected_results: TermValue +# ): +# +# with patch.object(BinOp, "__init__", lambda p1, p2, p3, p4, p5, p6: None): +# bin_op = BinOp(None, None, None, None, None) +# bin_op.encoding = "UTF-8" +# +# result = bin_op.convert_value(value) +# +# assert ( +# result.kind == expected_results.kind +# and result.value == expected_results.value +# and result.converted == expected_results.converted +# ) From 8284e0b0a0e7468a7be673fef46f620d1bb36cd7 Mon Sep 17 00:00:00 2001 From: nofarmishraki Date: Sat, 23 Jan 2021 19:50:34 +0200 Subject: [PATCH 07/32] BUG: remove spaces (#39189) --- doc/source/whatsnew/v1.3.0.rst | 2 -- 1 file changed, 2 deletions(-) diff --git a/doc/source/whatsnew/v1.3.0.rst b/doc/source/whatsnew/v1.3.0.rst index b68a519143694..d0080a7d74ecd 100644 --- a/doc/source/whatsnew/v1.3.0.rst +++ b/doc/source/whatsnew/v1.3.0.rst @@ -329,8 +329,6 @@ I/O - :meth:`read_sql` returned an empty generator if ``chunksize`` was no-zero and the query returned no results. Now returns a generator with a single empty dataframe (:issue:`34411`) - Bug in :func:`read_hdf` returning unexpected records when filtering on categorical string columns using ``where`` parameter (:issue:`39189`) - - Period ^^^^^^ - Comparisons of :class:`Period` objects or :class:`Index`, :class:`Series`, or :class:`DataFrame` with mismatched ``PeriodDtype`` now behave like other mismatched-type comparisons, returning ``False`` for equals, ``True`` for not-equal, and raising ``TypeError`` for inequality checks (:issue:`??`) From 9773aaa49ef4b4b352d865338591063164e5770d Mon Sep 17 00:00:00 2001 From: nofarmishraki Date: Sat, 23 Jan 2021 19:52:30 +0200 Subject: [PATCH 08/32] BUG: remove whatsnew (#39189) --- doc/source/whatsnew/v1.3.0.rst | 1 - pandas/tests/computation/test_pytables.py | 86 +++++++++++------------ 2 files changed, 43 insertions(+), 44 deletions(-) diff --git a/doc/source/whatsnew/v1.3.0.rst b/doc/source/whatsnew/v1.3.0.rst index d0080a7d74ecd..509265ca00544 100644 --- a/doc/source/whatsnew/v1.3.0.rst +++ b/doc/source/whatsnew/v1.3.0.rst @@ -327,7 +327,6 @@ I/O - Bug in :func:`read_csv` not switching ``true_values`` and ``false_values`` for nullable ``boolean`` dtype (:issue:`34655`) - Bug in :func:`read_json` when ``orient="split"`` does not maintain numeric string index (:issue:`28556`) - :meth:`read_sql` returned an empty generator if ``chunksize`` was no-zero and the query returned no results. Now returns a generator with a single empty dataframe (:issue:`34411`) -- Bug in :func:`read_hdf` returning unexpected records when filtering on categorical string columns using ``where`` parameter (:issue:`39189`) Period ^^^^^^ diff --git a/pandas/tests/computation/test_pytables.py b/pandas/tests/computation/test_pytables.py index 709c587ac6107..b6a9c6f6a1dd4 100644 --- a/pandas/tests/computation/test_pytables.py +++ b/pandas/tests/computation/test_pytables.py @@ -1,43 +1,43 @@ -# from typing import Any -# from unittest.mock import PropertyMock, patch -# -# import pytest -# -# from pandas.core.computation.pytables import BinOp, TermValue -# from pandas.core.series import Series -# -# -# @patch( -# "pandas.core.computation.pytables.BinOp.kind", -# new_callable=PropertyMock, -# return_value="integer", -# ) -# @patch( -# "pandas.core.computation.pytables.BinOp.meta", -# new_callable=PropertyMock, -# return_value="category", -# ) -# @patch( -# "pandas.core.computation.pytables.BinOp.metadata", -# new_callable=PropertyMock, -# return_value=Series(data=["a", "b", "s"]), -# ) -# @pytest.mark.parametrize( -# "value, expected_results", -# [("q", TermValue(-1, -1, "integer")), ("a", TermValue(0, 0, "integer"))], -# ) -# def test_convert_value( -# mock_kind, mock_meta, mock_metadata, value: Any, expected_results: TermValue -# ): -# -# with patch.object(BinOp, "__init__", lambda p1, p2, p3, p4, p5, p6: None): -# bin_op = BinOp(None, None, None, None, None) -# bin_op.encoding = "UTF-8" -# -# result = bin_op.convert_value(value) -# -# assert ( -# result.kind == expected_results.kind -# and result.value == expected_results.value -# and result.converted == expected_results.converted -# ) +from typing import Any +from unittest.mock import PropertyMock, patch + +import pytest + +from pandas.core.computation.pytables import BinOp, TermValue +from pandas.core.series import Series + + +@patch( + "pandas.core.computation.pytables.BinOp.kind", + new_callable=PropertyMock, + return_value="integer", +) +@patch( + "pandas.core.computation.pytables.BinOp.meta", + new_callable=PropertyMock, + return_value="category", +) +@patch( + "pandas.core.computation.pytables.BinOp.metadata", + new_callable=PropertyMock, + return_value=Series(data=["a", "b", "s"]), +) +@pytest.mark.parametrize( + "value, expected_results", + [("q", TermValue(-1, -1, "integer")), ("a", TermValue(0, 0, "integer"))], +) +def test_convert_value( + mock_kind, mock_meta, mock_metadata, value: Any, expected_results: TermValue +): + + with patch.object(BinOp, "__init__", lambda p1, p2, p3, p4, p5, p6: None): + bin_op = BinOp(None, None, None, None, None) + bin_op.encoding = "UTF-8" + + result = bin_op.convert_value(value) + + assert ( + result.kind == expected_results.kind + and result.value == expected_results.value + and result.converted == expected_results.converted + ) From 4281ef0cb25312992ac5b91db01e350e6dab3d80 Mon Sep 17 00:00:00 2001 From: nofarmishraki Date: Sat, 23 Jan 2021 19:54:27 +0200 Subject: [PATCH 09/32] BUG: remove tests(#39189) --- pandas/tests/computation/test_pytables.py | 86 +++++++++++------------ 1 file changed, 43 insertions(+), 43 deletions(-) diff --git a/pandas/tests/computation/test_pytables.py b/pandas/tests/computation/test_pytables.py index b6a9c6f6a1dd4..709c587ac6107 100644 --- a/pandas/tests/computation/test_pytables.py +++ b/pandas/tests/computation/test_pytables.py @@ -1,43 +1,43 @@ -from typing import Any -from unittest.mock import PropertyMock, patch - -import pytest - -from pandas.core.computation.pytables import BinOp, TermValue -from pandas.core.series import Series - - -@patch( - "pandas.core.computation.pytables.BinOp.kind", - new_callable=PropertyMock, - return_value="integer", -) -@patch( - "pandas.core.computation.pytables.BinOp.meta", - new_callable=PropertyMock, - return_value="category", -) -@patch( - "pandas.core.computation.pytables.BinOp.metadata", - new_callable=PropertyMock, - return_value=Series(data=["a", "b", "s"]), -) -@pytest.mark.parametrize( - "value, expected_results", - [("q", TermValue(-1, -1, "integer")), ("a", TermValue(0, 0, "integer"))], -) -def test_convert_value( - mock_kind, mock_meta, mock_metadata, value: Any, expected_results: TermValue -): - - with patch.object(BinOp, "__init__", lambda p1, p2, p3, p4, p5, p6: None): - bin_op = BinOp(None, None, None, None, None) - bin_op.encoding = "UTF-8" - - result = bin_op.convert_value(value) - - assert ( - result.kind == expected_results.kind - and result.value == expected_results.value - and result.converted == expected_results.converted - ) +# from typing import Any +# from unittest.mock import PropertyMock, patch +# +# import pytest +# +# from pandas.core.computation.pytables import BinOp, TermValue +# from pandas.core.series import Series +# +# +# @patch( +# "pandas.core.computation.pytables.BinOp.kind", +# new_callable=PropertyMock, +# return_value="integer", +# ) +# @patch( +# "pandas.core.computation.pytables.BinOp.meta", +# new_callable=PropertyMock, +# return_value="category", +# ) +# @patch( +# "pandas.core.computation.pytables.BinOp.metadata", +# new_callable=PropertyMock, +# return_value=Series(data=["a", "b", "s"]), +# ) +# @pytest.mark.parametrize( +# "value, expected_results", +# [("q", TermValue(-1, -1, "integer")), ("a", TermValue(0, 0, "integer"))], +# ) +# def test_convert_value( +# mock_kind, mock_meta, mock_metadata, value: Any, expected_results: TermValue +# ): +# +# with patch.object(BinOp, "__init__", lambda p1, p2, p3, p4, p5, p6: None): +# bin_op = BinOp(None, None, None, None, None) +# bin_op.encoding = "UTF-8" +# +# result = bin_op.convert_value(value) +# +# assert ( +# result.kind == expected_results.kind +# and result.value == expected_results.value +# and result.converted == expected_results.converted +# ) From 7178757953fee242bb99f39c07fbc8408c61ba20 Mon Sep 17 00:00:00 2001 From: nofarmishraki Date: Sat, 23 Jan 2021 19:59:04 +0200 Subject: [PATCH 10/32] BUG: add whats new (#39189) --- doc/source/whatsnew/v1.3.0.rst | 1 + 1 file changed, 1 insertion(+) diff --git a/doc/source/whatsnew/v1.3.0.rst b/doc/source/whatsnew/v1.3.0.rst index 509265ca00544..d0080a7d74ecd 100644 --- a/doc/source/whatsnew/v1.3.0.rst +++ b/doc/source/whatsnew/v1.3.0.rst @@ -327,6 +327,7 @@ I/O - Bug in :func:`read_csv` not switching ``true_values`` and ``false_values`` for nullable ``boolean`` dtype (:issue:`34655`) - Bug in :func:`read_json` when ``orient="split"`` does not maintain numeric string index (:issue:`28556`) - :meth:`read_sql` returned an empty generator if ``chunksize`` was no-zero and the query returned no results. Now returns a generator with a single empty dataframe (:issue:`34411`) +- Bug in :func:`read_hdf` returning unexpected records when filtering on categorical string columns using ``where`` parameter (:issue:`39189`) Period ^^^^^^ From ca9420e71d7d137cbaf537561f4154e4b4738955 Mon Sep 17 00:00:00 2001 From: nofarmishraki Date: Sat, 23 Jan 2021 19:59:26 +0200 Subject: [PATCH 11/32] BUG: check tests (#39189) --- pandas/tests/computation/test_pytables.py | 43 ----------------------- 1 file changed, 43 deletions(-) diff --git a/pandas/tests/computation/test_pytables.py b/pandas/tests/computation/test_pytables.py index 709c587ac6107..e69de29bb2d1d 100644 --- a/pandas/tests/computation/test_pytables.py +++ b/pandas/tests/computation/test_pytables.py @@ -1,43 +0,0 @@ -# from typing import Any -# from unittest.mock import PropertyMock, patch -# -# import pytest -# -# from pandas.core.computation.pytables import BinOp, TermValue -# from pandas.core.series import Series -# -# -# @patch( -# "pandas.core.computation.pytables.BinOp.kind", -# new_callable=PropertyMock, -# return_value="integer", -# ) -# @patch( -# "pandas.core.computation.pytables.BinOp.meta", -# new_callable=PropertyMock, -# return_value="category", -# ) -# @patch( -# "pandas.core.computation.pytables.BinOp.metadata", -# new_callable=PropertyMock, -# return_value=Series(data=["a", "b", "s"]), -# ) -# @pytest.mark.parametrize( -# "value, expected_results", -# [("q", TermValue(-1, -1, "integer")), ("a", TermValue(0, 0, "integer"))], -# ) -# def test_convert_value( -# mock_kind, mock_meta, mock_metadata, value: Any, expected_results: TermValue -# ): -# -# with patch.object(BinOp, "__init__", lambda p1, p2, p3, p4, p5, p6: None): -# bin_op = BinOp(None, None, None, None, None) -# bin_op.encoding = "UTF-8" -# -# result = bin_op.convert_value(value) -# -# assert ( -# result.kind == expected_results.kind -# and result.value == expected_results.value -# and result.converted == expected_results.converted -# ) From f61b7c50fc20d42b3e4df19e4d398618bf6c892b Mon Sep 17 00:00:00 2001 From: nofarmishraki Date: Tue, 26 Jan 2021 21:29:34 +0200 Subject: [PATCH 12/32] BUG: update tests (#39189) --- pandas/core/computation/pytables.py | 18 ++++++++++++------ pandas/tests/computation/test_pytables.py | 22 ++++++++++++++++++++++ 2 files changed, 34 insertions(+), 6 deletions(-) diff --git a/pandas/core/computation/pytables.py b/pandas/core/computation/pytables.py index 6a3b95186d666..a330490f96990 100644 --- a/pandas/core/computation/pytables.py +++ b/pandas/core/computation/pytables.py @@ -11,6 +11,7 @@ from pandas.compat.chainmap import DeepChainMap from pandas.core.dtypes.common import is_list_like +from pandas.core.series import Series import pandas.core.common as com from pandas.core.computation import expr, ops, scope as _scope @@ -209,12 +210,8 @@ def stringify(value): v = Timedelta(v, unit="s").value return TermValue(int(v), v, kind) elif meta == "category": - metadata = extract_array(self.metadata, extract_numpy=True) - if v not in metadata: - result = -1 - else: - result = metadata.searchsorted(v, side="left") - return TermValue(result, result, "integer") + term_value = self._convert_category_value(self.metadata, v) + return term_value elif kind == "integer": v = int(float(v)) return TermValue(v, v, kind) @@ -243,6 +240,15 @@ def stringify(value): else: raise TypeError(f"Cannot compare {v} of type {type(v)} to {kind} column") + @staticmethod + def _convert_category_value(metadata: Series, value: Any) -> TermValue: + metadata = extract_array(metadata, extract_numpy=True) + if value not in metadata: + result = -1 + else: + result = metadata.searchsorted(value, side="left") + return TermValue(result, result, "integer") + def convert_values(self): pass diff --git a/pandas/tests/computation/test_pytables.py b/pandas/tests/computation/test_pytables.py index e69de29bb2d1d..c4f1ad08f38cd 100644 --- a/pandas/tests/computation/test_pytables.py +++ b/pandas/tests/computation/test_pytables.py @@ -0,0 +1,22 @@ +from typing import Any +import pytest +import numpy as np + +from pandas.core.computation.pytables import TermValue, BinOp +from pandas import Series + + +@pytest.mark.parametrize( + "value, expected_results", + [("q", TermValue(-1, -1, "integer")), ("a", TermValue(0, 0, "integer"))], +) +def test__convert_value(value: Any, expected_results: TermValue): + metadata = Series(np.array(['a', 'b', 's'])) + + result = BinOp._convert_category_value(metadata, value) + + assert ( + result.kind == expected_results.kind + and result.value == expected_results.value + and result.converted == expected_results.converted + ) \ No newline at end of file From 8c3b3b68b7e30eeb52d58683e0745856069d370d Mon Sep 17 00:00:00 2001 From: nofarmishraki Date: Tue, 26 Jan 2021 21:33:50 +0200 Subject: [PATCH 13/32] BUG: update after precommit (#39189) --- pandas/tests/computation/test_pytables.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pandas/tests/computation/test_pytables.py b/pandas/tests/computation/test_pytables.py index c4f1ad08f38cd..adf9df195002f 100644 --- a/pandas/tests/computation/test_pytables.py +++ b/pandas/tests/computation/test_pytables.py @@ -11,7 +11,7 @@ [("q", TermValue(-1, -1, "integer")), ("a", TermValue(0, 0, "integer"))], ) def test__convert_value(value: Any, expected_results: TermValue): - metadata = Series(np.array(['a', 'b', 's'])) + metadata = Series(np.array(["a", "b", "s"])) result = BinOp._convert_category_value(metadata, value) @@ -19,4 +19,4 @@ def test__convert_value(value: Any, expected_results: TermValue): result.kind == expected_results.kind and result.value == expected_results.value and result.converted == expected_results.converted - ) \ No newline at end of file + ) From 4e3bce264e2310ad515f8a77fe6b7139609be0ce Mon Sep 17 00:00:00 2001 From: nofarmishraki Date: Tue, 26 Jan 2021 21:34:37 +0200 Subject: [PATCH 14/32] BUG: update after precommit (#39189) --- pandas/core/computation/pytables.py | 2 +- pandas/tests/computation/test_pytables.py | 5 +++-- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/pandas/core/computation/pytables.py b/pandas/core/computation/pytables.py index a330490f96990..e10c0eb596bbf 100644 --- a/pandas/core/computation/pytables.py +++ b/pandas/core/computation/pytables.py @@ -11,7 +11,6 @@ from pandas.compat.chainmap import DeepChainMap from pandas.core.dtypes.common import is_list_like -from pandas.core.series import Series import pandas.core.common as com from pandas.core.computation import expr, ops, scope as _scope @@ -20,6 +19,7 @@ from pandas.core.computation.ops import UndefinedVariableError, is_term from pandas.core.construction import extract_array from pandas.core.indexes.base import Index +from pandas.core.series import Series from pandas.io.formats.printing import pprint_thing, pprint_thing_encoded diff --git a/pandas/tests/computation/test_pytables.py b/pandas/tests/computation/test_pytables.py index adf9df195002f..a60fec1c606d7 100644 --- a/pandas/tests/computation/test_pytables.py +++ b/pandas/tests/computation/test_pytables.py @@ -1,9 +1,10 @@ from typing import Any -import pytest + import numpy as np +import pytest -from pandas.core.computation.pytables import TermValue, BinOp from pandas import Series +from pandas.core.computation.pytables import BinOp, TermValue @pytest.mark.parametrize( From 558a585af916e2aeebacb0a975dc4696298681ee Mon Sep 17 00:00:00 2001 From: nofarmishraki Date: Fri, 22 Jan 2021 23:11:50 +0200 Subject: [PATCH 15/32] BUG: fix case of a category value which isn't exists (#39189) --- pandas/core/computation/pytables.py | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/pandas/core/computation/pytables.py b/pandas/core/computation/pytables.py index db2385de06e93..6a3b95186d666 100644 --- a/pandas/core/computation/pytables.py +++ b/pandas/core/computation/pytables.py @@ -210,12 +210,10 @@ def stringify(value): return TermValue(int(v), v, kind) elif meta == "category": metadata = extract_array(self.metadata, extract_numpy=True) - result = metadata.searchsorted(v, side="left") - - # result returns 0 if v is first element or if v is not in metadata - # check that metadata contains v - if not result and v not in metadata: + if v not in metadata: result = -1 + else: + result = metadata.searchsorted(v, side="left") return TermValue(result, result, "integer") elif kind == "integer": v = int(float(v)) From adfe6009db75afa2397bdd3346f34e4306dff0f9 Mon Sep 17 00:00:00 2001 From: nofarmishraki Date: Sat, 23 Jan 2021 18:24:53 +0200 Subject: [PATCH 16/32] BUG: add UT to conver_value for this use case (#39189) --- pandas/tests/computation/test_pytables.py | 30 +++++++++++++++++++++++ 1 file changed, 30 insertions(+) create mode 100644 pandas/tests/computation/test_pytables.py diff --git a/pandas/tests/computation/test_pytables.py b/pandas/tests/computation/test_pytables.py new file mode 100644 index 0000000000000..91e424ed1d459 --- /dev/null +++ b/pandas/tests/computation/test_pytables.py @@ -0,0 +1,30 @@ +import pytest +from typing import Any +from unittest.mock import patch, PropertyMock +from pandas.core.computation.pytables import BinOp, TermValue +from pandas.core.series import Series + + +@patch('pandas.core.computation.pytables.BinOp.kind', new_callable=PropertyMock, return_value='integer') +@patch('pandas.core.computation.pytables.BinOp.meta', new_callable=PropertyMock, return_value='category') +@patch('pandas.core.computation.pytables.BinOp.metadata', new_callable=PropertyMock, return_value=Series(data=['a', 'b', 's'])) +@pytest.mark.parametrize("value, expected_results", + [('q', + TermValue(-1, -1, 'integer')), + ('a', + TermValue(0, 0, 'integer'))]) +def test_convert_value(mock_kind, + mock_meta, + mock_metadata, + value: Any, + expected_results: TermValue): + + with patch.object(BinOp, "__init__", lambda p1, p2, p3, p4, p5, p6: None): + bin_op = BinOp(None, None, None, None, None) + bin_op.encoding = 'UTF-8' + + result = bin_op.convert_value(value) + + assert result.kind == expected_results.kind and\ + result.value == expected_results.value and\ + result.converted == expected_results.converted From 63815c701ee8f62323ab2c3d93e6bf8a793c4958 Mon Sep 17 00:00:00 2001 From: nofarmishraki Date: Sat, 23 Jan 2021 18:33:57 +0200 Subject: [PATCH 17/32] BUG: change style with pre-commit (#39189) --- pandas/tests/computation/test_pytables.py | 51 ++++++++++++++--------- 1 file changed, 32 insertions(+), 19 deletions(-) diff --git a/pandas/tests/computation/test_pytables.py b/pandas/tests/computation/test_pytables.py index 91e424ed1d459..b6a9c6f6a1dd4 100644 --- a/pandas/tests/computation/test_pytables.py +++ b/pandas/tests/computation/test_pytables.py @@ -1,30 +1,43 @@ -import pytest from typing import Any -from unittest.mock import patch, PropertyMock +from unittest.mock import PropertyMock, patch + +import pytest + from pandas.core.computation.pytables import BinOp, TermValue from pandas.core.series import Series -@patch('pandas.core.computation.pytables.BinOp.kind', new_callable=PropertyMock, return_value='integer') -@patch('pandas.core.computation.pytables.BinOp.meta', new_callable=PropertyMock, return_value='category') -@patch('pandas.core.computation.pytables.BinOp.metadata', new_callable=PropertyMock, return_value=Series(data=['a', 'b', 's'])) -@pytest.mark.parametrize("value, expected_results", - [('q', - TermValue(-1, -1, 'integer')), - ('a', - TermValue(0, 0, 'integer'))]) -def test_convert_value(mock_kind, - mock_meta, - mock_metadata, - value: Any, - expected_results: TermValue): +@patch( + "pandas.core.computation.pytables.BinOp.kind", + new_callable=PropertyMock, + return_value="integer", +) +@patch( + "pandas.core.computation.pytables.BinOp.meta", + new_callable=PropertyMock, + return_value="category", +) +@patch( + "pandas.core.computation.pytables.BinOp.metadata", + new_callable=PropertyMock, + return_value=Series(data=["a", "b", "s"]), +) +@pytest.mark.parametrize( + "value, expected_results", + [("q", TermValue(-1, -1, "integer")), ("a", TermValue(0, 0, "integer"))], +) +def test_convert_value( + mock_kind, mock_meta, mock_metadata, value: Any, expected_results: TermValue +): with patch.object(BinOp, "__init__", lambda p1, p2, p3, p4, p5, p6: None): bin_op = BinOp(None, None, None, None, None) - bin_op.encoding = 'UTF-8' + bin_op.encoding = "UTF-8" result = bin_op.convert_value(value) - assert result.kind == expected_results.kind and\ - result.value == expected_results.value and\ - result.converted == expected_results.converted + assert ( + result.kind == expected_results.kind + and result.value == expected_results.value + and result.converted == expected_results.converted + ) From 74c687a70f0e15955fea131c76ae237db927d525 Mon Sep 17 00:00:00 2001 From: nofarmishraki Date: Sat, 23 Jan 2021 19:26:08 +0200 Subject: [PATCH 18/32] BUG: add a whatsnew record (#39189) --- doc/source/whatsnew/v1.3.0.rst | 3 +++ 1 file changed, 3 insertions(+) diff --git a/doc/source/whatsnew/v1.3.0.rst b/doc/source/whatsnew/v1.3.0.rst index 1dcde2000fc89..1c72b2637d74d 100644 --- a/doc/source/whatsnew/v1.3.0.rst +++ b/doc/source/whatsnew/v1.3.0.rst @@ -330,6 +330,9 @@ I/O - Bug in :func:`read_csv` not switching ``true_values`` and ``false_values`` for nullable ``boolean`` dtype (:issue:`34655`) - Bug in :func:`read_json` when ``orient="split"`` does not maintain numeric string index (:issue:`28556`) - :meth:`read_sql` returned an empty generator if ``chunksize`` was no-zero and the query returned no results. Now returns a generator with a single empty dataframe (:issue:`34411`) +- Bug in :func:`read_hdf` returning unexpected records when filtering on categorical string columns using ``where`` parameter (:issue:`39189`) + + Period ^^^^^^ From 3023fc088f20822e163b3883783387e75efdd19a Mon Sep 17 00:00:00 2001 From: nofarmishraki Date: Sat, 23 Jan 2021 19:45:31 +0200 Subject: [PATCH 19/32] BUG: check for tests (#39189) --- pandas/tests/computation/test_pytables.py | 86 +++++++++++------------ 1 file changed, 43 insertions(+), 43 deletions(-) diff --git a/pandas/tests/computation/test_pytables.py b/pandas/tests/computation/test_pytables.py index b6a9c6f6a1dd4..709c587ac6107 100644 --- a/pandas/tests/computation/test_pytables.py +++ b/pandas/tests/computation/test_pytables.py @@ -1,43 +1,43 @@ -from typing import Any -from unittest.mock import PropertyMock, patch - -import pytest - -from pandas.core.computation.pytables import BinOp, TermValue -from pandas.core.series import Series - - -@patch( - "pandas.core.computation.pytables.BinOp.kind", - new_callable=PropertyMock, - return_value="integer", -) -@patch( - "pandas.core.computation.pytables.BinOp.meta", - new_callable=PropertyMock, - return_value="category", -) -@patch( - "pandas.core.computation.pytables.BinOp.metadata", - new_callable=PropertyMock, - return_value=Series(data=["a", "b", "s"]), -) -@pytest.mark.parametrize( - "value, expected_results", - [("q", TermValue(-1, -1, "integer")), ("a", TermValue(0, 0, "integer"))], -) -def test_convert_value( - mock_kind, mock_meta, mock_metadata, value: Any, expected_results: TermValue -): - - with patch.object(BinOp, "__init__", lambda p1, p2, p3, p4, p5, p6: None): - bin_op = BinOp(None, None, None, None, None) - bin_op.encoding = "UTF-8" - - result = bin_op.convert_value(value) - - assert ( - result.kind == expected_results.kind - and result.value == expected_results.value - and result.converted == expected_results.converted - ) +# from typing import Any +# from unittest.mock import PropertyMock, patch +# +# import pytest +# +# from pandas.core.computation.pytables import BinOp, TermValue +# from pandas.core.series import Series +# +# +# @patch( +# "pandas.core.computation.pytables.BinOp.kind", +# new_callable=PropertyMock, +# return_value="integer", +# ) +# @patch( +# "pandas.core.computation.pytables.BinOp.meta", +# new_callable=PropertyMock, +# return_value="category", +# ) +# @patch( +# "pandas.core.computation.pytables.BinOp.metadata", +# new_callable=PropertyMock, +# return_value=Series(data=["a", "b", "s"]), +# ) +# @pytest.mark.parametrize( +# "value, expected_results", +# [("q", TermValue(-1, -1, "integer")), ("a", TermValue(0, 0, "integer"))], +# ) +# def test_convert_value( +# mock_kind, mock_meta, mock_metadata, value: Any, expected_results: TermValue +# ): +# +# with patch.object(BinOp, "__init__", lambda p1, p2, p3, p4, p5, p6: None): +# bin_op = BinOp(None, None, None, None, None) +# bin_op.encoding = "UTF-8" +# +# result = bin_op.convert_value(value) +# +# assert ( +# result.kind == expected_results.kind +# and result.value == expected_results.value +# and result.converted == expected_results.converted +# ) From f917ba99f785eff469d28d5a30081e1b5713f6b3 Mon Sep 17 00:00:00 2001 From: nofarmishraki Date: Sat, 23 Jan 2021 19:50:34 +0200 Subject: [PATCH 20/32] BUG: remove spaces (#39189) --- doc/source/whatsnew/v1.3.0.rst | 2 -- 1 file changed, 2 deletions(-) diff --git a/doc/source/whatsnew/v1.3.0.rst b/doc/source/whatsnew/v1.3.0.rst index 1c72b2637d74d..3be38e123b5d5 100644 --- a/doc/source/whatsnew/v1.3.0.rst +++ b/doc/source/whatsnew/v1.3.0.rst @@ -332,8 +332,6 @@ I/O - :meth:`read_sql` returned an empty generator if ``chunksize`` was no-zero and the query returned no results. Now returns a generator with a single empty dataframe (:issue:`34411`) - Bug in :func:`read_hdf` returning unexpected records when filtering on categorical string columns using ``where`` parameter (:issue:`39189`) - - Period ^^^^^^ - Comparisons of :class:`Period` objects or :class:`Index`, :class:`Series`, or :class:`DataFrame` with mismatched ``PeriodDtype`` now behave like other mismatched-type comparisons, returning ``False`` for equals, ``True`` for not-equal, and raising ``TypeError`` for inequality checks (:issue:`39274`) From 0abe19255c43517df594f7f6c23795b320088a26 Mon Sep 17 00:00:00 2001 From: nofarmishraki Date: Sat, 23 Jan 2021 19:52:30 +0200 Subject: [PATCH 21/32] BUG: remove whatsnew (#39189) --- doc/source/whatsnew/v1.3.0.rst | 1 - pandas/tests/computation/test_pytables.py | 86 +++++++++++------------ 2 files changed, 43 insertions(+), 44 deletions(-) diff --git a/doc/source/whatsnew/v1.3.0.rst b/doc/source/whatsnew/v1.3.0.rst index 3be38e123b5d5..1dcde2000fc89 100644 --- a/doc/source/whatsnew/v1.3.0.rst +++ b/doc/source/whatsnew/v1.3.0.rst @@ -330,7 +330,6 @@ I/O - Bug in :func:`read_csv` not switching ``true_values`` and ``false_values`` for nullable ``boolean`` dtype (:issue:`34655`) - Bug in :func:`read_json` when ``orient="split"`` does not maintain numeric string index (:issue:`28556`) - :meth:`read_sql` returned an empty generator if ``chunksize`` was no-zero and the query returned no results. Now returns a generator with a single empty dataframe (:issue:`34411`) -- Bug in :func:`read_hdf` returning unexpected records when filtering on categorical string columns using ``where`` parameter (:issue:`39189`) Period ^^^^^^ diff --git a/pandas/tests/computation/test_pytables.py b/pandas/tests/computation/test_pytables.py index 709c587ac6107..b6a9c6f6a1dd4 100644 --- a/pandas/tests/computation/test_pytables.py +++ b/pandas/tests/computation/test_pytables.py @@ -1,43 +1,43 @@ -# from typing import Any -# from unittest.mock import PropertyMock, patch -# -# import pytest -# -# from pandas.core.computation.pytables import BinOp, TermValue -# from pandas.core.series import Series -# -# -# @patch( -# "pandas.core.computation.pytables.BinOp.kind", -# new_callable=PropertyMock, -# return_value="integer", -# ) -# @patch( -# "pandas.core.computation.pytables.BinOp.meta", -# new_callable=PropertyMock, -# return_value="category", -# ) -# @patch( -# "pandas.core.computation.pytables.BinOp.metadata", -# new_callable=PropertyMock, -# return_value=Series(data=["a", "b", "s"]), -# ) -# @pytest.mark.parametrize( -# "value, expected_results", -# [("q", TermValue(-1, -1, "integer")), ("a", TermValue(0, 0, "integer"))], -# ) -# def test_convert_value( -# mock_kind, mock_meta, mock_metadata, value: Any, expected_results: TermValue -# ): -# -# with patch.object(BinOp, "__init__", lambda p1, p2, p3, p4, p5, p6: None): -# bin_op = BinOp(None, None, None, None, None) -# bin_op.encoding = "UTF-8" -# -# result = bin_op.convert_value(value) -# -# assert ( -# result.kind == expected_results.kind -# and result.value == expected_results.value -# and result.converted == expected_results.converted -# ) +from typing import Any +from unittest.mock import PropertyMock, patch + +import pytest + +from pandas.core.computation.pytables import BinOp, TermValue +from pandas.core.series import Series + + +@patch( + "pandas.core.computation.pytables.BinOp.kind", + new_callable=PropertyMock, + return_value="integer", +) +@patch( + "pandas.core.computation.pytables.BinOp.meta", + new_callable=PropertyMock, + return_value="category", +) +@patch( + "pandas.core.computation.pytables.BinOp.metadata", + new_callable=PropertyMock, + return_value=Series(data=["a", "b", "s"]), +) +@pytest.mark.parametrize( + "value, expected_results", + [("q", TermValue(-1, -1, "integer")), ("a", TermValue(0, 0, "integer"))], +) +def test_convert_value( + mock_kind, mock_meta, mock_metadata, value: Any, expected_results: TermValue +): + + with patch.object(BinOp, "__init__", lambda p1, p2, p3, p4, p5, p6: None): + bin_op = BinOp(None, None, None, None, None) + bin_op.encoding = "UTF-8" + + result = bin_op.convert_value(value) + + assert ( + result.kind == expected_results.kind + and result.value == expected_results.value + and result.converted == expected_results.converted + ) From 1b959eedd5f7212165e2606a3751160dbe86072b Mon Sep 17 00:00:00 2001 From: nofarmishraki Date: Sat, 23 Jan 2021 19:54:27 +0200 Subject: [PATCH 22/32] BUG: remove tests(#39189) --- pandas/tests/computation/test_pytables.py | 86 +++++++++++------------ 1 file changed, 43 insertions(+), 43 deletions(-) diff --git a/pandas/tests/computation/test_pytables.py b/pandas/tests/computation/test_pytables.py index b6a9c6f6a1dd4..709c587ac6107 100644 --- a/pandas/tests/computation/test_pytables.py +++ b/pandas/tests/computation/test_pytables.py @@ -1,43 +1,43 @@ -from typing import Any -from unittest.mock import PropertyMock, patch - -import pytest - -from pandas.core.computation.pytables import BinOp, TermValue -from pandas.core.series import Series - - -@patch( - "pandas.core.computation.pytables.BinOp.kind", - new_callable=PropertyMock, - return_value="integer", -) -@patch( - "pandas.core.computation.pytables.BinOp.meta", - new_callable=PropertyMock, - return_value="category", -) -@patch( - "pandas.core.computation.pytables.BinOp.metadata", - new_callable=PropertyMock, - return_value=Series(data=["a", "b", "s"]), -) -@pytest.mark.parametrize( - "value, expected_results", - [("q", TermValue(-1, -1, "integer")), ("a", TermValue(0, 0, "integer"))], -) -def test_convert_value( - mock_kind, mock_meta, mock_metadata, value: Any, expected_results: TermValue -): - - with patch.object(BinOp, "__init__", lambda p1, p2, p3, p4, p5, p6: None): - bin_op = BinOp(None, None, None, None, None) - bin_op.encoding = "UTF-8" - - result = bin_op.convert_value(value) - - assert ( - result.kind == expected_results.kind - and result.value == expected_results.value - and result.converted == expected_results.converted - ) +# from typing import Any +# from unittest.mock import PropertyMock, patch +# +# import pytest +# +# from pandas.core.computation.pytables import BinOp, TermValue +# from pandas.core.series import Series +# +# +# @patch( +# "pandas.core.computation.pytables.BinOp.kind", +# new_callable=PropertyMock, +# return_value="integer", +# ) +# @patch( +# "pandas.core.computation.pytables.BinOp.meta", +# new_callable=PropertyMock, +# return_value="category", +# ) +# @patch( +# "pandas.core.computation.pytables.BinOp.metadata", +# new_callable=PropertyMock, +# return_value=Series(data=["a", "b", "s"]), +# ) +# @pytest.mark.parametrize( +# "value, expected_results", +# [("q", TermValue(-1, -1, "integer")), ("a", TermValue(0, 0, "integer"))], +# ) +# def test_convert_value( +# mock_kind, mock_meta, mock_metadata, value: Any, expected_results: TermValue +# ): +# +# with patch.object(BinOp, "__init__", lambda p1, p2, p3, p4, p5, p6: None): +# bin_op = BinOp(None, None, None, None, None) +# bin_op.encoding = "UTF-8" +# +# result = bin_op.convert_value(value) +# +# assert ( +# result.kind == expected_results.kind +# and result.value == expected_results.value +# and result.converted == expected_results.converted +# ) From 4de349fe0566756ea492f67dd848570d49ae0bae Mon Sep 17 00:00:00 2001 From: nofarmishraki Date: Sat, 23 Jan 2021 19:59:04 +0200 Subject: [PATCH 23/32] BUG: add whats new (#39189) --- doc/source/whatsnew/v1.3.0.rst | 1 + 1 file changed, 1 insertion(+) diff --git a/doc/source/whatsnew/v1.3.0.rst b/doc/source/whatsnew/v1.3.0.rst index 1dcde2000fc89..3be38e123b5d5 100644 --- a/doc/source/whatsnew/v1.3.0.rst +++ b/doc/source/whatsnew/v1.3.0.rst @@ -330,6 +330,7 @@ I/O - Bug in :func:`read_csv` not switching ``true_values`` and ``false_values`` for nullable ``boolean`` dtype (:issue:`34655`) - Bug in :func:`read_json` when ``orient="split"`` does not maintain numeric string index (:issue:`28556`) - :meth:`read_sql` returned an empty generator if ``chunksize`` was no-zero and the query returned no results. Now returns a generator with a single empty dataframe (:issue:`34411`) +- Bug in :func:`read_hdf` returning unexpected records when filtering on categorical string columns using ``where`` parameter (:issue:`39189`) Period ^^^^^^ From d7a3ef66a81a950953f41cb510d653f7b4dc28ef Mon Sep 17 00:00:00 2001 From: nofarmishraki Date: Sat, 23 Jan 2021 19:59:26 +0200 Subject: [PATCH 24/32] BUG: check tests (#39189) --- pandas/tests/computation/test_pytables.py | 43 ----------------------- 1 file changed, 43 deletions(-) diff --git a/pandas/tests/computation/test_pytables.py b/pandas/tests/computation/test_pytables.py index 709c587ac6107..e69de29bb2d1d 100644 --- a/pandas/tests/computation/test_pytables.py +++ b/pandas/tests/computation/test_pytables.py @@ -1,43 +0,0 @@ -# from typing import Any -# from unittest.mock import PropertyMock, patch -# -# import pytest -# -# from pandas.core.computation.pytables import BinOp, TermValue -# from pandas.core.series import Series -# -# -# @patch( -# "pandas.core.computation.pytables.BinOp.kind", -# new_callable=PropertyMock, -# return_value="integer", -# ) -# @patch( -# "pandas.core.computation.pytables.BinOp.meta", -# new_callable=PropertyMock, -# return_value="category", -# ) -# @patch( -# "pandas.core.computation.pytables.BinOp.metadata", -# new_callable=PropertyMock, -# return_value=Series(data=["a", "b", "s"]), -# ) -# @pytest.mark.parametrize( -# "value, expected_results", -# [("q", TermValue(-1, -1, "integer")), ("a", TermValue(0, 0, "integer"))], -# ) -# def test_convert_value( -# mock_kind, mock_meta, mock_metadata, value: Any, expected_results: TermValue -# ): -# -# with patch.object(BinOp, "__init__", lambda p1, p2, p3, p4, p5, p6: None): -# bin_op = BinOp(None, None, None, None, None) -# bin_op.encoding = "UTF-8" -# -# result = bin_op.convert_value(value) -# -# assert ( -# result.kind == expected_results.kind -# and result.value == expected_results.value -# and result.converted == expected_results.converted -# ) From eb8cd5abbb7d0c7847e75f1c6f5da4e23c0a12ea Mon Sep 17 00:00:00 2001 From: nofarmishraki Date: Tue, 26 Jan 2021 21:29:34 +0200 Subject: [PATCH 25/32] BUG: update tests (#39189) --- pandas/core/computation/pytables.py | 18 ++++++++++++------ pandas/tests/computation/test_pytables.py | 22 ++++++++++++++++++++++ 2 files changed, 34 insertions(+), 6 deletions(-) diff --git a/pandas/core/computation/pytables.py b/pandas/core/computation/pytables.py index 6a3b95186d666..a330490f96990 100644 --- a/pandas/core/computation/pytables.py +++ b/pandas/core/computation/pytables.py @@ -11,6 +11,7 @@ from pandas.compat.chainmap import DeepChainMap from pandas.core.dtypes.common import is_list_like +from pandas.core.series import Series import pandas.core.common as com from pandas.core.computation import expr, ops, scope as _scope @@ -209,12 +210,8 @@ def stringify(value): v = Timedelta(v, unit="s").value return TermValue(int(v), v, kind) elif meta == "category": - metadata = extract_array(self.metadata, extract_numpy=True) - if v not in metadata: - result = -1 - else: - result = metadata.searchsorted(v, side="left") - return TermValue(result, result, "integer") + term_value = self._convert_category_value(self.metadata, v) + return term_value elif kind == "integer": v = int(float(v)) return TermValue(v, v, kind) @@ -243,6 +240,15 @@ def stringify(value): else: raise TypeError(f"Cannot compare {v} of type {type(v)} to {kind} column") + @staticmethod + def _convert_category_value(metadata: Series, value: Any) -> TermValue: + metadata = extract_array(metadata, extract_numpy=True) + if value not in metadata: + result = -1 + else: + result = metadata.searchsorted(value, side="left") + return TermValue(result, result, "integer") + def convert_values(self): pass diff --git a/pandas/tests/computation/test_pytables.py b/pandas/tests/computation/test_pytables.py index e69de29bb2d1d..c4f1ad08f38cd 100644 --- a/pandas/tests/computation/test_pytables.py +++ b/pandas/tests/computation/test_pytables.py @@ -0,0 +1,22 @@ +from typing import Any +import pytest +import numpy as np + +from pandas.core.computation.pytables import TermValue, BinOp +from pandas import Series + + +@pytest.mark.parametrize( + "value, expected_results", + [("q", TermValue(-1, -1, "integer")), ("a", TermValue(0, 0, "integer"))], +) +def test__convert_value(value: Any, expected_results: TermValue): + metadata = Series(np.array(['a', 'b', 's'])) + + result = BinOp._convert_category_value(metadata, value) + + assert ( + result.kind == expected_results.kind + and result.value == expected_results.value + and result.converted == expected_results.converted + ) \ No newline at end of file From 235d05eee2b8eec30ba784e484e53d255b91dea6 Mon Sep 17 00:00:00 2001 From: nofarmishraki Date: Tue, 26 Jan 2021 21:33:50 +0200 Subject: [PATCH 26/32] BUG: update after precommit (#39189) --- pandas/tests/computation/test_pytables.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pandas/tests/computation/test_pytables.py b/pandas/tests/computation/test_pytables.py index c4f1ad08f38cd..adf9df195002f 100644 --- a/pandas/tests/computation/test_pytables.py +++ b/pandas/tests/computation/test_pytables.py @@ -11,7 +11,7 @@ [("q", TermValue(-1, -1, "integer")), ("a", TermValue(0, 0, "integer"))], ) def test__convert_value(value: Any, expected_results: TermValue): - metadata = Series(np.array(['a', 'b', 's'])) + metadata = Series(np.array(["a", "b", "s"])) result = BinOp._convert_category_value(metadata, value) @@ -19,4 +19,4 @@ def test__convert_value(value: Any, expected_results: TermValue): result.kind == expected_results.kind and result.value == expected_results.value and result.converted == expected_results.converted - ) \ No newline at end of file + ) From 73541ff527e47141d7d788ae581e62b8e6757bf0 Mon Sep 17 00:00:00 2001 From: nofarmishraki Date: Tue, 26 Jan 2021 21:34:37 +0200 Subject: [PATCH 27/32] BUG: update after precommit (#39189) --- pandas/core/computation/pytables.py | 2 +- pandas/tests/computation/test_pytables.py | 5 +++-- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/pandas/core/computation/pytables.py b/pandas/core/computation/pytables.py index a330490f96990..e10c0eb596bbf 100644 --- a/pandas/core/computation/pytables.py +++ b/pandas/core/computation/pytables.py @@ -11,7 +11,6 @@ from pandas.compat.chainmap import DeepChainMap from pandas.core.dtypes.common import is_list_like -from pandas.core.series import Series import pandas.core.common as com from pandas.core.computation import expr, ops, scope as _scope @@ -20,6 +19,7 @@ from pandas.core.computation.ops import UndefinedVariableError, is_term from pandas.core.construction import extract_array from pandas.core.indexes.base import Index +from pandas.core.series import Series from pandas.io.formats.printing import pprint_thing, pprint_thing_encoded diff --git a/pandas/tests/computation/test_pytables.py b/pandas/tests/computation/test_pytables.py index adf9df195002f..a60fec1c606d7 100644 --- a/pandas/tests/computation/test_pytables.py +++ b/pandas/tests/computation/test_pytables.py @@ -1,9 +1,10 @@ from typing import Any -import pytest + import numpy as np +import pytest -from pandas.core.computation.pytables import TermValue, BinOp from pandas import Series +from pandas.core.computation.pytables import BinOp, TermValue @pytest.mark.parametrize( From 017e47a019765c205b6d2d01825c15d7e659c9d0 Mon Sep 17 00:00:00 2001 From: nofarmishraki Date: Sat, 30 Jan 2021 16:31:27 +0200 Subject: [PATCH 28/32] BUG: change test location (#39189) --- pandas/core/computation/pytables.py | 17 +++++---------- pandas/tests/computation/test_pytables.py | 23 -------------------- pandas/tests/io/pytables/test_categorical.py | 16 ++++++++++++++ 3 files changed, 22 insertions(+), 34 deletions(-) delete mode 100644 pandas/tests/computation/test_pytables.py diff --git a/pandas/core/computation/pytables.py b/pandas/core/computation/pytables.py index e10c0eb596bbf..826332de0f530 100644 --- a/pandas/core/computation/pytables.py +++ b/pandas/core/computation/pytables.py @@ -210,8 +210,12 @@ def stringify(value): v = Timedelta(v, unit="s").value return TermValue(int(v), v, kind) elif meta == "category": - term_value = self._convert_category_value(self.metadata, v) - return term_value + metadata = extract_array(self.metadata, extract_numpy=True) + if v not in metadata: + result = -1 + else: + result = metadata.searchsorted(v, side="left") + return TermValue(result, result, "integer") elif kind == "integer": v = int(float(v)) return TermValue(v, v, kind) @@ -240,15 +244,6 @@ def stringify(value): else: raise TypeError(f"Cannot compare {v} of type {type(v)} to {kind} column") - @staticmethod - def _convert_category_value(metadata: Series, value: Any) -> TermValue: - metadata = extract_array(metadata, extract_numpy=True) - if value not in metadata: - result = -1 - else: - result = metadata.searchsorted(value, side="left") - return TermValue(result, result, "integer") - def convert_values(self): pass diff --git a/pandas/tests/computation/test_pytables.py b/pandas/tests/computation/test_pytables.py deleted file mode 100644 index a60fec1c606d7..0000000000000 --- a/pandas/tests/computation/test_pytables.py +++ /dev/null @@ -1,23 +0,0 @@ -from typing import Any - -import numpy as np -import pytest - -from pandas import Series -from pandas.core.computation.pytables import BinOp, TermValue - - -@pytest.mark.parametrize( - "value, expected_results", - [("q", TermValue(-1, -1, "integer")), ("a", TermValue(0, 0, "integer"))], -) -def test__convert_value(value: Any, expected_results: TermValue): - metadata = Series(np.array(["a", "b", "s"])) - - result = BinOp._convert_category_value(metadata, value) - - assert ( - result.kind == expected_results.kind - and result.value == expected_results.value - and result.converted == expected_results.converted - ) diff --git a/pandas/tests/io/pytables/test_categorical.py b/pandas/tests/io/pytables/test_categorical.py index 67209c2bc0d57..89b8bd382c7cf 100644 --- a/pandas/tests/io/pytables/test_categorical.py +++ b/pandas/tests/io/pytables/test_categorical.py @@ -184,3 +184,19 @@ def test_categorical_nan_only_columns(setup_path): df.to_hdf(path, "df", format="table", data_columns=True) result = read_hdf(path, "df") tm.assert_frame_equal(result, expected) + + +@pytest.mark.parametrize( + "value, expected_results", + [ + ('col=="q"', DataFrame({"col": ["a", "b", "s"]}, DataFrame({"col": []}))), + ('col=="a"', DataFrame({"col": ["a", "b", "s"]}, DataFrame({"col": ["a"]}))), + ], +) +def test_convert_value(setup_path, where: str, df: DataFrame, expected: DataFrame): + # GH39420 + # Check that read_hdf with categorical columns can filter by where condition. + with ensure_clean_path(setup_path) as path: + df.to_hdf(path, "df", format="table") + result = read_hdf(path, "df", where=where) + tm.assert_frame_equal(result, expected) From d67ff95d7b862bab99da46e4606c1f157dd8f4ce Mon Sep 17 00:00:00 2001 From: nofarmishraki Date: Sat, 30 Jan 2021 16:33:41 +0200 Subject: [PATCH 29/32] BUG: remove import (#39189) --- pandas/core/computation/pytables.py | 1 - 1 file changed, 1 deletion(-) diff --git a/pandas/core/computation/pytables.py b/pandas/core/computation/pytables.py index 826332de0f530..6a3b95186d666 100644 --- a/pandas/core/computation/pytables.py +++ b/pandas/core/computation/pytables.py @@ -19,7 +19,6 @@ from pandas.core.computation.ops import UndefinedVariableError, is_term from pandas.core.construction import extract_array from pandas.core.indexes.base import Index -from pandas.core.series import Series from pandas.io.formats.printing import pprint_thing, pprint_thing_encoded From 37eef60230ee726ec8edaddc02e07bc23c10f4b7 Mon Sep 17 00:00:00 2001 From: nofarmishraki Date: Sat, 30 Jan 2021 18:01:45 +0200 Subject: [PATCH 30/32] BUG: remove import (#39189) --- pandas/tests/io/pytables/test_categorical.py | 16 +++++++++++----- 1 file changed, 11 insertions(+), 5 deletions(-) diff --git a/pandas/tests/io/pytables/test_categorical.py b/pandas/tests/io/pytables/test_categorical.py index 89b8bd382c7cf..b4f85cdb71a1c 100644 --- a/pandas/tests/io/pytables/test_categorical.py +++ b/pandas/tests/io/pytables/test_categorical.py @@ -187,16 +187,22 @@ def test_categorical_nan_only_columns(setup_path): @pytest.mark.parametrize( - "value, expected_results", + "where, df, expected", [ - ('col=="q"', DataFrame({"col": ["a", "b", "s"]}, DataFrame({"col": []}))), - ('col=="a"', DataFrame({"col": ["a", "b", "s"]}, DataFrame({"col": ["a"]}))), + ('col=="q"', DataFrame({"col": ["a", "b", "s"]}), DataFrame({"col": []})), + ('col=="a"', DataFrame({"col": ["a", "b", "s"]}), DataFrame({"col": ["a"]})), ], ) def test_convert_value(setup_path, where: str, df: DataFrame, expected: DataFrame): # GH39420 # Check that read_hdf with categorical columns can filter by where condition. + df.col = df.col.astype("category") + max_widths = {"col": 1} + categorical_values = list(sorted(list(df.col.unique()))) + expected.col = expected.col.astype("category") + expected.col.cat.set_categories(categorical_values, inplace=True) + with ensure_clean_path(setup_path) as path: - df.to_hdf(path, "df", format="table") - result = read_hdf(path, "df", where=where) + df.to_hdf(path, "df", format="table", min_itemsize=max_widths) + result = read_hdf(path, where=where) tm.assert_frame_equal(result, expected) From b3565af8eb285892d59eec7c6ad5f4511681577c Mon Sep 17 00:00:00 2001 From: nofarmishraki Date: Sat, 30 Jan 2021 18:09:00 +0200 Subject: [PATCH 31/32] BUG: remove list() before sorted() (#39189) --- pandas/tests/io/pytables/test_categorical.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/tests/io/pytables/test_categorical.py b/pandas/tests/io/pytables/test_categorical.py index b4f85cdb71a1c..6e52adc739710 100644 --- a/pandas/tests/io/pytables/test_categorical.py +++ b/pandas/tests/io/pytables/test_categorical.py @@ -198,7 +198,7 @@ def test_convert_value(setup_path, where: str, df: DataFrame, expected: DataFram # Check that read_hdf with categorical columns can filter by where condition. df.col = df.col.astype("category") max_widths = {"col": 1} - categorical_values = list(sorted(list(df.col.unique()))) + categorical_values = sorted(list(df.col.unique())) expected.col = expected.col.astype("category") expected.col.cat.set_categories(categorical_values, inplace=True) From 5af7c04e4f1d15d3106ead50752a30bb986d63f9 Mon Sep 17 00:00:00 2001 From: nofarmishraki Date: Sat, 30 Jan 2021 18:20:14 +0200 Subject: [PATCH 32/32] BUG: remove list() in sorted() (#39189) --- pandas/tests/io/pytables/test_categorical.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/tests/io/pytables/test_categorical.py b/pandas/tests/io/pytables/test_categorical.py index 6e52adc739710..b873811de616c 100644 --- a/pandas/tests/io/pytables/test_categorical.py +++ b/pandas/tests/io/pytables/test_categorical.py @@ -198,7 +198,7 @@ def test_convert_value(setup_path, where: str, df: DataFrame, expected: DataFram # Check that read_hdf with categorical columns can filter by where condition. df.col = df.col.astype("category") max_widths = {"col": 1} - categorical_values = sorted(list(df.col.unique())) + categorical_values = sorted(df.col.unique()) expected.col = expected.col.astype("category") expected.col.cat.set_categories(categorical_values, inplace=True)