From e55f060d95f2693dce74dc7d8daa48b281fe0ee0 Mon Sep 17 00:00:00 2001 From: BeforeFlight Date: Sat, 8 Jun 2019 04:07:21 +0300 Subject: [PATCH 1/6] TST: issue #26610. Tests and fix. --- doc/source/whatsnew/v0.25.0.rst | 1 + pandas/io/pytables.py | 4 ++-- pandas/tests/io/test_pytables.py | 28 ++++++++++++++++++++++++++++ 3 files changed, 31 insertions(+), 2 deletions(-) diff --git a/doc/source/whatsnew/v0.25.0.rst b/doc/source/whatsnew/v0.25.0.rst index 3096a13ac0767..0b2d880f4d395 100644 --- a/doc/source/whatsnew/v0.25.0.rst +++ b/doc/source/whatsnew/v0.25.0.rst @@ -649,6 +649,7 @@ I/O - Bug in :func:`read_json` where date strings with ``Z`` were not converted to a UTC timezone (:issue:`26168`) - Added ``cache_dates=True`` parameter to :meth:`read_csv`, which allows to cache unique dates when they are parsed (:issue:`25990`) - :meth:`DataFrame.to_excel` now raises a ``ValueError`` when the caller's dimensions exceed the limitations of Excel (:issue:`26051`) +- Bug while selecting from HDF store with `where`='' specified (:issue:`26610`). Now the whole DataFrame will be returned from store. Plotting ^^^^^^^^ diff --git a/pandas/io/pytables.py b/pandas/io/pytables.py index 53ef2395a302a..ca333f040aae9 100644 --- a/pandas/io/pytables.py +++ b/pandas/io/pytables.py @@ -95,10 +95,10 @@ def _ensure_term(where, scope_level): wlist.append(w) else: wlist.append(Term(w, scope_level=level)) - where = wlist + where = wlist if wlist else None elif maybe_expression(where): where = Term(where, scope_level=level) - return where + return where if where != "" else None class PossibleDataLossError(Exception): diff --git a/pandas/tests/io/test_pytables.py b/pandas/tests/io/test_pytables.py index 8b5907b920cca..b2ac1f88cfd3a 100644 --- a/pandas/tests/io/test_pytables.py +++ b/pandas/tests/io/test_pytables.py @@ -4731,6 +4731,34 @@ def test_read_py2_hdf_file_in_py3(self, datapath): result = store['p'] assert_frame_equal(result, expected) + @pytest.mark.parametrize("call", [ + "read_hdf", "select", "select_as_coordinates", "select_as_multiple" + ]) + @pytest.mark.parametrize("where", ["", (), (None, ), [], [None]]) + def test_select_empty_where(self, call, where): + # GH26610 + + # Using keyword `where` as '' or (), or [None], etc + # while reading from HDF store raises + # "SyntaxError: only a single expression is allowed" + + df = pd.DataFrame([[1, 2], [1, 2]], columns=list("ab")) + with ensure_clean_path("empty_where.h5") as path: + with pd.HDFStore(path) as store: + key1 = "df1" + key2 = "df2" + store.put("df1", df[["a"]], "t") + store.put("df2", df[["b"]], "t") + if call == "read_hdf": + pd.read_hdf(store, key1, where=where) + elif call == "select": + store.select(key1, where=where) + elif call == "select_as_coordinates": + store.select_as_coordinates(key1, where=where) + elif call == "select_as_multiple": + store.select_as_multiple( + [key1, key2], where=where, selector=key1) + class TestHDFComplexValues(Base): # GH10447 From a7f0ad2ffbe3f731964e78bab946c2fd1f7bd472 Mon Sep 17 00:00:00 2001 From: BeforeFlight Date: Sun, 9 Jun 2019 04:11:12 +0300 Subject: [PATCH 2/6] Docstring fixed. Tests simplified. --- doc/source/whatsnew/v0.25.0.rst | 2 +- pandas/tests/io/test_pytables.py | 21 ++++----------------- 2 files changed, 5 insertions(+), 18 deletions(-) diff --git a/doc/source/whatsnew/v0.25.0.rst b/doc/source/whatsnew/v0.25.0.rst index 0b2d880f4d395..0bdbeaf760d22 100644 --- a/doc/source/whatsnew/v0.25.0.rst +++ b/doc/source/whatsnew/v0.25.0.rst @@ -649,7 +649,7 @@ I/O - Bug in :func:`read_json` where date strings with ``Z`` were not converted to a UTC timezone (:issue:`26168`) - Added ``cache_dates=True`` parameter to :meth:`read_csv`, which allows to cache unique dates when they are parsed (:issue:`25990`) - :meth:`DataFrame.to_excel` now raises a ``ValueError`` when the caller's dimensions exceed the limitations of Excel (:issue:`26051`) -- Bug while selecting from HDF store with `where`='' specified (:issue:`26610`). Now the whole DataFrame will be returned from store. +- Bug while selecting from :class: `pandas.HDFstore` with `where=''` specified (:issue:`26610`). Plotting ^^^^^^^^ diff --git a/pandas/tests/io/test_pytables.py b/pandas/tests/io/test_pytables.py index b2ac1f88cfd3a..79c49a922e052 100644 --- a/pandas/tests/io/test_pytables.py +++ b/pandas/tests/io/test_pytables.py @@ -4731,9 +4731,6 @@ def test_read_py2_hdf_file_in_py3(self, datapath): result = store['p'] assert_frame_equal(result, expected) - @pytest.mark.parametrize("call", [ - "read_hdf", "select", "select_as_coordinates", "select_as_multiple" - ]) @pytest.mark.parametrize("where", ["", (), (None, ), [], [None]]) def test_select_empty_where(self, call, where): # GH26610 @@ -4742,22 +4739,12 @@ def test_select_empty_where(self, call, where): # while reading from HDF store raises # "SyntaxError: only a single expression is allowed" - df = pd.DataFrame([[1, 2], [1, 2]], columns=list("ab")) + df = pd.DataFrame([1, 2, 3]) with ensure_clean_path("empty_where.h5") as path: with pd.HDFStore(path) as store: - key1 = "df1" - key2 = "df2" - store.put("df1", df[["a"]], "t") - store.put("df2", df[["b"]], "t") - if call == "read_hdf": - pd.read_hdf(store, key1, where=where) - elif call == "select": - store.select(key1, where=where) - elif call == "select_as_coordinates": - store.select_as_coordinates(key1, where=where) - elif call == "select_as_multiple": - store.select_as_multiple( - [key1, key2], where=where, selector=key1) + store.put("df", df, "t") + result = pd.read_hdf(store, "df", where=where) + assert_frame_equal(result, df) class TestHDFComplexValues(Base): From 3b2a5c09cb5bd62625c187df1143cd2840955ec8 Mon Sep 17 00:00:00 2001 From: BeforeFlight Date: Sun, 9 Jun 2019 04:28:12 +0300 Subject: [PATCH 3/6] Test fixed. --- pandas/tests/io/test_pytables.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/tests/io/test_pytables.py b/pandas/tests/io/test_pytables.py index 79c49a922e052..5c9c3ae46df23 100644 --- a/pandas/tests/io/test_pytables.py +++ b/pandas/tests/io/test_pytables.py @@ -4732,7 +4732,7 @@ def test_read_py2_hdf_file_in_py3(self, datapath): assert_frame_equal(result, expected) @pytest.mark.parametrize("where", ["", (), (None, ), [], [None]]) - def test_select_empty_where(self, call, where): + def test_select_empty_where(self, where): # GH26610 # Using keyword `where` as '' or (), or [None], etc From f75ae8ca457d1c3ff991e5e56b933f0a29beb56f Mon Sep 17 00:00:00 2001 From: BeforeFlight Date: Mon, 10 Jun 2019 01:39:01 +0300 Subject: [PATCH 4/6] Requested changes applied. --- doc/source/whatsnew/v0.25.0.rst | 2 +- pandas/io/pytables.py | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/doc/source/whatsnew/v0.25.0.rst b/doc/source/whatsnew/v0.25.0.rst index 0bdbeaf760d22..edfd948b1a33e 100644 --- a/doc/source/whatsnew/v0.25.0.rst +++ b/doc/source/whatsnew/v0.25.0.rst @@ -649,7 +649,7 @@ I/O - Bug in :func:`read_json` where date strings with ``Z`` were not converted to a UTC timezone (:issue:`26168`) - Added ``cache_dates=True`` parameter to :meth:`read_csv`, which allows to cache unique dates when they are parsed (:issue:`25990`) - :meth:`DataFrame.to_excel` now raises a ``ValueError`` when the caller's dimensions exceed the limitations of Excel (:issue:`26051`) -- Bug while selecting from :class: `pandas.HDFstore` with `where=''` specified (:issue:`26610`). +- Bug while selecting from :class:`HDFStore` with ``where=''`` specified (:issue:`26610`). Plotting ^^^^^^^^ diff --git a/pandas/io/pytables.py b/pandas/io/pytables.py index ca333f040aae9..b25edf14f497a 100644 --- a/pandas/io/pytables.py +++ b/pandas/io/pytables.py @@ -95,10 +95,10 @@ def _ensure_term(where, scope_level): wlist.append(w) else: wlist.append(Term(w, scope_level=level)) - where = wlist if wlist else None + where = wlist elif maybe_expression(where): where = Term(where, scope_level=level) - return where if where != "" else None + return where if len(where) else None class PossibleDataLossError(Exception): From 8ae2517d53b6840e8da0d64bd56f17d48322ddbe Mon Sep 17 00:00:00 2001 From: BeforeFlight Date: Mon, 10 Jun 2019 02:12:11 +0300 Subject: [PATCH 5/6] Another try for where checking. --- pandas/io/pytables.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pandas/io/pytables.py b/pandas/io/pytables.py index b25edf14f497a..1be673ff5bd0f 100644 --- a/pandas/io/pytables.py +++ b/pandas/io/pytables.py @@ -97,8 +97,8 @@ def _ensure_term(where, scope_level): wlist.append(Term(w, scope_level=level)) where = wlist elif maybe_expression(where): - where = Term(where, scope_level=level) - return where if len(where) else None + return Term(where, scope_level=level) + return where if where else None class PossibleDataLossError(Exception): From 89245dc561d37c5a94a0c86c7d0a2c37061cd541 Mon Sep 17 00:00:00 2001 From: BeforeFlight Date: Mon, 10 Jun 2019 16:00:36 +0300 Subject: [PATCH 6/6] Fix where is None clause. --- pandas/io/pytables.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pandas/io/pytables.py b/pandas/io/pytables.py index 1be673ff5bd0f..983b1286eec91 100644 --- a/pandas/io/pytables.py +++ b/pandas/io/pytables.py @@ -97,8 +97,8 @@ def _ensure_term(where, scope_level): wlist.append(Term(w, scope_level=level)) where = wlist elif maybe_expression(where): - return Term(where, scope_level=level) - return where if where else None + where = Term(where, scope_level=level) + return where if where is None or len(where) else None class PossibleDataLossError(Exception):