From 19dc3042a25cf7ab5e54b77fa84f059c4971f244 Mon Sep 17 00:00:00 2001 From: Jan-Philip Gehrcke Date: Wed, 12 Jun 2019 22:03:07 +0200 Subject: [PATCH 01/26] TST: add TestReadPyTablesHDF5 test scaffold --- pandas/tests/io/test_pytables.py | 84 ++++++++++++++++++++++++++++++++ 1 file changed, 84 insertions(+) diff --git a/pandas/tests/io/test_pytables.py b/pandas/tests/io/test_pytables.py index 299c0feb502be..cdc4d6b33dbc0 100644 --- a/pandas/tests/io/test_pytables.py +++ b/pandas/tests/io/test_pytables.py @@ -5,6 +5,7 @@ from io import BytesIO import os import tempfile +import time from warnings import catch_warnings, simplefilter import numpy as np @@ -5177,3 +5178,86 @@ def test_dst_transitions(self): store.append('df', df) result = store.select('df') assert_frame_equal(result, df) + + +class TestReadPyTablesHDF5(Base): + """ + A group of tests which covers reading HDF5 files written by plain PyTables + (not written by pandas). + """ + + def _create_simple_hdf5_file_with_pytables(self): + + table_schema = { + 'c0': tables.Time64Col(pos=0), + 'c1': tables.StringCol(5, pos=1), + 'c2': tables.UInt32Col(pos=2), + } + + t0 = time.time() + + testsamples = [ + {'c0': t0, 'c1': 'aaaaa', 'c2': 1}, + {'c0': t0 + 1, 'c1': 'bbbbb', 'c2': 2}, + {'c0': t0 + 2, 'c1': 'ccccc', 'c2': 10**5}, + {'c0': t0 + 3, 'c1': 'ddddd', 'c2': 4294967295}, + ] + + # This returns a path and does not open the file. + tmpfilepath = create_tempfile(self.path) + objectname = 'pandas_test_timeseries' + + with tables.open_file(tmpfilepath, mode='w') as hf: + t = hf.create_table('/', name=objectname, description=table_schema) + for sample in testsamples: + for key, value in sample.items(): + t.row[key] = value + t.row.append() + + return tmpfilepath, objectname, testsamples + + def _compare(self, df, samples): + """Compare the reference `samples` with the contents in DataFrame `df`. + """ + for idx, row in df.iterrows(): + # Compare Time64Col values with tolerance. + tm.assert_almost_equal(samples[idx]['c0'], row['c0']) + + # Compare a short string. + assert samples[idx]['c1'] == row['c1'] + + # Compare an unsigned 32 bit integer. + assert samples[idx]['c2'] == row['c2'] + + def test_read_complete(self): + path, objname, samples = self._create_simple_hdf5_file_with_pytables() + + df = pd.read_hdf(path, key=objname) + self._compare(df, samples) + + def test_read_with_start(self): + path, objname, samples = self._create_simple_hdf5_file_with_pytables() + + # Currently this fails as of + # https://github.com/pandas-dev/pandas/issues/11188 + with pytest.raises(ValueError, match='Shape of passed values is'): + df = pd.read_hdf(path, key=objname, start=1) + self._compare(df, samples[1:]) + + def test_read_with_stop(self): + path, objname, samples = self._create_simple_hdf5_file_with_pytables() + + # Currently this fails as of + # https://github.com/pandas-dev/pandas/issues/11188 + with pytest.raises(ValueError, match='Shape of passed values is'): + df = pd.read_hdf(path, key=objname, stop=1) + self._compare(df, samples[0:1]) + + def test_read_with_startstop(self): + path, objname, samples = self._create_simple_hdf5_file_with_pytables() + + # Currently this fails as of + # https://github.com/pandas-dev/pandas/issues/11188 + with pytest.raises(ValueError, match='Shape of passed values is'): + df = pd.read_hdf(path, key=objname, start=1, stop=2) + self._compare(df, samples[1:2]) From e9c7c39ddadd9ea3a6cf97cbe60aad288783c015 Mon Sep 17 00:00:00 2001 From: Jan-Philip Gehrcke Date: Wed, 12 Jun 2019 23:27:46 +0200 Subject: [PATCH 02/26] BUG: this fixes #11188 --- pandas/io/pytables.py | 16 +++++++++++++++- pandas/tests/io/test_pytables.py | 31 ++++++++++--------------------- 2 files changed, 25 insertions(+), 22 deletions(-) diff --git a/pandas/io/pytables.py b/pandas/io/pytables.py index 79d6d8563a162..15b260bbf5ef5 100644 --- a/pandas/io/pytables.py +++ b/pandas/io/pytables.py @@ -1456,6 +1456,11 @@ def __init__(self, store, s, func, where, nrows, start=None, stop=None, stop = nrows stop = min(nrows, stop) + # Piggy-back normalized `nrows` (considering start and stop) onto + # PyTables tables object so that the GenericIndexCol constructor + # knows how large the index should be. + self.s.table._nrows_to_read = stop - start + self.nrows = nrows self.start = start self.stop = stop @@ -1816,7 +1821,16 @@ def is_indexed(self): def convert(self, values, nan_rep, encoding, errors): """ set the values from this selection: take = take ownership """ - self.values = Int64Index(np.arange(self.table.nrows)) + if hasattr(self.table, '_nrows_to_read'): + # The `_nrows_to_read` property is set on the table object by the + # code path invoked by the top-level `read_hdf()`, and calculated + # based on the start` and `stop` integer values. These values allow + # for a sub-selection and likewise the index size needs to be + # adjusted to the size of this sub-selection. + self.values = Int64Index(np.arange(self.table._nrows_to_read)) + else: + self.values = Int64Index(np.arange(self.table.nrows)) + return self def get_attr(self): diff --git a/pandas/tests/io/test_pytables.py b/pandas/tests/io/test_pytables.py index cdc4d6b33dbc0..a4223ff606406 100644 --- a/pandas/tests/io/test_pytables.py +++ b/pandas/tests/io/test_pytables.py @@ -5231,33 +5231,22 @@ def _compare(self, df, samples): def test_read_complete(self): path, objname, samples = self._create_simple_hdf5_file_with_pytables() - - df = pd.read_hdf(path, key=objname) - self._compare(df, samples) + self._compare(pd.read_hdf(path, key=objname), samples) def test_read_with_start(self): path, objname, samples = self._create_simple_hdf5_file_with_pytables() - - # Currently this fails as of - # https://github.com/pandas-dev/pandas/issues/11188 - with pytest.raises(ValueError, match='Shape of passed values is'): - df = pd.read_hdf(path, key=objname, start=1) - self._compare(df, samples[1:]) + # This is a regression test for pandas-dev/pandas/issues/11188 + self._compare(pd.read_hdf(path, key=objname, start=1), samples[1:]) def test_read_with_stop(self): path, objname, samples = self._create_simple_hdf5_file_with_pytables() - - # Currently this fails as of - # https://github.com/pandas-dev/pandas/issues/11188 - with pytest.raises(ValueError, match='Shape of passed values is'): - df = pd.read_hdf(path, key=objname, stop=1) - self._compare(df, samples[0:1]) + # This is a regression test for pandas-dev/pandas/issues/11188 + self._compare(pd.read_hdf(path, key=objname, stop=1), samples[0:1]) def test_read_with_startstop(self): path, objname, samples = self._create_simple_hdf5_file_with_pytables() - - # Currently this fails as of - # https://github.com/pandas-dev/pandas/issues/11188 - with pytest.raises(ValueError, match='Shape of passed values is'): - df = pd.read_hdf(path, key=objname, start=1, stop=2) - self._compare(df, samples[1:2]) + # This is a regression test for pandas-dev/pandas/issues/11188 + self._compare( + pd.read_hdf(path, key=objname, start=1, stop=2), + samples[1:2] + ) From b7a082ab7f46140bb7c32396a20ba6254806b585 Mon Sep 17 00:00:00 2001 From: Jan-Philip Gehrcke Date: Fri, 14 Jun 2019 11:30:35 +0200 Subject: [PATCH 03/26] CLN: do not piggyback idx size on table obj --- pandas/io/pytables.py | 28 ++++++++++------------------ 1 file changed, 10 insertions(+), 18 deletions(-) diff --git a/pandas/io/pytables.py b/pandas/io/pytables.py index 15b260bbf5ef5..fd3e7dfcce543 100644 --- a/pandas/io/pytables.py +++ b/pandas/io/pytables.py @@ -1456,11 +1456,6 @@ def __init__(self, store, s, func, where, nrows, start=None, stop=None, stop = nrows stop = min(nrows, stop) - # Piggy-back normalized `nrows` (considering start and stop) onto - # PyTables tables object so that the GenericIndexCol constructor - # knows how large the index should be. - self.s.table._nrows_to_read = stop - start - self.nrows = nrows self.start = start self.stop = stop @@ -1629,7 +1624,7 @@ def infer(self, handler): new_self.read_metadata(handler) return new_self - def convert(self, values, nan_rep, encoding, errors): + def convert(self, values, nan_rep, encoding, errors, **kwargs): """ set the values from this selection: take = take ownership """ # values is a recarray @@ -1818,18 +1813,13 @@ class GenericIndexCol(IndexCol): def is_indexed(self): return False - def convert(self, values, nan_rep, encoding, errors): + def convert(self, values, nan_rep, encoding, errors, **kwargs): """ set the values from this selection: take = take ownership """ - if hasattr(self.table, '_nrows_to_read'): - # The `_nrows_to_read` property is set on the table object by the - # code path invoked by the top-level `read_hdf()`, and calculated - # based on the start` and `stop` integer values. These values allow - # for a sub-selection and likewise the index size needs to be - # adjusted to the size of this sub-selection. - self.values = Int64Index(np.arange(self.table._nrows_to_read)) - else: - self.values = Int64Index(np.arange(self.table.nrows)) + start = kwargs.get('start', 0) + stop = kwargs.get('stop', self.table.nrows) + stop = min(stop, self.table.nrows) + self.values = Int64Index(np.arange(stop - start)) return self @@ -2173,7 +2163,7 @@ def validate_attr(self, append): raise ValueError("appended items dtype do not match existing " "items dtype in table!") - def convert(self, values, nan_rep, encoding, errors): + def convert(self, values, nan_rep, encoding, errors, **kwargs): """set the data from this selection (and convert to the correct dtype if we can) """ @@ -3445,8 +3435,10 @@ def read_axes(self, where, **kwargs): # convert the data for a in self.axes: a.set_info(self.info) + # `kwargs` may contain `start` and `stop` arguments if passed to + # `store.select()`. If set they determine the index size. a.convert(values, nan_rep=self.nan_rep, encoding=self.encoding, - errors=self.errors) + errors=self.errors, **kwargs) return True From 964cba1a47b67d02fcd1780c82d9e58d604086bd Mon Sep 17 00:00:00 2001 From: Jan-Philip Gehrcke Date: Wed, 12 Jun 2019 23:35:16 +0200 Subject: [PATCH 04/26] DOC: update whatsnew for #11188 --- doc/source/whatsnew/v0.25.0.rst | 1 + 1 file changed, 1 insertion(+) diff --git a/doc/source/whatsnew/v0.25.0.rst b/doc/source/whatsnew/v0.25.0.rst index 77b689569d57f..f695ad61ff209 100644 --- a/doc/source/whatsnew/v0.25.0.rst +++ b/doc/source/whatsnew/v0.25.0.rst @@ -699,6 +699,7 @@ I/O - Bug in :func:`read_csv` not properly interpreting the UTF8 encoded filenames on Windows on Python 3.6+ (:issue:`15086`) - Improved performance in :meth:`pandas.read_stata` and :class:`pandas.io.stata.StataReader` when converting columns that have missing values (:issue:`25772`) - Bug in :meth:`DataFrame.to_html` where header numbers would ignore display options when rounding (:issue:`17280`) +- Bug in :func:`read_hdf` where the `start` and `stop` arguments would raise a ``ValueError`` indicating an index size mismatch (:issue:`11188`) - Bug in :func:`read_hdf` not properly closing store after a ``KeyError`` is raised (:issue:`25766`) - Bug in ``read_csv`` which would not raise ``ValueError`` if a column index in ``usecols`` was out of bounds (:issue:`25623`) - Improved the explanation for the failure when value labels are repeated in Stata dta files and suggested work-arounds (:issue:`25772`) From 04b8423a297513078a25f2e3b27e1ef687a6af4c Mon Sep 17 00:00:00 2001 From: Jan-Philip Gehrcke Date: Fri, 14 Jun 2019 21:41:04 +0200 Subject: [PATCH 05/26] DOC: improve changelog (squash later) --- doc/source/whatsnew/v0.25.0.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v0.25.0.rst b/doc/source/whatsnew/v0.25.0.rst index f695ad61ff209..467cb5a40213c 100644 --- a/doc/source/whatsnew/v0.25.0.rst +++ b/doc/source/whatsnew/v0.25.0.rst @@ -699,7 +699,7 @@ I/O - Bug in :func:`read_csv` not properly interpreting the UTF8 encoded filenames on Windows on Python 3.6+ (:issue:`15086`) - Improved performance in :meth:`pandas.read_stata` and :class:`pandas.io.stata.StataReader` when converting columns that have missing values (:issue:`25772`) - Bug in :meth:`DataFrame.to_html` where header numbers would ignore display options when rounding (:issue:`17280`) -- Bug in :func:`read_hdf` where the `start` and `stop` arguments would raise a ``ValueError`` indicating an index size mismatch (:issue:`11188`) +- Bug in :func:`read_hdf` where reading a table from an HDF5 file written directly with PyTables fails with a ``ValueError`` when using a sub-selection via the ``start`` or ``stop`` arguments (:issue:`11188`) - Bug in :func:`read_hdf` not properly closing store after a ``KeyError`` is raised (:issue:`25766`) - Bug in ``read_csv`` which would not raise ``ValueError`` if a column index in ``usecols`` was out of bounds (:issue:`25623`) - Improved the explanation for the failure when value labels are repeated in Stata dta files and suggested work-arounds (:issue:`25772`) From 7d200a5aaa35ff98b07a554da910fe4f54ad45e2 Mon Sep 17 00:00:00 2001 From: Jan-Philip Gehrcke Date: Fri, 14 Jun 2019 21:52:23 +0200 Subject: [PATCH 06/26] squash: remove kwargs, add a bit of docstring --- pandas/io/pytables.py | 22 ++++++++++++++++------ 1 file changed, 16 insertions(+), 6 deletions(-) diff --git a/pandas/io/pytables.py b/pandas/io/pytables.py index fd3e7dfcce543..cc1b4ddd4ea86 100644 --- a/pandas/io/pytables.py +++ b/pandas/io/pytables.py @@ -1624,7 +1624,7 @@ def infer(self, handler): new_self.read_metadata(handler) return new_self - def convert(self, values, nan_rep, encoding, errors, **kwargs): + def convert(self, values, nan_rep, encoding, errors, start=None, stop=None): """ set the values from this selection: take = take ownership """ # values is a recarray @@ -1813,11 +1813,21 @@ class GenericIndexCol(IndexCol): def is_indexed(self): return False - def convert(self, values, nan_rep, encoding, errors, **kwargs): - """ set the values from this selection: take = take ownership """ + def convert(self, values, nan_rep, encoding, errors, start=None, stop=None): + """ set the values from this selection: take = take ownership + + Parameters + ---------- + + start : int, optional + Table row number: the start of the sub-selection. + stop : int, optional + Table row number: the end of the sub-selection. Values larger than + the underlying table's row count are normalized to that. + """ - start = kwargs.get('start', 0) - stop = kwargs.get('stop', self.table.nrows) + start = start if start is not None else 0 + stop = stop if stop is not None else self.table.nrows stop = min(stop, self.table.nrows) self.values = Int64Index(np.arange(stop - start)) @@ -2163,7 +2173,7 @@ def validate_attr(self, append): raise ValueError("appended items dtype do not match existing " "items dtype in table!") - def convert(self, values, nan_rep, encoding, errors, **kwargs): + def convert(self, values, nan_rep, encoding, errors, start=None, stop=None): """set the data from this selection (and convert to the correct dtype if we can) """ From 70e78c91ac0731eedd9f6d159828368886074e16 Mon Sep 17 00:00:00 2001 From: Jan-Philip Gehrcke Date: Fri, 14 Jun 2019 22:29:24 +0200 Subject: [PATCH 07/26] squash: rework tests (use fixture, assert_frame_equal) --- pandas/tests/io/test_pytables.py | 102 +++++++++++++++---------------- 1 file changed, 49 insertions(+), 53 deletions(-) diff --git a/pandas/tests/io/test_pytables.py b/pandas/tests/io/test_pytables.py index a4223ff606406..ac4638a525fbb 100644 --- a/pandas/tests/io/test_pytables.py +++ b/pandas/tests/io/test_pytables.py @@ -5180,73 +5180,69 @@ def test_dst_transitions(self): assert_frame_equal(result, df) -class TestReadPyTablesHDF5(Base): - """ - A group of tests which covers reading HDF5 files written by plain PyTables - (not written by pandas). - """ - - def _create_simple_hdf5_file_with_pytables(self): - - table_schema = { - 'c0': tables.Time64Col(pos=0), - 'c1': tables.StringCol(5, pos=1), - 'c2': tables.UInt32Col(pos=2), - } +@pytest.fixture(scope='module') +def pytables_hdf5_file(): + """Use PyTables to create a simple HDF5 file.""" - t0 = time.time() + table_schema = { + 'c0': tables.Time64Col(pos=0), + 'c1': tables.StringCol(5, pos=1), + 'c2': tables.Int64Col(pos=2), + } - testsamples = [ - {'c0': t0, 'c1': 'aaaaa', 'c2': 1}, - {'c0': t0 + 1, 'c1': 'bbbbb', 'c2': 2}, - {'c0': t0 + 2, 'c1': 'ccccc', 'c2': 10**5}, - {'c0': t0 + 3, 'c1': 'ddddd', 'c2': 4294967295}, - ] + t0 = time.time() - # This returns a path and does not open the file. - tmpfilepath = create_tempfile(self.path) - objectname = 'pandas_test_timeseries' + testsamples = [ + {'c0': t0, 'c1': 'aaaaa', 'c2': 1}, + {'c0': t0 + 1, 'c1': 'bbbbb', 'c2': 2}, + {'c0': t0 + 2, 'c1': 'ccccc', 'c2': 10**5}, + {'c0': t0 + 3, 'c1': 'ddddd', 'c2': 4294967295}, + ] - with tables.open_file(tmpfilepath, mode='w') as hf: - t = hf.create_table('/', name=objectname, description=table_schema) - for sample in testsamples: - for key, value in sample.items(): - t.row[key] = value - t.row.append() + # This returns a path and does not open the file. + tmpfilepath = create_tempfile('pytables_hdf5_file') + objectname = 'pandas_test_timeseries' - return tmpfilepath, objectname, testsamples + with tables.open_file(tmpfilepath, mode='w') as hf: + t = hf.create_table('/', name=objectname, description=table_schema) + for sample in testsamples: + for key, value in sample.items(): + t.row[key] = value + t.row.append() - def _compare(self, df, samples): - """Compare the reference `samples` with the contents in DataFrame `df`. - """ - for idx, row in df.iterrows(): - # Compare Time64Col values with tolerance. - tm.assert_almost_equal(samples[idx]['c0'], row['c0']) + return tmpfilepath, objectname, pd.DataFrame(testsamples) - # Compare a short string. - assert samples[idx]['c1'] == row['c1'] - # Compare an unsigned 32 bit integer. - assert samples[idx]['c2'] == row['c2'] +class TestReadPyTablesHDF5(Base): + """ + A group of tests which covers reading HDF5 files written by plain PyTables + (not written by pandas). + """ - def test_read_complete(self): - path, objname, samples = self._create_simple_hdf5_file_with_pytables() - self._compare(pd.read_hdf(path, key=objname), samples) + def test_read_complete(self, pytables_hdf5_file): + path, objname, expected_df = pytables_hdf5_file + assert_frame_equal(pd.read_hdf(path, key=objname), expected_df) - def test_read_with_start(self): - path, objname, samples = self._create_simple_hdf5_file_with_pytables() + def test_read_with_start(self, pytables_hdf5_file): + path, objname, expected_df = pytables_hdf5_file # This is a regression test for pandas-dev/pandas/issues/11188 - self._compare(pd.read_hdf(path, key=objname, start=1), samples[1:]) + assert_frame_equal( + pd.read_hdf(path, key=objname, start=1), + expected_df[1:].reset_index(drop=True) + ) - def test_read_with_stop(self): - path, objname, samples = self._create_simple_hdf5_file_with_pytables() + def test_read_with_stop(self, pytables_hdf5_file): + path, objname, expected_df = pytables_hdf5_file # This is a regression test for pandas-dev/pandas/issues/11188 - self._compare(pd.read_hdf(path, key=objname, stop=1), samples[0:1]) + assert_frame_equal( + pd.read_hdf(path, key=objname, stop=1), + expected_df[:1].reset_index(drop=True) + ) - def test_read_with_startstop(self): - path, objname, samples = self._create_simple_hdf5_file_with_pytables() + def test_read_with_startstop(self, pytables_hdf5_file): + path, objname, expected_df = pytables_hdf5_file # This is a regression test for pandas-dev/pandas/issues/11188 - self._compare( + assert_frame_equal( pd.read_hdf(path, key=objname, start=1, stop=2), - samples[1:2] + expected_df[1:2].reset_index(drop=True) ) From e7eb30a4440b6752f4a96e5a907112e142335941 Mon Sep 17 00:00:00 2001 From: Jan-Philip Gehrcke Date: Tue, 18 Jun 2019 12:47:42 +0200 Subject: [PATCH 08/26] squash: address line length errors --- pandas/io/pytables.py | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/pandas/io/pytables.py b/pandas/io/pytables.py index cc1b4ddd4ea86..d82a90cd47560 100644 --- a/pandas/io/pytables.py +++ b/pandas/io/pytables.py @@ -1624,7 +1624,8 @@ def infer(self, handler): new_self.read_metadata(handler) return new_self - def convert(self, values, nan_rep, encoding, errors, start=None, stop=None): + def convert(self, values, nan_rep, encoding, errors, start=None, + stop=None): """ set the values from this selection: take = take ownership """ # values is a recarray @@ -1813,7 +1814,8 @@ class GenericIndexCol(IndexCol): def is_indexed(self): return False - def convert(self, values, nan_rep, encoding, errors, start=None, stop=None): + def convert(self, values, nan_rep, encoding, errors, start=None, + stop=None): """ set the values from this selection: take = take ownership Parameters @@ -2173,7 +2175,8 @@ def validate_attr(self, append): raise ValueError("appended items dtype do not match existing " "items dtype in table!") - def convert(self, values, nan_rep, encoding, errors, start=None, stop=None): + def convert(self, values, nan_rep, encoding, errors, start=None, + stop=None): """set the data from this selection (and convert to the correct dtype if we can) """ From 5b881d8c64346ced22b603e7165c7dd5b658f6c0 Mon Sep 17 00:00:00 2001 From: Jan-Philip Gehrcke Date: Tue, 18 Jun 2019 12:52:05 +0200 Subject: [PATCH 09/26] squash: pytables.py: remove one assignment --- pandas/io/pytables.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pandas/io/pytables.py b/pandas/io/pytables.py index d82a90cd47560..5ad278ffc55c4 100644 --- a/pandas/io/pytables.py +++ b/pandas/io/pytables.py @@ -1829,8 +1829,8 @@ def convert(self, values, nan_rep, encoding, errors, start=None, """ start = start if start is not None else 0 - stop = stop if stop is not None else self.table.nrows - stop = min(stop, self.table.nrows) + stop = min(stop, self.table.nrows) \ + if stop is not None else self.table.nrows self.values = Int64Index(np.arange(stop - start)) return self From 96411118c240ab5e35ae8353ad0a9fc48917ae64 Mon Sep 17 00:00:00 2001 From: Jan-Philip Gehrcke Date: Wed, 19 Jun 2019 12:29:01 +0200 Subject: [PATCH 10/26] squash: tests: result/expected, Base, fixture scope --- pandas/tests/io/test_pytables.py | 37 ++++++++++++++++---------------- 1 file changed, 18 insertions(+), 19 deletions(-) diff --git a/pandas/tests/io/test_pytables.py b/pandas/tests/io/test_pytables.py index ac4638a525fbb..8a3166a6dba6a 100644 --- a/pandas/tests/io/test_pytables.py +++ b/pandas/tests/io/test_pytables.py @@ -5180,7 +5180,7 @@ def test_dst_transitions(self): assert_frame_equal(result, df) -@pytest.fixture(scope='module') +@pytest.fixture() def pytables_hdf5_file(): """Use PyTables to create a simple HDF5 file.""" @@ -5213,36 +5213,35 @@ def pytables_hdf5_file(): return tmpfilepath, objectname, pd.DataFrame(testsamples) -class TestReadPyTablesHDF5(Base): +class TestReadPyTablesHDF5: """ A group of tests which covers reading HDF5 files written by plain PyTables (not written by pandas). """ def test_read_complete(self, pytables_hdf5_file): - path, objname, expected_df = pytables_hdf5_file - assert_frame_equal(pd.read_hdf(path, key=objname), expected_df) + path, objname, df = pytables_hdf5_file + result = pd.read_hdf(path, key=objname) + expected = df + assert_frame_equal(result, expected) def test_read_with_start(self, pytables_hdf5_file): - path, objname, expected_df = pytables_hdf5_file + path, objname, df = pytables_hdf5_file # This is a regression test for pandas-dev/pandas/issues/11188 - assert_frame_equal( - pd.read_hdf(path, key=objname, start=1), - expected_df[1:].reset_index(drop=True) - ) + result = pd.read_hdf(path, key=objname, start=1) + expected = df[1:].reset_index(drop=True) + assert_frame_equal(result, expected) def test_read_with_stop(self, pytables_hdf5_file): - path, objname, expected_df = pytables_hdf5_file + path, objname, df = pytables_hdf5_file # This is a regression test for pandas-dev/pandas/issues/11188 - assert_frame_equal( - pd.read_hdf(path, key=objname, stop=1), - expected_df[:1].reset_index(drop=True) - ) + result = pd.read_hdf(path, key=objname, stop=1) + expected = df[:1].reset_index(drop=True) + assert_frame_equal(result, expected) def test_read_with_startstop(self, pytables_hdf5_file): - path, objname, expected_df = pytables_hdf5_file + path, objname, df = pytables_hdf5_file # This is a regression test for pandas-dev/pandas/issues/11188 - assert_frame_equal( - pd.read_hdf(path, key=objname, start=1, stop=2), - expected_df[1:2].reset_index(drop=True) - ) + result = pd.read_hdf(path, key=objname, start=1, stop=2) + expected = df[1:2].reset_index(drop=True) + assert_frame_equal(result, expected) From d2a9882d309f9006190db3e8245a1608a057718f Mon Sep 17 00:00:00 2001 From: Jan-Philip Gehrcke Date: Wed, 19 Jun 2019 12:37:31 +0200 Subject: [PATCH 11/26] CLN: s/existing/exiting --- pandas/tests/io/test_pytables.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/tests/io/test_pytables.py b/pandas/tests/io/test_pytables.py index 8a3166a6dba6a..90da18e1d7f56 100644 --- a/pandas/tests/io/test_pytables.py +++ b/pandas/tests/io/test_pytables.py @@ -106,7 +106,7 @@ def ensure_clean_store(path, mode='a', complevel=None, complib=None, def ensure_clean_path(path): """ return essentially a named temporary file that is not opened - and deleted on existing; if path is a list, then create and + and deleted on exiting; if path is a list, then create and return list of filenames """ try: From aa834730f1b9192cb86da1dcf2bc6261b9110da6 Mon Sep 17 00:00:00 2001 From: Jan-Philip Gehrcke Date: Wed, 19 Jun 2019 12:52:01 +0200 Subject: [PATCH 12/26] TST: use ensure_clean_path context manager --- pandas/tests/io/test_pytables.py | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/pandas/tests/io/test_pytables.py b/pandas/tests/io/test_pytables.py index 90da18e1d7f56..fd5dc870d2be3 100644 --- a/pandas/tests/io/test_pytables.py +++ b/pandas/tests/io/test_pytables.py @@ -5199,18 +5199,18 @@ def pytables_hdf5_file(): {'c0': t0 + 3, 'c1': 'ddddd', 'c2': 4294967295}, ] - # This returns a path and does not open the file. - tmpfilepath = create_tempfile('pytables_hdf5_file') - objectname = 'pandas_test_timeseries' - - with tables.open_file(tmpfilepath, mode='w') as hf: - t = hf.create_table('/', name=objectname, description=table_schema) - for sample in testsamples: - for key, value in sample.items(): - t.row[key] = value - t.row.append() - - return tmpfilepath, objectname, pd.DataFrame(testsamples) + objname = 'pandas_test_timeseries' + + with ensure_clean_path('pytables_hdf5_file') as path: + # The `ensure_clean_path` context mgr removes the temp file upon exit. + with tables.open_file(path, mode='w') as f: + t = f.create_table('/', name=objname, description=table_schema) + for sample in testsamples: + for key, value in sample.items(): + t.row[key] = value + t.row.append() + + yield path, objname, pd.DataFrame(testsamples) class TestReadPyTablesHDF5: From 065054f5f635155f223429454ef9414b3354254b Mon Sep 17 00:00:00 2001 From: Jan-Philip Gehrcke Date: Wed, 19 Jun 2019 13:13:43 +0200 Subject: [PATCH 13/26] squash: do not use line cont backslash --- pandas/io/pytables.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pandas/io/pytables.py b/pandas/io/pytables.py index 5ad278ffc55c4..fec4ca447f667 100644 --- a/pandas/io/pytables.py +++ b/pandas/io/pytables.py @@ -1829,8 +1829,8 @@ def convert(self, values, nan_rep, encoding, errors, start=None, """ start = start if start is not None else 0 - stop = min(stop, self.table.nrows) \ - if stop is not None else self.table.nrows + stop = (min(stop, self.table.nrows) + if stop is not None else self.table.nrows) self.values = Int64Index(np.arange(stop - start)) return self From 9cc85ce75daac2c70f33234717c4da47b4b7f972 Mon Sep 17 00:00:00 2001 From: Jan-Philip Gehrcke Date: Wed, 19 Jun 2019 13:32:56 +0200 Subject: [PATCH 14/26] squash: pass start/stop explicitly into a.convert() --- pandas/io/pytables.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/pandas/io/pytables.py b/pandas/io/pytables.py index fec4ca447f667..f7ae1db90551f 100644 --- a/pandas/io/pytables.py +++ b/pandas/io/pytables.py @@ -3451,7 +3451,8 @@ def read_axes(self, where, **kwargs): # `kwargs` may contain `start` and `stop` arguments if passed to # `store.select()`. If set they determine the index size. a.convert(values, nan_rep=self.nan_rep, encoding=self.encoding, - errors=self.errors, **kwargs) + errors=self.errors, start=kwargs.get('start'), + stop=kwargs.get('stop')) return True From cd69c0b414cc80a8ce579704eefcf7233cf1125d Mon Sep 17 00:00:00 2001 From: Jan-Philip Gehrcke Date: Wed, 19 Jun 2019 13:46:40 +0200 Subject: [PATCH 15/26] DOC: add some docstring --- pandas/io/pytables.py | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/pandas/io/pytables.py b/pandas/io/pytables.py index f7ae1db90551f..a43433190a501 100644 --- a/pandas/io/pytables.py +++ b/pandas/io/pytables.py @@ -1821,6 +1821,14 @@ def convert(self, values, nan_rep, encoding, errors, start=None, Parameters ---------- + values : + Expected to be passed but ignored in this implementation. + nan_rep : + Expected to be passed but ignored in this implementation. + encoding: + Expected to be passed but ignored in this implementation. + errors: + Expected to be passed but ignored in this implementation. start : int, optional Table row number: the start of the sub-selection. stop : int, optional From a9c6f15ccc2d9bcd9f4e8c26f95d4c857b858b7c Mon Sep 17 00:00:00 2001 From: Jan-Philip Gehrcke Date: Wed, 19 Jun 2019 14:16:19 +0200 Subject: [PATCH 16/26] squash: flake8 error --- pandas/io/pytables.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/io/pytables.py b/pandas/io/pytables.py index a43433190a501..673c91d7ff7ff 100644 --- a/pandas/io/pytables.py +++ b/pandas/io/pytables.py @@ -1838,7 +1838,7 @@ def convert(self, values, nan_rep, encoding, errors, start=None, start = start if start is not None else 0 stop = (min(stop, self.table.nrows) - if stop is not None else self.table.nrows) + if stop is not None else self.table.nrows) self.values = Int64Index(np.arange(stop - start)) return self From 8adf459d6a5e6ecb6056745226fd42385632b478 Mon Sep 17 00:00:00 2001 From: Jan-Philip Gehrcke Date: Fri, 21 Jun 2019 10:30:32 +0200 Subject: [PATCH 17/26] squash: change docstrings one more time --- pandas/io/pytables.py | 12 ++++-------- 1 file changed, 4 insertions(+), 8 deletions(-) diff --git a/pandas/io/pytables.py b/pandas/io/pytables.py index 673c91d7ff7ff..17d580bae5cf1 100644 --- a/pandas/io/pytables.py +++ b/pandas/io/pytables.py @@ -1821,14 +1821,10 @@ def convert(self, values, nan_rep, encoding, errors, start=None, Parameters ---------- - values : - Expected to be passed but ignored in this implementation. - nan_rep : - Expected to be passed but ignored in this implementation. - encoding: - Expected to be passed but ignored in this implementation. - errors: - Expected to be passed but ignored in this implementation. + values : np.ndarray + nan_rep : str + encoding : str + errors : str start : int, optional Table row number: the start of the sub-selection. stop : int, optional From aed78ffc2c7fdcd5020c2770b08051af1014253e Mon Sep 17 00:00:00 2001 From: Jan-Philip Gehrcke Date: Fri, 21 Jun 2019 10:33:54 +0200 Subject: [PATCH 18/26] squash: tests: cleanup --- pandas/tests/io/test_pytables.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/pandas/tests/io/test_pytables.py b/pandas/tests/io/test_pytables.py index fd5dc870d2be3..2b97e6c2b661e 100644 --- a/pandas/tests/io/test_pytables.py +++ b/pandas/tests/io/test_pytables.py @@ -5,7 +5,6 @@ from io import BytesIO import os import tempfile -import time from warnings import catch_warnings, simplefilter import numpy as np @@ -5180,7 +5179,7 @@ def test_dst_transitions(self): assert_frame_equal(result, df) -@pytest.fixture() +@pytest.fixture def pytables_hdf5_file(): """Use PyTables to create a simple HDF5 file.""" @@ -5190,7 +5189,7 @@ def pytables_hdf5_file(): 'c2': tables.Int64Col(pos=2), } - t0 = time.time() + t0 = 1561105000.0 testsamples = [ {'c0': t0, 'c1': 'aaaaa', 'c2': 1}, From 53dba1a06a279c2bec8774ba1daa7ae6aac10132 Mon Sep 17 00:00:00 2001 From: Jan-Philip Gehrcke Date: Fri, 21 Jun 2019 11:25:07 +0200 Subject: [PATCH 19/26] TST: move tests to io/pytables/test_compat.py --- pandas/tests/io/pytables/test_compat.py | 105 ++++++++++++++++++++++++ pandas/tests/io/test_pytables.py | 67 --------------- 2 files changed, 105 insertions(+), 67 deletions(-) create mode 100644 pandas/tests/io/pytables/test_compat.py diff --git a/pandas/tests/io/pytables/test_compat.py b/pandas/tests/io/pytables/test_compat.py new file mode 100644 index 0000000000000..ba5878802b032 --- /dev/null +++ b/pandas/tests/io/pytables/test_compat.py @@ -0,0 +1,105 @@ +import os +import tempfile +from contextlib import contextmanager + +import pytest + +import pandas as pd + +from pandas.util.testing import assert_frame_equal + + +tables = pytest.importorskip('tables') + + +def safe_remove(path): + if path is not None: + try: + os.remove(path) + except OSError: + pass + + +def create_tempfile(path): + return os.path.join(tempfile.gettempdir(), path) + + +@contextmanager +def ensure_clean_path(path): + try: + if isinstance(path, list): + filenames = [create_tempfile(p) for p in path] + yield filenames + else: + filenames = [create_tempfile(path)] + yield filenames[0] + finally: + for f in filenames: + safe_remove(f) + + +@pytest.fixture +def pytables_hdf5_file(): + """Use PyTables to create a simple HDF5 file.""" + + table_schema = { + 'c0': tables.Time64Col(pos=0), + 'c1': tables.StringCol(5, pos=1), + 'c2': tables.Int64Col(pos=2), + } + + t0 = 1561105000.0 + + testsamples = [ + {'c0': t0, 'c1': 'aaaaa', 'c2': 1}, + {'c0': t0 + 1, 'c1': 'bbbbb', 'c2': 2}, + {'c0': t0 + 2, 'c1': 'ccccc', 'c2': 10**5}, + {'c0': t0 + 3, 'c1': 'ddddd', 'c2': 4294967295}, + ] + + objname = 'pandas_test_timeseries' + + with ensure_clean_path('pytables_hdf5_file') as path: + # The `ensure_clean_path` context mgr removes the temp file upon exit. + with tables.open_file(path, mode='w') as f: + t = f.create_table('/', name=objname, description=table_schema) + for sample in testsamples: + for key, value in sample.items(): + t.row[key] = value + t.row.append() + + yield path, objname, pd.DataFrame(testsamples) + + +class TestReadPyTablesHDF5: + """ + A group of tests which covers reading HDF5 files written by plain PyTables + (not written by pandas). + """ + + def test_read_complete(self, pytables_hdf5_file): + path, objname, df = pytables_hdf5_file + result = pd.read_hdf(path, key=objname) + expected = df + assert_frame_equal(result, expected) + + def test_read_with_start(self, pytables_hdf5_file): + path, objname, df = pytables_hdf5_file + # This is a regression test for pandas-dev/pandas/issues/11188 + result = pd.read_hdf(path, key=objname, start=1) + expected = df[1:].reset_index(drop=True) + assert_frame_equal(result, expected) + + def test_read_with_stop(self, pytables_hdf5_file): + path, objname, df = pytables_hdf5_file + # This is a regression test for pandas-dev/pandas/issues/11188 + result = pd.read_hdf(path, key=objname, stop=1) + expected = df[:1].reset_index(drop=True) + assert_frame_equal(result, expected) + + def test_read_with_startstop(self, pytables_hdf5_file): + path, objname, df = pytables_hdf5_file + # This is a regression test for pandas-dev/pandas/issues/11188 + result = pd.read_hdf(path, key=objname, start=1, stop=2) + expected = df[1:2].reset_index(drop=True) + assert_frame_equal(result, expected) diff --git a/pandas/tests/io/test_pytables.py b/pandas/tests/io/test_pytables.py index 2b97e6c2b661e..ef9dbc63d873d 100644 --- a/pandas/tests/io/test_pytables.py +++ b/pandas/tests/io/test_pytables.py @@ -5177,70 +5177,3 @@ def test_dst_transitions(self): store.append('df', df) result = store.select('df') assert_frame_equal(result, df) - - -@pytest.fixture -def pytables_hdf5_file(): - """Use PyTables to create a simple HDF5 file.""" - - table_schema = { - 'c0': tables.Time64Col(pos=0), - 'c1': tables.StringCol(5, pos=1), - 'c2': tables.Int64Col(pos=2), - } - - t0 = 1561105000.0 - - testsamples = [ - {'c0': t0, 'c1': 'aaaaa', 'c2': 1}, - {'c0': t0 + 1, 'c1': 'bbbbb', 'c2': 2}, - {'c0': t0 + 2, 'c1': 'ccccc', 'c2': 10**5}, - {'c0': t0 + 3, 'c1': 'ddddd', 'c2': 4294967295}, - ] - - objname = 'pandas_test_timeseries' - - with ensure_clean_path('pytables_hdf5_file') as path: - # The `ensure_clean_path` context mgr removes the temp file upon exit. - with tables.open_file(path, mode='w') as f: - t = f.create_table('/', name=objname, description=table_schema) - for sample in testsamples: - for key, value in sample.items(): - t.row[key] = value - t.row.append() - - yield path, objname, pd.DataFrame(testsamples) - - -class TestReadPyTablesHDF5: - """ - A group of tests which covers reading HDF5 files written by plain PyTables - (not written by pandas). - """ - - def test_read_complete(self, pytables_hdf5_file): - path, objname, df = pytables_hdf5_file - result = pd.read_hdf(path, key=objname) - expected = df - assert_frame_equal(result, expected) - - def test_read_with_start(self, pytables_hdf5_file): - path, objname, df = pytables_hdf5_file - # This is a regression test for pandas-dev/pandas/issues/11188 - result = pd.read_hdf(path, key=objname, start=1) - expected = df[1:].reset_index(drop=True) - assert_frame_equal(result, expected) - - def test_read_with_stop(self, pytables_hdf5_file): - path, objname, df = pytables_hdf5_file - # This is a regression test for pandas-dev/pandas/issues/11188 - result = pd.read_hdf(path, key=objname, stop=1) - expected = df[:1].reset_index(drop=True) - assert_frame_equal(result, expected) - - def test_read_with_startstop(self, pytables_hdf5_file): - path, objname, df = pytables_hdf5_file - # This is a regression test for pandas-dev/pandas/issues/11188 - result = pd.read_hdf(path, key=objname, start=1, stop=2) - expected = df[1:2].reset_index(drop=True) - assert_frame_equal(result, expected) From dfec26ef2abf0cae8f0ab1582f13c11b9d1dcf92 Mon Sep 17 00:00:00 2001 From: Jan-Philip Gehrcke Date: Fri, 21 Jun 2019 11:41:12 +0200 Subject: [PATCH 20/26] TST: use pytest fixture instead of ensure_clean_path --- pandas/tests/io/pytables/test_compat.py | 54 ++++++++----------------- 1 file changed, 17 insertions(+), 37 deletions(-) diff --git a/pandas/tests/io/pytables/test_compat.py b/pandas/tests/io/pytables/test_compat.py index ba5878802b032..2e456ecb384fd 100644 --- a/pandas/tests/io/pytables/test_compat.py +++ b/pandas/tests/io/pytables/test_compat.py @@ -12,35 +12,13 @@ tables = pytest.importorskip('tables') -def safe_remove(path): - if path is not None: - try: - os.remove(path) - except OSError: - pass - - -def create_tempfile(path): - return os.path.join(tempfile.gettempdir(), path) - - -@contextmanager -def ensure_clean_path(path): - try: - if isinstance(path, list): - filenames = [create_tempfile(p) for p in path] - yield filenames - else: - filenames = [create_tempfile(path)] - yield filenames[0] - finally: - for f in filenames: - safe_remove(f) - - @pytest.fixture -def pytables_hdf5_file(): - """Use PyTables to create a simple HDF5 file.""" +def pytables_hdf5_file(tmp_path): + """Use PyTables to create a simple HDF5 file. + + There is no need for temporary file cleanup, pytest makes sure that there is + no collision between tests and that storage needs to not grow indefinitely. + """ table_schema = { 'c0': tables.Time64Col(pos=0), @@ -59,16 +37,18 @@ def pytables_hdf5_file(): objname = 'pandas_test_timeseries' - with ensure_clean_path('pytables_hdf5_file') as path: - # The `ensure_clean_path` context mgr removes the temp file upon exit. - with tables.open_file(path, mode='w') as f: - t = f.create_table('/', name=objname, description=table_schema) - for sample in testsamples: - for key, value in sample.items(): - t.row[key] = value - t.row.append() + # The `tmp_path` fixture provides a temporary directory unique to the + # individual test invocation. Create a file in there. + h5path = tmp_path / 'written_with_pytables.h5' + + with tables.open_file(h5path, mode='w') as f: + t = f.create_table('/', name=objname, description=table_schema) + for sample in testsamples: + for key, value in sample.items(): + t.row[key] = value + t.row.append() - yield path, objname, pd.DataFrame(testsamples) + return h5path, objname, pd.DataFrame(testsamples) class TestReadPyTablesHDF5: From b9421af7a9da997e327632fb0e7d7b7df0c963bd Mon Sep 17 00:00:00 2001 From: Jan-Philip Gehrcke Date: Fri, 21 Jun 2019 13:27:28 +0200 Subject: [PATCH 21/26] squash: address linter errors --- pandas/tests/io/pytables/test_compat.py | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/pandas/tests/io/pytables/test_compat.py b/pandas/tests/io/pytables/test_compat.py index 2e456ecb384fd..075188382720d 100644 --- a/pandas/tests/io/pytables/test_compat.py +++ b/pandas/tests/io/pytables/test_compat.py @@ -1,7 +1,3 @@ -import os -import tempfile -from contextlib import contextmanager - import pytest import pandas as pd @@ -16,8 +12,9 @@ def pytables_hdf5_file(tmp_path): """Use PyTables to create a simple HDF5 file. - There is no need for temporary file cleanup, pytest makes sure that there is - no collision between tests and that storage needs to not grow indefinitely. + There is no need for temporary file cleanup, pytest makes sure that there + is no collision between tests and that storage needs to not grow + indefinitely. """ table_schema = { From 9c0e96b0b99646bbbc220fd1faec73fdd2baa367 Mon Sep 17 00:00:00 2001 From: Jan-Philip Gehrcke Date: Fri, 21 Jun 2019 13:34:05 +0200 Subject: [PATCH 22/26] squash: pass path as text --- pandas/tests/io/pytables/test_compat.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/tests/io/pytables/test_compat.py b/pandas/tests/io/pytables/test_compat.py index 075188382720d..b2eddfe924533 100644 --- a/pandas/tests/io/pytables/test_compat.py +++ b/pandas/tests/io/pytables/test_compat.py @@ -38,7 +38,7 @@ def pytables_hdf5_file(tmp_path): # individual test invocation. Create a file in there. h5path = tmp_path / 'written_with_pytables.h5' - with tables.open_file(h5path, mode='w') as f: + with tables.open_file(str(h5path), mode='w') as f: t = f.create_table('/', name=objname, description=table_schema) for sample in testsamples: for key, value in sample.items(): From 79bed6a7b74233d4010569e8f16aae812fb5511f Mon Sep 17 00:00:00 2001 From: Jan-Philip Gehrcke Date: Fri, 21 Jun 2019 13:57:50 +0200 Subject: [PATCH 23/26] squash: tests: improve code comment --- pandas/tests/io/pytables/test_compat.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/pandas/tests/io/pytables/test_compat.py b/pandas/tests/io/pytables/test_compat.py index b2eddfe924533..c8e5b76cafa2c 100644 --- a/pandas/tests/io/pytables/test_compat.py +++ b/pandas/tests/io/pytables/test_compat.py @@ -12,9 +12,11 @@ def pytables_hdf5_file(tmp_path): """Use PyTables to create a simple HDF5 file. - There is no need for temporary file cleanup, pytest makes sure that there - is no collision between tests and that storage needs to not grow - indefinitely. + There is no need for temporary file cleanup: pytest's `tmp_path` fixture + guarantees file system isolation between tests, retains files for later + inspection and then removes them in a rolling fashion so that the storage + space consumption does not grow indefinitely with the number of test runner + invocations. """ table_schema = { From bca6ee64b23625adb60c6a294708d5274936fada Mon Sep 17 00:00:00 2001 From: Jan-Philip Gehrcke Date: Fri, 21 Jun 2019 14:42:45 +0200 Subject: [PATCH 24/26] squash: use ensure_clean_path again --- pandas/tests/io/pytables/test_compat.py | 32 ++++++++++--------------- 1 file changed, 12 insertions(+), 20 deletions(-) diff --git a/pandas/tests/io/pytables/test_compat.py b/pandas/tests/io/pytables/test_compat.py index c8e5b76cafa2c..abe7987c581ba 100644 --- a/pandas/tests/io/pytables/test_compat.py +++ b/pandas/tests/io/pytables/test_compat.py @@ -1,23 +1,17 @@ import pytest import pandas as pd - from pandas.util.testing import assert_frame_equal +from pandas.tests.io.test_pytables import ensure_clean_path + tables = pytest.importorskip('tables') @pytest.fixture def pytables_hdf5_file(tmp_path): - """Use PyTables to create a simple HDF5 file. - - There is no need for temporary file cleanup: pytest's `tmp_path` fixture - guarantees file system isolation between tests, retains files for later - inspection and then removes them in a rolling fashion so that the storage - space consumption does not grow indefinitely with the number of test runner - invocations. - """ + """Use PyTables to create a simple HDF5 file.""" table_schema = { 'c0': tables.Time64Col(pos=0), @@ -36,18 +30,16 @@ def pytables_hdf5_file(tmp_path): objname = 'pandas_test_timeseries' - # The `tmp_path` fixture provides a temporary directory unique to the - # individual test invocation. Create a file in there. - h5path = tmp_path / 'written_with_pytables.h5' - - with tables.open_file(str(h5path), mode='w') as f: - t = f.create_table('/', name=objname, description=table_schema) - for sample in testsamples: - for key, value in sample.items(): - t.row[key] = value - t.row.append() + with ensure_clean_path('written_with_pytables.h5') as path: + # The `ensure_clean_path` context mgr removes the temp file upon exit. + with tables.open_file(path, mode='w') as f: + t = f.create_table('/', name=objname, description=table_schema) + for sample in testsamples: + for key, value in sample.items(): + t.row[key] = value + t.row.append() - return h5path, objname, pd.DataFrame(testsamples) + yield path, objname, pd.DataFrame(testsamples) class TestReadPyTablesHDF5: From 60d37e02ab40ab907f517bd5bb16fc7bb2d5c2b0 Mon Sep 17 00:00:00 2001 From: Jan-Philip Gehrcke Date: Fri, 21 Jun 2019 14:55:01 +0200 Subject: [PATCH 25/26] squash: tests: cleanup --- pandas/tests/io/pytables/test_compat.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/pandas/tests/io/pytables/test_compat.py b/pandas/tests/io/pytables/test_compat.py index abe7987c581ba..10c264dddca2c 100644 --- a/pandas/tests/io/pytables/test_compat.py +++ b/pandas/tests/io/pytables/test_compat.py @@ -10,7 +10,7 @@ @pytest.fixture -def pytables_hdf5_file(tmp_path): +def pytables_hdf5_file(): """Use PyTables to create a simple HDF5 file.""" table_schema = { @@ -46,6 +46,8 @@ class TestReadPyTablesHDF5: """ A group of tests which covers reading HDF5 files written by plain PyTables (not written by pandas). + + Was introduced for regression-testing issue 11188. """ def test_read_complete(self, pytables_hdf5_file): From 1ce1a7005d6d172eb7af9b7bd435da0312fd4dbe Mon Sep 17 00:00:00 2001 From: Jan-Philip Gehrcke Date: Fri, 21 Jun 2019 15:33:18 +0200 Subject: [PATCH 26/26] squash: tests: fix isort error --- pandas/tests/io/pytables/test_compat.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/pandas/tests/io/pytables/test_compat.py b/pandas/tests/io/pytables/test_compat.py index 10c264dddca2c..d74e1218ebdb0 100644 --- a/pandas/tests/io/pytables/test_compat.py +++ b/pandas/tests/io/pytables/test_compat.py @@ -1,10 +1,8 @@ import pytest import pandas as pd -from pandas.util.testing import assert_frame_equal - from pandas.tests.io.test_pytables import ensure_clean_path - +from pandas.util.testing import assert_frame_equal tables = pytest.importorskip('tables')