From 2e77cef7dadd9607871f486371bb395751e293f3 Mon Sep 17 00:00:00 2001 From: VirosaLi Date: Mon, 8 Jun 2020 10:08:49 -0500 Subject: [PATCH 1/7] TST: groupby apply with indexing and colunm aggregation returns the column (#7002) --- pandas/tests/groupby/test_apply.py | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/pandas/tests/groupby/test_apply.py b/pandas/tests/groupby/test_apply.py index bc8067212d60e..ec454f1b3e93f 100644 --- a/pandas/tests/groupby/test_apply.py +++ b/pandas/tests/groupby/test_apply.py @@ -961,3 +961,13 @@ def fn(x): name="col2", ) tm.assert_series_equal(result, expected) + + +def test_apply_function_with_indexing_return_column(): + # GH: 7002 + df = DataFrame({'foo1': ['one', 'two', 'two', 'three', 'one', 'two'], + 'foo2': np.random.randn(6)}) + result = df.groupby('foo1', as_index=False).apply(lambda x: x.mean()) + expected = df.groupby('foo1', as_index=False).mean() + tm.assert_frame_equal(result, expected) + assert 'foo1' in result.columns From ce4db3c1b7c168a2062580fedfeb7f4e5db22050 Mon Sep 17 00:00:00 2001 From: VirosaLi Date: Sun, 14 Jun 2020 14:16:15 -0500 Subject: [PATCH 2/7] Revert "TST: groupby apply with indexing and colunm aggregation returns the column (#7002)" This reverts commit 2e77cef7dadd9607871f486371bb395751e293f3. --- pandas/tests/groupby/test_apply.py | 10 ---------- 1 file changed, 10 deletions(-) diff --git a/pandas/tests/groupby/test_apply.py b/pandas/tests/groupby/test_apply.py index ec454f1b3e93f..bc8067212d60e 100644 --- a/pandas/tests/groupby/test_apply.py +++ b/pandas/tests/groupby/test_apply.py @@ -961,13 +961,3 @@ def fn(x): name="col2", ) tm.assert_series_equal(result, expected) - - -def test_apply_function_with_indexing_return_column(): - # GH: 7002 - df = DataFrame({'foo1': ['one', 'two', 'two', 'three', 'one', 'two'], - 'foo2': np.random.randn(6)}) - result = df.groupby('foo1', as_index=False).apply(lambda x: x.mean()) - expected = df.groupby('foo1', as_index=False).mean() - tm.assert_frame_equal(result, expected) - assert 'foo1' in result.columns From cacf138e204f47abda45a7ffe3270a2c3a5f6c27 Mon Sep 17 00:00:00 2001 From: Fangchen Li Date: Mon, 22 Jun 2020 15:56:00 -0500 Subject: [PATCH 3/7] BUG: min_itemsize bug in pytable's append_to_multiple (#11238) --- pandas/io/pytables.py | 12 +++++++++++- pandas/tests/io/pytables/test_store.py | 27 ++++++++++++++++++++++++++ 2 files changed, 38 insertions(+), 1 deletion(-) diff --git a/pandas/io/pytables.py b/pandas/io/pytables.py index 800e9474cc0f8..cce0053620569 100644 --- a/pandas/io/pytables.py +++ b/pandas/io/pytables.py @@ -1303,6 +1303,8 @@ def append_to_multiple( valid_index = valid_index.intersection(index) value = value.loc[valid_index] + min_itemsize = kwargs.pop("min_itemsize", None) + # append for k, v in d.items(): dc = data_columns if k == selector else None @@ -1310,7 +1312,15 @@ def append_to_multiple( # compute the val val = value.reindex(v, axis=axis) - self.append(k, val, data_columns=dc, **kwargs) + if min_itemsize is None: + self.append(k, val, data_columns=dc, **kwargs) + else: + min_itemsize = { + key: value for (key, value) in min_itemsize.items() if key in v + } + self.append( + k, val, data_columns=dc, min_itemsize=min_itemsize, **kwargs + ) def create_table_index( self, diff --git a/pandas/tests/io/pytables/test_store.py b/pandas/tests/io/pytables/test_store.py index 524e9f41a7731..1e4fe4bfd65fa 100644 --- a/pandas/tests/io/pytables/test_store.py +++ b/pandas/tests/io/pytables/test_store.py @@ -3697,6 +3697,33 @@ def test_append_to_multiple_dropna_false(self, setup_path): assert not store.select("df1a").index.equals(store.select("df2a").index) + def test_append_to_multiple_min_itemsize(self, setup_path): + # GH 11238 + df = pd.DataFrame( + { + "IX": np.arange(1, 21), + "Num": np.arange(1, 21), + "BigNum": np.arange(1, 21) * 88, + "Str": ["a" for _ in range(20)], + "LongStr": ["abcde" for _ in range(20)], + } + ) + expected = df.iloc[[0]] + + with ensure_clean_store(setup_path) as store: + store.append_to_multiple( + { + "index": ["IX"], + "nums": ["Num", "BigNum"], + "strs": ["Str", "LongStr"], + }, + df.iloc[[0]], + "index", + min_itemsize={"Str": 10, "LongStr": 100}, + ) + result = store.select_as_multiple(["index", "nums", "strs"]) + tm.assert_frame_equal(result, expected) + def test_select_as_multiple(self, setup_path): df1 = tm.makeTimeDataFrame() From 8648896b4c333b3c273b4a9bba69bfcc9f535ff3 Mon Sep 17 00:00:00 2001 From: Fangchen Li Date: Tue, 23 Jun 2020 10:59:27 -0500 Subject: [PATCH 4/7] whatsnews update (#11238) --- doc/source/whatsnew/v1.1.0.rst | 1 + pandas/io/pytables.py | 8 ++------ 2 files changed, 3 insertions(+), 6 deletions(-) diff --git a/doc/source/whatsnew/v1.1.0.rst b/doc/source/whatsnew/v1.1.0.rst index 9d9d809a295ea..70fc67a0e5eb8 100644 --- a/doc/source/whatsnew/v1.1.0.rst +++ b/doc/source/whatsnew/v1.1.0.rst @@ -1016,6 +1016,7 @@ I/O - Bug in :meth:`~SQLDatabase.execute` was raising a ``ProgrammingError`` for some DB-API drivers when the SQL statement contained the `%` character and no parameters were present (:issue:`34211`) - Bug in :meth:`~pandas.io.stata.StataReader` which resulted in categorical variables with difference dtypes when reading data using an iterator. (:issue:`31544`) - :meth:`HDFStore.keys` has now an optional `include` parameter that allows the retrieval of all native HDF5 table names (:issue:`29916`) +- Bug in :meth:`HDFStore.append_to_multiple` was raising an ``ValueError`` when `min_itemsize` parameter is set (:issue:`11238`) Plotting ^^^^^^^^ diff --git a/pandas/io/pytables.py b/pandas/io/pytables.py index cce0053620569..db6bed76e03ef 100644 --- a/pandas/io/pytables.py +++ b/pandas/io/pytables.py @@ -1312,15 +1312,11 @@ def append_to_multiple( # compute the val val = value.reindex(v, axis=axis) - if min_itemsize is None: - self.append(k, val, data_columns=dc, **kwargs) - else: + if min_itemsize is not None: min_itemsize = { key: value for (key, value) in min_itemsize.items() if key in v } - self.append( - k, val, data_columns=dc, min_itemsize=min_itemsize, **kwargs - ) + self.append(k, val, data_columns=dc, min_itemsize=min_itemsize, **kwargs) def create_table_index( self, From a0ee2ab8cd83b1ca91515de9a4dc64e26f6fe6ca Mon Sep 17 00:00:00 2001 From: Fangchen Li Date: Tue, 23 Jun 2020 13:38:46 -0500 Subject: [PATCH 5/7] test update (#11238) --- pandas/io/pytables.py | 11 ++++++----- pandas/tests/io/pytables/test_store.py | 2 +- 2 files changed, 7 insertions(+), 6 deletions(-) diff --git a/pandas/io/pytables.py b/pandas/io/pytables.py index db6bed76e03ef..0e5d7b007bd89 100644 --- a/pandas/io/pytables.py +++ b/pandas/io/pytables.py @@ -1312,11 +1312,12 @@ def append_to_multiple( # compute the val val = value.reindex(v, axis=axis) - if min_itemsize is not None: - min_itemsize = { - key: value for (key, value) in min_itemsize.items() if key in v - } - self.append(k, val, data_columns=dc, min_itemsize=min_itemsize, **kwargs) + filtered = ( + {key: value for (key, value) in min_itemsize.items() if key in v} + if min_itemsize is not None + else None + ) + self.append(k, val, data_columns=dc, min_itemsize=filtered, **kwargs) def create_table_index( self, diff --git a/pandas/tests/io/pytables/test_store.py b/pandas/tests/io/pytables/test_store.py index 1e4fe4bfd65fa..c69992471fc9b 100644 --- a/pandas/tests/io/pytables/test_store.py +++ b/pandas/tests/io/pytables/test_store.py @@ -3719,7 +3719,7 @@ def test_append_to_multiple_min_itemsize(self, setup_path): }, df.iloc[[0]], "index", - min_itemsize={"Str": 10, "LongStr": 100}, + min_itemsize={"Str": 10, "LongStr": 100, "Num": 2}, ) result = store.select_as_multiple(["index", "nums", "strs"]) tm.assert_frame_equal(result, expected) From 876bc804368ec1648e7349bf095281cd45e7db3e Mon Sep 17 00:00:00 2001 From: Fangchen Li Date: Wed, 24 Jun 2020 17:43:24 -0500 Subject: [PATCH 6/7] whatsnew 1.1 --- doc/source/whatsnew/v1.1.0.rst | 1 + 1 file changed, 1 insertion(+) diff --git a/doc/source/whatsnew/v1.1.0.rst b/doc/source/whatsnew/v1.1.0.rst index 7c9fa53568f45..579136ab6e07a 100644 --- a/doc/source/whatsnew/v1.1.0.rst +++ b/doc/source/whatsnew/v1.1.0.rst @@ -1020,6 +1020,7 @@ I/O - Bug in :meth:`~pandas.io.stata.StataReader` which resulted in categorical variables with difference dtypes when reading data using an iterator. (:issue:`31544`) - :meth:`HDFStore.keys` has now an optional `include` parameter that allows the retrieval of all native HDF5 table names (:issue:`29916`) - Bug in :meth:`read_excel` for ODS files removes 0.0 values (:issue:`27222`) +- Bug in :meth:`HDFStore.append_to_multiple` was raising a ``ValueError`` when the min_itemsize parameter is set (:issue:`11238`) Plotting ^^^^^^^^ From d2fd46220aa7adda9b4b8faacf559ccf207eb2a6 Mon Sep 17 00:00:00 2001 From: Fangchen Li Date: Wed, 24 Jun 2020 21:00:10 -0500 Subject: [PATCH 7/7] whatsnew 1.1 --- doc/source/whatsnew/v1.1.0.rst | 1 - 1 file changed, 1 deletion(-) diff --git a/doc/source/whatsnew/v1.1.0.rst b/doc/source/whatsnew/v1.1.0.rst index 72dfe1cdfa1b2..6eb61f14d5629 100644 --- a/doc/source/whatsnew/v1.1.0.rst +++ b/doc/source/whatsnew/v1.1.0.rst @@ -1029,7 +1029,6 @@ I/O - Bug in :meth:`ujson.encode` was raising an `OverflowError` with numbers larger than sys.maxsize (:issue: `34395`) - Bug in :meth:`HDFStore.append_to_multiple` was raising a ``ValueError`` when the min_itemsize parameter is set (:issue:`11238`) - Plotting ^^^^^^^^