From f9ad2e8e68d2d69ba6e6539631cbca9367957c44 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Sat, 30 Nov 2019 11:25:32 -0800 Subject: [PATCH 1/5] annotate to_hdf --- pandas/core/generic.py | 6 ++++-- pandas/io/pytables.py | 9 +++++---- 2 files changed, 9 insertions(+), 6 deletions(-) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index b13aee238efb3..7f7c524bbb144 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -2423,7 +2423,9 @@ def to_json( indent=indent, ) - def to_hdf(self, path_or_buf, key, **kwargs): + def to_hdf( + self, path_or_buf, key: str, mode: str = "a", append: bool_t = False, **kwargs + ): """ Write the contained data to an HDF5 file using HDFStore. @@ -2526,7 +2528,7 @@ def to_hdf(self, path_or_buf, key, **kwargs): """ from pandas.io import pytables - pytables.to_hdf(path_or_buf, key, self, **kwargs) + pytables.to_hdf(path_or_buf, key, self, mode=mode, append=append, **kwargs) def to_msgpack(self, path_or_buf=None, encoding="utf-8", **kwargs): """ diff --git a/pandas/io/pytables.py b/pandas/io/pytables.py index 29835a9bd0c00..d42cdc68eb68c 100644 --- a/pandas/io/pytables.py +++ b/pandas/io/pytables.py @@ -54,6 +54,7 @@ if TYPE_CHECKING: from tables import File, Node # noqa:F401 + from pandas.core.generic import NDFrame # noqa:F401 # versioning attribute @@ -241,12 +242,12 @@ def _tables(): def to_hdf( path_or_buf, - key, - value, - mode=None, + key: str, + value: "NDFrame", + mode: str = "a", complevel: Optional[int] = None, complib=None, - append=None, + append: bool = False, **kwargs, ): """ store this object, close it if we opened it """ From 7b978c7b13ebbb35141a919e6038b8acfc1336f8 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Sat, 30 Nov 2019 11:35:01 -0800 Subject: [PATCH 2/5] CLN: flesh out to_hdf signature --- pandas/core/generic.py | 62 ++++++++++++++++++++++++++++-------------- pandas/io/pytables.py | 10 +++++-- 2 files changed, 49 insertions(+), 23 deletions(-) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 7f7c524bbb144..2a9d1fe593ab3 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -2424,7 +2424,16 @@ def to_json( ) def to_hdf( - self, path_or_buf, key: str, mode: str = "a", append: bool_t = False, **kwargs + self, + path_or_buf, + key: str, + mode: str = "a", + complevel: Optional[int] = None, + complib: Optional[str] = None, + append: bool_t = False, + format: Optional[str] = None, + errors: str = "strict", + **kwargs, ): """ Write the contained data to an HDF5 file using HDFStore. @@ -2453,7 +2462,20 @@ def to_hdf( - 'a': append, an existing file is opened for reading and writing, and if the file does not exist it is created. - 'r+': similar to 'a', but the file must already exist. - format : {'fixed', 'table'}, default 'fixed' + complevel : {0-9}, optional + Specifies a compression level for data. + A value of 0 disables compression. + complib : {'zlib', 'lzo', 'bzip2', 'blosc'}, default 'zlib' + Specifies the compression library to be used. + As of v0.20.2 these additional compressors for Blosc are supported + (default if no compressor specified: 'blosc:blosclz'): + {'blosc:blosclz', 'blosc:lz4', 'blosc:lz4hc', 'blosc:snappy', + 'blosc:zlib', 'blosc:zstd'}. + Specifying a compression library which is not available issues + a ValueError. + append : bool, default False + For Table formats, append the input data to the existing. + format : {'fixed', 'table', None}, default 'fixed' Possible values: - 'fixed': Fixed format. Fast writing/reading. Not-appendable, @@ -2461,32 +2483,21 @@ def to_hdf( - 'table': Table format. Write as a PyTables Table structure which may perform worse but allow more flexible operations like searching / selecting subsets of the data. - append : bool, default False - For Table formats, append the input data to the existing. + - If None, pd.get_option('io.hdf.default_format') is checked, + followed by fallback to "fixed" + errors : str, default 'strict' + Specifies how encoding and decoding errors are to be handled. + See the errors argument for :func:`open` for a full list + of options. data_columns : list of columns or True, optional List of columns to create as indexed data columns for on-disk queries, or True to use all columns. By default only the axes of the object are indexed. See :ref:`io.hdf5-query-data-columns`. Applicable only to format='table'. - complevel : {0-9}, optional - Specifies a compression level for data. - A value of 0 disables compression. - complib : {'zlib', 'lzo', 'bzip2', 'blosc'}, default 'zlib' - Specifies the compression library to be used. - As of v0.20.2 these additional compressors for Blosc are supported - (default if no compressor specified: 'blosc:blosclz'): - {'blosc:blosclz', 'blosc:lz4', 'blosc:lz4hc', 'blosc:snappy', - 'blosc:zlib', 'blosc:zstd'}. - Specifying a compression library which is not available issues - a ValueError. fletcher32 : bool, default False If applying compression use the fletcher32 checksum. dropna : bool, default False If true, ALL nan rows will not be written to store. - errors : str, default 'strict' - Specifies how encoding and decoding errors are to be handled. - See the errors argument for :func:`open` for a full list - of options. See Also -------- @@ -2528,7 +2539,18 @@ def to_hdf( """ from pandas.io import pytables - pytables.to_hdf(path_or_buf, key, self, mode=mode, append=append, **kwargs) + pytables.to_hdf( + path_or_buf, + key, + self, + mode=mode, + complevel=complevel, + complib=complib, + append=append, + format=format, + errors=errors, + **kwargs, + ) def to_msgpack(self, path_or_buf=None, encoding="utf-8", **kwargs): """ diff --git a/pandas/io/pytables.py b/pandas/io/pytables.py index d42cdc68eb68c..7a9b91feff09b 100644 --- a/pandas/io/pytables.py +++ b/pandas/io/pytables.py @@ -246,16 +246,20 @@ def to_hdf( value: "NDFrame", mode: str = "a", complevel: Optional[int] = None, - complib=None, + complib: Optional[str] = None, append: bool = False, + format: Optional[str] = None, + errors: str = "strict", **kwargs, ): """ store this object, close it if we opened it """ if append: - f = lambda store: store.append(key, value, **kwargs) + f = lambda store: store.append( + key, value, format=format, errors=errors, **kwargs + ) else: - f = lambda store: store.put(key, value, **kwargs) + f = lambda store: store.put(key, value, format=format, errors=errors, **kwargs) path_or_buf = _stringify_path(path_or_buf) if isinstance(path_or_buf, str): From 67fe1179a7b31e090359e5bd9bf0f137c8c003f5 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Sat, 30 Nov 2019 11:40:43 -0800 Subject: [PATCH 3/5] encoding kwarg --- pandas/core/generic.py | 2 ++ pandas/io/pytables.py | 11 +++++++---- 2 files changed, 9 insertions(+), 4 deletions(-) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 2a9d1fe593ab3..c9e6ef20c0fbd 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -2433,6 +2433,7 @@ def to_hdf( append: bool_t = False, format: Optional[str] = None, errors: str = "strict", + encoding: str = "UTF-8", **kwargs, ): """ @@ -2549,6 +2550,7 @@ def to_hdf( append=append, format=format, errors=errors, + encoding=encoding, **kwargs, ) diff --git a/pandas/io/pytables.py b/pandas/io/pytables.py index 7a9b91feff09b..443c4b8f479b1 100644 --- a/pandas/io/pytables.py +++ b/pandas/io/pytables.py @@ -250,16 +250,19 @@ def to_hdf( append: bool = False, format: Optional[str] = None, errors: str = "strict", + encoding: str = "UTF-8", **kwargs, ): """ store this object, close it if we opened it """ if append: f = lambda store: store.append( - key, value, format=format, errors=errors, **kwargs + key, value, format=format, errors=errors, encoding=encoding, **kwargs ) else: - f = lambda store: store.put(key, value, format=format, errors=errors, **kwargs) + f = lambda store: store.put( + key, value, format=format, errors=errors, encoding=encoding, **kwargs + ) path_or_buf = _stringify_path(path_or_buf) if isinstance(path_or_buf, str): @@ -1037,7 +1040,7 @@ def append( format=None, append=True, columns=None, - dropna=None, + dropna: Optional[bool] = None, **kwargs, ): """ @@ -1065,7 +1068,7 @@ def append( chunksize : size to chunk the writing expectedrows : expected TOTAL row size of this table encoding : default None, provide an encoding for strings - dropna : bool, default False + dropna : bool, default False Do not write an ALL nan row to the store settable by the option 'io.hdf.dropna_table'. From 9e9bbe5ed9015c35eae889a71d32b8c093703a19 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Sat, 30 Nov 2019 11:41:12 -0800 Subject: [PATCH 4/5] docstring --- pandas/core/generic.py | 1 + 1 file changed, 1 insertion(+) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index c9e6ef20c0fbd..4ca28a901d2ed 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -2490,6 +2490,7 @@ def to_hdf( Specifies how encoding and decoding errors are to be handled. See the errors argument for :func:`open` for a full list of options. + encoding : str, default "UTF-8" data_columns : list of columns or True, optional List of columns to create as indexed data columns for on-disk queries, or True to use all columns. By default only the axes From 897a460b3d8087fceb693754bcd680c2298b18c1 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Sun, 1 Dec 2019 10:25:12 -0800 Subject: [PATCH 5/5] NDFrame-> FrameOrSeries --- pandas/io/pytables.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pandas/io/pytables.py b/pandas/io/pytables.py index 443c4b8f479b1..13e318873bf58 100644 --- a/pandas/io/pytables.py +++ b/pandas/io/pytables.py @@ -43,6 +43,7 @@ concat, isna, ) +from pandas._typing import FrameOrSeries from pandas.core.arrays.categorical import Categorical import pandas.core.common as com from pandas.core.computation.pytables import PyTablesExpr, maybe_expression @@ -54,7 +55,6 @@ if TYPE_CHECKING: from tables import File, Node # noqa:F401 - from pandas.core.generic import NDFrame # noqa:F401 # versioning attribute @@ -243,7 +243,7 @@ def _tables(): def to_hdf( path_or_buf, key: str, - value: "NDFrame", + value: FrameOrSeries, mode: str = "a", complevel: Optional[int] = None, complib: Optional[str] = None,