From f3a2e9286256b87a72dc642a270808f90a8181ce Mon Sep 17 00:00:00 2001 From: Joshua Maxcy Date: Wed, 3 Jul 2019 01:24:02 -0400 Subject: [PATCH 01/15] addressed issue #27160 --- pandas/core/generic.py | 6 +++--- pandas/io/packers.py | 15 ++++++++++++--- 2 files changed, 15 insertions(+), 6 deletions(-) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 822428c6787be..466a04c2a0401 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -2415,7 +2415,7 @@ def to_msgpack(self, path_or_buf=None, encoding='utf-8', **kwargs): Parameters ---------- path : string File path, buffer-like, or None - if None, return generated string + if None, return generated bytes append : bool whether to append to an existing msgpack (default is False) compress : type of compressor (zlib or blosc), default to None (no @@ -2423,9 +2423,9 @@ def to_msgpack(self, path_or_buf=None, encoding='utf-8', **kwargs): Returns ------- - None or str + None or bytes If path_or_buf is None, returns the resulting msgpack format as a - string. Otherwise returns None. + byte string. Otherwise returns None. """ from pandas.io import packers diff --git a/pandas/io/packers.py b/pandas/io/packers.py index 30e51e62aa764..04013134cc90c 100644 --- a/pandas/io/packers.py +++ b/pandas/io/packers.py @@ -87,7 +87,7 @@ def to_msgpack(path_or_buf, *args, **kwargs): Parameters ---------- path_or_buf : string File path, buffer-like, or None - if None, return generated string + if None, return generated bytes args : an object or objects to serialize encoding : encoding for unicode objects append : boolean whether to append to an existing msgpack @@ -115,8 +115,17 @@ def writer(fh): path_or_buf = _stringify_path(path_or_buf) if isinstance(path_or_buf, str): - with open(path_or_buf, mode) as fh: - writer(fh) + try: + exists = os.path.exists(os.path.dirname(path_or_buf)) + except (TypeError, ValueError): + exists = False + + if exists: + with open(path_or_buf, mode) as fh: + writer(fh) + else: + raise ValueError('path_or_buf is invalid or was not found') + elif path_or_buf is None: buf = BytesIO() writer(buf) From c5bf2c1edf449b774211758062f50a5f8f144737 Mon Sep 17 00:00:00 2001 From: Joshua Maxcy Date: Wed, 3 Jul 2019 09:07:13 -0400 Subject: [PATCH 02/15] fixed issue #27610 for real this time --- pandas/io/packers.py | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/pandas/io/packers.py b/pandas/io/packers.py index 04013134cc90c..d4e83fb889d3a 100644 --- a/pandas/io/packers.py +++ b/pandas/io/packers.py @@ -116,14 +116,9 @@ def writer(fh): path_or_buf = _stringify_path(path_or_buf) if isinstance(path_or_buf, str): try: - exists = os.path.exists(os.path.dirname(path_or_buf)) - except (TypeError, ValueError): - exists = False - - if exists: with open(path_or_buf, mode) as fh: writer(fh) - else: + except (FileNotFoundError, ValueError): raise ValueError('path_or_buf is invalid or was not found') elif path_or_buf is None: From 06f4b18eb209bb2a1201a05b6001beb35dc7e20a Mon Sep 17 00:00:00 2001 From: Joshua Maxcy Date: Wed, 3 Jul 2019 11:46:35 -0400 Subject: [PATCH 03/15] added test --- pandas/tests/io/test_packers.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/pandas/tests/io/test_packers.py b/pandas/tests/io/test_packers.py index 203b550b8936a..5e14db5fa9abc 100644 --- a/pandas/tests/io/test_packers.py +++ b/pandas/tests/io/test_packers.py @@ -159,6 +159,9 @@ def __init__(self): read_msgpack(path_or_buf={}) with pytest.raises(ValueError, match=msg.format(r'.*\.A')): read_msgpack(path_or_buf=A()) + with pytest.raises(ValueError): + df = tm.makeDataFrame() + df.to_msgpack('/non/existent/path/df.msgpack') @pytest.mark.filterwarnings("ignore:.*msgpack:FutureWarning") From df9acecb975b93ada1c5b60aab7d15e7b5bea537 Mon Sep 17 00:00:00 2001 From: Joshua Maxcy Date: Wed, 3 Jul 2019 13:35:05 -0400 Subject: [PATCH 04/15] updated nontexistent path --- pandas/tests/io/test_packers.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/tests/io/test_packers.py b/pandas/tests/io/test_packers.py index 5e14db5fa9abc..1a829997f8485 100644 --- a/pandas/tests/io/test_packers.py +++ b/pandas/tests/io/test_packers.py @@ -161,7 +161,7 @@ def __init__(self): read_msgpack(path_or_buf=A()) with pytest.raises(ValueError): df = tm.makeDataFrame() - df.to_msgpack('/non/existent/path/df.msgpack') + df.to_msgpack(os.path.join('nonexistent_dir', 'df.msgpack')) @pytest.mark.filterwarnings("ignore:.*msgpack:FutureWarning") From 330e5434c3825dedb2091123f44abfd5674c916d Mon Sep 17 00:00:00 2001 From: Joshua Maxcy Date: Wed, 3 Jul 2019 18:33:55 -0400 Subject: [PATCH 05/15] removed unnecessary error exception --- pandas/io/packers.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/io/packers.py b/pandas/io/packers.py index d4e83fb889d3a..e9925acfc8e1d 100644 --- a/pandas/io/packers.py +++ b/pandas/io/packers.py @@ -118,7 +118,7 @@ def writer(fh): try: with open(path_or_buf, mode) as fh: writer(fh) - except (FileNotFoundError, ValueError): + except FileNotFoundError: raise ValueError('path_or_buf is invalid or was not found') elif path_or_buf is None: From 3dac473d88a09b7785696155e635946ebe9bf35b Mon Sep 17 00:00:00 2001 From: Joshua Maxcy Date: Thu, 4 Jul 2019 14:31:52 -0400 Subject: [PATCH 06/15] raise FileNotFoundError instead of ValueError --- pandas/io/packers.py | 2 +- pandas/tests/io/test_packers.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/pandas/io/packers.py b/pandas/io/packers.py index e9925acfc8e1d..8f7d16e904553 100644 --- a/pandas/io/packers.py +++ b/pandas/io/packers.py @@ -119,7 +119,7 @@ def writer(fh): with open(path_or_buf, mode) as fh: writer(fh) except FileNotFoundError: - raise ValueError('path_or_buf is invalid or was not found') + raise FileNotFoundError('path_or_buf is invalid or was not found') elif path_or_buf is None: buf = BytesIO() diff --git a/pandas/tests/io/test_packers.py b/pandas/tests/io/test_packers.py index 1a829997f8485..ee53f50640d93 100644 --- a/pandas/tests/io/test_packers.py +++ b/pandas/tests/io/test_packers.py @@ -159,7 +159,7 @@ def __init__(self): read_msgpack(path_or_buf={}) with pytest.raises(ValueError, match=msg.format(r'.*\.A')): read_msgpack(path_or_buf=A()) - with pytest.raises(ValueError): + with pytest.raises(FileNotFoundError): df = tm.makeDataFrame() df.to_msgpack(os.path.join('nonexistent_dir', 'df.msgpack')) From 4491dff101ec2c37ca1e4e916d0918df5f911bde Mon Sep 17 00:00:00 2001 From: Joshua Maxcy Date: Thu, 4 Jul 2019 15:46:59 -0400 Subject: [PATCH 07/15] fixed linting issue (hopefully) --- pandas/io/packers.py | 6 +++--- pandas/tests/io/test_packers.py | 3 ++- 2 files changed, 5 insertions(+), 4 deletions(-) diff --git a/pandas/io/packers.py b/pandas/io/packers.py index 8f7d16e904553..fe985aba254d3 100644 --- a/pandas/io/packers.py +++ b/pandas/io/packers.py @@ -118,9 +118,9 @@ def writer(fh): try: with open(path_or_buf, mode) as fh: writer(fh) - except FileNotFoundError: - raise FileNotFoundError('path_or_buf is invalid or was not found') - + except FileNotFoundError as error: + error.strerror = 'path_or_buf is invalid or was not found' + raise error elif path_or_buf is None: buf = BytesIO() writer(buf) diff --git a/pandas/tests/io/test_packers.py b/pandas/tests/io/test_packers.py index ee53f50640d93..095169ce0ebe2 100644 --- a/pandas/tests/io/test_packers.py +++ b/pandas/tests/io/test_packers.py @@ -161,7 +161,8 @@ def __init__(self): read_msgpack(path_or_buf=A()) with pytest.raises(FileNotFoundError): df = tm.makeDataFrame() - df.to_msgpack(os.path.join('nonexistent_dir', 'df.msgpack')) + invalid_path = os.path.join('nonexistent_dir', 'df.msgpack') + df.to_msgpack(path_or_buf=invalid_path) @pytest.mark.filterwarnings("ignore:.*msgpack:FutureWarning") From c7f62a7cdae2cd5b0cafe33c767a11f02e387474 Mon Sep 17 00:00:00 2001 From: Joshua Maxcy Date: Thu, 4 Jul 2019 22:00:06 -0400 Subject: [PATCH 08/15] ran black -l 79 against files --- pandas/io/packers.py | 755 ++++++++++++++++++-------------- pandas/tests/io/test_packers.py | 533 ++++++++++++---------- 2 files changed, 727 insertions(+), 561 deletions(-) diff --git a/pandas/io/packers.py b/pandas/io/packers.py index fe985aba254d3..1b3d63e7c49d5 100644 --- a/pandas/io/packers.py +++ b/pandas/io/packers.py @@ -49,16 +49,38 @@ from pandas.compat._optional import import_optional_dependency from pandas.errors import PerformanceWarning from pandas.util._move import ( - BadMove as _BadMove, move_into_mutable_buffer as _move_into_mutable_buffer) + BadMove as _BadMove, + move_into_mutable_buffer as _move_into_mutable_buffer, +) from pandas.core.dtypes.common import ( - is_categorical_dtype, is_datetime64tz_dtype, is_object_dtype, - needs_i8_conversion, pandas_dtype) + is_categorical_dtype, + is_datetime64tz_dtype, + is_object_dtype, + needs_i8_conversion, + pandas_dtype, +) from pandas import ( # noqa:F401 - Categorical, CategoricalIndex, DataFrame, DatetimeIndex, Float64Index, - Index, Int64Index, Interval, IntervalIndex, MultiIndex, NaT, Panel, Period, - PeriodIndex, RangeIndex, Series, TimedeltaIndex, Timestamp) + Categorical, + CategoricalIndex, + DataFrame, + DatetimeIndex, + Float64Index, + Index, + Int64Index, + Interval, + IntervalIndex, + MultiIndex, + NaT, + Panel, + Period, + PeriodIndex, + RangeIndex, + Series, + TimedeltaIndex, + Timestamp, +) from pandas.core import internals from pandas.core.arrays import DatetimeArray, IntervalArray, PeriodArray from pandas.core.arrays.sparse import BlockIndex, IntIndex @@ -95,19 +117,22 @@ def to_msgpack(path_or_buf, *args, **kwargs): compress : type of compressor (zlib or blosc), default to None (no compression) """ - warnings.warn("to_msgpack is deprecated and will be removed in a " - "future version.\n" - "It is recommended to use pyarrow for on-the-wire " - "transmission of pandas objects.", - FutureWarning, stacklevel=3) + warnings.warn( + "to_msgpack is deprecated and will be removed in a " + "future version.\n" + "It is recommended to use pyarrow for on-the-wire " + "transmission of pandas objects.", + FutureWarning, + stacklevel=3, + ) global compressor - compressor = kwargs.pop('compress', None) - append = kwargs.pop('append', None) + compressor = kwargs.pop("compress", None) + append = kwargs.pop("append", None) if append: - mode = 'a+b' + mode = "a+b" else: - mode = 'wb' + mode = "wb" def writer(fh): for a in args: @@ -119,7 +144,7 @@ def writer(fh): with open(path_or_buf, mode) as fh: writer(fh) except FileNotFoundError as error: - error.strerror = 'path_or_buf is invalid or was not found' + error.strerror = "path_or_buf is invalid or was not found" raise error elif path_or_buf is None: buf = BytesIO() @@ -129,7 +154,7 @@ def writer(fh): writer(path_or_buf) -def read_msgpack(path_or_buf, encoding='utf-8', iterator=False, **kwargs): +def read_msgpack(path_or_buf, encoding="utf-8", iterator=False, **kwargs): """ Load msgpack pandas object from the specified file path @@ -156,11 +181,14 @@ def read_msgpack(path_or_buf, encoding='utf-8', iterator=False, **kwargs): read_msgpack is only guaranteed to be backwards compatible to pandas 0.20.3. """ - warnings.warn("The read_msgpack is deprecated and will be removed in a " - "future version.\n" - "It is recommended to use pyarrow for on-the-wire " - "transmission of pandas objects.", - FutureWarning, stacklevel=3) + warnings.warn( + "The read_msgpack is deprecated and will be removed in a " + "future version.\n" + "It is recommended to use pyarrow for on-the-wire " + "transmission of pandas objects.", + FutureWarning, + stacklevel=3, + ) path_or_buf, _, _, should_close = get_filepath_or_buffer(path_or_buf) if iterator: @@ -186,7 +214,7 @@ def read(fh): exists = False if exists: - with open(path_or_buf, 'rb') as fh: + with open(path_or_buf, "rb") as fh: return read(fh) if isinstance(path_or_buf, bytes): @@ -198,25 +226,25 @@ def read(fh): finally: if fh is not None: fh.close() - elif hasattr(path_or_buf, 'read') and callable(path_or_buf.read): + elif hasattr(path_or_buf, "read") and callable(path_or_buf.read): # treat as a buffer like return read(path_or_buf) - raise ValueError('path_or_buf needs to be a string file path or file-like') + raise ValueError("path_or_buf needs to be a string file path or file-like") -dtype_dict = {21: np.dtype('M8[ns]'), - 'datetime64[ns]': np.dtype('M8[ns]'), - 'datetime64[us]': np.dtype('M8[us]'), - 22: np.dtype('m8[ns]'), - 'timedelta64[ns]': np.dtype('m8[ns]'), - 'timedelta64[us]': np.dtype('m8[us]'), - - # this is platform int, which we need to remap to np.int64 - # for compat on windows platforms - 7: np.dtype('int64'), - 'category': 'category' - } +dtype_dict = { + 21: np.dtype("M8[ns]"), + "datetime64[ns]": np.dtype("M8[ns]"), + "datetime64[us]": np.dtype("M8[us]"), + 22: np.dtype("m8[ns]"), + "timedelta64[ns]": np.dtype("m8[ns]"), + "timedelta64[us]": np.dtype("m8[us]"), + # this is platform int, which we need to remap to np.int64 + # for compat on windows platforms + 7: np.dtype("int64"), + "category": "category", +} def dtype_for(t): @@ -226,13 +254,15 @@ def dtype_for(t): return np.typeDict.get(t, t) -c2f_dict = {'complex': np.float64, - 'complex128': np.float64, - 'complex64': np.float32} +c2f_dict = { + "complex": np.float64, + "complex128": np.float64, + "complex64": np.float32, +} # windows (32 bit) compat -if hasattr(np, 'float128'): - c2f_dict['complex256'] = np.float128 +if hasattr(np, "float128"): + c2f_dict["complex256"] = np.float128 def c2f(r, i, ctype_name): @@ -256,13 +286,12 @@ def convert(values): return values.ravel().tolist() if needs_i8_conversion(dtype): - values = values.view('i8') + values = values.view("i8") v = values.ravel() - if compressor == 'zlib': + if compressor == "zlib": zlib = import_optional_dependency( - "zlib", - extra="zlib is required when `compress='zlib'`." + "zlib", extra="zlib is required when `compress='zlib'`." ) # return string arrays like they are @@ -273,10 +302,9 @@ def convert(values): v = v.tostring() return ExtType(0, zlib.compress(v)) - elif compressor == 'blosc': + elif compressor == "blosc": blosc = import_optional_dependency( - "blosc", - extra="zlib is required when `compress='blosc'`." + "blosc", extra="zlib is required when `compress='blosc'`." ) # return string arrays like they are @@ -307,19 +335,17 @@ def unconvert(values, dtype, compress=None): dtype = pandas_dtype(dtype).base if not as_is_ext: - values = values.encode('latin1') + values = values.encode("latin1") if compress: - if compress == 'zlib': + if compress == "zlib": zlib = import_optional_dependency( - "zlib", - extra="zlib is required when `compress='zlib'`." + "zlib", extra="zlib is required when `compress='zlib'`." ) decompress = zlib.decompress - elif compress == 'blosc': + elif compress == "blosc": blosc = import_optional_dependency( - "blosc", - extra="zlib is required when `compress='blosc'`." + "blosc", extra="zlib is required when `compress='blosc'`." ) decompress = blosc.decompress else: @@ -327,8 +353,7 @@ def unconvert(values, dtype, compress=None): try: return np.frombuffer( - _move_into_mutable_buffer(decompress(values)), - dtype=dtype, + _move_into_mutable_buffer(decompress(values)), dtype=dtype ) except _BadMove as e: # Pull the decompressed data off of the `_BadMove` exception. @@ -342,8 +367,8 @@ def unconvert(values, dtype, compress=None): # warn even though we need to make a copy because we are only # copying at most 1 byte. warnings.warn( - 'copying data after decompressing; this may mean that' - ' decompress is caching its result', + "copying data after decompressing; this may mean that" + " decompress is caching its result", PerformanceWarning, ) # fall through to copying `np.fromstring` @@ -362,75 +387,88 @@ def encode(obj): tobj = type(obj) if isinstance(obj, Index): if isinstance(obj, RangeIndex): - return {'typ': 'range_index', - 'klass': obj.__class__.__name__, - 'name': getattr(obj, 'name', None), - 'start': obj._range.start, - 'stop': obj._range.stop, - 'step': obj._range.step, - } + return { + "typ": "range_index", + "klass": obj.__class__.__name__, + "name": getattr(obj, "name", None), + "start": obj._range.start, + "stop": obj._range.stop, + "step": obj._range.step, + } elif isinstance(obj, PeriodIndex): - return {'typ': 'period_index', - 'klass': obj.__class__.__name__, - 'name': getattr(obj, 'name', None), - 'freq': getattr(obj, 'freqstr', None), - 'dtype': obj.dtype.name, - 'data': convert(obj.asi8), - 'compress': compressor} + return { + "typ": "period_index", + "klass": obj.__class__.__name__, + "name": getattr(obj, "name", None), + "freq": getattr(obj, "freqstr", None), + "dtype": obj.dtype.name, + "data": convert(obj.asi8), + "compress": compressor, + } elif isinstance(obj, DatetimeIndex): - tz = getattr(obj, 'tz', None) + tz = getattr(obj, "tz", None) # store tz info and data as UTC if tz is not None: tz = tz.zone - obj = obj.tz_convert('UTC') - return {'typ': 'datetime_index', - 'klass': obj.__class__.__name__, - 'name': getattr(obj, 'name', None), - 'dtype': obj.dtype.name, - 'data': convert(obj.asi8), - 'freq': getattr(obj, 'freqstr', None), - 'tz': tz, - 'compress': compressor} + obj = obj.tz_convert("UTC") + return { + "typ": "datetime_index", + "klass": obj.__class__.__name__, + "name": getattr(obj, "name", None), + "dtype": obj.dtype.name, + "data": convert(obj.asi8), + "freq": getattr(obj, "freqstr", None), + "tz": tz, + "compress": compressor, + } elif isinstance(obj, (IntervalIndex, IntervalArray)): if isinstance(obj, IntervalIndex): - typ = 'interval_index' + typ = "interval_index" else: - typ = 'interval_array' - return {'typ': typ, - 'klass': obj.__class__.__name__, - 'name': getattr(obj, 'name', None), - 'left': getattr(obj, 'left', None), - 'right': getattr(obj, 'right', None), - 'closed': getattr(obj, 'closed', None)} + typ = "interval_array" + return { + "typ": typ, + "klass": obj.__class__.__name__, + "name": getattr(obj, "name", None), + "left": getattr(obj, "left", None), + "right": getattr(obj, "right", None), + "closed": getattr(obj, "closed", None), + } elif isinstance(obj, MultiIndex): - return {'typ': 'multi_index', - 'klass': obj.__class__.__name__, - 'names': getattr(obj, 'names', None), - 'dtype': obj.dtype.name, - 'data': convert(obj.values), - 'compress': compressor} + return { + "typ": "multi_index", + "klass": obj.__class__.__name__, + "names": getattr(obj, "names", None), + "dtype": obj.dtype.name, + "data": convert(obj.values), + "compress": compressor, + } else: - return {'typ': 'index', - 'klass': obj.__class__.__name__, - 'name': getattr(obj, 'name', None), - 'dtype': obj.dtype.name, - 'data': convert(obj.values), - 'compress': compressor} + return { + "typ": "index", + "klass": obj.__class__.__name__, + "name": getattr(obj, "name", None), + "dtype": obj.dtype.name, + "data": convert(obj.values), + "compress": compressor, + } elif isinstance(obj, Categorical): - return {'typ': 'category', - 'klass': obj.__class__.__name__, - 'name': getattr(obj, 'name', None), - 'codes': obj.codes, - 'categories': obj.categories, - 'ordered': obj.ordered, - 'compress': compressor} + return { + "typ": "category", + "klass": obj.__class__.__name__, + "name": getattr(obj, "name", None), + "codes": obj.codes, + "categories": obj.categories, + "ordered": obj.ordered, + "compress": compressor, + } elif isinstance(obj, Series): if isinstance(obj, SparseSeries): raise NotImplementedError( - 'msgpack sparse series is not implemented' + "msgpack sparse series is not implemented" ) # d = {'typ': 'sparse_series', # 'klass': obj.__class__.__name__, @@ -443,17 +481,19 @@ def encode(obj): # d[f] = getattr(obj, f, None) # return d else: - return {'typ': 'series', - 'klass': obj.__class__.__name__, - 'name': getattr(obj, 'name', None), - 'index': obj.index, - 'dtype': obj.dtype.name, - 'data': convert(obj.values), - 'compress': compressor} + return { + "typ": "series", + "klass": obj.__class__.__name__, + "name": getattr(obj, "name", None), + "index": obj.index, + "dtype": obj.dtype.name, + "data": convert(obj.values), + "compress": compressor, + } elif issubclass(tobj, NDFrame): if isinstance(obj, SparseDataFrame): raise NotImplementedError( - 'msgpack sparse frame is not implemented' + "msgpack sparse frame is not implemented" ) # d = {'typ': 'sparse_dataframe', # 'klass': obj.__class__.__name__, @@ -470,19 +510,29 @@ def encode(obj): data = data.consolidate() # the block manager - return {'typ': 'block_manager', - 'klass': obj.__class__.__name__, - 'axes': data.axes, - 'blocks': [{'locs': b.mgr_locs.as_array, - 'values': convert(b.values), - 'shape': b.values.shape, - 'dtype': b.dtype.name, - 'klass': b.__class__.__name__, - 'compress': compressor} for b in data.blocks] + return { + "typ": "block_manager", + "klass": obj.__class__.__name__, + "axes": data.axes, + "blocks": [ + { + "locs": b.mgr_locs.as_array, + "values": convert(b.values), + "shape": b.values.shape, + "dtype": b.dtype.name, + "klass": b.__class__.__name__, + "compress": compressor, } + for b in data.blocks + ], + } - elif isinstance(obj, (datetime, date, np.datetime64, timedelta, - np.timedelta64)) or obj is NaT: + elif ( + isinstance( + obj, (datetime, date, np.datetime64, timedelta, np.timedelta64) + ) + or obj is NaT + ): if isinstance(obj, Timestamp): tz = obj.tzinfo if tz is not None: @@ -490,71 +540,84 @@ def encode(obj): freq = obj.freq if freq is not None: freq = freq.freqstr - return {'typ': 'timestamp', - 'value': obj.value, - 'freq': freq, - 'tz': tz} + return { + "typ": "timestamp", + "value": obj.value, + "freq": freq, + "tz": tz, + } if obj is NaT: - return {'typ': 'nat'} + return {"typ": "nat"} elif isinstance(obj, np.timedelta64): - return {'typ': 'timedelta64', - 'data': obj.view('i8')} + return {"typ": "timedelta64", "data": obj.view("i8")} elif isinstance(obj, timedelta): - return {'typ': 'timedelta', - 'data': (obj.days, obj.seconds, obj.microseconds)} + return { + "typ": "timedelta", + "data": (obj.days, obj.seconds, obj.microseconds), + } elif isinstance(obj, np.datetime64): - return {'typ': 'datetime64', - 'data': str(obj)} + return {"typ": "datetime64", "data": str(obj)} elif isinstance(obj, datetime): - return {'typ': 'datetime', - 'data': obj.isoformat()} + return {"typ": "datetime", "data": obj.isoformat()} elif isinstance(obj, date): - return {'typ': 'date', - 'data': obj.isoformat()} + return {"typ": "date", "data": obj.isoformat()} raise Exception( - "cannot encode this datetimelike object: {obj}".format(obj=obj)) + "cannot encode this datetimelike object: {obj}".format(obj=obj) + ) elif isinstance(obj, Period): - return {'typ': 'period', - 'ordinal': obj.ordinal, - 'freq': obj.freqstr} + return {"typ": "period", "ordinal": obj.ordinal, "freq": obj.freqstr} elif isinstance(obj, Interval): - return {'typ': 'interval', - 'left': obj.left, - 'right': obj.right, - 'closed': obj.closed} + return { + "typ": "interval", + "left": obj.left, + "right": obj.right, + "closed": obj.closed, + } elif isinstance(obj, BlockIndex): - return {'typ': 'block_index', - 'klass': obj.__class__.__name__, - 'blocs': obj.blocs, - 'blengths': obj.blengths, - 'length': obj.length} + return { + "typ": "block_index", + "klass": obj.__class__.__name__, + "blocs": obj.blocs, + "blengths": obj.blengths, + "length": obj.length, + } elif isinstance(obj, IntIndex): - return {'typ': 'int_index', - 'klass': obj.__class__.__name__, - 'indices': obj.indices, - 'length': obj.length} + return { + "typ": "int_index", + "klass": obj.__class__.__name__, + "indices": obj.indices, + "length": obj.length, + } elif isinstance(obj, np.ndarray): - return {'typ': 'ndarray', - 'shape': obj.shape, - 'ndim': obj.ndim, - 'dtype': obj.dtype.name, - 'data': convert(obj), - 'compress': compressor} + return { + "typ": "ndarray", + "shape": obj.shape, + "ndim": obj.ndim, + "dtype": obj.dtype.name, + "data": convert(obj), + "compress": compressor, + } elif isinstance(obj, np.number): if np.iscomplexobj(obj): - return {'typ': 'np_scalar', - 'sub_typ': 'np_complex', - 'dtype': obj.dtype.name, - 'real': np.real(obj).__repr__(), - 'imag': np.imag(obj).__repr__()} + return { + "typ": "np_scalar", + "sub_typ": "np_complex", + "dtype": obj.dtype.name, + "real": np.real(obj).__repr__(), + "imag": np.imag(obj).__repr__(), + } else: - return {'typ': 'np_scalar', - 'dtype': obj.dtype.name, - 'data': obj.__repr__()} + return { + "typ": "np_scalar", + "dtype": obj.dtype.name, + "data": obj.__repr__(), + } elif isinstance(obj, complex): - return {'typ': 'np_complex', - 'real': np.real(obj).__repr__(), - 'imag': np.imag(obj).__repr__()} + return { + "typ": "np_complex", + "real": np.real(obj).__repr__(), + "imag": np.imag(obj).__repr__(), + } return obj @@ -564,110 +627,113 @@ def decode(obj): Decoder for deserializing numpy data types. """ - typ = obj.get('typ') + typ = obj.get("typ") if typ is None: return obj - elif typ == 'timestamp': - freq = obj['freq'] if 'freq' in obj else obj['offset'] - return Timestamp(obj['value'], tz=obj['tz'], freq=freq) - elif typ == 'nat': + elif typ == "timestamp": + freq = obj["freq"] if "freq" in obj else obj["offset"] + return Timestamp(obj["value"], tz=obj["tz"], freq=freq) + elif typ == "nat": return NaT - elif typ == 'period': - return Period(ordinal=obj['ordinal'], freq=obj['freq']) - elif typ == 'index': - dtype = dtype_for(obj['dtype']) - data = unconvert(obj['data'], dtype, - obj.get('compress')) - return Index(data, dtype=dtype, name=obj['name']) - elif typ == 'range_index': - return RangeIndex(obj['start'], - obj['stop'], - obj['step'], - name=obj['name']) - elif typ == 'multi_index': - dtype = dtype_for(obj['dtype']) - data = unconvert(obj['data'], dtype, - obj.get('compress')) + elif typ == "period": + return Period(ordinal=obj["ordinal"], freq=obj["freq"]) + elif typ == "index": + dtype = dtype_for(obj["dtype"]) + data = unconvert(obj["data"], dtype, obj.get("compress")) + return Index(data, dtype=dtype, name=obj["name"]) + elif typ == "range_index": + return RangeIndex( + obj["start"], obj["stop"], obj["step"], name=obj["name"] + ) + elif typ == "multi_index": + dtype = dtype_for(obj["dtype"]) + data = unconvert(obj["data"], dtype, obj.get("compress")) data = [tuple(x) for x in data] - return MultiIndex.from_tuples(data, names=obj['names']) - elif typ == 'period_index': - data = unconvert(obj['data'], np.int64, obj.get('compress')) - d = dict(name=obj['name'], freq=obj['freq']) - freq = d.pop('freq', None) + return MultiIndex.from_tuples(data, names=obj["names"]) + elif typ == "period_index": + data = unconvert(obj["data"], np.int64, obj.get("compress")) + d = dict(name=obj["name"], freq=obj["freq"]) + freq = d.pop("freq", None) return PeriodIndex(PeriodArray(data, freq), **d) - elif typ == 'datetime_index': - data = unconvert(obj['data'], np.int64, obj.get('compress')) - d = dict(name=obj['name'], freq=obj['freq']) + elif typ == "datetime_index": + data = unconvert(obj["data"], np.int64, obj.get("compress")) + d = dict(name=obj["name"], freq=obj["freq"]) result = DatetimeIndex(data, **d) - tz = obj['tz'] + tz = obj["tz"] # reverse tz conversion if tz is not None: - result = result.tz_localize('UTC').tz_convert(tz) + result = result.tz_localize("UTC").tz_convert(tz) return result - elif typ in ('interval_index', 'interval_array'): - return globals()[obj['klass']].from_arrays(obj['left'], - obj['right'], - obj['closed'], - name=obj['name']) - elif typ == 'category': - from_codes = globals()[obj['klass']].from_codes - return from_codes(codes=obj['codes'], - categories=obj['categories'], - ordered=obj['ordered']) - - elif typ == 'interval': - return Interval(obj['left'], obj['right'], obj['closed']) - elif typ == 'series': - dtype = dtype_for(obj['dtype']) + elif typ in ("interval_index", "interval_array"): + return globals()[obj["klass"]].from_arrays( + obj["left"], obj["right"], obj["closed"], name=obj["name"] + ) + elif typ == "category": + from_codes = globals()[obj["klass"]].from_codes + return from_codes( + codes=obj["codes"], + categories=obj["categories"], + ordered=obj["ordered"], + ) + + elif typ == "interval": + return Interval(obj["left"], obj["right"], obj["closed"]) + elif typ == "series": + dtype = dtype_for(obj["dtype"]) pd_dtype = pandas_dtype(dtype) - index = obj['index'] - result = Series(unconvert(obj['data'], dtype, obj['compress']), - index=index, - dtype=pd_dtype, - name=obj['name']) + index = obj["index"] + result = Series( + unconvert(obj["data"], dtype, obj["compress"]), + index=index, + dtype=pd_dtype, + name=obj["name"], + ) return result - elif typ == 'block_manager': - axes = obj['axes'] + elif typ == "block_manager": + axes = obj["axes"] def create_block(b): - values = _safe_reshape(unconvert( - b['values'], dtype_for(b['dtype']), - b['compress']), b['shape']) + values = _safe_reshape( + unconvert(b["values"], dtype_for(b["dtype"]), b["compress"]), + b["shape"], + ) # locs handles duplicate column names, and should be used instead # of items; see GH 9618 - if 'locs' in b: - placement = b['locs'] + if "locs" in b: + placement = b["locs"] else: - placement = axes[0].get_indexer(b['items']) + placement = axes[0].get_indexer(b["items"]) - if is_datetime64tz_dtype(b['dtype']): + if is_datetime64tz_dtype(b["dtype"]): assert isinstance(values, np.ndarray), type(values) - assert values.dtype == 'M8[ns]', values.dtype - values = DatetimeArray(values, dtype=b['dtype']) - - return make_block(values=values, - klass=getattr(internals, b['klass']), - placement=placement, - dtype=b['dtype']) - - blocks = [create_block(b) for b in obj['blocks']] - return globals()[obj['klass']](BlockManager(blocks, axes)) - elif typ == 'datetime': - return parse(obj['data']) - elif typ == 'datetime64': - return np.datetime64(parse(obj['data'])) - elif typ == 'date': - return parse(obj['data']).date() - elif typ == 'timedelta': - return timedelta(*obj['data']) - elif typ == 'timedelta64': - return np.timedelta64(int(obj['data'])) + assert values.dtype == "M8[ns]", values.dtype + values = DatetimeArray(values, dtype=b["dtype"]) + + return make_block( + values=values, + klass=getattr(internals, b["klass"]), + placement=placement, + dtype=b["dtype"], + ) + + blocks = [create_block(b) for b in obj["blocks"]] + return globals()[obj["klass"]](BlockManager(blocks, axes)) + elif typ == "datetime": + return parse(obj["data"]) + elif typ == "datetime64": + return np.datetime64(parse(obj["data"])) + elif typ == "date": + return parse(obj["data"]).date() + elif typ == "timedelta": + return timedelta(*obj["data"]) + elif typ == "timedelta64": + return np.timedelta64(int(obj["data"])) # elif typ == 'sparse_series': # dtype = dtype_for(obj['dtype']) # return SparseSeries( @@ -685,94 +751,131 @@ def create_block(b): # obj['data'], items=obj['items'], # default_fill_value=obj['default_fill_value'], # default_kind=obj['default_kind']) - elif typ == 'block_index': - return globals()[obj['klass']](obj['length'], obj['blocs'], - obj['blengths']) - elif typ == 'int_index': - return globals()[obj['klass']](obj['length'], obj['indices']) - elif typ == 'ndarray': - return unconvert(obj['data'], np.typeDict[obj['dtype']], - obj.get('compress')).reshape(obj['shape']) - elif typ == 'np_scalar': - if obj.get('sub_typ') == 'np_complex': - return c2f(obj['real'], obj['imag'], obj['dtype']) + elif typ == "block_index": + return globals()[obj["klass"]]( + obj["length"], obj["blocs"], obj["blengths"] + ) + elif typ == "int_index": + return globals()[obj["klass"]](obj["length"], obj["indices"]) + elif typ == "ndarray": + return unconvert( + obj["data"], np.typeDict[obj["dtype"]], obj.get("compress") + ).reshape(obj["shape"]) + elif typ == "np_scalar": + if obj.get("sub_typ") == "np_complex": + return c2f(obj["real"], obj["imag"], obj["dtype"]) else: - dtype = dtype_for(obj['dtype']) + dtype = dtype_for(obj["dtype"]) try: - return dtype(obj['data']) + return dtype(obj["data"]) except (ValueError, TypeError): - return dtype.type(obj['data']) - elif typ == 'np_complex': - return complex(obj['real'] + '+' + obj['imag'] + 'j') + return dtype.type(obj["data"]) + elif typ == "np_complex": + return complex(obj["real"] + "+" + obj["imag"] + "j") elif isinstance(obj, (dict, list, set)): return obj else: return obj -def pack(o, default=encode, - encoding='utf-8', unicode_errors='strict', use_single_float=False, - autoreset=1, use_bin_type=1): +def pack( + o, + default=encode, + encoding="utf-8", + unicode_errors="strict", + use_single_float=False, + autoreset=1, + use_bin_type=1, +): """ Pack an object and return the packed bytes. """ - return Packer(default=default, encoding=encoding, - unicode_errors=unicode_errors, - use_single_float=use_single_float, - autoreset=autoreset, - use_bin_type=use_bin_type).pack(o) - - -def unpack(packed, object_hook=decode, - list_hook=None, use_list=False, encoding='utf-8', - unicode_errors='strict', object_pairs_hook=None, - max_buffer_size=0, ext_hook=ExtType): + return Packer( + default=default, + encoding=encoding, + unicode_errors=unicode_errors, + use_single_float=use_single_float, + autoreset=autoreset, + use_bin_type=use_bin_type, + ).pack(o) + + +def unpack( + packed, + object_hook=decode, + list_hook=None, + use_list=False, + encoding="utf-8", + unicode_errors="strict", + object_pairs_hook=None, + max_buffer_size=0, + ext_hook=ExtType, +): """ Unpack a packed object, return an iterator Note: packed lists will be returned as tuples """ - return Unpacker(packed, object_hook=object_hook, - list_hook=list_hook, - use_list=use_list, encoding=encoding, - unicode_errors=unicode_errors, - object_pairs_hook=object_pairs_hook, - max_buffer_size=max_buffer_size, - ext_hook=ext_hook) + return Unpacker( + packed, + object_hook=object_hook, + list_hook=list_hook, + use_list=use_list, + encoding=encoding, + unicode_errors=unicode_errors, + object_pairs_hook=object_pairs_hook, + max_buffer_size=max_buffer_size, + ext_hook=ext_hook, + ) class Packer(_Packer): - - def __init__(self, default=encode, - encoding='utf-8', - unicode_errors='strict', - use_single_float=False, - autoreset=1, - use_bin_type=1): - super().__init__(default=default, encoding=encoding, - unicode_errors=unicode_errors, - use_single_float=use_single_float, - autoreset=autoreset, - use_bin_type=use_bin_type) + def __init__( + self, + default=encode, + encoding="utf-8", + unicode_errors="strict", + use_single_float=False, + autoreset=1, + use_bin_type=1, + ): + super().__init__( + default=default, + encoding=encoding, + unicode_errors=unicode_errors, + use_single_float=use_single_float, + autoreset=autoreset, + use_bin_type=use_bin_type, + ) class Unpacker(_Unpacker): - - def __init__(self, file_like=None, read_size=0, use_list=False, - object_hook=decode, - object_pairs_hook=None, list_hook=None, encoding='utf-8', - unicode_errors='strict', max_buffer_size=0, ext_hook=ExtType): - super().__init__(file_like=file_like, - read_size=read_size, - use_list=use_list, - object_hook=object_hook, - object_pairs_hook=object_pairs_hook, - list_hook=list_hook, - encoding=encoding, - unicode_errors=unicode_errors, - max_buffer_size=max_buffer_size, - ext_hook=ext_hook) + def __init__( + self, + file_like=None, + read_size=0, + use_list=False, + object_hook=decode, + object_pairs_hook=None, + list_hook=None, + encoding="utf-8", + unicode_errors="strict", + max_buffer_size=0, + ext_hook=ExtType, + ): + super().__init__( + file_like=file_like, + read_size=read_size, + use_list=use_list, + object_hook=object_hook, + object_pairs_hook=object_pairs_hook, + list_hook=list_hook, + encoding=encoding, + unicode_errors=unicode_errors, + max_buffer_size=max_buffer_size, + ext_hook=ext_hook, + ) class Iterator: @@ -798,13 +901,13 @@ def __iter__(self): path_exists = False if path_exists: - fh = open(self.path, 'rb') + fh = open(self.path, "rb") else: fh = BytesIO(self.path) else: - if not hasattr(self.path, 'read'): + if not hasattr(self.path, "read"): fh = BytesIO(self.path) else: diff --git a/pandas/tests/io/test_packers.py b/pandas/tests/io/test_packers.py index 095169ce0ebe2..42add04b72126 100644 --- a/pandas/tests/io/test_packers.py +++ b/pandas/tests/io/test_packers.py @@ -12,12 +12,27 @@ import pandas from pandas import ( - Categorical, DataFrame, Index, Interval, MultiIndex, NaT, Period, Series, - Timestamp, bdate_range, date_range, period_range) + Categorical, + DataFrame, + Index, + Interval, + MultiIndex, + NaT, + Period, + Series, + Timestamp, + bdate_range, + date_range, + period_range, +) import pandas.util.testing as tm from pandas.util.testing import ( - assert_categorical_equal, assert_frame_equal, assert_index_equal, - assert_series_equal, ensure_clean) + assert_categorical_equal, + assert_frame_equal, + assert_index_equal, + assert_series_equal, + ensure_clean, +) from pandas.io.packers import read_msgpack, to_msgpack @@ -38,26 +53,28 @@ _ZLIB_INSTALLED = True -@pytest.fixture(scope='module') +@pytest.fixture(scope="module") def current_packers_data(): # our current version packers data from pandas.tests.io.generate_legacy_storage_files import ( - create_msgpack_data) + create_msgpack_data, + ) + return create_msgpack_data() -@pytest.fixture(scope='module') +@pytest.fixture(scope="module") def all_packers_data(): # our all of our current version packers data - from pandas.tests.io.generate_legacy_storage_files import ( - create_data) + from pandas.tests.io.generate_legacy_storage_files import create_data + return create_data() def check_arbitrary(a, b): if isinstance(a, (list, tuple)) and isinstance(b, (list, tuple)): - assert(len(a) == len(b)) + assert len(a) == len(b) for a_, b_ in zip(a, b): check_arbitrary(a_, b_) elif isinstance(a, DataFrame): @@ -70,7 +87,7 @@ def check_arbitrary(a, b): # Temp, # Categorical.categories is changed from str to bytes in PY3 # maybe the same as GH 13591 - if b.categories.inferred_type == 'string': + if b.categories.inferred_type == "string": pass else: tm.assert_categorical_equal(a, b) @@ -80,14 +97,13 @@ def check_arbitrary(a, b): assert a == b assert a.freq == b.freq else: - assert(a == b) + assert a == b @pytest.mark.filterwarnings("ignore:.*msgpack:FutureWarning") class TestPackers: - def setup_method(self, method): - self.path = '__%s__.msg' % tm.rands(10) + self.path = "__%s__.msg" % tm.rands(10) def teardown_method(self, method): pass @@ -100,7 +116,6 @@ def encode_decode(self, x, compress=None, **kwargs): @pytest.mark.filterwarnings("ignore:.*msgpack:FutureWarning") class TestAPI(TestPackers): - def test_string_io(self): df = DataFrame(np.random.randn(10, 2)) @@ -123,7 +138,7 @@ def test_string_io(self): with ensure_clean(self.path) as p: s = df.to_msgpack() - with open(p, 'wb') as fh: + with open(p, "wb") as fh: fh.write(s) result = read_msgpack(p) tm.assert_frame_equal(result, df) @@ -148,26 +163,24 @@ def test_iterator_with_string_io(self): def test_invalid_arg(self): # GH10369 class A: - def __init__(self): self.read = 0 msg = "Invalid file path or buffer object type: " - with pytest.raises(ValueError, match=msg.format('NoneType')): + with pytest.raises(ValueError, match=msg.format("NoneType")): read_msgpack(path_or_buf=None) - with pytest.raises(ValueError, match=msg.format('dict')): + with pytest.raises(ValueError, match=msg.format("dict")): read_msgpack(path_or_buf={}) - with pytest.raises(ValueError, match=msg.format(r'.*\.A')): + with pytest.raises(ValueError, match=msg.format(r".*\.A")): read_msgpack(path_or_buf=A()) with pytest.raises(FileNotFoundError): df = tm.makeDataFrame() - invalid_path = os.path.join('nonexistent_dir', 'df.msgpack') + invalid_path = os.path.join("nonexistent_dir", "df.msgpack") df.to_msgpack(path_or_buf=invalid_path) @pytest.mark.filterwarnings("ignore:.*msgpack:FutureWarning") class TestNumpy(TestPackers): - def test_numpy_scalar_float(self): x = np.float32(np.random.rand()) x_rec = self.encode_decode(x) @@ -207,12 +220,13 @@ def test_list_numpy_float(self): tm.assert_almost_equal(tuple(x), x_rec) def test_list_numpy_float_complex(self): - if not hasattr(np, 'complex128'): - pytest.skip('numpy can not handle complex128') + if not hasattr(np, "complex128"): + pytest.skip("numpy can not handle complex128") - x = [np.float32(np.random.rand()) for i in range(5)] + \ - [np.complex128(np.random.rand() + 1j * np.random.rand()) - for i in range(5)] + x = [np.float32(np.random.rand()) for i in range(5)] + [ + np.complex128(np.random.rand() + 1j * np.random.rand()) + for i in range(5) + ] x_rec = self.encode_decode(x) assert np.allclose(x, x_rec) @@ -226,18 +240,19 @@ def test_list_float(self): tm.assert_almost_equal(tuple(x), x_rec) def test_list_float_complex(self): - x = [np.random.rand() for i in range(5)] + \ - [(np.random.rand() + 1j * np.random.rand()) for i in range(5)] + x = [np.random.rand() for i in range(5)] + [ + (np.random.rand() + 1j * np.random.rand()) for i in range(5) + ] x_rec = self.encode_decode(x) assert np.allclose(x, x_rec) def test_dict_float(self): - x = {'foo': 1.0, 'bar': 2.0} + x = {"foo": 1.0, "bar": 2.0} x_rec = self.encode_decode(x) tm.assert_almost_equal(x, x_rec) def test_dict_complex(self): - x = {'foo': 1.0 + 1.0j, 'bar': 2.0 + 2.0j} + x = {"foo": 1.0 + 1.0j, "bar": 2.0 + 2.0j} x_rec = self.encode_decode(x) tm.assert_dict_equal(x, x_rec) @@ -245,13 +260,15 @@ def test_dict_complex(self): tm.assert_class_equal(x[key], x_rec[key], obj="complex value") def test_dict_numpy_float(self): - x = {'foo': np.float32(1.0), 'bar': np.float32(2.0)} + x = {"foo": np.float32(1.0), "bar": np.float32(2.0)} x_rec = self.encode_decode(x) tm.assert_almost_equal(x, x_rec) def test_dict_numpy_complex(self): - x = {'foo': np.complex128(1.0 + 1.0j), - 'bar': np.complex128(2.0 + 2.0j)} + x = { + "foo": np.complex128(1.0 + 1.0j), + "bar": np.complex128(2.0 + 2.0j), + } x_rec = self.encode_decode(x) tm.assert_dict_equal(x, x_rec) @@ -263,7 +280,7 @@ def test_numpy_array_float(self): # run multiple times for n in range(10): x = np.random.rand(10) - for dtype in ['float32', 'float64']: + for dtype in ["float32", "float64"]: x = x.astype(dtype) x_rec = self.encode_decode(x) tm.assert_almost_equal(x, x_rec) @@ -271,11 +288,12 @@ def test_numpy_array_float(self): def test_numpy_array_complex(self): x = (np.random.rand(5) + 1j * np.random.rand(5)).astype(np.complex128) x_rec = self.encode_decode(x) - assert (all(map(lambda x, y: x == y, x, x_rec)) and - x.dtype == x_rec.dtype) + assert ( + all(map(lambda x, y: x == y, x, x_rec)) and x.dtype == x_rec.dtype + ) def test_list_mixed(self): - x = [1.0, np.float32(3.5), np.complex128(4.25), 'foo', np.bool_(1)] + x = [1.0, np.float32(3.5), np.complex128(4.25), "foo", np.bool_(1)] x_rec = self.encode_decode(x) # current msgpack cannot distinguish list/tuple tm.assert_almost_equal(tuple(x), x_rec) @@ -286,12 +304,13 @@ def test_list_mixed(self): @pytest.mark.filterwarnings("ignore:.*msgpack:FutureWarning") class TestBasic(TestPackers): - def test_timestamp(self): - for i in [Timestamp( - '20130101'), Timestamp('20130101', tz='US/Eastern'), - Timestamp('201301010501')]: + for i in [ + Timestamp("20130101"), + Timestamp("20130101", tz="US/Eastern"), + Timestamp("201301010501"), + ]: i_rec = self.encode_decode(i) assert i == i_rec @@ -301,62 +320,74 @@ def test_nat(self): def test_datetimes(self): - for i in [datetime.datetime(2013, 1, 1), - datetime.datetime(2013, 1, 1, 5, 1), - datetime.date(2013, 1, 1), - np.datetime64(datetime.datetime(2013, 1, 5, 2, 15))]: + for i in [ + datetime.datetime(2013, 1, 1), + datetime.datetime(2013, 1, 1, 5, 1), + datetime.date(2013, 1, 1), + np.datetime64(datetime.datetime(2013, 1, 5, 2, 15)), + ]: i_rec = self.encode_decode(i) assert i == i_rec def test_timedeltas(self): - for i in [datetime.timedelta(days=1), - datetime.timedelta(days=1, seconds=10), - np.timedelta64(1000000)]: + for i in [ + datetime.timedelta(days=1), + datetime.timedelta(days=1, seconds=10), + np.timedelta64(1000000), + ]: i_rec = self.encode_decode(i) assert i == i_rec def test_periods(self): # 13463 - for i in [Period('2010-09', 'M'), Period('2014-Q1', 'Q')]: + for i in [Period("2010-09", "M"), Period("2014-Q1", "Q")]: i_rec = self.encode_decode(i) assert i == i_rec def test_intervals(self): # 19967 - for i in [Interval(0, 1), Interval(0, 1, 'left'), - Interval(10, 25., 'right')]: + for i in [ + Interval(0, 1), + Interval(0, 1, "left"), + Interval(10, 25.0, "right"), + ]: i_rec = self.encode_decode(i) assert i == i_rec @pytest.mark.filterwarnings("ignore:.*msgpack:FutureWarning") class TestIndex(TestPackers): - def setup_method(self, method): super().setup_method(method) self.d = { - 'string': tm.makeStringIndex(100), - 'date': tm.makeDateIndex(100), - 'int': tm.makeIntIndex(100), - 'rng': tm.makeRangeIndex(100), - 'float': tm.makeFloatIndex(100), - 'empty': Index([]), - 'tuple': Index(zip(['foo', 'bar', 'baz'], [1, 2, 3])), - 'period': Index(period_range('2012-1-1', freq='M', periods=3)), - 'date2': Index(date_range('2013-01-1', periods=10)), - 'bdate': Index(bdate_range('2013-01-02', periods=10)), - 'cat': tm.makeCategoricalIndex(100), - 'interval': tm.makeIntervalIndex(100), - 'timedelta': tm.makeTimedeltaIndex(100, 'H') + "string": tm.makeStringIndex(100), + "date": tm.makeDateIndex(100), + "int": tm.makeIntIndex(100), + "rng": tm.makeRangeIndex(100), + "float": tm.makeFloatIndex(100), + "empty": Index([]), + "tuple": Index(zip(["foo", "bar", "baz"], [1, 2, 3])), + "period": Index(period_range("2012-1-1", freq="M", periods=3)), + "date2": Index(date_range("2013-01-1", periods=10)), + "bdate": Index(bdate_range("2013-01-02", periods=10)), + "cat": tm.makeCategoricalIndex(100), + "interval": tm.makeIntervalIndex(100), + "timedelta": tm.makeTimedeltaIndex(100, "H"), } self.mi = { - 'reg': MultiIndex.from_tuples([('bar', 'one'), ('baz', 'two'), - ('foo', 'two'), - ('qux', 'one'), ('qux', 'two')], - names=['first', 'second']), + "reg": MultiIndex.from_tuples( + [ + ("bar", "one"), + ("baz", "two"), + ("foo", "two"), + ("qux", "one"), + ("qux", "two"), + ], + names=["first", "second"], + ) } def test_basic_index(self): @@ -366,13 +397,14 @@ def test_basic_index(self): tm.assert_index_equal(i, i_rec) # datetime with no freq (GH5506) - i = Index([Timestamp('20130101'), Timestamp('20130103')]) + i = Index([Timestamp("20130101"), Timestamp("20130103")]) i_rec = self.encode_decode(i) tm.assert_index_equal(i, i_rec) # datetime with timezone - i = Index([Timestamp('20130101 9:00:00'), Timestamp( - '20130103 11:00:00')]).tz_localize('US/Eastern') + i = Index( + [Timestamp("20130101 9:00:00"), Timestamp("20130103 11:00:00")] + ).tz_localize("US/Eastern") i_rec = self.encode_decode(i) tm.assert_index_equal(i, i_rec) @@ -391,52 +423,51 @@ def test_unicode(self): def categorical_index(self): # GH15487 df = DataFrame(np.random.randn(10, 2)) - df = df.astype({0: 'category'}).set_index(0) + df = df.astype({0: "category"}).set_index(0) result = self.encode_decode(df) tm.assert_frame_equal(result, df) @pytest.mark.filterwarnings("ignore:.*msgpack:FutureWarning") class TestSeries(TestPackers): - def setup_method(self, method): super().setup_method(method) self.d = {} s = tm.makeStringSeries() - s.name = 'string' - self.d['string'] = s + s.name = "string" + self.d["string"] = s s = tm.makeObjectSeries() - s.name = 'object' - self.d['object'] = s + s.name = "object" + self.d["object"] = s - s = Series(iNaT, dtype='M8[ns]', index=range(5)) - self.d['date'] = s + s = Series(iNaT, dtype="M8[ns]", index=range(5)) + self.d["date"] = s data = { - 'A': [0., 1., 2., 3., np.nan], - 'B': [0, 1, 0, 1, 0], - 'C': ['foo1', 'foo2', 'foo3', 'foo4', 'foo5'], - 'D': date_range('1/1/2009', periods=5), - 'E': [0., 1, Timestamp('20100101'), 'foo', 2.], - 'F': [Timestamp('20130102', tz='US/Eastern')] * 2 + - [Timestamp('20130603', tz='CET')] * 3, - 'G': [Timestamp('20130102', tz='US/Eastern')] * 5, - 'H': Categorical([1, 2, 3, 4, 5]), - 'I': Categorical([1, 2, 3, 4, 5], ordered=True), - 'J': (np.bool_(1), 2, 3, 4, 5), + "A": [0.0, 1.0, 2.0, 3.0, np.nan], + "B": [0, 1, 0, 1, 0], + "C": ["foo1", "foo2", "foo3", "foo4", "foo5"], + "D": date_range("1/1/2009", periods=5), + "E": [0.0, 1, Timestamp("20100101"), "foo", 2.0], + "F": [Timestamp("20130102", tz="US/Eastern")] * 2 + + [Timestamp("20130603", tz="CET")] * 3, + "G": [Timestamp("20130102", tz="US/Eastern")] * 5, + "H": Categorical([1, 2, 3, 4, 5]), + "I": Categorical([1, 2, 3, 4, 5], ordered=True), + "J": (np.bool_(1), 2, 3, 4, 5), } - self.d['float'] = Series(data['A']) - self.d['int'] = Series(data['B']) - self.d['mixed'] = Series(data['E']) - self.d['dt_tz_mixed'] = Series(data['F']) - self.d['dt_tz'] = Series(data['G']) - self.d['cat_ordered'] = Series(data['H']) - self.d['cat_unordered'] = Series(data['I']) - self.d['numpy_bool_mixed'] = Series(data['J']) + self.d["float"] = Series(data["A"]) + self.d["int"] = Series(data["B"]) + self.d["mixed"] = Series(data["E"]) + self.d["dt_tz_mixed"] = Series(data["F"]) + self.d["dt_tz"] = Series(data["G"]) + self.d["cat_ordered"] = Series(data["H"]) + self.d["cat_unordered"] = Series(data["I"]) + self.d["numpy_bool_mixed"] = Series(data["J"]) def test_basic(self): @@ -449,18 +480,18 @@ def test_basic(self): @pytest.mark.filterwarnings("ignore:.*msgpack:FutureWarning") class TestCategorical(TestPackers): - def setup_method(self, method): super().setup_method(method) self.d = {} - self.d['plain_str'] = Categorical(['a', 'b', 'c', 'd', 'e']) - self.d['plain_str_ordered'] = Categorical(['a', 'b', 'c', 'd', 'e'], - ordered=True) + self.d["plain_str"] = Categorical(["a", "b", "c", "d", "e"]) + self.d["plain_str_ordered"] = Categorical( + ["a", "b", "c", "d", "e"], ordered=True + ) - self.d['plain_int'] = Categorical([5, 6, 7, 8]) - self.d['plain_int_ordered'] = Categorical([5, 6, 7, 8], ordered=True) + self.d["plain_int"] = Categorical([5, 6, 7, 8]) + self.d["plain_int_ordered"] = Categorical([5, 6, 7, 8], ordered=True) def test_basic(self): @@ -473,26 +504,26 @@ def test_basic(self): @pytest.mark.filterwarnings("ignore:msgpack:FutureWarning") class TestNDFrame(TestPackers): - def setup_method(self, method): super().setup_method(method) data = { - 'A': [0., 1., 2., 3., np.nan], - 'B': [0, 1, 0, 1, 0], - 'C': ['foo1', 'foo2', 'foo3', 'foo4', 'foo5'], - 'D': date_range('1/1/2009', periods=5), - 'E': [0., 1, Timestamp('20100101'), 'foo', 2.], - 'F': [Timestamp('20130102', tz='US/Eastern')] * 5, - 'G': [Timestamp('20130603', tz='CET')] * 5, - 'H': Categorical(['a', 'b', 'c', 'd', 'e']), - 'I': Categorical(['a', 'b', 'c', 'd', 'e'], ordered=True), + "A": [0.0, 1.0, 2.0, 3.0, np.nan], + "B": [0, 1, 0, 1, 0], + "C": ["foo1", "foo2", "foo3", "foo4", "foo5"], + "D": date_range("1/1/2009", periods=5), + "E": [0.0, 1, Timestamp("20100101"), "foo", 2.0], + "F": [Timestamp("20130102", tz="US/Eastern")] * 5, + "G": [Timestamp("20130603", tz="CET")] * 5, + "H": Categorical(["a", "b", "c", "d", "e"]), + "I": Categorical(["a", "b", "c", "d", "e"], ordered=True), } self.frame = { - 'float': DataFrame(dict(A=data['A'], B=Series(data['A']) + 1)), - 'int': DataFrame(dict(A=data['B'], B=Series(data['B']) + 1)), - 'mixed': DataFrame(data)} + "float": DataFrame(dict(A=data["A"], B=Series(data["A"]) + 1)), + "int": DataFrame(dict(A=data["B"], B=Series(data["B"]) + 1)), + "mixed": DataFrame(data), + } def test_basic_frame(self): @@ -506,22 +537,36 @@ def test_multi(self): for k in self.frame.keys(): assert_frame_equal(self.frame[k], i_rec[k]) - packed_items = tuple([self.frame['float'], self.frame['float'].A, - self.frame['float'].B, None]) + packed_items = tuple( + [ + self.frame["float"], + self.frame["float"].A, + self.frame["float"].B, + None, + ] + ) l_rec = self.encode_decode(packed_items) check_arbitrary(packed_items, l_rec) # this is an oddity in that packed lists will be returned as tuples - packed_items = [self.frame['float'], self.frame['float'].A, - self.frame['float'].B, None] + packed_items = [ + self.frame["float"], + self.frame["float"].A, + self.frame["float"].B, + None, + ] l_rec = self.encode_decode(packed_items) assert isinstance(l_rec, tuple) check_arbitrary(packed_items, l_rec) def test_iterator(self): - packed_items = [self.frame['float'], self.frame['float'].A, - self.frame['float'].B, None] + packed_items = [ + self.frame["float"], + self.frame["float"].A, + self.frame["float"].B, + None, + ] with ensure_clean(self.path) as path: to_msgpack(path, *packed_items) @@ -532,22 +577,22 @@ def tests_datetimeindex_freq_issue(self): # GH 5947 # inferring freq on the datetimeindex - df = DataFrame([1, 2, 3], index=date_range('1/1/2013', '1/3/2013')) + df = DataFrame([1, 2, 3], index=date_range("1/1/2013", "1/3/2013")) result = self.encode_decode(df) assert_frame_equal(result, df) - df = DataFrame([1, 2], index=date_range('1/1/2013', '1/2/2013')) + df = DataFrame([1, 2], index=date_range("1/1/2013", "1/2/2013")) result = self.encode_decode(df) assert_frame_equal(result, df) def test_dataframe_duplicate_column_names(self): # GH 9618 - expected_1 = DataFrame(columns=['a', 'a']) + expected_1 = DataFrame(columns=["a", "a"]) expected_2 = DataFrame(columns=[1] * 100) expected_2.loc[0] = np.random.randn(100) expected_3 = DataFrame(columns=[1, 1]) - expected_3.loc[0] = ['abc', np.nan] + expected_3.loc[0] = ["abc", np.nan] result_1 = self.encode_decode(expected_1) result_2 = self.encode_decode(expected_2) @@ -563,7 +608,6 @@ def test_dataframe_duplicate_column_names(self): @pytest.mark.filterwarnings("ignore:DataFrame.to_sparse:FutureWarning") @pytest.mark.filterwarnings("ignore:.*msgpack:FutureWarning") class TestSparse(TestPackers): - def _check_roundtrip(self, obj, comparator, **kwargs): # currently these are not implemetned @@ -578,16 +622,19 @@ def test_sparse_series(self): s = tm.makeStringSeries() s[3:5] = np.nan ss = s.to_sparse() - self._check_roundtrip(ss, tm.assert_series_equal, - check_series_type=True) + self._check_roundtrip( + ss, tm.assert_series_equal, check_series_type=True + ) - ss2 = s.to_sparse(kind='integer') - self._check_roundtrip(ss2, tm.assert_series_equal, - check_series_type=True) + ss2 = s.to_sparse(kind="integer") + self._check_roundtrip( + ss2, tm.assert_series_equal, check_series_type=True + ) ss3 = s.to_sparse(fill_value=0) - self._check_roundtrip(ss3, tm.assert_series_equal, - check_series_type=True) + self._check_roundtrip( + ss3, tm.assert_series_equal, check_series_type=True + ) def test_sparse_frame(self): @@ -596,16 +643,17 @@ def test_sparse_frame(self): s.loc[8:10, -2] = np.nan ss = s.to_sparse() - self._check_roundtrip(ss, tm.assert_frame_equal, - check_frame_type=True) + self._check_roundtrip(ss, tm.assert_frame_equal, check_frame_type=True) - ss2 = s.to_sparse(kind='integer') - self._check_roundtrip(ss2, tm.assert_frame_equal, - check_frame_type=True) + ss2 = s.to_sparse(kind="integer") + self._check_roundtrip( + ss2, tm.assert_frame_equal, check_frame_type=True + ) ss3 = s.to_sparse(fill_value=0) - self._check_roundtrip(ss3, tm.assert_frame_equal, - check_frame_type=True) + self._check_roundtrip( + ss3, tm.assert_frame_equal, check_frame_type=True + ) @pytest.mark.filterwarnings("ignore:.*msgpack:FutureWarning") @@ -616,6 +664,7 @@ class TestCompression(TestPackers): def setup_method(self, method): try: from sqlalchemy import create_engine + self._create_sql_engine = create_engine except ImportError: self._SQLALCHEMY_INSTALLED = False @@ -624,16 +673,16 @@ def setup_method(self, method): super().setup_method(method) data = { - 'A': np.arange(1000, dtype=np.float64), - 'B': np.arange(1000, dtype=np.int32), - 'C': list(100 * 'abcdefghij'), - 'D': date_range(datetime.datetime(2015, 4, 1), periods=1000), - 'E': [datetime.timedelta(days=x) for x in range(1000)], + "A": np.arange(1000, dtype=np.float64), + "B": np.arange(1000, dtype=np.int32), + "C": list(100 * "abcdefghij"), + "D": date_range(datetime.datetime(2015, 4, 1), periods=1000), + "E": [datetime.timedelta(days=x) for x in range(1000)], } self.frame = { - 'float': DataFrame({k: data[k] for k in ['A', 'A']}), - 'int': DataFrame({k: data[k] for k in ['B', 'B']}), - 'mixed': DataFrame(data), + "float": DataFrame({k: data[k] for k in ["A", "A"]}), + "int": DataFrame({k: data[k] for k in ["B", "B"]}), + "mixed": DataFrame(data), } def test_plain(self): @@ -653,16 +702,17 @@ def _test_compression(self, compress): def test_compression_zlib(self): if not _ZLIB_INSTALLED: - pytest.skip('no zlib') - self._test_compression('zlib') + pytest.skip("no zlib") + self._test_compression("zlib") def test_compression_blosc(self): if not _BLOSC_INSTALLED: - pytest.skip('no blosc') - self._test_compression('blosc') + pytest.skip("no blosc") + self._test_compression("blosc") def _test_compression_warns_when_decompress_caches( - self, monkeypatch, compress): + self, monkeypatch, compress + ): not_garbage = [] control = [] # copied data @@ -680,19 +730,20 @@ def decompress(ob): # types mapped to values to add in place. rhs = { - np.dtype('float64'): 1.0, - np.dtype('int32'): 1, - np.dtype('object'): 'a', - np.dtype('datetime64[ns]'): np.timedelta64(1, 'ns'), - np.dtype('timedelta64[ns]'): np.timedelta64(1, 'ns'), + np.dtype("float64"): 1.0, + np.dtype("int32"): 1, + np.dtype("object"): "a", + np.dtype("datetime64[ns]"): np.timedelta64(1, "ns"), + np.dtype("timedelta64[ns]"): np.timedelta64(1, "ns"), } - with monkeypatch.context() as m, \ - tm.assert_produces_warning(PerformanceWarning) as ws: - m.setattr(compress_module, 'decompress', decompress) + with monkeypatch.context() as m, tm.assert_produces_warning( + PerformanceWarning + ) as ws: + m.setattr(compress_module, "decompress", decompress) with catch_warnings(): - filterwarnings('ignore', category=FutureWarning) + filterwarnings("ignore", category=FutureWarning) i_rec = self.encode_decode(self.frame, compress=compress) for k in self.frame.keys(): @@ -708,9 +759,11 @@ def decompress(ob): for w in ws: # check the messages from our warnings - assert str(w.message) == ('copying data after decompressing; ' - 'this may mean that decompress is ' - 'caching its result') + assert str(w.message) == ( + "copying data after decompressing; " + "this may mean that decompress is " + "caching its result" + ) for buf, control_buf in zip(not_garbage, control): # make sure none of our mutations above affected the @@ -719,121 +772,121 @@ def decompress(ob): def test_compression_warns_when_decompress_caches_zlib(self, monkeypatch): if not _ZLIB_INSTALLED: - pytest.skip('no zlib') + pytest.skip("no zlib") self._test_compression_warns_when_decompress_caches( - monkeypatch, 'zlib') + monkeypatch, "zlib" + ) def test_compression_warns_when_decompress_caches_blosc(self, monkeypatch): if not _BLOSC_INSTALLED: - pytest.skip('no blosc') + pytest.skip("no blosc") self._test_compression_warns_when_decompress_caches( - monkeypatch, 'blosc') + monkeypatch, "blosc" + ) def _test_small_strings_no_warn(self, compress): - empty = np.array([], dtype='uint8') + empty = np.array([], dtype="uint8") with tm.assert_produces_warning(None): with catch_warnings(): - filterwarnings('ignore', category=FutureWarning) + filterwarnings("ignore", category=FutureWarning) empty_unpacked = self.encode_decode(empty, compress=compress) tm.assert_numpy_array_equal(empty_unpacked, empty) assert empty_unpacked.flags.writeable - char = np.array([ord(b'a')], dtype='uint8') + char = np.array([ord(b"a")], dtype="uint8") with tm.assert_produces_warning(None): with catch_warnings(): - filterwarnings('ignore', category=FutureWarning) + filterwarnings("ignore", category=FutureWarning) char_unpacked = self.encode_decode(char, compress=compress) tm.assert_numpy_array_equal(char_unpacked, char) assert char_unpacked.flags.writeable # if this test fails I am sorry because the interpreter is now in a # bad state where b'a' points to 98 == ord(b'b'). - char_unpacked[0] = ord(b'b') + char_unpacked[0] = ord(b"b") # we compare the ord of bytes b'a' with unicode 'a' because the should # always be the same (unless we were able to mutate the shared # character singleton in which case ord(b'a') == ord(b'b'). - assert ord(b'a') == ord('a') + assert ord(b"a") == ord("a") tm.assert_numpy_array_equal( - char_unpacked, - np.array([ord(b'b')], dtype='uint8'), + char_unpacked, np.array([ord(b"b")], dtype="uint8") ) def test_small_strings_no_warn_zlib(self): if not _ZLIB_INSTALLED: - pytest.skip('no zlib') - self._test_small_strings_no_warn('zlib') + pytest.skip("no zlib") + self._test_small_strings_no_warn("zlib") def test_small_strings_no_warn_blosc(self): if not _BLOSC_INSTALLED: - pytest.skip('no blosc') - self._test_small_strings_no_warn('blosc') + pytest.skip("no blosc") + self._test_small_strings_no_warn("blosc") def test_readonly_axis_blosc(self): # GH11880 if not _BLOSC_INSTALLED: - pytest.skip('no blosc') - df1 = DataFrame({'A': list('abcd')}) - df2 = DataFrame(df1, index=[1., 2., 3., 4.]) - assert 1 in self.encode_decode(df1['A'], compress='blosc') - assert 1. in self.encode_decode(df2['A'], compress='blosc') + pytest.skip("no blosc") + df1 = DataFrame({"A": list("abcd")}) + df2 = DataFrame(df1, index=[1.0, 2.0, 3.0, 4.0]) + assert 1 in self.encode_decode(df1["A"], compress="blosc") + assert 1.0 in self.encode_decode(df2["A"], compress="blosc") def test_readonly_axis_zlib(self): # GH11880 - df1 = DataFrame({'A': list('abcd')}) - df2 = DataFrame(df1, index=[1., 2., 3., 4.]) - assert 1 in self.encode_decode(df1['A'], compress='zlib') - assert 1. in self.encode_decode(df2['A'], compress='zlib') + df1 = DataFrame({"A": list("abcd")}) + df2 = DataFrame(df1, index=[1.0, 2.0, 3.0, 4.0]) + assert 1 in self.encode_decode(df1["A"], compress="zlib") + assert 1.0 in self.encode_decode(df2["A"], compress="zlib") def test_readonly_axis_blosc_to_sql(self): # GH11880 if not _BLOSC_INSTALLED: - pytest.skip('no blosc') + pytest.skip("no blosc") if not self._SQLALCHEMY_INSTALLED: - pytest.skip('no sqlalchemy') - expected = DataFrame({'A': list('abcd')}) - df = self.encode_decode(expected, compress='blosc') + pytest.skip("no sqlalchemy") + expected = DataFrame({"A": list("abcd")}) + df = self.encode_decode(expected, compress="blosc") eng = self._create_sql_engine("sqlite:///:memory:") - df.to_sql('test', eng, if_exists='append') - result = pandas.read_sql_table('test', eng, index_col='index') + df.to_sql("test", eng, if_exists="append") + result = pandas.read_sql_table("test", eng, index_col="index") result.index.names = [None] assert_frame_equal(expected, result) def test_readonly_axis_zlib_to_sql(self): # GH11880 if not _ZLIB_INSTALLED: - pytest.skip('no zlib') + pytest.skip("no zlib") if not self._SQLALCHEMY_INSTALLED: - pytest.skip('no sqlalchemy') - expected = DataFrame({'A': list('abcd')}) - df = self.encode_decode(expected, compress='zlib') + pytest.skip("no sqlalchemy") + expected = DataFrame({"A": list("abcd")}) + df = self.encode_decode(expected, compress="zlib") eng = self._create_sql_engine("sqlite:///:memory:") - df.to_sql('test', eng, if_exists='append') - result = pandas.read_sql_table('test', eng, index_col='index') + df.to_sql("test", eng, if_exists="append") + result = pandas.read_sql_table("test", eng, index_col="index") result.index.names = [None] assert_frame_equal(expected, result) @pytest.mark.filterwarnings("ignore:.*msgpack:FutureWarning") class TestEncoding(TestPackers): - def setup_method(self, method): super().setup_method(method) data = { - 'A': ['\u2019'] * 1000, - 'B': np.arange(1000, dtype=np.int32), - 'C': list(100 * 'abcdefghij'), - 'D': date_range(datetime.datetime(2015, 4, 1), periods=1000), - 'E': [datetime.timedelta(days=x) for x in range(1000)], - 'G': [400] * 1000 + "A": ["\u2019"] * 1000, + "B": np.arange(1000, dtype=np.int32), + "C": list(100 * "abcdefghij"), + "D": date_range(datetime.datetime(2015, 4, 1), periods=1000), + "E": [datetime.timedelta(days=x) for x in range(1000)], + "G": [400] * 1000, } self.frame = { - 'float': DataFrame({k: data[k] for k in ['A', 'A']}), - 'int': DataFrame({k: data[k] for k in ['B', 'B']}), - 'mixed': DataFrame(data), + "float": DataFrame({k: data[k] for k in ["A", "A"]}), + "int": DataFrame({k: data[k] for k in ["B", "B"]}), + "mixed": DataFrame(data), } - self.utf_encodings = ['utf8', 'utf16', 'utf32'] + self.utf_encodings = ["utf8", "utf16", "utf32"] def test_utf(self): # GH10581 @@ -845,14 +898,17 @@ def test_utf(self): def test_default_encoding(self): for frame in self.frame.values(): result = frame.to_msgpack() - expected = frame.to_msgpack(encoding='utf8') + expected = frame.to_msgpack(encoding="utf8") assert result == expected result = self.encode_decode(frame) assert_frame_equal(result, frame) -files = glob.glob(os.path.join(os.path.dirname(__file__), "data", - "legacy_msgpack", "*", "*.msgpack")) +files = glob.glob( + os.path.join( + os.path.dirname(__file__), "data", "legacy_msgpack", "*", "*.msgpack" + ) +) @pytest.fixture(params=files) @@ -873,11 +929,12 @@ class TestMsgpack: 3. Move the created pickle to "data/legacy_msgpack/" directory. """ - minimum_structure = {'series': ['float', 'int', 'mixed', - 'ts', 'mi', 'dup'], - 'frame': ['float', 'int', 'mixed', 'mi'], - 'index': ['int', 'date', 'period'], - 'mi': ['reg2']} + minimum_structure = { + "series": ["float", "int", "mixed", "ts", "mi", "dup"], + "frame": ["float", "int", "mixed", "mi"], + "index": ["int", "date", "period"], + "mi": ["reg2"], + } def check_min_structure(self, data, version): for typ, v in self.minimum_structure.items(): @@ -892,12 +949,13 @@ def compare(self, current_data, all_data, vf, version): self.check_min_structure(data, version) for typ, dv in data.items(): - assert typ in all_data, ('unpacked data contains ' - 'extra key "{0}"' - .format(typ)) + assert ( + typ in all_data + ), "unpacked data contains " 'extra key "{0}"'.format(typ) for dt, result in dv.items(): - assert dt in current_data[typ], ('data["{0}"] contains extra ' - 'key "{1}"'.format(typ, dt)) + assert ( + dt in current_data[typ] + ), 'data["{0}"] contains extra ' 'key "{1}"'.format(typ, dt) try: expected = current_data[typ][dt] except KeyError: @@ -920,21 +978,26 @@ def compare_series_dt_tz(self, result, expected, typ, version): def compare_frame_dt_mixed_tzs(self, result, expected, typ, version): tm.assert_frame_equal(result, expected) - def test_msgpacks_legacy(self, current_packers_data, all_packers_data, - legacy_packer, datapath): + def test_msgpacks_legacy( + self, current_packers_data, all_packers_data, legacy_packer, datapath + ): version = os.path.basename(os.path.dirname(legacy_packer)) try: with catch_warnings(record=True): - self.compare(current_packers_data, all_packers_data, - legacy_packer, version) + self.compare( + current_packers_data, + all_packers_data, + legacy_packer, + version, + ) except ImportError: # blosc not installed pass def test_msgpack_period_freq(self): # https://github.com/pandas-dev/pandas/issues/24135 - s = Series(np.random.rand(5), index=date_range('20130101', periods=5)) + s = Series(np.random.rand(5), index=date_range("20130101", periods=5)) r = read_msgpack(s.to_msgpack()) repr(r) From 728e3374497f1534fa3b065242b42417f70356a7 Mon Sep 17 00:00:00 2001 From: Joshua Maxcy Date: Thu, 4 Jul 2019 22:43:08 -0400 Subject: [PATCH 09/15] merge test_packers --- pandas/tests/io/test_packers.py | 111 -------------------------------- 1 file changed, 111 deletions(-) diff --git a/pandas/tests/io/test_packers.py b/pandas/tests/io/test_packers.py index 72030abd9cb91..6bf1aa15d5812 100644 --- a/pandas/tests/io/test_packers.py +++ b/pandas/tests/io/test_packers.py @@ -56,13 +56,7 @@ @pytest.fixture(scope="module") def current_packers_data(): # our current version packers data -<<<<<<< HEAD - from pandas.tests.io.generate_legacy_storage_files import ( - create_msgpack_data, - ) -======= from pandas.tests.io.generate_legacy_storage_files import create_msgpack_data ->>>>>>> master return create_msgpack_data() @@ -228,12 +222,7 @@ def test_list_numpy_float_complex(self): pytest.skip("numpy can not handle complex128") x = [np.float32(np.random.rand()) for i in range(5)] + [ -<<<<<<< HEAD - np.complex128(np.random.rand() + 1j * np.random.rand()) - for i in range(5) -======= np.complex128(np.random.rand() + 1j * np.random.rand()) for i in range(5) ->>>>>>> master ] x_rec = self.encode_decode(x) assert np.allclose(x, x_rec) @@ -273,14 +262,7 @@ def test_dict_numpy_float(self): tm.assert_almost_equal(x, x_rec) def test_dict_numpy_complex(self): -<<<<<<< HEAD - x = { - "foo": np.complex128(1.0 + 1.0j), - "bar": np.complex128(2.0 + 2.0j), - } -======= x = {"foo": np.complex128(1.0 + 1.0j), "bar": np.complex128(2.0 + 2.0j)} ->>>>>>> master x_rec = self.encode_decode(x) tm.assert_dict_equal(x, x_rec) @@ -300,13 +282,7 @@ def test_numpy_array_float(self): def test_numpy_array_complex(self): x = (np.random.rand(5) + 1j * np.random.rand(5)).astype(np.complex128) x_rec = self.encode_decode(x) -<<<<<<< HEAD - assert ( - all(map(lambda x, y: x == y, x, x_rec)) and x.dtype == x_rec.dtype - ) -======= assert all(map(lambda x, y: x == y, x, x_rec)) and x.dtype == x_rec.dtype ->>>>>>> master def test_list_mixed(self): x = [1.0, np.float32(3.5), np.complex128(4.25), "foo", np.bool_(1)] @@ -363,15 +339,7 @@ def test_periods(self): def test_intervals(self): # 19967 -<<<<<<< HEAD - for i in [ - Interval(0, 1), - Interval(0, 1, "left"), - Interval(10, 25.0, "right"), - ]: -======= for i in [Interval(0, 1), Interval(0, 1, "left"), Interval(10, 25.0, "right")]: ->>>>>>> master i_rec = self.encode_decode(i) assert i == i_rec @@ -558,16 +526,7 @@ def test_multi(self): assert_frame_equal(self.frame[k], i_rec[k]) packed_items = tuple( -<<<<<<< HEAD - [ - self.frame["float"], - self.frame["float"].A, - self.frame["float"].B, - None, - ] -======= [self.frame["float"], self.frame["float"].A, self.frame["float"].B, None] ->>>>>>> master ) l_rec = self.encode_decode(packed_items) check_arbitrary(packed_items, l_rec) @@ -646,21 +605,6 @@ def test_sparse_series(self): s = tm.makeStringSeries() s[3:5] = np.nan ss = s.to_sparse() -<<<<<<< HEAD - self._check_roundtrip( - ss, tm.assert_series_equal, check_series_type=True - ) - - ss2 = s.to_sparse(kind="integer") - self._check_roundtrip( - ss2, tm.assert_series_equal, check_series_type=True - ) - - ss3 = s.to_sparse(fill_value=0) - self._check_roundtrip( - ss3, tm.assert_series_equal, check_series_type=True - ) -======= self._check_roundtrip(ss, tm.assert_series_equal, check_series_type=True) ss2 = s.to_sparse(kind="integer") @@ -668,7 +612,6 @@ def test_sparse_series(self): ss3 = s.to_sparse(fill_value=0) self._check_roundtrip(ss3, tm.assert_series_equal, check_series_type=True) ->>>>>>> master def test_sparse_frame(self): @@ -680,21 +623,10 @@ def test_sparse_frame(self): self._check_roundtrip(ss, tm.assert_frame_equal, check_frame_type=True) ss2 = s.to_sparse(kind="integer") -<<<<<<< HEAD - self._check_roundtrip( - ss2, tm.assert_frame_equal, check_frame_type=True - ) - - ss3 = s.to_sparse(fill_value=0) - self._check_roundtrip( - ss3, tm.assert_frame_equal, check_frame_type=True - ) -======= self._check_roundtrip(ss2, tm.assert_frame_equal, check_frame_type=True) ss3 = s.to_sparse(fill_value=0) self._check_roundtrip(ss3, tm.assert_frame_equal, check_frame_type=True) ->>>>>>> master @pytest.mark.filterwarnings("ignore:.*msgpack:FutureWarning") @@ -751,13 +683,7 @@ def test_compression_blosc(self): pytest.skip("no blosc") self._test_compression("blosc") -<<<<<<< HEAD - def _test_compression_warns_when_decompress_caches( - self, monkeypatch, compress - ): -======= def _test_compression_warns_when_decompress_caches(self, monkeypatch, compress): ->>>>>>> master not_garbage = [] control = [] # copied data @@ -818,24 +744,12 @@ def decompress(ob): def test_compression_warns_when_decompress_caches_zlib(self, monkeypatch): if not _ZLIB_INSTALLED: pytest.skip("no zlib") -<<<<<<< HEAD - self._test_compression_warns_when_decompress_caches( - monkeypatch, "zlib" - ) -======= self._test_compression_warns_when_decompress_caches(monkeypatch, "zlib") ->>>>>>> master def test_compression_warns_when_decompress_caches_blosc(self, monkeypatch): if not _BLOSC_INSTALLED: pytest.skip("no blosc") -<<<<<<< HEAD - self._test_compression_warns_when_decompress_caches( - monkeypatch, "blosc" - ) -======= self._test_compression_warns_when_decompress_caches(monkeypatch, "blosc") ->>>>>>> master def _test_small_strings_no_warn(self, compress): empty = np.array([], dtype="uint8") @@ -863,13 +777,7 @@ def _test_small_strings_no_warn(self, compress): # always be the same (unless we were able to mutate the shared # character singleton in which case ord(b'a') == ord(b'b'). assert ord(b"a") == ord("a") -<<<<<<< HEAD - tm.assert_numpy_array_equal( - char_unpacked, np.array([ord(b"b")], dtype="uint8") - ) -======= tm.assert_numpy_array_equal(char_unpacked, np.array([ord(b"b")], dtype="uint8")) ->>>>>>> master def test_small_strings_no_warn_zlib(self): if not _ZLIB_INSTALLED: @@ -962,13 +870,7 @@ def test_default_encoding(self): files = glob.glob( -<<<<<<< HEAD - os.path.join( - os.path.dirname(__file__), "data", "legacy_msgpack", "*", "*.msgpack" - ) -======= os.path.join(os.path.dirname(__file__), "data", "legacy_msgpack", "*", "*.msgpack") ->>>>>>> master ) @@ -1010,15 +912,9 @@ def compare(self, current_data, all_data, vf, version): self.check_min_structure(data, version) for typ, dv in data.items(): -<<<<<<< HEAD - assert ( - typ in all_data - ), "unpacked data contains " 'extra key "{0}"'.format(typ) -======= assert typ in all_data, "unpacked data contains " 'extra key "{0}"'.format( typ ) ->>>>>>> master for dt, result in dv.items(): assert ( dt in current_data[typ] @@ -1054,14 +950,7 @@ def test_msgpacks_legacy( try: with catch_warnings(record=True): self.compare( -<<<<<<< HEAD - current_packers_data, - all_packers_data, - legacy_packer, - version, -======= current_packers_data, all_packers_data, legacy_packer, version ->>>>>>> master ) except ImportError: # blosc not installed From 0a3488f296325397fbf8a6bf270493e8e4dc9bd4 Mon Sep 17 00:00:00 2001 From: Joshua Maxcy Date: Fri, 5 Jul 2019 00:34:57 -0400 Subject: [PATCH 10/15] added FileNotFoundError for read_msgpack --- pandas/io/packers.py | 2 ++ pandas/tests/io/test_packers.py | 4 +++- 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/pandas/io/packers.py b/pandas/io/packers.py index 423248dcb971e..d89576a9252ef 100644 --- a/pandas/io/packers.py +++ b/pandas/io/packers.py @@ -215,6 +215,8 @@ def read(fh): if exists: with open(path_or_buf, "rb") as fh: return read(fh) + else: + raise FileNotFoundError("path_or_buf is invalid or was not found") if isinstance(path_or_buf, bytes): # treat as a binary-like diff --git a/pandas/tests/io/test_packers.py b/pandas/tests/io/test_packers.py index 6bf1aa15d5812..f2591dc561d08 100644 --- a/pandas/tests/io/test_packers.py +++ b/pandas/tests/io/test_packers.py @@ -165,15 +165,17 @@ def __init__(self): self.read = 0 msg = "Invalid file path or buffer object type: " + invalid_path = os.path.join("nonexistent_dir", "df.msgpack") with pytest.raises(ValueError, match=msg.format("NoneType")): read_msgpack(path_or_buf=None) with pytest.raises(ValueError, match=msg.format("dict")): read_msgpack(path_or_buf={}) with pytest.raises(ValueError, match=msg.format(r".*\.A")): read_msgpack(path_or_buf=A()) + with pytest.raises(FileNotFoundError): + read_msgpack(path_or_buf=invalid_path) with pytest.raises(FileNotFoundError): df = tm.makeDataFrame() - invalid_path = os.path.join("nonexistent_dir", "df.msgpack") df.to_msgpack(path_or_buf=invalid_path) From 21d46085af4778a6df5b533d92555cd480548f70 Mon Sep 17 00:00:00 2001 From: Joshua Maxcy Date: Fri, 5 Jul 2019 14:26:02 -0400 Subject: [PATCH 11/15] updated to expectt FileNotFoundError --- pandas/io/packers.py | 12 ++++-------- pandas/tests/io/test_common.py | 4 ++-- 2 files changed, 6 insertions(+), 10 deletions(-) diff --git a/pandas/io/packers.py b/pandas/io/packers.py index d89576a9252ef..132274ec2554c 100644 --- a/pandas/io/packers.py +++ b/pandas/io/packers.py @@ -143,7 +143,7 @@ def writer(fh): with open(path_or_buf, mode) as fh: writer(fh) except FileNotFoundError as error: - error.strerror = "path_or_buf is invalid or was not found" + error.strerror = "File b'{}' does not exist".format(path_or_buf) raise error elif path_or_buf is None: buf = BytesIO() @@ -208,15 +208,11 @@ def read(fh): # see if we have an actual file if isinstance(path_or_buf, str): try: - exists = os.path.exists(path_or_buf) - except (TypeError, ValueError): - exists = False - - if exists: with open(path_or_buf, "rb") as fh: return read(fh) - else: - raise FileNotFoundError("path_or_buf is invalid or was not found") + except FileNotFoundError as error: + error.strerror = "File b'{}' does not exist".format(path_or_buf) + raise error if isinstance(path_or_buf, bytes): # treat as a binary-like diff --git a/pandas/tests/io/test_common.py b/pandas/tests/io/test_common.py index 426698bfa1e94..8e09e96fbd471 100644 --- a/pandas/tests/io/test_common.py +++ b/pandas/tests/io/test_common.py @@ -142,7 +142,7 @@ def test_iterator(self): (pd.read_stata, "os", FileNotFoundError, "dta"), (pd.read_sas, "os", FileNotFoundError, "sas7bdat"), (pd.read_json, "os", ValueError, "json"), - (pd.read_msgpack, "os", ValueError, "mp"), + (pd.read_msgpack, "os", FileNotFoundError, "mp"), (pd.read_pickle, "os", FileNotFoundError, "pickle"), ], ) @@ -177,7 +177,7 @@ def test_read_non_existant(self, reader, module, error_class, fn_ext): (pd.read_stata, "os", FileNotFoundError, "dta"), (pd.read_sas, "os", FileNotFoundError, "sas7bdat"), (pd.read_json, "os", ValueError, "json"), - (pd.read_msgpack, "os", ValueError, "mp"), + (pd.read_msgpack, "os", FileNotFoundError, "mp"), (pd.read_pickle, "os", FileNotFoundError, "pickle"), ], ) From e137c500bb9cdef0f635a9334a456fc21ccac359 Mon Sep 17 00:00:00 2001 From: Joshua Maxcy Date: Fri, 5 Jul 2019 15:12:00 -0400 Subject: [PATCH 12/15] no longer modifying exception --- pandas/io/packers.py | 12 ++++++------ pandas/tests/io/test_packers.py | 6 ++---- 2 files changed, 8 insertions(+), 10 deletions(-) diff --git a/pandas/io/packers.py b/pandas/io/packers.py index 132274ec2554c..2e411fb07885f 100644 --- a/pandas/io/packers.py +++ b/pandas/io/packers.py @@ -142,9 +142,9 @@ def writer(fh): try: with open(path_or_buf, mode) as fh: writer(fh) - except FileNotFoundError as error: - error.strerror = "File b'{}' does not exist".format(path_or_buf) - raise error + except FileNotFoundError: + msg = "File b'{}' does not exist".format(path_or_buf) + raise FileNotFoundError(msg) elif path_or_buf is None: buf = BytesIO() writer(buf) @@ -210,9 +210,9 @@ def read(fh): try: with open(path_or_buf, "rb") as fh: return read(fh) - except FileNotFoundError as error: - error.strerror = "File b'{}' does not exist".format(path_or_buf) - raise error + except FileNotFoundError: + msg = "File b'{}' does not exist".format(path_or_buf) + raise FileNotFoundError(msg) if isinstance(path_or_buf, bytes): # treat as a binary-like diff --git a/pandas/tests/io/test_packers.py b/pandas/tests/io/test_packers.py index f2591dc561d08..f2ed986563425 100644 --- a/pandas/tests/io/test_packers.py +++ b/pandas/tests/io/test_packers.py @@ -166,17 +166,15 @@ def __init__(self): msg = "Invalid file path or buffer object type: " invalid_path = os.path.join("nonexistent_dir", "df.msgpack") + path_msg = "File b'{}' does not exist".format(invalid_path) with pytest.raises(ValueError, match=msg.format("NoneType")): read_msgpack(path_or_buf=None) with pytest.raises(ValueError, match=msg.format("dict")): read_msgpack(path_or_buf={}) with pytest.raises(ValueError, match=msg.format(r".*\.A")): read_msgpack(path_or_buf=A()) - with pytest.raises(FileNotFoundError): + with pytest.raises(FileNotFoundError, match=path_msg): read_msgpack(path_or_buf=invalid_path) - with pytest.raises(FileNotFoundError): - df = tm.makeDataFrame() - df.to_msgpack(path_or_buf=invalid_path) @pytest.mark.filterwarnings("ignore:.*msgpack:FutureWarning") From 0d7c4d50ad3b168293df20618470f338581342a1 Mon Sep 17 00:00:00 2001 From: Joshua Maxcy Date: Fri, 5 Jul 2019 16:25:43 -0400 Subject: [PATCH 13/15] fix windows error --- pandas/tests/io/test_packers.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/pandas/tests/io/test_packers.py b/pandas/tests/io/test_packers.py index f2ed986563425..fb1f657905be7 100644 --- a/pandas/tests/io/test_packers.py +++ b/pandas/tests/io/test_packers.py @@ -166,14 +166,13 @@ def __init__(self): msg = "Invalid file path or buffer object type: " invalid_path = os.path.join("nonexistent_dir", "df.msgpack") - path_msg = "File b'{}' does not exist".format(invalid_path) with pytest.raises(ValueError, match=msg.format("NoneType")): read_msgpack(path_or_buf=None) with pytest.raises(ValueError, match=msg.format("dict")): read_msgpack(path_or_buf={}) with pytest.raises(ValueError, match=msg.format(r".*\.A")): read_msgpack(path_or_buf=A()) - with pytest.raises(FileNotFoundError, match=path_msg): + with pytest.raises(FileNotFoundError, match="does not exist"): read_msgpack(path_or_buf=invalid_path) From c935271685f369d9d71c9ff99fab3b8b9b7447ab Mon Sep 17 00:00:00 2001 From: Joshua Maxcy Date: Fri, 5 Jul 2019 17:14:50 -0400 Subject: [PATCH 14/15] added whatsnew entry --- doc/source/whatsnew/v0.25.0.rst | 1 + 1 file changed, 1 insertion(+) diff --git a/doc/source/whatsnew/v0.25.0.rst b/doc/source/whatsnew/v0.25.0.rst index ab242ece98181..5eb934dc8f7b3 100644 --- a/doc/source/whatsnew/v0.25.0.rst +++ b/doc/source/whatsnew/v0.25.0.rst @@ -1094,6 +1094,7 @@ I/O - Bug while selecting from :class:`HDFStore` with ``where=''`` specified (:issue:`26610`). - Fixed bug in :func:`DataFrame.to_excel()` where custom objects (i.e. `PeriodIndex`) inside merged cells were not being converted into types safe for the Excel writer (:issue:`27006`) - Bug in :meth:`read_hdf` where reading a timezone aware :class:`DatetimeIndex` would raise a ``TypeError`` (:issue:`11926`) +- Improved :meth:`to_msgpack` and :meth:`read_msgpack` by raising ``FileNotFoundError`` for invalid paths. Also fixed docstring for :meth:`to_msgpack`; return type is bytes when no path is specified(:issue:`27160`) Plotting ^^^^^^^^ From 9a379a0cee495e70002d5dc137ccdd39d22ae59e Mon Sep 17 00:00:00 2001 From: Joshua Maxcy Date: Fri, 5 Jul 2019 18:08:32 -0400 Subject: [PATCH 15/15] retry whatsnew entry --- doc/source/whatsnew/v0.25.0.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v0.25.0.rst b/doc/source/whatsnew/v0.25.0.rst index 5eb934dc8f7b3..a8e3ee8042b0f 100644 --- a/doc/source/whatsnew/v0.25.0.rst +++ b/doc/source/whatsnew/v0.25.0.rst @@ -1094,7 +1094,7 @@ I/O - Bug while selecting from :class:`HDFStore` with ``where=''`` specified (:issue:`26610`). - Fixed bug in :func:`DataFrame.to_excel()` where custom objects (i.e. `PeriodIndex`) inside merged cells were not being converted into types safe for the Excel writer (:issue:`27006`) - Bug in :meth:`read_hdf` where reading a timezone aware :class:`DatetimeIndex` would raise a ``TypeError`` (:issue:`11926`) -- Improved :meth:`to_msgpack` and :meth:`read_msgpack` by raising ``FileNotFoundError`` for invalid paths. Also fixed docstring for :meth:`to_msgpack`; return type is bytes when no path is specified(:issue:`27160`) +- Bug in :meth:`to_msgpack` and :meth:`read_msgpack` which would raise a ``ValueError`` rather than a ``FileNotFoundError`` for an invalid path (:issue:`27160`) Plotting ^^^^^^^^