diff --git a/pandas/core/arrays/base.py b/pandas/core/arrays/base.py index 20fd582179dc6..51ad01dd6b369 100644 --- a/pandas/core/arrays/base.py +++ b/pandas/core/arrays/base.py @@ -115,6 +115,7 @@ class ExtensionArray: # ------------------------------------------------------------------------ # Constructors # ------------------------------------------------------------------------ + @classmethod def _from_sequence(cls, scalars, dtype=None, copy=False): """ @@ -286,6 +287,7 @@ def __iter__(self): # ------------------------------------------------------------------------ # Required attributes # ------------------------------------------------------------------------ + @property def dtype(self) -> ExtensionDtype: """ @@ -319,6 +321,7 @@ def nbytes(self) -> int: # ------------------------------------------------------------------------ # Additional Methods # ------------------------------------------------------------------------ + def astype(self, dtype, copy=True): """ Cast to a NumPy array with 'dtype'. @@ -479,8 +482,7 @@ def dropna(self): def shift( self, periods: int = 1, - fill_value: object = None, - ) -> ABCExtensionArray: + fill_value: object = None) -> ABCExtensionArray: """ Shift values by desired number. @@ -836,6 +838,7 @@ def copy(self, deep: bool = False) -> ABCExtensionArray: # ------------------------------------------------------------------------ # Printing # ------------------------------------------------------------------------ + def __repr__(self): from pandas.io.formats.printing import format_object_summary diff --git a/pandas/core/groupby/generic.py b/pandas/core/groupby/generic.py index 91be320a3e674..1b4e001620286 100644 --- a/pandas/core/groupby/generic.py +++ b/pandas/core/groupby/generic.py @@ -480,7 +480,7 @@ def first_not_none(values): # if we have date/time like in the original, then coerce dates # as we are stacking can easily have object dtypes here so = self._selected_obj - if (so.ndim == 2 and so.dtypes.apply(is_datetimelike).any()): + if so.ndim == 2 and so.dtypes.apply(is_datetimelike).any(): result = result.apply( lambda x: to_numeric(x, errors='ignore')) date_cols = self._selected_obj.select_dtypes( diff --git a/pandas/core/internals/managers.py b/pandas/core/internals/managers.py index 7fe34279c0482..592c385dd87ec 100644 --- a/pandas/core/internals/managers.py +++ b/pandas/core/internals/managers.py @@ -1027,7 +1027,7 @@ def set(self, item, value): value_is_extension_type = (is_extension_type(value) or is_extension_array_dtype(value)) - # categorical/spares/datetimetz + # categorical/sparse/datetimetz if value_is_extension_type: def value_getitem(placement): diff --git a/pandas/tests/io/pytables/test_pytables.py b/pandas/tests/io/pytables/test_pytables.py index 413c11ba2f9fe..be318ede2df4a 100644 --- a/pandas/tests/io/pytables/test_pytables.py +++ b/pandas/tests/io/pytables/test_pytables.py @@ -1070,47 +1070,41 @@ def test_encoding(self): result = store.select('df', Term('columns=A', encoding='ascii')) tm.assert_frame_equal(result, expected) - def test_latin_encoding(self): - - values = [[b'E\xc9, 17', b'', b'a', b'b', b'c'], - [b'E\xc9, 17', b'a', b'b', b'c'], - [b'EE, 17', b'', b'a', b'b', b'c'], - [b'E\xc9, 17', b'\xf8\xfc', b'a', b'b', b'c'], - [b'', b'a', b'b', b'c'], - [b'\xf8\xfc', b'a', b'b', b'c'], - [b'A\xf8\xfc', b'', b'a', b'b', b'c'], - [np.nan, b'', b'b', b'c'], - [b'A\xf8\xfc', np.nan, b'', b'b', b'c']] - - def _try_decode(x, encoding='latin-1'): - try: - return x.decode(encoding) - except AttributeError: - return x - # not sure how to remove latin-1 from code in python 2 and 3 - values = [[_try_decode(x) for x in y] for y in values] - - examples = [] - for dtype in ['category', object]: - for val in values: - examples.append(pd.Series(val, dtype=dtype)) - - def roundtrip(s, key='data', encoding='latin-1', nan_rep=''): - with ensure_clean_path(self.path) as store: - s.to_hdf(store, key, format='table', encoding=encoding, - nan_rep=nan_rep) - retr = read_hdf(store, key) - s_nan = s.replace(nan_rep, np.nan) - if is_categorical_dtype(s_nan): - assert is_categorical_dtype(retr) - assert_series_equal(s_nan, retr, check_dtype=False, - check_categorical=False) - else: - assert_series_equal(s_nan, retr) - - for s in examples: - roundtrip(s) + @pytest.mark.parametrize('val', [ + [b'E\xc9, 17', b'', b'a', b'b', b'c'], + [b'E\xc9, 17', b'a', b'b', b'c'], + [b'EE, 17', b'', b'a', b'b', b'c'], + [b'E\xc9, 17', b'\xf8\xfc', b'a', b'b', b'c'], + [b'', b'a', b'b', b'c'], + [b'\xf8\xfc', b'a', b'b', b'c'], + [b'A\xf8\xfc', b'', b'a', b'b', b'c'], + [np.nan, b'', b'b', b'c'], + [b'A\xf8\xfc', np.nan, b'', b'b', b'c'] + ]) + @pytest.mark.parametrize('dtype', ['category', object]) + def test_latin_encoding(self, dtype, val): + enc = 'latin-1' + nan_rep = '' + key = 'data' + + val = [x.decode(enc) if isinstance(x, bytes) else x for x in val] + ser = pd.Series(val, dtype=dtype) + + with ensure_clean_path(self.path) as store: + ser.to_hdf(store, key, format='table', encoding=enc, + nan_rep=nan_rep) + retr = read_hdf(store, key) + + s_nan = ser.replace(nan_rep, np.nan) + + if is_categorical_dtype(s_nan): + assert is_categorical_dtype(retr) + assert_series_equal(s_nan, retr, check_dtype=False, + check_categorical=False) + else: + assert_series_equal(s_nan, retr) + # FIXME: don't leave commented-out # fails: # for x in examples: # roundtrip(s, nan_rep=b'\xf8\xfc')