From 4c3c6bd3028806510c442a8f61b027728aa48dd3 Mon Sep 17 00:00:00 2001 From: MomIsBestFriend <> Date: Sun, 10 Nov 2019 10:31:29 +0200 Subject: [PATCH 01/28] Finished groupby.pyx --- pandas/_libs/groupby.pyx | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/pandas/_libs/groupby.pyx b/pandas/_libs/groupby.pyx index 0e5eaa3b7dca6..49fb1ca50a1aa 100644 --- a/pandas/_libs/groupby.pyx +++ b/pandas/_libs/groupby.pyx @@ -753,8 +753,7 @@ def group_quantile(ndarray[float64_t] out, assert values.shape[0] == N if not (0 <= q <= 1): - raise ValueError("'q' must be between 0 and 1. Got" - " '{}' instead".format(q)) + raise ValueError(f"'q' must be between 0 and 1. Got '{q}' instead") inter_methods = { 'linear': INTERPOLATION_LINEAR, From e64e327de3fa849b7e6a0fb5f578ce8acc7be0c9 Mon Sep 17 00:00:00 2001 From: MomIsBestFriend <> Date: Sun, 10 Nov 2019 10:34:24 +0200 Subject: [PATCH 02/28] Finished hashing.pyx --- pandas/_libs/hashing.pyx | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/pandas/_libs/hashing.pyx b/pandas/_libs/hashing.pyx index 6b27b2204e75e..911036a039adb 100644 --- a/pandas/_libs/hashing.pyx +++ b/pandas/_libs/hashing.pyx @@ -47,8 +47,8 @@ def hash_object_array(object[:] arr, object key, object encoding='utf8'): k = key.encode(encoding) kb = k if len(k) != 16: - raise ValueError("key should be a 16-byte string encoded, " - "got {key} (len {klen})".format(key=k, klen=len(k))) + raise ValueError(f"key should be a 16-byte string encoded, " + f"got {k} (len {len(k)})") n = len(arr) @@ -67,9 +67,9 @@ def hash_object_array(object[:] arr, object key, object encoding='utf8'): data = str(val).encode(encoding) else: - raise TypeError("{val} of type {typ} is not a valid type " - "for hashing, must be string or null" - .format(val=val, typ=type(val))) + raise TypeError(f"{val} of type {type(val)} is not a valid type " + f"for hashing, must be string or null" + ) l = len(data) lens[i] = l From ba7c6be1b0321af9fa3782ce0b98486616358a70 Mon Sep 17 00:00:00 2001 From: MomIsBestFriend <> Date: Sun, 10 Nov 2019 10:42:45 +0200 Subject: [PATCH 03/28] Finished index.pyx --- pandas/_libs/index.pyx | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/pandas/_libs/index.pyx b/pandas/_libs/index.pyx index cc114b48a5b53..92937ae56817c 100644 --- a/pandas/_libs/index.pyx +++ b/pandas/_libs/index.pyx @@ -109,7 +109,7 @@ cdef class IndexEngine: Py_ssize_t loc if is_definitely_invalid_key(val): - raise TypeError("'{val}' is an invalid key".format(val=val)) + raise TypeError(f"'{val}' is an invalid key") if self.over_size_threshold and self.is_monotonic_increasing: if not self.is_unique: @@ -556,8 +556,8 @@ cpdef convert_scalar(ndarray arr, object value): pass elif value is None or value != value: return np.datetime64("NaT", "ns") - raise ValueError("cannot set a Timestamp with a non-timestamp {typ}" - .format(typ=type(value).__name__)) + raise ValueError(f"cannot set a Timestamp with a non-timestamp " + f"{type(value).__name__}") elif arr.descr.type_num == NPY_TIMEDELTA: if util.is_array(value): @@ -573,8 +573,8 @@ cpdef convert_scalar(ndarray arr, object value): pass elif value is None or value != value: return np.timedelta64("NaT", "ns") - raise ValueError("cannot set a Timedelta with a non-timedelta {typ}" - .format(typ=type(value).__name__)) + raise ValueError(f"cannot set a Timedelta with a non-timedelta " + f"{type(value).__name__}") if (issubclass(arr.dtype.type, (np.integer, np.floating, np.complex)) and not issubclass(arr.dtype.type, np.bool_)): @@ -677,7 +677,7 @@ cdef class BaseMultiIndexCodesEngine: # Index._get_fill_indexer), sort (integer representations of) keys: order = np.argsort(lab_ints) lab_ints = lab_ints[order] - indexer = (getattr(self._base, 'get_{}_indexer'.format(method)) + indexer = (getattr(self._base, f'get_{method}_indexer') (self, lab_ints, limit=limit)) indexer = indexer[order] else: @@ -687,7 +687,7 @@ cdef class BaseMultiIndexCodesEngine: def get_loc(self, object key): if is_definitely_invalid_key(key): - raise TypeError("'{key}' is an invalid key".format(key=key)) + raise TypeError(f"'{key}' is an invalid key") if not isinstance(key, tuple): raise KeyError(key) try: From 590c6a01b96923440f8cdac656e09601c35deec9 Mon Sep 17 00:00:00 2001 From: MomIsBestFriend <> Date: Sun, 10 Nov 2019 10:44:17 +0200 Subject: [PATCH 04/28] Finished internals.pyx --- pandas/_libs/internals.pyx | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/_libs/internals.pyx b/pandas/_libs/internals.pyx index db9f16d46e48c..ecd090de500da 100644 --- a/pandas/_libs/internals.pyx +++ b/pandas/_libs/internals.pyx @@ -61,7 +61,7 @@ cdef class BlockPlacement: else: v = self._as_array - return '%s(%r)' % (self.__class__.__name__, v) + return f'{self.__class__.__name__}({v})' def __repr__(self) -> str: return str(self) From 89bdecc10d24335b7a8901fb139aed561ba0e33c Mon Sep 17 00:00:00 2001 From: MomIsBestFriend <> Date: Sun, 10 Nov 2019 10:55:43 +0200 Subject: [PATCH 05/28] Finished interval.pyx --- pandas/_libs/interval.pyx | 30 +++++++++++++----------------- 1 file changed, 13 insertions(+), 17 deletions(-) diff --git a/pandas/_libs/interval.pyx b/pandas/_libs/interval.pyx index 2bd38524852ec..814af744f001e 100644 --- a/pandas/_libs/interval.pyx +++ b/pandas/_libs/interval.pyx @@ -179,8 +179,8 @@ cdef class IntervalMixin: When `other` is not closed exactly the same as self. """ if self.closed != other.closed: - msg = "'{}.closed' is '{}', expected '{}'." - raise ValueError(msg.format(name, other.closed, self.closed)) + msg = f"'{name}.closed' is '{other.closed}', expected '{self.closed}'." + raise ValueError(msg) cdef _interval_like(other): @@ -308,17 +308,16 @@ cdef class Interval(IntervalMixin): self._validate_endpoint(right) if closed not in _VALID_CLOSED: - msg = "invalid option for 'closed': {closed}".format(closed=closed) + msg = f"invalid option for 'closed': {closed}" raise ValueError(msg) if not left <= right: raise ValueError('left side of interval must be <= right side') if (isinstance(left, Timestamp) and not tz_compare(left.tzinfo, right.tzinfo)): # GH 18538 - msg = ("left and right must have the same time zone, got " - "'{left_tz}' and '{right_tz}'") - raise ValueError(msg.format(left_tz=left.tzinfo, - right_tz=right.tzinfo)) + msg = (f"left and right must have the same time zone, got " + f"'{left.tzinfo}' and '{right.tzinfo}'") + raise ValueError(msg) self.left = left self.right = right self.closed = closed @@ -359,8 +358,7 @@ cdef class Interval(IntervalMixin): name = type(self).__name__ other = type(other).__name__ op_str = {Py_LT: '<', Py_LE: '<=', Py_GT: '>', Py_GE: '>='}[op] - raise TypeError('unorderable types: {name}() {op} {other}()' - .format(name=name, op=op_str, other=other)) + raise TypeError(f'unorderable types: {name}() {op_str} {other}()') def __reduce__(self): args = (self.left, self.right, self.closed) @@ -381,8 +379,7 @@ cdef class Interval(IntervalMixin): left, right = self._repr_base() name = type(self).__name__ - repr_str = '{name}({left!r}, {right!r}, closed={closed!r})'.format( - name=name, left=left, right=right, closed=self.closed) + repr_str = f'{name}({left}, {right}, closed={self.closed})' return repr_str def __str__(self) -> str: @@ -390,8 +387,7 @@ cdef class Interval(IntervalMixin): left, right = self._repr_base() start_symbol = '[' if self.closed_left else '(' end_symbol = ']' if self.closed_right else ')' - return '{start}{left}, {right}{end}'.format( - start=start_symbol, left=left, right=right, end=end_symbol) + return f'{start_symbol}{left}, {right}{end_symbol}' def __add__(self, y): if isinstance(y, numbers.Number): @@ -477,8 +473,8 @@ cdef class Interval(IntervalMixin): False """ if not isinstance(other, Interval): - msg = '`other` must be an Interval, got {other}' - raise TypeError(msg.format(other=type(other).__name__)) + msg = f'`other` must be an Interval, got {type(other).__name__}' + raise TypeError(msg) # equality is okay if both endpoints are closed (overlap at a point) op1 = le if (self.closed_left and other.closed_right) else lt @@ -529,8 +525,8 @@ def intervals_to_interval_bounds(ndarray intervals, continue if not isinstance(interval, Interval): - raise TypeError("type {typ} with value {iv} is not an interval" - .format(typ=type(interval), iv=interval)) + raise TypeError(f"type {type(interval)} with value " + f"{interval} is not an interval") left[i] = interval.left right[i] = interval.right From dd3e437c8808494c67a2bf519626359fd5814d8e Mon Sep 17 00:00:00 2001 From: MomIsBestFriend <> Date: Sun, 10 Nov 2019 11:01:52 +0200 Subject: [PATCH 06/28] Finished lib.pyx --- pandas/_libs/lib.pyx | 15 ++++++--------- 1 file changed, 6 insertions(+), 9 deletions(-) diff --git a/pandas/_libs/lib.pyx b/pandas/_libs/lib.pyx index a14efd3313eaf..93cd373e0d0b1 100644 --- a/pandas/_libs/lib.pyx +++ b/pandas/_libs/lib.pyx @@ -1219,8 +1219,7 @@ def infer_dtype(value: object, skipna: object=None) -> str: return value # its ndarray like but we can't handle - raise ValueError("cannot infer type for {typ}" - .format(typ=type(value))) + raise ValueError(f"cannot infer type for {type(value)}") else: if not isinstance(value, list): @@ -1497,9 +1496,8 @@ cdef class Validator: return self.is_valid(value) or self.is_valid_null(value) cdef bint is_value_typed(self, object value) except -1: - raise NotImplementedError( - '{typ} child class must define is_value_typed' - .format(typ=type(self).__name__)) + raise NotImplementedError(f'{type(self).__name__} child class ' + f'must define is_value_typed') cdef bint is_valid_null(self, object value) except -1: return value is None or util.is_nan(value) @@ -1635,9 +1633,8 @@ cdef class TemporalValidator(Validator): return self.is_value_typed(value) or self.is_valid_null(value) cdef bint is_valid_null(self, object value) except -1: - raise NotImplementedError( - '{typ} child class must define is_valid_null' - .format(typ=type(self).__name__)) + raise NotImplementedError(f'{type(self).__name__} child class ' + f'must define is_valid_null') cdef inline bint is_valid_skipna(self, object value) except -1: cdef: @@ -1926,7 +1923,7 @@ def maybe_convert_numeric(ndarray[object] values, set na_values, seen.float_ = True except (TypeError, ValueError) as e: if not seen.coerce_numeric: - raise type(e)(str(e) + " at position {pos}".format(pos=i)) + raise type(e)(str(e) + f" at position {i}") elif "uint64" in str(e): # Exception from check functions. raise From 6fc5aca0bcd94ca7731363f5dd064585f5379c6f Mon Sep 17 00:00:00 2001 From: MomIsBestFriend <> Date: Sun, 10 Nov 2019 11:04:02 +0200 Subject: [PATCH 07/28] Finished ops.pyx --- pandas/_libs/ops.pyx | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/pandas/_libs/ops.pyx b/pandas/_libs/ops.pyx index bdafcd646a4c8..abe1484e3763d 100644 --- a/pandas/_libs/ops.pyx +++ b/pandas/_libs/ops.pyx @@ -123,8 +123,7 @@ def vec_compare(object[:] left, object[:] right, object op): int flag if n != len(right): - raise ValueError('Arrays were different lengths: {n} vs {nright}' - .format(n=n, nright=len(right))) + raise ValueError(f'Arrays were different lengths: {n} vs {len(right)}') if op is operator.lt: flag = Py_LT @@ -224,8 +223,7 @@ def vec_binop(object[:] left, object[:] right, object op): object[:] result if n != len(right): - raise ValueError('Arrays were different lengths: {n} vs {nright}' - .format(n=n, nright=len(right))) + raise ValueError(f'Arrays were different lengths: {n} vs {len(right)}') result = np.empty(n, dtype=object) From 3d193fa2a8ca237aed059fd146a4efe00f4c687f Mon Sep 17 00:00:00 2001 From: MomIsBestFriend <> Date: Sun, 10 Nov 2019 11:11:44 +0200 Subject: [PATCH 08/28] Finished parsers.pyx --- pandas/_libs/parsers.pyx | 28 ++++++++++++++-------------- 1 file changed, 14 insertions(+), 14 deletions(-) diff --git a/pandas/_libs/parsers.pyx b/pandas/_libs/parsers.pyx index 8b9842ba087a5..601b81556be0e 100644 --- a/pandas/_libs/parsers.pyx +++ b/pandas/_libs/parsers.pyx @@ -637,19 +637,19 @@ cdef class TextReader: source = zip_file.open(file_name) elif len(zip_names) == 0: - raise ValueError('Zero files found in compressed ' - 'zip file %s', source) + raise ValueError(f'Zero files found in compressed ' + f'zip file {source}') else: - raise ValueError('Multiple files found in compressed ' - 'zip file %s', str(zip_names)) + raise ValueError(f'Multiple files found in compressed ' + f'zip file {zip_names}') elif self.compression == 'xz': if isinstance(source, str): source = _get_lzma_file(lzma)(source, 'rb') else: source = _get_lzma_file(lzma)(filename=source) else: - raise ValueError('Unrecognized compression type: %s' % - self.compression) + raise ValueError(f'Unrecognized compression type: ' + f'{self.compression}') if b'utf-16' in (self.encoding or b''): # we need to read utf-16 through UTF8Recoder. @@ -703,8 +703,8 @@ cdef class TextReader: self.parser.cb_io = &buffer_rd_bytes self.parser.cb_cleanup = &del_rd_source else: - raise IOError('Expected file path name or file-like object,' - ' got %s type' % type(source)) + raise IOError(f'Expected file path name or file-like object, ' + f'got {type(source)} type') cdef _get_header(self): # header is now a list of lists, so field_count should use header[0] @@ -744,8 +744,8 @@ cdef class TextReader: msg = "[%s], len of %d," % ( ','.join(str(m) for m in msg), len(msg)) raise ParserError( - 'Passed header=%s but only %d lines in file' - % (msg, self.parser.lines)) + f'Passed header={msg} but only ' + f'{self.parser.lines} lines in file') else: field_count = self.parser.line_fields[hr] @@ -779,7 +779,7 @@ cdef class TextReader: if not self.has_mi_columns and self.mangle_dupe_cols: while count > 0: counts[name] = count + 1 - name = '%s.%d' % (name, count) + name = f'{name}.{count}' count = counts.get(name, 0) if old_name == '': @@ -1662,7 +1662,7 @@ cdef _to_fw_string(parser_t *parser, int64_t col, int64_t line_start, char *data ndarray result - result = np.empty(line_end - line_start, dtype='|S%d' % width) + result = np.empty(line_end - line_start, dtype=f'|S{width}') data = result.data with nogil: @@ -2176,8 +2176,8 @@ def _concatenate_chunks(list chunks): if warning_columns: warning_names = ','.join(warning_columns) warning_message = " ".join([ - "Columns (%s) have mixed types." % warning_names, - "Specify dtype option on import or set low_memory=False." + f"Columns ({warning_names}) have mixed types." + f"Specify dtype option on import or set low_memory=False." ]) warnings.warn(warning_message, DtypeWarning, stacklevel=8) return result From b09066b057c17551c28ed95f7891befb8be8dffc Mon Sep 17 00:00:00 2001 From: MomIsBestFriend <> Date: Sun, 10 Nov 2019 11:18:26 +0200 Subject: [PATCH 09/28] Finished reduction.pyx --- pandas/_libs/reduction.pyx | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/pandas/_libs/reduction.pyx b/pandas/_libs/reduction.pyx index 60c65a22603cd..477c7a6bcd089 100644 --- a/pandas/_libs/reduction.pyx +++ b/pandas/_libs/reduction.pyx @@ -92,8 +92,7 @@ cdef class Reducer: if dummy.dtype != self.arr.dtype: raise ValueError('Dummy array must be same dtype') if len(dummy) != self.chunksize: - raise ValueError('Dummy array must be length {length}' - .format(length=self.chunksize)) + raise ValueError(f'Dummy array must be length {self.chunksize}') return dummy, typ, index, ityp From da6dcf0ef767b1cc8a9a9d68edd510257ed74227 Mon Sep 17 00:00:00 2001 From: MomIsBestFriend <> Date: Sun, 10 Nov 2019 11:22:43 +0200 Subject: [PATCH 10/28] Finished sparse.pyx --- pandas/_libs/sparse.pyx | 18 ++++++++---------- 1 file changed, 8 insertions(+), 10 deletions(-) diff --git a/pandas/_libs/sparse.pyx b/pandas/_libs/sparse.pyx index 1944f9592829c..578995a3eb3b6 100644 --- a/pandas/_libs/sparse.pyx +++ b/pandas/_libs/sparse.pyx @@ -53,7 +53,7 @@ cdef class IntIndex(SparseIndex): def __repr__(self) -> str: output = 'IntIndex\n' - output += 'Indices: %s\n' % repr(self.indices) + output += f'Indices: {repr(self.indices)}\n' return output @property @@ -72,9 +72,8 @@ cdef class IntIndex(SparseIndex): """ if self.npoints > self.length: - msg = ("Too many indices. Expected " - "{exp} but found {act}").format( - exp=self.length, act=self.npoints) + msg = (f"Too many indices. Expected " + f"{self.length} but found {self.npoints}") raise ValueError(msg) # Indices are vacuously ordered and non-negative @@ -343,8 +342,8 @@ cdef class BlockIndex(SparseIndex): def __repr__(self) -> str: output = 'BlockIndex\n' - output += 'Block locations: %s\n' % repr(self.blocs) - output += 'Block lengths: %s' % repr(self.blengths) + output += f'Block locations: {repr(self.blocs)}\n' + output += f'Block lengths: {repr(self.blengths)}' return output @@ -380,15 +379,14 @@ cdef class BlockIndex(SparseIndex): if i < self.nblocks - 1: if blocs[i] + blengths[i] > blocs[i + 1]: - raise ValueError('Block {idx} overlaps'.format(idx=i)) + raise ValueError(f'Block {i} overlaps') else: if blocs[i] + blengths[i] > self.length: - raise ValueError('Block {idx} extends beyond end' - .format(idx=i)) + raise ValueError(f'Block {i} extends beyond end') # no zero-length blocks if blengths[i] == 0: - raise ValueError('Zero-length block {idx}'.format(idx=i)) + raise ValueError(f'Zero-length block {i}') def equals(self, other): if not isinstance(other, BlockIndex): From 2667b092b472069d50d38cb82d489196c49cdc9c Mon Sep 17 00:00:00 2001 From: MomIsBestFriend <> Date: Sun, 10 Nov 2019 11:28:02 +0200 Subject: [PATCH 11/28] Finished testing.pyx --- pandas/_libs/testing.pyx | 18 ++++++++---------- 1 file changed, 8 insertions(+), 10 deletions(-) diff --git a/pandas/_libs/testing.pyx b/pandas/_libs/testing.pyx index 7ad5ea189763c..f848310d961e1 100644 --- a/pandas/_libs/testing.pyx +++ b/pandas/_libs/testing.pyx @@ -108,7 +108,7 @@ cpdef assert_almost_equal(a, b, return assert_dict_equal(a, b) if isinstance(a, str) or isinstance(b, str): - assert a == b, "%r != %r" % (a, b) + assert a == b, f"{a} != {b}" return True a_is_ndarray = isinstance(a, np.ndarray) @@ -128,16 +128,15 @@ cpdef assert_almost_equal(a, b, assert_class_equal(a, b, obj=obj) assert has_length(a) and has_length(b), ( - "Can't compare objects without length, one or both is invalid: " - "(%r, %r)" % (a, b)) + f"Can't compare objects without length, one or both is invalid: " + f"({a}, {b})") if a_is_ndarray and b_is_ndarray: na, nb = a.size, b.size if a.shape != b.shape: from pandas.util.testing import raise_assert_detail raise_assert_detail( - obj, '{0} shapes are different'.format(obj), - a.shape, b.shape) + obj, f'{obj} shapes are different', a.shape, b.shape) if check_dtype and not is_dtype_equal(a.dtype, b.dtype): from pandas.util.testing import assert_attr_equal @@ -158,8 +157,7 @@ cpdef assert_almost_equal(a, b, else: r = None - raise_assert_detail(obj, '{0} length are different'.format(obj), - na, nb, r) + raise_assert_detail(obj, f'{obj} length are different', na, nb, r) for i in xrange(len(a)): try: @@ -171,8 +169,8 @@ cpdef assert_almost_equal(a, b, if is_unequal: from pandas.util.testing import raise_assert_detail - msg = '{0} values are different ({1} %)'.format( - obj, np.round(diff * 100.0 / na, 5)) + msg = (f'{obj} values are different ' + f'({np.round(diff * 100.0 / na, 5)} %)') raise_assert_detail(obj, msg, lobj, robj) return True @@ -214,4 +212,4 @@ cpdef assert_almost_equal(a, b, 'with decimal %d' % (fb, fa, decimal)) return True - raise AssertionError("{0} != {1}".format(a, b)) + raise AssertionError(f"{a} != {b}") From 5ff5e400ffe3f31ed12d29fd42658c6ae1aece29 Mon Sep 17 00:00:00 2001 From: MomIsBestFriend <> Date: Sun, 10 Nov 2019 11:33:08 +0200 Subject: [PATCH 12/28] Finished tslib.pyx --- pandas/_libs/tslib.pyx | 28 +++++++++++++--------------- 1 file changed, 13 insertions(+), 15 deletions(-) diff --git a/pandas/_libs/tslib.pyx b/pandas/_libs/tslib.pyx index 0f1657480e4b3..d101a2976cd55 100644 --- a/pandas/_libs/tslib.pyx +++ b/pandas/_libs/tslib.pyx @@ -357,8 +357,8 @@ def array_with_unit_to_datetime(ndarray values, object unit, if ((fvalues < Timestamp.min.value).any() or (fvalues > Timestamp.max.value).any()): - raise OutOfBoundsDatetime("cannot convert input with unit " - "'{unit}'".format(unit=unit)) + raise OutOfBoundsDatetime(f"cannot convert input with unit " + f"'{unit}'") result = (iresult * m).astype('M8[ns]') iresult = result.view('i8') iresult[mask] = NPY_NAT @@ -384,8 +384,8 @@ def array_with_unit_to_datetime(ndarray values, object unit, except OverflowError: if is_raise: raise OutOfBoundsDatetime( - "cannot convert input {val} with the unit " - "'{unit}'".format(val=val, unit=unit)) + f"cannot convert input {val} with the unit " + f"'{unit}'") elif is_ignore: raise AssertionError iresult[i] = NPY_NAT @@ -400,16 +400,16 @@ def array_with_unit_to_datetime(ndarray values, object unit, except ValueError: if is_raise: raise ValueError( - "non convertible value {val} with the unit " - "'{unit}'".format(val=val, unit=unit)) + f"non convertible value {val} with the unit " + f"'{unit}'") elif is_ignore: raise AssertionError iresult[i] = NPY_NAT except OverflowError: if is_raise: raise OutOfBoundsDatetime( - "cannot convert input {val} with the unit " - "'{unit}'".format(val=val, unit=unit)) + f"cannot convert input {val} with the unit " + f"'{unit}'") elif is_ignore: raise AssertionError iresult[i] = NPY_NAT @@ -417,8 +417,8 @@ def array_with_unit_to_datetime(ndarray values, object unit, else: if is_raise: - raise ValueError("unit='{0}' not valid with non-numerical " - "val='{1}'".format(unit, val)) + raise ValueError(f"unit='{unit}' not valid with non-numerical " + f"val='{val}'") if is_ignore: raise AssertionError @@ -600,9 +600,8 @@ cpdef array_to_datetime(ndarray[object] values, str errors='raise', iresult[i] = NPY_NAT continue elif is_raise: - raise ValueError("time data {val} doesn't " - "match format specified" - .format(val=val)) + raise ValueError(f"time data {val} doesn't " + f"match format specified") return values, tz_out try: @@ -657,8 +656,7 @@ cpdef array_to_datetime(ndarray[object] values, str errors='raise', if is_coerce: iresult[i] = NPY_NAT else: - raise TypeError("{typ} is not convertible to datetime" - .format(typ=type(val))) + raise TypeError(f"{type(val)} is not convertible to datetime") except OutOfBoundsDatetime: if is_coerce: From f6db1950820d5814d7a7cfb1b1f7cb236ec1bcc7 Mon Sep 17 00:00:00 2001 From: MomIsBestFriend <> Date: Sun, 10 Nov 2019 11:37:17 +0200 Subject: [PATCH 13/28] Finished window.pyx --- pandas/_libs/window.pyx | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/pandas/_libs/window.pyx b/pandas/_libs/window.pyx index d50cb05b0174a..86b06397123b7 100644 --- a/pandas/_libs/window.pyx +++ b/pandas/_libs/window.pyx @@ -69,8 +69,8 @@ def _check_minp(win, minp, N, floor=None) -> int: if not util.is_integer_object(minp): raise ValueError("min_periods must be an integer") if minp > win: - raise ValueError("min_periods (minp) must be <= " - "window (win)".format(minp=minp, win=win)) + raise ValueError(f"min_periods (minp) must be <= " + f"window (win)") elif minp > N: minp = N + 1 elif minp < 0: @@ -1476,13 +1476,12 @@ def roll_quantile(ndarray[float64_t, cast=True] values, int64_t win, int ret = 0 if quantile <= 0.0 or quantile >= 1.0: - raise ValueError("quantile value {0} not in [0, 1]".format(quantile)) + raise ValueError(f"quantile value {quantile} not in [0, 1]") try: interpolation_type = interpolation_types[interpolation] except KeyError: - raise ValueError("Interpolation '{interp}' is not supported" - .format(interp=interpolation)) + raise ValueError(f"Interpolation '{interpolation}' is not supported") # we use the Fixed/Variable Indexer here as the # actual skiplist ops outweigh any window computation costs @@ -2077,8 +2076,8 @@ def ewmcov(float64_t[:] input_x, float64_t[:] input_y, bint is_observation if len(input_y) != N: - raise ValueError("arrays are of different lengths " - "({N} and {len_y})".format(N=N, len_y=len(input_y))) + raise ValueError(f"arrays are of different lengths " + f"({N} and {len(input_y)})") output = np.empty(N, dtype=float) if N == 0: From 5fbd79557cc9213e581cd1e2361a178bdba5082f Mon Sep 17 00:00:00 2001 From: MomIsBestFriend <> Date: Sun, 10 Nov 2019 16:51:14 +0200 Subject: [PATCH 14/28] Finished fixing errors in previous PR --- pandas/_libs/hashing.pyx | 2 +- pandas/_libs/interval.pyx | 2 +- pandas/_libs/lib.pyx | 4 ++-- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/pandas/_libs/hashing.pyx b/pandas/_libs/hashing.pyx index 911036a039adb..d3b5ecfdaa178 100644 --- a/pandas/_libs/hashing.pyx +++ b/pandas/_libs/hashing.pyx @@ -48,7 +48,7 @@ def hash_object_array(object[:] arr, object key, object encoding='utf8'): kb = k if len(k) != 16: raise ValueError(f"key should be a 16-byte string encoded, " - f"got {k} (len {len(k)})") + f"got {k} (len {len(k)})") n = len(arr) diff --git a/pandas/_libs/interval.pyx b/pandas/_libs/interval.pyx index 814af744f001e..d527a83745bb7 100644 --- a/pandas/_libs/interval.pyx +++ b/pandas/_libs/interval.pyx @@ -526,7 +526,7 @@ def intervals_to_interval_bounds(ndarray intervals, if not isinstance(interval, Interval): raise TypeError(f"type {type(interval)} with value " - f"{interval} is not an interval") + f"{interval} is not an interval") left[i] = interval.left right[i] = interval.right diff --git a/pandas/_libs/lib.pyx b/pandas/_libs/lib.pyx index 93cd373e0d0b1..7d65cb52bce1e 100644 --- a/pandas/_libs/lib.pyx +++ b/pandas/_libs/lib.pyx @@ -1497,7 +1497,7 @@ cdef class Validator: cdef bint is_value_typed(self, object value) except -1: raise NotImplementedError(f'{type(self).__name__} child class ' - f'must define is_value_typed') + f'must define is_value_typed') cdef bint is_valid_null(self, object value) except -1: return value is None or util.is_nan(value) @@ -1634,7 +1634,7 @@ cdef class TemporalValidator(Validator): cdef bint is_valid_null(self, object value) except -1: raise NotImplementedError(f'{type(self).__name__} child class ' - f'must define is_valid_null') + f'must define is_valid_null') cdef inline bint is_valid_skipna(self, object value) except -1: cdef: From 3534311a53c6d8d1134e9629cc53e2b11c55856e Mon Sep 17 00:00:00 2001 From: MomIsBestFriend <> Date: Sun, 10 Nov 2019 23:01:07 +0200 Subject: [PATCH 15/28] added the \!r to the strings --- pandas/_libs/interval.pyx | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/_libs/interval.pyx b/pandas/_libs/interval.pyx index d527a83745bb7..6bd8693dffebd 100644 --- a/pandas/_libs/interval.pyx +++ b/pandas/_libs/interval.pyx @@ -379,7 +379,7 @@ cdef class Interval(IntervalMixin): left, right = self._repr_base() name = type(self).__name__ - repr_str = f'{name}({left}, {right}, closed={self.closed})' + repr_str = f'{name}({left!r}, {right!r}, closed={self.closed!r})' return repr_str def __str__(self) -> str: From 56d40ace54021e5e70819e7553c3e2fb816dc921 Mon Sep 17 00:00:00 2001 From: MomIsBestFriend <> Date: Mon, 11 Nov 2019 22:37:05 +0200 Subject: [PATCH 16/28] parsers.pyx --- pandas/_libs/parsers.pyx | 77 +++++++++++++++++----------------------- 1 file changed, 32 insertions(+), 45 deletions(-) diff --git a/pandas/_libs/parsers.pyx b/pandas/_libs/parsers.pyx index 601b81556be0e..5956cacda2f8b 100644 --- a/pandas/_libs/parsers.pyx +++ b/pandas/_libs/parsers.pyx @@ -589,8 +589,7 @@ cdef class TextReader: if not isinstance(quote_char, (str, bytes)) and quote_char is not None: dtype = type(quote_char).__name__ - raise TypeError('"quotechar" must be string, ' - 'not {dtype}'.format(dtype=dtype)) + raise TypeError(f'"quotechar" must be string, not {dtype}') if quote_char is None or quote_char == '': if quoting != QUOTE_NONE: @@ -684,9 +683,9 @@ cdef class TextReader: if ptr == NULL: if not os.path.exists(source): raise FileNotFoundError( - ENOENT, - 'File {source} does not exist'.format(source=source), - source) + ENOENT, + f'File {source} does not exist', + source) raise IOError('Initializing from file failed') self.parser.source = ptr @@ -741,8 +740,7 @@ cdef class TextReader: self.parser.lines < hr): msg = self.orig_header if isinstance(msg, list): - msg = "[%s], len of %d," % ( - ','.join(str(m) for m in msg), len(msg)) + msg = f"[{','.join(str(m) for m in msg)}], len of {len(msg)}," raise ParserError( f'Passed header={msg} but only ' f'{self.parser.lines} lines in file') @@ -768,10 +766,9 @@ cdef class TextReader: if name == '': if self.has_mi_columns: - name = ('Unnamed: {i}_level_{lvl}' - .format(i=i, lvl=level)) + name = (f'Unnamed: {i}_level_{level}') else: - name = 'Unnamed: {i}'.format(i=i) + name = f'Unnamed: {i}' unnamed_count += 1 count = counts.get(name, 0) @@ -845,10 +842,9 @@ cdef class TextReader: passed_count = len(header[0]) - # if passed_count > field_count: - # raise ParserError('Column names have %d fields, ' - # 'data has %d fields' - # % (passed_count, field_count)) + if passed_count > field_count: + raise ParserError(f'Column names have {passed_count} fields, ' + f'data has {field_count} fields') if (self.has_usecols and self.allow_leading_cols and not callable(self.usecols)): @@ -990,7 +986,7 @@ cdef class TextReader: cdef _end_clock(self, what): if self.verbose: elapsed = time.time() - self.clocks.pop(-1) - print('%s took: %.2f ms' % (what, elapsed * 1000)) + print(f'{what} took: {elapsed * 1000:.2f} ms') def set_noconvert(self, i): self.noconvert.add(i) @@ -1028,11 +1024,9 @@ cdef class TextReader: (num_cols >= self.parser.line_fields[i]) * num_cols if self.table_width - self.leading_cols > num_cols: - raise ParserError( - "Too many columns specified: expected {expected} and " - "found {found}" - .format(expected=self.table_width - self.leading_cols, - found=num_cols)) + raise ParserError(f"Too many columns specified: expected " + f"{self.table_width - self.leading_cols} " + f"and found {num_cols}") results = {} nused = 0 @@ -1075,9 +1069,9 @@ cdef class TextReader: if conv: if col_dtype is not None: - warnings.warn(("Both a converter and dtype were specified " - "for column {0} - only the converter will " - "be used").format(name), ParserWarning, + warnings.warn((f"Both a converter and dtype were specified " + f"for column {name} - only the converter will " + f"be used"), ParserWarning, stacklevel=5) results[i] = _apply_converter(conv, self.parser, i, start, end, self.c_encoding) @@ -1118,7 +1112,7 @@ cdef class TextReader: col_res = _maybe_upcast(col_res) if col_res is None: - raise ParserError('Unable to parse column {i}'.format(i=i)) + raise ParserError(f'Unable to parse column {i}') results[i] = col_res @@ -1178,12 +1172,9 @@ cdef class TextReader: col_res = col_res.astype(col_dtype) if (col_res != col_res_orig).any(): raise ValueError( - "cannot safely convert passed user dtype of " - "{col_dtype} for {col_res} dtyped data in " - "column {column}".format( - col_dtype=col_dtype, - col_res=col_res_orig.dtype.name, - column=i)) + f"cannot safely convert passed user dtype of " + f"{col_dtype} for {col_res_orig.dtype.name} dtyped data in " + f"column {i}") return col_res, na_count @@ -1216,9 +1207,9 @@ cdef class TextReader: dtype=dtype) except NotImplementedError: raise NotImplementedError( - "Extension Array: {ea} must implement " - "_from_sequence_of_strings in order " - "to be used in parser methods".format(ea=array_type)) + f"Extension Array: {array_type} must implement " + f"_from_sequence_of_strings in order " + f"to be used in parser methods") return result, na_count @@ -1228,8 +1219,7 @@ cdef class TextReader: end, na_filter, na_hashset) if user_dtype and na_count is not None: if na_count > 0: - raise ValueError("Integer column has NA values in " - "column {column}".format(column=i)) + raise ValueError(f"Integer column has NA values in column {i}") except OverflowError: result = _try_uint64(self.parser, i, start, end, na_filter, na_hashset) @@ -1253,8 +1243,7 @@ cdef class TextReader: self.true_set, self.false_set) if user_dtype and na_count is not None: if na_count > 0: - raise ValueError("Bool column has NA values in " - "column {column}".format(column=i)) + raise ValueError(f"Bool column has NA values in column {i}") return result, na_count elif dtype.kind == 'S': @@ -1270,8 +1259,7 @@ cdef class TextReader: elif dtype.kind == 'U': width = dtype.itemsize if width > 0: - raise TypeError("the dtype {dtype} is not " - "supported for parsing".format(dtype=dtype)) + raise TypeError(f"the dtype {dtype} is not supported for parsing") # unicode variable width return self._string_convert(i, start, end, na_filter, @@ -1280,12 +1268,11 @@ cdef class TextReader: return self._string_convert(i, start, end, na_filter, na_hashset) elif is_datetime64_dtype(dtype): - raise TypeError("the dtype {dtype} is not supported " - "for parsing, pass this column " - "using parse_dates instead".format(dtype=dtype)) + raise TypeError(f"the dtype {dtype} is not supported " + f"for parsing, pass this column " + f"using parse_dates instead") else: - raise TypeError("the dtype {dtype} is not " - "supported for parsing".format(dtype=dtype)) + raise TypeError(f"the dtype {dtype} is not supported for parsing") cdef _string_convert(self, Py_ssize_t i, int64_t start, int64_t end, bint na_filter, kh_str_starts_t *na_hashset): @@ -2132,7 +2119,7 @@ cdef raise_parser_error(object base, parser_t *parser): Py_XDECREF(type) raise old_exc - message = '{base}. C error: '.format(base=base) + message = f'{base}. C error: ' if parser.error_msg != NULL: message += parser.error_msg.decode('utf-8') else: From aef897187762ca233dc10edcaec4d948caaf877a Mon Sep 17 00:00:00 2001 From: MomIsBestFriend <> Date: Mon, 11 Nov 2019 23:11:43 +0200 Subject: [PATCH 17/28] Finished the pyx left overs --- pandas/_libs/testing.pyx | 8 ++++---- pandas/_libs/tslib.pyx | 7 +------ 2 files changed, 5 insertions(+), 10 deletions(-) diff --git a/pandas/_libs/testing.pyx b/pandas/_libs/testing.pyx index f848310d961e1..141735a97938a 100644 --- a/pandas/_libs/testing.pyx +++ b/pandas/_libs/testing.pyx @@ -204,12 +204,12 @@ cpdef assert_almost_equal(a, b, # case for zero if abs(fa) < 1e-5: if not decimal_almost_equal(fa, fb, decimal): - assert False, ('(very low values) expected %.5f but ' - 'got %.5f, with decimal %d' % (fb, fa, decimal)) + assert False, (f'(very low values) expected {fb:.5f} ' + f'but got {fa:.5f}, with decimal {decimal}') else: if not decimal_almost_equal(1, fb / fa, decimal): - assert False, ('expected %.5f but got %.5f, ' - 'with decimal %d' % (fb, fa, decimal)) + assert False, (f'expected {fb:.5f} but got {fa:.5f}, ' + f'with decimal {decimal}') return True raise AssertionError(f"{a} != {b}") diff --git a/pandas/_libs/tslib.pyx b/pandas/_libs/tslib.pyx index d101a2976cd55..812b433bcdaf3 100644 --- a/pandas/_libs/tslib.pyx +++ b/pandas/_libs/tslib.pyx @@ -266,12 +266,7 @@ def format_array_from_datetime(ndarray[int64_t] values, object tz=None, elif basic_format: dt64_to_dtstruct(val, &dts) - res = '%d-%.2d-%.2d %.2d:%.2d:%.2d' % (dts.year, - dts.month, - dts.day, - dts.hour, - dts.min, - dts.sec) + res = f'{dts:year-month.2d day.2d:hour.2d:min.2d:sec.2d}' if show_ns: ns = dts.ps // 1000 From 381139511c0112c7a9aff8c924d558d5ef64fe30 Mon Sep 17 00:00:00 2001 From: MomIsBestFriend <50263213+MomIsBestFriend@users.noreply.github.com> Date: Mon, 11 Nov 2019 23:49:21 +0200 Subject: [PATCH 18/28] Update parsers.pyx --- pandas/_libs/parsers.pyx | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/pandas/_libs/parsers.pyx b/pandas/_libs/parsers.pyx index 5956cacda2f8b..8b308ea69202e 100644 --- a/pandas/_libs/parsers.pyx +++ b/pandas/_libs/parsers.pyx @@ -683,9 +683,9 @@ cdef class TextReader: if ptr == NULL: if not os.path.exists(source): raise FileNotFoundError( - ENOENT, - f'File {source} does not exist', - source) + ENOENT, + f'File {source} does not exist', + source) raise IOError('Initializing from file failed') self.parser.source = ptr From 515e20efaf6318e32999073adc7e8f93e6f5d8c7 Mon Sep 17 00:00:00 2001 From: MomIsBestFriend <50263213+MomIsBestFriend@users.noreply.github.com> Date: Mon, 11 Nov 2019 23:50:52 +0200 Subject: [PATCH 19/28] Update parsers.pyx --- pandas/_libs/parsers.pyx | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/_libs/parsers.pyx b/pandas/_libs/parsers.pyx index 8b308ea69202e..281ec343cb616 100644 --- a/pandas/_libs/parsers.pyx +++ b/pandas/_libs/parsers.pyx @@ -766,7 +766,7 @@ cdef class TextReader: if name == '': if self.has_mi_columns: - name = (f'Unnamed: {i}_level_{level}') + name = f'Unnamed: {i}_level_{level}' else: name = f'Unnamed: {i}' unnamed_count += 1 From 10dea5a6eb3d11fc9dc8cd46bbe1654e8793babf Mon Sep 17 00:00:00 2001 From: MomIsBestFriend <> Date: Tue, 12 Nov 2019 00:03:19 +0200 Subject: [PATCH 20/28] Putted the join out of the f-string --- pandas/_libs/parsers.pyx | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/pandas/_libs/parsers.pyx b/pandas/_libs/parsers.pyx index 281ec343cb616..340c11905a6fb 100644 --- a/pandas/_libs/parsers.pyx +++ b/pandas/_libs/parsers.pyx @@ -740,7 +740,8 @@ cdef class TextReader: self.parser.lines < hr): msg = self.orig_header if isinstance(msg, list): - msg = f"[{','.join(str(m) for m in msg)}], len of {len(msg)}," + joined = ','.join(str(m) for m in msg) + msg = f"[{joined}], len of {len(msg)}," raise ParserError( f'Passed header={msg} but only ' f'{self.parser.lines} lines in file') From dd53c05f74bba9b160b419973ca819f14c029812 Mon Sep 17 00:00:00 2001 From: MomIsBestFriend <> Date: Tue, 12 Nov 2019 00:32:00 +0200 Subject: [PATCH 21/28] res date format --- pandas/_libs/tslib.pyx | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/pandas/_libs/tslib.pyx b/pandas/_libs/tslib.pyx index 812b433bcdaf3..0d0796d520669 100644 --- a/pandas/_libs/tslib.pyx +++ b/pandas/_libs/tslib.pyx @@ -266,15 +266,16 @@ def format_array_from_datetime(ndarray[int64_t] values, object tz=None, elif basic_format: dt64_to_dtstruct(val, &dts) - res = f'{dts:year-month.2d day.2d:hour.2d:min.2d:sec.2d}' + res = (f'{dts.year}-{dts.month:.2d}-{dts.day:.2d} ' + f'{dts.hour:.2d}-{dts.min:.2d}-{dts.sec:.2d}') if show_ns: ns = dts.ps // 1000 - res += '.%.9d' % (ns + 1000 * dts.us) + res += f'{ns + 1000 * dts.us:.9d}' elif show_us: - res += '.%.6d' % dts.us + res += f'{dts.us:.6d}' elif show_ms: - res += '.%.3d' % (dts.us /1000) + res += f'{dts.us /1000:.3d}' result[i] = res From 10ed985e15ef82d3a18202e0561a7c4a4e642c0a Mon Sep 17 00:00:00 2001 From: MomIsBestFriend <> Date: Tue, 12 Nov 2019 00:42:21 +0200 Subject: [PATCH 22/28] Reverted the comment in parsers.pyx --- pandas/_libs/parsers.pyx | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/pandas/_libs/parsers.pyx b/pandas/_libs/parsers.pyx index 340c11905a6fb..a60d83f32021b 100644 --- a/pandas/_libs/parsers.pyx +++ b/pandas/_libs/parsers.pyx @@ -843,9 +843,10 @@ cdef class TextReader: passed_count = len(header[0]) - if passed_count > field_count: - raise ParserError(f'Column names have {passed_count} fields, ' - f'data has {field_count} fields') + # if passed_count > field_count: + # raise ParserError('Column names have %d fields, ' + # 'data has %d fields' + # % (passed_count, field_count)) if (self.has_usecols and self.allow_leading_cols and not callable(self.usecols)): From 1375f9f243362130cb6fc24781e77eb6bb560974 Mon Sep 17 00:00:00 2001 From: MomIsBestFriend <> Date: Tue, 12 Nov 2019 01:14:55 +0200 Subject: [PATCH 23/28] Trying with no percision on intergenrs --- pandas/_libs/tslib.pyx | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/pandas/_libs/tslib.pyx b/pandas/_libs/tslib.pyx index 0d0796d520669..7be5b6048b852 100644 --- a/pandas/_libs/tslib.pyx +++ b/pandas/_libs/tslib.pyx @@ -266,16 +266,16 @@ def format_array_from_datetime(ndarray[int64_t] values, object tz=None, elif basic_format: dt64_to_dtstruct(val, &dts) - res = (f'{dts.year}-{dts.month:.2d}-{dts.day:.2d} ' - f'{dts.hour:.2d}-{dts.min:.2d}-{dts.sec:.2d}') + res = (f'{dts.year}-{dts.month}-{dts.day} ' + f'{dts.hour}-{dts.min}-{dts.sec}') if show_ns: ns = dts.ps // 1000 - res += f'{ns + 1000 * dts.us:.9d}' + res += f'{ns + 1000 * dts.us}' elif show_us: - res += f'{dts.us:.6d}' + res += f'{dts.us}' elif show_ms: - res += f'{dts.us /1000:.3d}' + res += f'{dts.us /1000}' result[i] = res From eec10f0a40e1e215c0eeb2ce763aa55edab233de Mon Sep 17 00:00:00 2001 From: MomIsBestFriend <50263213+MomIsBestFriend@users.noreply.github.com> Date: Tue, 12 Nov 2019 02:39:24 +0200 Subject: [PATCH 24/28] Update tslib.pyx --- pandas/_libs/tslib.pyx | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/_libs/tslib.pyx b/pandas/_libs/tslib.pyx index 7be5b6048b852..ab190dce6301a 100644 --- a/pandas/_libs/tslib.pyx +++ b/pandas/_libs/tslib.pyx @@ -267,7 +267,7 @@ def format_array_from_datetime(ndarray[int64_t] values, object tz=None, dt64_to_dtstruct(val, &dts) res = (f'{dts.year}-{dts.month}-{dts.day} ' - f'{dts.hour}-{dts.min}-{dts.sec}') + f'{dts.hour}-{dts.min}-{dts.sec}') if show_ns: ns = dts.ps // 1000 From 682006367cc018dec046cf607635a144bf1d24eb Mon Sep 17 00:00:00 2001 From: MomIsBestFriend <50263213+MomIsBestFriend@users.noreply.github.com> Date: Tue, 12 Nov 2019 02:43:19 +0200 Subject: [PATCH 25/28] Update parsers.pyx --- pandas/_libs/parsers.pyx | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/pandas/_libs/parsers.pyx b/pandas/_libs/parsers.pyx index a60d83f32021b..8eb7cfb9a87c5 100644 --- a/pandas/_libs/parsers.pyx +++ b/pandas/_libs/parsers.pyx @@ -683,9 +683,9 @@ cdef class TextReader: if ptr == NULL: if not os.path.exists(source): raise FileNotFoundError( - ENOENT, - f'File {source} does not exist', - source) + ENOENT, + f'File {source} does not exist', + source) raise IOError('Initializing from file failed') self.parser.source = ptr From 98d440d273c47829a948393db2f8ee0510a00036 Mon Sep 17 00:00:00 2001 From: MomIsBestFriend <> Date: Tue, 12 Nov 2019 09:32:23 +0200 Subject: [PATCH 26/28] Squshing commits --- pandas/_libs/hashtable_class_helper.pxi.in | 10 ++++---- pandas/_libs/internals.pyx | 2 +- pandas/_libs/parsers.pyx | 6 ++--- pandas/_libs/tslib.pyx | 17 +++++++++--- pandas/core/arrays/categorical.py | 2 +- pandas/core/arrays/datetimelike.py | 2 +- pandas/core/arrays/integer.py | 2 +- pandas/core/arrays/interval.py | 2 +- pandas/core/computation/expr.py | 2 +- pandas/core/dtypes/common.py | 1 + pandas/core/frame.py | 2 +- pandas/core/generic.py | 2 +- pandas/core/groupby/groupby.py | 2 +- pandas/core/indexes/base.py | 6 +++-- pandas/core/indexes/category.py | 2 +- pandas/core/indexes/datetimes.py | 4 +-- pandas/core/indexes/interval.py | 6 ++--- pandas/core/indexes/multi.py | 6 ++--- pandas/core/indexes/numeric.py | 8 +++--- pandas/core/indexes/period.py | 4 +-- pandas/core/indexes/range.py | 2 +- pandas/core/indexes/timedeltas.py | 4 +-- pandas/core/internals/blocks.py | 2 +- pandas/core/internals/managers.py | 2 +- pandas/core/series.py | 2 +- pandas/io/pytables.py | 2 +- pandas/tests/dtypes/test_common.py | 30 ++++++++++++++++++++++ pandas/tests/reshape/test_concat.py | 2 +- scripts/validate_docstrings.py | 2 +- 29 files changed, 89 insertions(+), 47 deletions(-) diff --git a/pandas/_libs/hashtable_class_helper.pxi.in b/pandas/_libs/hashtable_class_helper.pxi.in index c39d6d60d4ea5..b207fcb66948d 100644 --- a/pandas/_libs/hashtable_class_helper.pxi.in +++ b/pandas/_libs/hashtable_class_helper.pxi.in @@ -100,7 +100,7 @@ cdef class {{name}}Vector: PyMem_Free(self.data) self.data = NULL - def __len__(self): + def __len__(self) -> int: return self.data.n cpdef to_array(self): @@ -168,7 +168,7 @@ cdef class StringVector: PyMem_Free(self.data) self.data = NULL - def __len__(self): + def __len__(self) -> int: return self.data.n def to_array(self): @@ -212,7 +212,7 @@ cdef class ObjectVector: self.ao = np.empty(_INIT_VEC_CAP, dtype=object) self.data = self.ao.data - def __len__(self): + def __len__(self) -> int: return self.n cdef inline append(self, object obj): @@ -270,7 +270,7 @@ cdef class {{name}}HashTable(HashTable): size_hint = min(size_hint, _SIZE_HINT_LIMIT) kh_resize_{{dtype}}(self.table, size_hint) - def __len__(self): + def __len__(self) -> int: return self.table.size def __dealloc__(self): @@ -897,7 +897,7 @@ cdef class PyObjectHashTable(HashTable): kh_destroy_pymap(self.table) self.table = NULL - def __len__(self): + def __len__(self) -> int: return self.table.size def __contains__(self, object key): diff --git a/pandas/_libs/internals.pyx b/pandas/_libs/internals.pyx index ecd090de500da..08decb44a8a53 100644 --- a/pandas/_libs/internals.pyx +++ b/pandas/_libs/internals.pyx @@ -66,7 +66,7 @@ cdef class BlockPlacement: def __repr__(self) -> str: return str(self) - def __len__(self): + def __len__(self) -> int: cdef: slice s = self._ensure_has_slice() if s is not None: diff --git a/pandas/_libs/parsers.pyx b/pandas/_libs/parsers.pyx index a60d83f32021b..8eb7cfb9a87c5 100644 --- a/pandas/_libs/parsers.pyx +++ b/pandas/_libs/parsers.pyx @@ -683,9 +683,9 @@ cdef class TextReader: if ptr == NULL: if not os.path.exists(source): raise FileNotFoundError( - ENOENT, - f'File {source} does not exist', - source) + ENOENT, + f'File {source} does not exist', + source) raise IOError('Initializing from file failed') self.parser.source = ptr diff --git a/pandas/_libs/tslib.pyx b/pandas/_libs/tslib.pyx index 7be5b6048b852..220ab1ac931be 100644 --- a/pandas/_libs/tslib.pyx +++ b/pandas/_libs/tslib.pyx @@ -266,16 +266,25 @@ def format_array_from_datetime(ndarray[int64_t] values, object tz=None, elif basic_format: dt64_to_dtstruct(val, &dts) +<<<<<<< HEAD + res = '%d-%.2d-%.2d %.2d:%.2d:%.2d' % (dts.year, + dts.month, + dts.day, + dts.hour, + dts.min, + dts.sec) +======= res = (f'{dts.year}-{dts.month}-{dts.day} ' - f'{dts.hour}-{dts.min}-{dts.sec}') + f'{dts.hour}-{dts.min}-{dts.sec}') +>>>>>>> 682006367cc018dec046cf607635a144bf1d24eb if show_ns: ns = dts.ps // 1000 - res += f'{ns + 1000 * dts.us}' + res += '.%.9d' % (ns + 1000 * dts.us) elif show_us: - res += f'{dts.us}' + res += '.%.6d' % dts.us elif show_ms: - res += f'{dts.us /1000}' + res += '.%.3d' % (dts.us / 1000) result[i] = res diff --git a/pandas/core/arrays/categorical.py b/pandas/core/arrays/categorical.py index 39470c7420086..73d1db9bda8ed 100644 --- a/pandas/core/arrays/categorical.py +++ b/pandas/core/arrays/categorical.py @@ -1940,7 +1940,7 @@ def take_nd(self, indexer, allow_fill=None, fill_value=None): take = take_nd - def __len__(self): + def __len__(self) -> int: """ The length of this Categorical. """ diff --git a/pandas/core/arrays/datetimelike.py b/pandas/core/arrays/datetimelike.py index 4b83dd0cfff09..f93db4695d38f 100644 --- a/pandas/core/arrays/datetimelike.py +++ b/pandas/core/arrays/datetimelike.py @@ -396,7 +396,7 @@ def size(self) -> int: """The number of elements in this array.""" return np.prod(self.shape) - def __len__(self): + def __len__(self) -> int: return len(self._data) def __getitem__(self, key): diff --git a/pandas/core/arrays/integer.py b/pandas/core/arrays/integer.py index 08b53e54b91ef..41d8bffd8c131 100644 --- a/pandas/core/arrays/integer.py +++ b/pandas/core/arrays/integer.py @@ -469,7 +469,7 @@ def __setitem__(self, key, value): self._data[key] = value self._mask[key] = mask - def __len__(self): + def __len__(self) -> int: return len(self._data) @property diff --git a/pandas/core/arrays/interval.py b/pandas/core/arrays/interval.py index cc41797e7872b..cb482665b3534 100644 --- a/pandas/core/arrays/interval.py +++ b/pandas/core/arrays/interval.py @@ -489,7 +489,7 @@ def _validate(self): def __iter__(self): return iter(np.asarray(self)) - def __len__(self): + def __len__(self) -> int: return len(self.left) def __getitem__(self, value): diff --git a/pandas/core/computation/expr.py b/pandas/core/computation/expr.py index 39653c3d695b2..929c9e69d56ac 100644 --- a/pandas/core/computation/expr.py +++ b/pandas/core/computation/expr.py @@ -837,7 +837,7 @@ def __call__(self): def __repr__(self) -> str: return printing.pprint_thing(self.terms) - def __len__(self): + def __len__(self) -> int: return len(self.expr) def parse(self): diff --git a/pandas/core/dtypes/common.py b/pandas/core/dtypes/common.py index 5180f513dfed0..4f9481eccb836 100644 --- a/pandas/core/dtypes/common.py +++ b/pandas/core/dtypes/common.py @@ -1508,6 +1508,7 @@ def is_extension_type(arr): Check whether an array-like is of a pandas extension class instance. .. deprecated:: 1.0.0 + Use ``is_extension_array_dtype`` instead. Extension classes include categoricals, pandas sparse objects (i.e. classes represented within the pandas library and not ones external diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 7e3c2200dbabc..ebee8b10896be 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -1023,7 +1023,7 @@ def itertuples(self, index=True, name="Pandas"): # fallback to regular tuples return zip(*arrays) - def __len__(self): + def __len__(self) -> int: """ Returns length of info axis, but here we use the index. """ diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 49df374670577..2468c43337d0d 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -1952,7 +1952,7 @@ def items(self): def iteritems(self): return self.items() - def __len__(self): + def __len__(self) -> int: """Returns length of info axis""" return len(self._info_axis) diff --git a/pandas/core/groupby/groupby.py b/pandas/core/groupby/groupby.py index e73be29d5b104..fd45d60b02277 100644 --- a/pandas/core/groupby/groupby.py +++ b/pandas/core/groupby/groupby.py @@ -399,7 +399,7 @@ def __init__( # we accept no other args validate_kwargs("group", kwargs, {}) - def __len__(self): + def __len__(self) -> int: return len(self.groups) def __repr__(self) -> str: diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index c9697c530628a..ee124ba3851b1 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -649,10 +649,12 @@ def _engine(self): # Array-Like Methods # ndarray compat - def __len__(self): + def __len__(self) -> int: """ Return the length of the Index. """ + # Assertion needed for mypy, see GH#29475 + assert self._data is not None return len(self._data) def __array__(self, dtype=None): @@ -1807,7 +1809,7 @@ def inferred_type(self): return lib.infer_dtype(self, skipna=False) @cache_readonly - def is_all_dates(self): + def is_all_dates(self) -> bool: return is_datetime_array(ensure_object(self.values)) # -------------------------------------------------------------------- diff --git a/pandas/core/indexes/category.py b/pandas/core/indexes/category.py index e5a8edb56e413..0187b47ab50a1 100644 --- a/pandas/core/indexes/category.py +++ b/pandas/core/indexes/category.py @@ -366,7 +366,7 @@ def _format_attrs(self): # -------------------------------------------------------------------- @property - def inferred_type(self): + def inferred_type(self) -> str: return "categorical" @property diff --git a/pandas/core/indexes/datetimes.py b/pandas/core/indexes/datetimes.py index 2d0ecf1b936da..4a3ee57084a8a 100644 --- a/pandas/core/indexes/datetimes.py +++ b/pandas/core/indexes/datetimes.py @@ -1235,13 +1235,13 @@ def is_type_compatible(self, typ): return typ == self.inferred_type or typ == "datetime" @property - def inferred_type(self): + def inferred_type(self) -> str: # b/c datetime is represented as microseconds since the epoch, make # sure we can't have ambiguous indexing return "datetime64" @property - def is_all_dates(self): + def is_all_dates(self) -> bool: return True def insert(self, loc, item): diff --git a/pandas/core/indexes/interval.py b/pandas/core/indexes/interval.py index bc3c0be08ec12..cf5295460d8fc 100644 --- a/pandas/core/indexes/interval.py +++ b/pandas/core/indexes/interval.py @@ -468,7 +468,7 @@ def itemsize(self): warnings.simplefilter("ignore") return self.left.itemsize + self.right.itemsize - def __len__(self): + def __len__(self) -> int: return len(self.left) @cache_readonly @@ -524,7 +524,7 @@ def dtype(self): return self._data.dtype @property - def inferred_type(self): + def inferred_type(self) -> str: """Return a string of the type inferred from the values""" return "interval" @@ -1357,7 +1357,7 @@ def func(self, other, sort=sort): return func @property - def is_all_dates(self): + def is_all_dates(self) -> bool: """ This is False even when left/right contain datetime-like objects, as the check is done on the Interval itself diff --git a/pandas/core/indexes/multi.py b/pandas/core/indexes/multi.py index 19769d5b029a1..a6a6de6c13c04 100644 --- a/pandas/core/indexes/multi.py +++ b/pandas/core/indexes/multi.py @@ -1217,7 +1217,7 @@ def format( # -------------------------------------------------------------------- - def __len__(self): + def __len__(self) -> int: return len(self.codes[0]) def _get_names(self): @@ -1322,7 +1322,7 @@ def _constructor(self): return MultiIndex.from_tuples @cache_readonly - def inferred_type(self): + def inferred_type(self) -> str: return "mixed" def _get_level_number(self, level): @@ -1791,7 +1791,7 @@ def to_flat_index(self): return Index(self.values, tupleize_cols=False) @property - def is_all_dates(self): + def is_all_dates(self) -> bool: return False def is_lexsorted(self): diff --git a/pandas/core/indexes/numeric.py b/pandas/core/indexes/numeric.py index 12a9201b06283..3e2b41f62f30b 100644 --- a/pandas/core/indexes/numeric.py +++ b/pandas/core/indexes/numeric.py @@ -134,7 +134,7 @@ def _concat_same_dtype(self, indexes, name): return result.rename(name) @property - def is_all_dates(self): + def is_all_dates(self) -> bool: """ Checks that all the labels are datetime objects. """ @@ -228,7 +228,7 @@ class Int64Index(IntegerIndex): _default_dtype = np.int64 @property - def inferred_type(self): + def inferred_type(self) -> str: """Always 'integer' for ``Int64Index``""" return "integer" @@ -283,7 +283,7 @@ class UInt64Index(IntegerIndex): _default_dtype = np.uint64 @property - def inferred_type(self): + def inferred_type(self) -> str: """Always 'integer' for ``UInt64Index``""" return "integer" @@ -356,7 +356,7 @@ class Float64Index(NumericIndex): _default_dtype = np.float64 @property - def inferred_type(self): + def inferred_type(self) -> str: """Always 'floating' for ``Float64Index``""" return "floating" diff --git a/pandas/core/indexes/period.py b/pandas/core/indexes/period.py index ca7be9ba512da..3bcb9ba345713 100644 --- a/pandas/core/indexes/period.py +++ b/pandas/core/indexes/period.py @@ -574,7 +574,7 @@ def searchsorted(self, value, side="left", sorter=None): return self._ndarray_values.searchsorted(value, side=side, sorter=sorter) @property - def is_all_dates(self): + def is_all_dates(self) -> bool: return True @property @@ -591,7 +591,7 @@ def is_full(self): return ((values[1:] - values[:-1]) < 2).all() @property - def inferred_type(self): + def inferred_type(self) -> str: # b/c data is represented as ints make sure we can't have ambiguous # indexing return "period" diff --git a/pandas/core/indexes/range.py b/pandas/core/indexes/range.py index 5fa3431fc97c0..67791417f1bb5 100644 --- a/pandas/core/indexes/range.py +++ b/pandas/core/indexes/range.py @@ -698,7 +698,7 @@ def _concat_same_dtype(self, indexes, name): # In this case return an empty range index. return RangeIndex(0, 0).rename(name) - def __len__(self): + def __len__(self) -> int: """ return the length of the RangeIndex """ diff --git a/pandas/core/indexes/timedeltas.py b/pandas/core/indexes/timedeltas.py index 2324b8cf74c46..8114b4a772f28 100644 --- a/pandas/core/indexes/timedeltas.py +++ b/pandas/core/indexes/timedeltas.py @@ -602,11 +602,11 @@ def is_type_compatible(self, typ): return typ == self.inferred_type or typ == "timedelta" @property - def inferred_type(self): + def inferred_type(self) -> str: return "timedelta64" @property - def is_all_dates(self): + def is_all_dates(self) -> bool: return True def insert(self, loc, item): diff --git a/pandas/core/internals/blocks.py b/pandas/core/internals/blocks.py index 9402a3ef9a763..5508cf3ca522e 100644 --- a/pandas/core/internals/blocks.py +++ b/pandas/core/internals/blocks.py @@ -288,7 +288,7 @@ def __repr__(self) -> str: return result - def __len__(self): + def __len__(self) -> int: return len(self.values) def __getstate__(self): diff --git a/pandas/core/internals/managers.py b/pandas/core/internals/managers.py index 0e97e55acddad..fbe1db1c23cdb 100644 --- a/pandas/core/internals/managers.py +++ b/pandas/core/internals/managers.py @@ -319,7 +319,7 @@ def _post_setstate(self): self._known_consolidated = False self._rebuild_blknos_and_blklocs() - def __len__(self): + def __len__(self) -> int: return len(self.items) def __repr__(self) -> str: diff --git a/pandas/core/series.py b/pandas/core/series.py index 15f405e244d0f..7327c2d543836 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -712,7 +712,7 @@ def put(self, *args, **kwargs): ) self._values.put(*args, **kwargs) - def __len__(self): + def __len__(self) -> int: """ Return the length of the Series. """ diff --git a/pandas/io/pytables.py b/pandas/io/pytables.py index e98802888e582..ee08e2abb2289 100644 --- a/pandas/io/pytables.py +++ b/pandas/io/pytables.py @@ -540,7 +540,7 @@ def __contains__(self, key): return True return False - def __len__(self): + def __len__(self) -> int: return len(self.groups()) def __repr__(self) -> str: diff --git a/pandas/tests/dtypes/test_common.py b/pandas/tests/dtypes/test_common.py index 7abaa0651449e..d8420673104d5 100644 --- a/pandas/tests/dtypes/test_common.py +++ b/pandas/tests/dtypes/test_common.py @@ -549,6 +549,7 @@ def test_is_bool_dtype(): assert com.is_bool_dtype(pd.Index([True, False])) +@pytest.mark.filterwarnings("ignore:'is_extension_type' is deprecated:FutureWarning") @pytest.mark.parametrize( "check_scipy", [False, pytest.param(True, marks=td.skip_if_no_scipy)] ) @@ -573,6 +574,35 @@ def test_is_extension_type(check_scipy): assert not com.is_extension_type(scipy.sparse.bsr_matrix([1, 2, 3])) +def test_is_extension_type_deprecation(): + with tm.assert_produces_warning(FutureWarning): + com.is_extension_type([1, 2, 3]) + + +@pytest.mark.parametrize( + "check_scipy", [False, pytest.param(True, marks=td.skip_if_no_scipy)] +) +def test_is_extension_array_dtype(check_scipy): + assert not com.is_extension_array_dtype([1, 2, 3]) + assert not com.is_extension_array_dtype(np.array([1, 2, 3])) + assert not com.is_extension_array_dtype(pd.DatetimeIndex([1, 2, 3])) + + cat = pd.Categorical([1, 2, 3]) + assert com.is_extension_array_dtype(cat) + assert com.is_extension_array_dtype(pd.Series(cat)) + assert com.is_extension_array_dtype(pd.SparseArray([1, 2, 3])) + assert com.is_extension_array_dtype(pd.DatetimeIndex(["2000"], tz="US/Eastern")) + + dtype = DatetimeTZDtype("ns", tz="US/Eastern") + s = pd.Series([], dtype=dtype) + assert com.is_extension_array_dtype(s) + + if check_scipy: + import scipy.sparse + + assert not com.is_extension_array_dtype(scipy.sparse.bsr_matrix([1, 2, 3])) + + def test_is_complex_dtype(): assert not com.is_complex_dtype(int) assert not com.is_complex_dtype(str) diff --git a/pandas/tests/reshape/test_concat.py b/pandas/tests/reshape/test_concat.py index 5c930e01c735d..b537200dd7664 100644 --- a/pandas/tests/reshape/test_concat.py +++ b/pandas/tests/reshape/test_concat.py @@ -1879,7 +1879,7 @@ def test_concat_iterables(self): tm.assert_frame_equal(concat(deque((df1, df2)), ignore_index=True), expected) class CustomIterator1: - def __len__(self): + def __len__(self) -> int: return 2 def __getitem__(self, index): diff --git a/scripts/validate_docstrings.py b/scripts/validate_docstrings.py index 1d0f4b583bd0c..7c6f2fea97933 100755 --- a/scripts/validate_docstrings.py +++ b/scripts/validate_docstrings.py @@ -250,7 +250,7 @@ def __init__(self, name): self.clean_doc = pydoc.getdoc(obj) self.doc = NumpyDocString(self.clean_doc) - def __len__(self): + def __len__(self) -> int: return len(self.raw_doc) @staticmethod From 28091a8f76e4177a9fd563e7897c2f3ae7fd5150 Mon Sep 17 00:00:00 2001 From: MomIsBestFriend <> Date: Tue, 12 Nov 2019 09:58:04 +0200 Subject: [PATCH 27/28] Lint errors --- pandas/_libs/parsers.pyx | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/pandas/_libs/parsers.pyx b/pandas/_libs/parsers.pyx index 8eb7cfb9a87c5..ab2b026e40396 100644 --- a/pandas/_libs/parsers.pyx +++ b/pandas/_libs/parsers.pyx @@ -683,9 +683,9 @@ cdef class TextReader: if ptr == NULL: if not os.path.exists(source): raise FileNotFoundError( - ENOENT, - f'File {source} does not exist', - source) + ENOENT, + f'File {source} does not exist', + source) raise IOError('Initializing from file failed') self.parser.source = ptr From 082c5ed9b9c40570e485a73dfe9f7913b7ad0b6b Mon Sep 17 00:00:00 2001 From: MomIsBestFriend <> Date: Tue, 12 Nov 2019 21:55:51 +0200 Subject: [PATCH 28/28] Finnal commit, number one --- pandas/_libs/parsers.pyx | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/pandas/_libs/parsers.pyx b/pandas/_libs/parsers.pyx index ab2b026e40396..a2c7d0da5b4a8 100644 --- a/pandas/_libs/parsers.pyx +++ b/pandas/_libs/parsers.pyx @@ -844,9 +844,9 @@ cdef class TextReader: passed_count = len(header[0]) # if passed_count > field_count: - # raise ParserError('Column names have %d fields, ' - # 'data has %d fields' - # % (passed_count, field_count)) + # raise ParserError('Column names have %d fields, ' + # 'data has %d fields' + # % (passed_count, field_count)) if (self.has_usecols and self.allow_leading_cols and not callable(self.usecols)):