From a9793f591c7a63a834732bbf0993dab83d5b4061 Mon Sep 17 00:00:00 2001 From: William Blum Date: Tue, 1 Nov 2022 10:56:43 -0400 Subject: [PATCH 01/12] STYLE: Refactored one loop, suppressed three false positives. --- pandas/core/strings/object_array.py | 1 + pandas/io/pytables.py | 3 ++- pandas/tests/io/parser/test_c_parser_only.py | 10 +++++----- pyproject.toml | 1 - 4 files changed, 8 insertions(+), 7 deletions(-) diff --git a/pandas/core/strings/object_array.py b/pandas/core/strings/object_array.py index d45cfa8de6f3e..afbc7efb208c3 100644 --- a/pandas/core/strings/object_array.py +++ b/pandas/core/strings/object_array.py @@ -382,6 +382,7 @@ def _str_get_dummies(self, sep: str = "|"): for i, t in enumerate(tags2): pat = sep + t + sep + # pylint: disable-next=cell-var-from-loop dummies[:, i] = lib.map_infer(arr.to_numpy(), lambda x: pat in x) return dummies, tags2 diff --git a/pandas/io/pytables.py b/pandas/io/pytables.py index 483c385fa32fc..a5ed1330323c8 100644 --- a/pandas/io/pytables.py +++ b/pandas/io/pytables.py @@ -4129,7 +4129,7 @@ def process_filter(field, filt): # the levels if self.is_multi_index: filt = filt.union(Index(self.levels)) - + # pylint: disable-next=cell-var-from-loop takers = op(axis_values, filt) return obj.loc(axis=axis_number)[takers] @@ -4143,6 +4143,7 @@ def process_filter(field, filt): # hack until we support reversed dim flags if isinstance(obj, DataFrame): axis_number = 1 - axis_number + # pylint: disable-next=cell-var-from-loop takers = op(values, filt) return obj.loc(axis=axis_number)[takers] diff --git a/pandas/tests/io/parser/test_c_parser_only.py b/pandas/tests/io/parser/test_c_parser_only.py index ecc49ea8adb9f..261da6ecf468f 100644 --- a/pandas/tests/io/parser/test_c_parser_only.py +++ b/pandas/tests/io/parser/test_c_parser_only.py @@ -176,6 +176,9 @@ def test_precise_conversion(c_parser_only): normal_errors = [] precise_errors = [] + def error(val, actual_val): + return abs(Decimal(f"{val:.100}") - actual_val) + # test numbers between 1 and 2 for num in np.linspace(1.0, 2.0, num=500): # 25 decimal digits of precision @@ -192,11 +195,8 @@ def test_precise_conversion(c_parser_only): ) actual_val = Decimal(text[2:]) - def error(val): - return abs(Decimal(f"{val:.100}") - actual_val) - - normal_errors.append(error(normal_val)) - precise_errors.append(error(precise_val)) + normal_errors.append(error(normal_val, actual_val)) + precise_errors.append(error(precise_val, actual_val)) # round-trip should match float() assert roundtrip_val == float(text[2:]) diff --git a/pyproject.toml b/pyproject.toml index ddecebaec7c72..b196aa2afc1cd 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -143,7 +143,6 @@ disable = [ "arguments-renamed", "attribute-defined-outside-init", "broad-except", - "cell-var-from-loop", "comparison-with-callable", "confusing-with-statement", "dangerous-default-value", From 724f3c71ee4c871eb119510e6cb29a2a2108a6a9 Mon Sep 17 00:00:00 2001 From: William Blum Date: Tue, 1 Nov 2022 13:35:25 -0400 Subject: [PATCH 02/12] Oooh, didn't think of that. --- pandas/core/strings/object_array.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/pandas/core/strings/object_array.py b/pandas/core/strings/object_array.py index afbc7efb208c3..3d0220f953026 100644 --- a/pandas/core/strings/object_array.py +++ b/pandas/core/strings/object_array.py @@ -380,10 +380,14 @@ def _str_get_dummies(self, sep: str = "|"): dummies = np.empty((len(arr), len(tags2)), dtype=np.int64) + def _isin(test_elements, element): + return element in test_element + for i, t in enumerate(tags2): pat = sep + t + sep - # pylint: disable-next=cell-var-from-loop - dummies[:, i] = lib.map_infer(arr.to_numpy(), lambda x: pat in x) + dummies[:, i] = lib.map_infer( + arr.to_numpy(), functools.partial(_isin, element=pat) + ) return dummies, tags2 def _str_upper(self): From 98b699119e44088dde8a0aa3826ca32987163903 Mon Sep 17 00:00:00 2001 From: William Blum Date: Tue, 1 Nov 2022 13:43:17 -0400 Subject: [PATCH 03/12] I need to get better at spotting this stuff. --- pandas/io/pytables.py | 86 +++++++++++++++++++++++++++++-------------- 1 file changed, 59 insertions(+), 27 deletions(-) diff --git a/pandas/io/pytables.py b/pandas/io/pytables.py index ddfb43adda262..1a399510b23f5 100644 --- a/pandas/io/pytables.py +++ b/pandas/io/pytables.py @@ -4106,45 +4106,77 @@ def process_axes(self, obj, selection: Selection, columns=None) -> DataFrame: for axis, labels in self.non_index_axes: obj = _reindex_axis(obj, axis, labels, columns) + def process_filter(field, filt, op): + + for axis_name in obj._AXIS_ORDERS: + axis_number = obj._get_axis_number(axis_name) + axis_values = obj._get_axis(axis_name) + assert axis_number is not None + + # see if the field is the name of an axis + if field == axis_name: + + # if we have a multi-index, then need to include + # the levels + if self.is_multi_index: + filt = filt.union(Index(self.levels)) + takers = op(axis_values, filt) + return obj.loc(axis=axis_number)[takers] + + # this might be the name of a file IN an axis + elif field in axis_values: + + # we need to filter on this dimension + values = ensure_index(getattr(obj, field).values) + filt = ensure_index(filt) + + # hack until we support reversed dim flags + if isinstance(obj, DataFrame): + axis_number = 1 - axis_number + takers = op(values, filt) + return obj.loc(axis=axis_number)[takers] + + raise ValueError(f"cannot find the field [{field}] for filtering!") + # apply the selection filters (but keep in the same order) if selection.filter is not None: for field, op, filt in selection.filter.format(): - def process_filter(field, filt): + # def process_filter(field, filt): - for axis_name in obj._AXIS_ORDERS: - axis_number = obj._get_axis_number(axis_name) - axis_values = obj._get_axis(axis_name) - assert axis_number is not None + # for axis_name in obj._AXIS_ORDERS: + # axis_number = obj._get_axis_number(axis_name) + # axis_values = obj._get_axis(axis_name) + # assert axis_number is not None - # see if the field is the name of an axis - if field == axis_name: + # # see if the field is the name of an axis + # if field == axis_name: - # if we have a multi-index, then need to include - # the levels - if self.is_multi_index: - filt = filt.union(Index(self.levels)) - # pylint: disable-next=cell-var-from-loop - takers = op(axis_values, filt) - return obj.loc(axis=axis_number)[takers] + # # if we have a multi-index, then need to include + # # the levels + # if self.is_multi_index: + # filt = filt.union(Index(self.levels)) + # # pylint: disable-next=cell-var-from-loop + # takers = op(axis_values, filt) + # return obj.loc(axis=axis_number)[takers] - # this might be the name of a file IN an axis - elif field in axis_values: + # # this might be the name of a file IN an axis + # elif field in axis_values: - # we need to filter on this dimension - values = ensure_index(getattr(obj, field).values) - filt = ensure_index(filt) + # # we need to filter on this dimension + # values = ensure_index(getattr(obj, field).values) + # filt = ensure_index(filt) - # hack until we support reversed dim flags - if isinstance(obj, DataFrame): - axis_number = 1 - axis_number - # pylint: disable-next=cell-var-from-loop - takers = op(values, filt) - return obj.loc(axis=axis_number)[takers] + # # hack until we support reversed dim flags + # if isinstance(obj, DataFrame): + # axis_number = 1 - axis_number + # # pylint: disable-next=cell-var-from-loop + # takers = op(values, filt) + # return obj.loc(axis=axis_number)[takers] - raise ValueError(f"cannot find the field [{field}] for filtering!") + # raise ValueError(f"cannot find the field [{field}] for filtering!") - obj = process_filter(field, filt) + obj = process_filter(field, filt, op) return obj From d4b22e573989896e912258e986b6146e23540e6e Mon Sep 17 00:00:00 2001 From: William Blum Date: Tue, 1 Nov 2022 14:05:16 -0400 Subject: [PATCH 04/12] Yeah, let's not suppress those after all. --- pandas/core/strings/object_array.py | 3 ++- pandas/io/pytables.py | 35 ----------------------------- 2 files changed, 2 insertions(+), 36 deletions(-) diff --git a/pandas/core/strings/object_array.py b/pandas/core/strings/object_array.py index 3d0220f953026..ce2ebf568d7d3 100644 --- a/pandas/core/strings/object_array.py +++ b/pandas/core/strings/object_array.py @@ -1,6 +1,7 @@ from __future__ import annotations from collections.abc import Callable # noqa: PDF001 +import functools import re import textwrap from typing import ( @@ -381,7 +382,7 @@ def _str_get_dummies(self, sep: str = "|"): dummies = np.empty((len(arr), len(tags2)), dtype=np.int64) def _isin(test_elements, element): - return element in test_element + return element in test_elements for i, t in enumerate(tags2): pat = sep + t + sep diff --git a/pandas/io/pytables.py b/pandas/io/pytables.py index 1a399510b23f5..348f595b1242c 100644 --- a/pandas/io/pytables.py +++ b/pandas/io/pytables.py @@ -4141,41 +4141,6 @@ def process_filter(field, filt, op): # apply the selection filters (but keep in the same order) if selection.filter is not None: for field, op, filt in selection.filter.format(): - - # def process_filter(field, filt): - - # for axis_name in obj._AXIS_ORDERS: - # axis_number = obj._get_axis_number(axis_name) - # axis_values = obj._get_axis(axis_name) - # assert axis_number is not None - - # # see if the field is the name of an axis - # if field == axis_name: - - # # if we have a multi-index, then need to include - # # the levels - # if self.is_multi_index: - # filt = filt.union(Index(self.levels)) - # # pylint: disable-next=cell-var-from-loop - # takers = op(axis_values, filt) - # return obj.loc(axis=axis_number)[takers] - - # # this might be the name of a file IN an axis - # elif field in axis_values: - - # # we need to filter on this dimension - # values = ensure_index(getattr(obj, field).values) - # filt = ensure_index(filt) - - # # hack until we support reversed dim flags - # if isinstance(obj, DataFrame): - # axis_number = 1 - axis_number - # # pylint: disable-next=cell-var-from-loop - # takers = op(values, filt) - # return obj.loc(axis=axis_number)[takers] - - # raise ValueError(f"cannot find the field [{field}] for filtering!") - obj = process_filter(field, filt, op) return obj From 7572c873f6acaeb514d7761cd2a5699726fdd66f Mon Sep 17 00:00:00 2001 From: Bill Blum Date: Tue, 1 Nov 2022 17:42:58 -0400 Subject: [PATCH 05/12] Accidentally removed a newline. --- pandas/io/pytables.py | 58 ++++++++++++++++++++++--------------------- 1 file changed, 30 insertions(+), 28 deletions(-) diff --git a/pandas/io/pytables.py b/pandas/io/pytables.py index 348f595b1242c..1f2bb4c5d21b4 100644 --- a/pandas/io/pytables.py +++ b/pandas/io/pytables.py @@ -4106,42 +4106,44 @@ def process_axes(self, obj, selection: Selection, columns=None) -> DataFrame: for axis, labels in self.non_index_axes: obj = _reindex_axis(obj, axis, labels, columns) - def process_filter(field, filt, op): + # apply the selection filters (but keep in the same order) + if selection.filter is not None: + for field, op, filt in selection.filter.format(): - for axis_name in obj._AXIS_ORDERS: - axis_number = obj._get_axis_number(axis_name) - axis_values = obj._get_axis(axis_name) - assert axis_number is not None + def process_filter(field, filt): - # see if the field is the name of an axis - if field == axis_name: + for axis_name in obj._AXIS_ORDERS: + axis_number = obj._get_axis_number(axis_name) + axis_values = obj._get_axis(axis_name) + assert axis_number is not None - # if we have a multi-index, then need to include - # the levels - if self.is_multi_index: - filt = filt.union(Index(self.levels)) - takers = op(axis_values, filt) - return obj.loc(axis=axis_number)[takers] + # see if the field is the name of an axis + if field == axis_name: - # this might be the name of a file IN an axis - elif field in axis_values: + # if we have a multi-index, then need to include + # the levels + if self.is_multi_index: + filt = filt.union(Index(self.levels)) - # we need to filter on this dimension - values = ensure_index(getattr(obj, field).values) - filt = ensure_index(filt) + takers = op(axis_values, filt) + return obj.loc(axis=axis_number)[takers] - # hack until we support reversed dim flags - if isinstance(obj, DataFrame): - axis_number = 1 - axis_number - takers = op(values, filt) - return obj.loc(axis=axis_number)[takers] + # this might be the name of a file IN an axis + elif field in axis_values: - raise ValueError(f"cannot find the field [{field}] for filtering!") + # we need to filter on this dimension + values = ensure_index(getattr(obj, field).values) + filt = ensure_index(filt) - # apply the selection filters (but keep in the same order) - if selection.filter is not None: - for field, op, filt in selection.filter.format(): - obj = process_filter(field, filt, op) + # hack until we support reversed dim flags + if isinstance(obj, DataFrame): + axis_number = 1 - axis_number + takers = op(values, filt) + return obj.loc(axis=axis_number)[takers] + + raise ValueError(f"cannot find the field [{field}] for filtering!") + + obj = process_filter(field, filt) return obj From 2a681f995c2352c0528d83c8c30a43a2196c7fd0 Mon Sep 17 00:00:00 2001 From: Bill Blum Date: Wed, 2 Nov 2022 04:52:39 -0400 Subject: [PATCH 06/12] Helps to revert to correct version of file. --- pandas/io/pytables.py | 54 +++++++++++++++++++++---------------------- 1 file changed, 27 insertions(+), 27 deletions(-) diff --git a/pandas/io/pytables.py b/pandas/io/pytables.py index 1f2bb4c5d21b4..1b9dcd88e134a 100644 --- a/pandas/io/pytables.py +++ b/pandas/io/pytables.py @@ -4106,43 +4106,43 @@ def process_axes(self, obj, selection: Selection, columns=None) -> DataFrame: for axis, labels in self.non_index_axes: obj = _reindex_axis(obj, axis, labels, columns) - # apply the selection filters (but keep in the same order) - if selection.filter is not None: - for field, op, filt in selection.filter.format(): + def process_filter(field, filt, op): - def process_filter(field, filt): + for axis_name in obj._AXIS_ORDERS: + axis_number = obj._get_axis_number(axis_name) + axis_values = obj._get_axis(axis_name) + assert axis_number is not None - for axis_name in obj._AXIS_ORDERS: - axis_number = obj._get_axis_number(axis_name) - axis_values = obj._get_axis(axis_name) - assert axis_number is not None + # see if the field is the name of an axis + if field == axis_name: - # see if the field is the name of an axis - if field == axis_name: + # if we have a multi-index, then need to include + # the levels + if self.is_multi_index: + filt = filt.union(Index(self.levels)) - # if we have a multi-index, then need to include - # the levels - if self.is_multi_index: - filt = filt.union(Index(self.levels)) + takers = op(axis_values, filt) + return obj.loc(axis=axis_number)[takers] - takers = op(axis_values, filt) - return obj.loc(axis=axis_number)[takers] + # this might be the name of a file IN an axis + elif field in axis_values: - # this might be the name of a file IN an axis - elif field in axis_values: + # we need to filter on this dimension + values = ensure_index(getattr(obj, field).values) + filt = ensure_index(filt) - # we need to filter on this dimension - values = ensure_index(getattr(obj, field).values) - filt = ensure_index(filt) + # hack until we support reversed dim flags + if isinstance(obj, DataFrame): + axis_number = 1 - axis_number - # hack until we support reversed dim flags - if isinstance(obj, DataFrame): - axis_number = 1 - axis_number - takers = op(values, filt) - return obj.loc(axis=axis_number)[takers] + takers = op(values, filt) + return obj.loc(axis=axis_number)[takers] - raise ValueError(f"cannot find the field [{field}] for filtering!") + raise ValueError(f"cannot find the field [{field}] for filtering!") + # apply the selection filters (but keep in the same order) + if selection.filter is not None: + for field, op, filt in selection.filter.format(): obj = process_filter(field, filt) return obj From d4c867418bc4b590c1f1bf33c981e3b1dd58c4a7 Mon Sep 17 00:00:00 2001 From: Bill Blum Date: Wed, 2 Nov 2022 05:05:13 -0400 Subject: [PATCH 07/12] OK, backing slowly away from the git client now. --- pandas/io/pytables.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/io/pytables.py b/pandas/io/pytables.py index 1b9dcd88e134a..58a57aaa09033 100644 --- a/pandas/io/pytables.py +++ b/pandas/io/pytables.py @@ -4106,7 +4106,7 @@ def process_axes(self, obj, selection: Selection, columns=None) -> DataFrame: for axis, labels in self.non_index_axes: obj = _reindex_axis(obj, axis, labels, columns) - def process_filter(field, filt, op): + def process_filter(field, filt): for axis_name in obj._AXIS_ORDERS: axis_number = obj._get_axis_number(axis_name) From a1f410f6606090e09d6b7269c8e37b07e5637ae9 Mon Sep 17 00:00:00 2001 From: Bill Blum Date: Wed, 2 Nov 2022 05:06:27 -0400 Subject: [PATCH 08/12] OK, backing slowly away from the git client now. --- pandas/io/pytables.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pandas/io/pytables.py b/pandas/io/pytables.py index 58a57aaa09033..9977c78aab89c 100644 --- a/pandas/io/pytables.py +++ b/pandas/io/pytables.py @@ -4106,7 +4106,7 @@ def process_axes(self, obj, selection: Selection, columns=None) -> DataFrame: for axis, labels in self.non_index_axes: obj = _reindex_axis(obj, axis, labels, columns) - def process_filter(field, filt): + def process_filter(field, filt, op): for axis_name in obj._AXIS_ORDERS: axis_number = obj._get_axis_number(axis_name) @@ -4143,7 +4143,7 @@ def process_filter(field, filt): # apply the selection filters (but keep in the same order) if selection.filter is not None: for field, op, filt in selection.filter.format(): - obj = process_filter(field, filt) + obj = process_filter(field, filt, op) return obj From 7f9de43f1eb24e2817da0959ae141b4122c5bea7 Mon Sep 17 00:00:00 2001 From: Bill <35750915+roadswitcher@users.noreply.github.com> Date: Wed, 2 Nov 2022 05:36:51 -0400 Subject: [PATCH 09/12] Resolve autotyper find --- pandas/core/strings/object_array.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/strings/object_array.py b/pandas/core/strings/object_array.py index ce2ebf568d7d3..13a0bd4b2b4aa 100644 --- a/pandas/core/strings/object_array.py +++ b/pandas/core/strings/object_array.py @@ -381,7 +381,7 @@ def _str_get_dummies(self, sep: str = "|"): dummies = np.empty((len(arr), len(tags2)), dtype=np.int64) - def _isin(test_elements, element): + def _isin(test_elements, element) -> bool: return element in test_elements for i, t in enumerate(tags2): From 9b360fade9cad6acd84140d7f551ebf85858c68d Mon Sep 17 00:00:00 2001 From: William Blum Date: Wed, 2 Nov 2022 08:43:52 -0400 Subject: [PATCH 10/12] Added type hints per reviewer note. --- pandas/core/strings/object_array.py | 2 +- pandas/tests/io/parser/test_c_parser_only.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/pandas/core/strings/object_array.py b/pandas/core/strings/object_array.py index 13a0bd4b2b4aa..29a182a1c3969 100644 --- a/pandas/core/strings/object_array.py +++ b/pandas/core/strings/object_array.py @@ -381,7 +381,7 @@ def _str_get_dummies(self, sep: str = "|"): dummies = np.empty((len(arr), len(tags2)), dtype=np.int64) - def _isin(test_elements, element) -> bool: + def _isin(test_elements: Series, element: str) -> bool: return element in test_elements for i, t in enumerate(tags2): diff --git a/pandas/tests/io/parser/test_c_parser_only.py b/pandas/tests/io/parser/test_c_parser_only.py index 261da6ecf468f..d1b429bac0c3e 100644 --- a/pandas/tests/io/parser/test_c_parser_only.py +++ b/pandas/tests/io/parser/test_c_parser_only.py @@ -176,7 +176,7 @@ def test_precise_conversion(c_parser_only): normal_errors = [] precise_errors = [] - def error(val, actual_val): + def error(val: float, actual_val: Decimal) -> float: return abs(Decimal(f"{val:.100}") - actual_val) # test numbers between 1 and 2 From e5b8d7ae07f0521d71bcff1f1a0dc739b1b7c130 Mon Sep 17 00:00:00 2001 From: Bill <35750915+roadswitcher@users.noreply.github.com> Date: Wed, 2 Nov 2022 09:37:33 -0400 Subject: [PATCH 11/12] Update pandas/core/strings/object_array.py Co-authored-by: Marco Edward Gorelli <33491632+MarcoGorelli@users.noreply.github.com> --- pandas/core/strings/object_array.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/strings/object_array.py b/pandas/core/strings/object_array.py index 29a182a1c3969..21e7ede3ed386 100644 --- a/pandas/core/strings/object_array.py +++ b/pandas/core/strings/object_array.py @@ -381,7 +381,7 @@ def _str_get_dummies(self, sep: str = "|"): dummies = np.empty((len(arr), len(tags2)), dtype=np.int64) - def _isin(test_elements: Series, element: str) -> bool: + def _isin(test_elements: str, element: str) -> bool: return element in test_elements for i, t in enumerate(tags2): From fa3c521fe16ffae19f12bd0ac5bc08e08c9b3d01 Mon Sep 17 00:00:00 2001 From: Bill <35750915+roadswitcher@users.noreply.github.com> Date: Wed, 2 Nov 2022 09:37:49 -0400 Subject: [PATCH 12/12] Update pandas/tests/io/parser/test_c_parser_only.py Co-authored-by: Marco Edward Gorelli <33491632+MarcoGorelli@users.noreply.github.com> --- pandas/tests/io/parser/test_c_parser_only.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/tests/io/parser/test_c_parser_only.py b/pandas/tests/io/parser/test_c_parser_only.py index d1b429bac0c3e..ec08fb0d60648 100644 --- a/pandas/tests/io/parser/test_c_parser_only.py +++ b/pandas/tests/io/parser/test_c_parser_only.py @@ -176,7 +176,7 @@ def test_precise_conversion(c_parser_only): normal_errors = [] precise_errors = [] - def error(val: float, actual_val: Decimal) -> float: + def error(val: float, actual_val: Decimal) -> Decimal: return abs(Decimal(f"{val:.100}") - actual_val) # test numbers between 1 and 2