Skip to content

Commit e16ad24

Browse files
jakkdlZac-HD
andauthored
Fix some B023 false alarms (#303)
* Shed fixes * Fix some B023 false alarms * Add regression tests * WIP: failing tests * avoid false alarms with filter, reduce, key= and return. Added tests for functools.reduce Co-authored-by: Zac Hatfield-Dodds <zac.hatfield.dodds@gmail.com>
1 parent c7c5c7c commit e16ad24

File tree

3 files changed

+148
-25
lines changed

3 files changed

+148
-25
lines changed

bugbear.py

Lines changed: 45 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -280,7 +280,7 @@ def visit_ExceptHandler(self, node):
280280
names = [_to_name_str(e) for e in node.type.elts]
281281
as_ = " as " + node.name if node.name is not None else ""
282282
if len(names) == 0:
283-
vs = ("`except (){}:`".format(as_),)
283+
vs = (f"`except (){as_}:`",)
284284
self.errors.append(B001(node.lineno, node.col_offset, vars=vs))
285285
elif len(names) == 1:
286286
self.errors.append(B013(node.lineno, node.col_offset, vars=names))
@@ -568,7 +568,7 @@ def check_for_b020(self, node):
568568
n = targets.names[name][0]
569569
self.errors.append(B020(n.lineno, n.col_offset, vars=(name,)))
570570

571-
def check_for_b023(self, loop_node):
571+
def check_for_b023(self, loop_node): # noqa: C901
572572
"""Check that functions (including lambdas) do not use loop variables.
573573
574574
https://docs.python-guide.org/writing/gotchas/#late-binding-closures from
@@ -584,26 +584,58 @@ def check_for_b023(self, loop_node):
584584
# implement this "backwards": first we find all the candidate variable
585585
# uses, and then if there are any we check for assignment of those names
586586
# inside the loop body.
587+
safe_functions = []
587588
suspicious_variables = []
588589
for node in ast.walk(loop_node):
589-
if isinstance(node, FUNCTION_NODES):
590+
# check if function is immediately consumed to avoid false alarm
591+
if isinstance(node, ast.Call):
592+
# check for filter&reduce
593+
if (
594+
isinstance(node.func, ast.Name)
595+
and node.func.id in ("filter", "reduce")
596+
) or (
597+
isinstance(node.func, ast.Attribute)
598+
and node.func.attr == "reduce"
599+
and isinstance(node.func.value, ast.Name)
600+
and node.func.value.id == "functools"
601+
):
602+
for arg in node.args:
603+
if isinstance(arg, FUNCTION_NODES):
604+
safe_functions.append(arg)
605+
606+
# check for key=
607+
for keyword in node.keywords:
608+
if keyword.arg == "key" and isinstance(
609+
keyword.value, FUNCTION_NODES
610+
):
611+
safe_functions.append(keyword.value)
612+
613+
if isinstance(node, ast.Return):
614+
if isinstance(node.value, FUNCTION_NODES):
615+
safe_functions.append(node.value)
616+
# TODO: ast.walk(node) and mark all child nodes safe?
617+
618+
if isinstance(node, FUNCTION_NODES) and node not in safe_functions:
590619
argnames = {
591620
arg.arg for arg in ast.walk(node.args) if isinstance(arg, ast.arg)
592621
}
593622
if isinstance(node, ast.Lambda):
594623
body_nodes = ast.walk(node.body)
595624
else:
596625
body_nodes = itertools.chain.from_iterable(map(ast.walk, node.body))
626+
errors = []
597627
for name in body_nodes:
598-
if (
599-
isinstance(name, ast.Name)
600-
and name.id not in argnames
601-
and isinstance(name.ctx, ast.Load)
602-
):
603-
err = B023(name.lineno, name.col_offset, vars=(name.id,))
604-
if err not in self._b023_seen:
605-
self._b023_seen.add(err) # dedupe across nested loops
606-
suspicious_variables.append(err)
628+
if isinstance(name, ast.Name) and name.id not in argnames:
629+
if isinstance(name.ctx, ast.Load):
630+
errors.append(
631+
B023(name.lineno, name.col_offset, vars=(name.id,))
632+
)
633+
elif isinstance(name.ctx, ast.Store):
634+
argnames.add(name.id)
635+
for err in errors:
636+
if err.vars[0] not in argnames and err not in self._b023_seen:
637+
self._b023_seen.add(err) # dedupe across nested loops
638+
suspicious_variables.append(err)
607639

608640
if suspicious_variables:
609641
reassigned_in_loop = set(self._get_assigned_names(loop_node))
@@ -912,7 +944,7 @@ def check_for_b025(self, node):
912944
uniques.add(name)
913945
seen.extend(uniques)
914946
# sort to have a deterministic output
915-
duplicates = sorted(set(x for x in seen if seen.count(x) > 1))
947+
duplicates = sorted({x for x in seen if seen.count(x) > 1})
916948
for duplicate in duplicates:
917949
self.errors.append(B025(node.lineno, node.col_offset, vars=(duplicate,)))
918950

tests/b023.py

Lines changed: 96 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -2,10 +2,10 @@
22
Should emit:
33
B023 - on lines 12, 13, 16, 28, 29, 30, 31, 40, 42, 50, 51, 52, 53, 61, 68.
44
"""
5+
from functools import reduce
56

67
functions = []
78
z = 0
8-
99
for x in range(3):
1010
y = x + 1
1111
# Subject to late-binding problems
@@ -25,10 +25,10 @@ def f_ok_1(x):
2525

2626

2727
def check_inside_functions_too():
28-
ls = [lambda: x for x in range(2)]
29-
st = {lambda: x for x in range(2)}
30-
gn = (lambda: x for x in range(2))
31-
dt = {x: lambda: x for x in range(2)}
28+
ls = [lambda: x for x in range(2)] # error
29+
st = {lambda: x for x in range(2)} # error
30+
gn = (lambda: x for x in range(2)) # error
31+
dt = {x: lambda: x for x in range(2)} # error
3232

3333

3434
async def pointless_async_iterable():
@@ -37,28 +37,28 @@ async def pointless_async_iterable():
3737

3838
async def container_for_problems():
3939
async for x in pointless_async_iterable():
40-
functions.append(lambda: x)
40+
functions.append(lambda: x) # error
4141

42-
[lambda: x async for x in pointless_async_iterable()]
42+
[lambda: x async for x in pointless_async_iterable()] # error
4343

4444

4545
a = 10
4646
b = 0
4747
while True:
4848
a = a_ = a - 1
4949
b += 1
50-
functions.append(lambda: a)
51-
functions.append(lambda: a_)
52-
functions.append(lambda: b)
53-
functions.append(lambda: c) # not a name error because of late binding!
50+
functions.append(lambda: a) # error
51+
functions.append(lambda: a_) # error
52+
functions.append(lambda: b) # error
53+
functions.append(lambda: c) # error, but not a name error due to late binding
5454
c: bool = a > 3
5555
if not c:
5656
break
5757

5858
# Nested loops should not duplicate reports
5959
for j in range(2):
6060
for k in range(3):
61-
lambda: j * k
61+
lambda: j * k # error
6262

6363

6464
for j, k, l in [(1, 2, 3)]:
@@ -76,3 +76,87 @@ def f():
7676

7777
def explicit_capture(captured=var):
7878
return captured
79+
80+
81+
# `query` is defined in the function, so also defining it in the loop should be OK.
82+
for name in ["a", "b"]:
83+
query = name
84+
85+
def myfunc(x):
86+
query = x
87+
query_post = x
88+
_ = query
89+
_ = query_post
90+
91+
query_post = name # in case iteration order matters
92+
93+
94+
# Bug here because two dict comprehensions reference `name`, one of which is inside
95+
# the lambda. This should be totally fine, of course.
96+
_ = {
97+
k: v
98+
for k, v in reduce(
99+
lambda data, event: merge_mappings(
100+
[data, {name: f(caches, data, event) for name, f in xx}]
101+
),
102+
events,
103+
{name: getattr(group, name) for name in yy},
104+
).items()
105+
if k in backfill_fields
106+
}
107+
108+
109+
# OK to define lambdas if they're immediately consumed, typically as the `key=`
110+
# argument or in a consumed `filter()` (even if a comprehension is better style)
111+
for x in range(2):
112+
# It's not a complete get-out-of-linting-free construct - these should fail:
113+
min([None, lambda: x], key=repr)
114+
sorted([None, lambda: x], key=repr)
115+
any(filter(bool, [None, lambda: x]))
116+
list(filter(bool, [None, lambda: x]))
117+
all(reduce(bool, [None, lambda: x]))
118+
119+
# But all these ones should be OK:
120+
min(range(3), key=lambda y: x * y)
121+
max(range(3), key=lambda y: x * y)
122+
sorted(range(3), key=lambda y: x * y)
123+
124+
any(filter(lambda y: x < y, range(3)))
125+
all(filter(lambda y: x < y, range(3)))
126+
set(filter(lambda y: x < y, range(3)))
127+
list(filter(lambda y: x < y, range(3)))
128+
tuple(filter(lambda y: x < y, range(3)))
129+
sorted(filter(lambda y: x < y, range(3)))
130+
frozenset(filter(lambda y: x < y, range(3)))
131+
132+
any(reduce(lambda y: x | y, range(3)))
133+
all(reduce(lambda y: x | y, range(3)))
134+
set(reduce(lambda y: x | y, range(3)))
135+
list(reduce(lambda y: x | y, range(3)))
136+
tuple(reduce(lambda y: x | y, range(3)))
137+
sorted(reduce(lambda y: x | y, range(3)))
138+
frozenset(reduce(lambda y: x | y, range(3)))
139+
140+
import functools
141+
142+
any(functools.reduce(lambda y: x | y, range(3)))
143+
all(functools.reduce(lambda y: x | y, range(3)))
144+
set(functools.reduce(lambda y: x | y, range(3)))
145+
list(functools.reduce(lambda y: x | y, range(3)))
146+
tuple(functools.reduce(lambda y: x | y, range(3)))
147+
sorted(functools.reduce(lambda y: x | y, range(3)))
148+
frozenset(functools.reduce(lambda y: x | y, range(3)))
149+
150+
# OK because the lambda which references a loop variable is defined in a `return`
151+
# statement, and after we return the loop variable can't be redefined.
152+
# In principle we could do something fancy with `break`, but it's not worth it.
153+
def iter_f(names):
154+
for name in names:
155+
if exists(name):
156+
return lambda: name if exists(name) else None
157+
158+
if foo(name):
159+
return [lambda: name] # false alarm, should be fixed?
160+
161+
if False:
162+
return [lambda: i for i in range(3)] # error

tests/test_bugbear.py

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -351,6 +351,13 @@ def test_b023(self):
351351
B023(61, 16, vars=("j",)),
352352
B023(61, 20, vars=("k",)),
353353
B023(68, 9, vars=("l",)),
354+
B023(113, 23, vars=("x",)),
355+
B023(114, 26, vars=("x",)),
356+
B023(115, 36, vars=("x",)),
357+
B023(116, 37, vars=("x",)),
358+
B023(117, 36, vars=("x",)),
359+
B023(159, 28, vars=("name",)), # false alarm?
360+
B023(162, 28, vars=("i",)),
354361
)
355362
self.assertEqual(errors, expected)
356363

0 commit comments

Comments
 (0)