Skip to content

Commit aaad1d6

Browse files
authored
Implement late-binding loop check (#265)
* Implement late-binding loop check * Dedupe warning with nested loops * Only trigger for Load names * Handle attribute/item assignment * Allow explictly-captured vars i.e. don't complain about the default value of arguments, since that's an explicit solution to late binding!
1 parent 88bb957 commit aaad1d6

File tree

4 files changed

+205
-0
lines changed

4 files changed

+205
-0
lines changed

README.rst

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -150,6 +150,9 @@ No exceptions will be suppressed and therefore this context manager is redundant
150150
N.B. this rule currently does not flag `suppress` calls to avoid potential false
151151
positives due to similarly named user-defined functions.
152152

153+
**B023**: Functions defined inside a loop must not use variables redefined in
154+
the loop, because `late-binding closures are a classic gotcha
155+
<https://docs.python-guide.org/writing/gotchas/#late-binding-closures>`__.
153156

154157
Opinionated warnings
155158
~~~~~~~~~~~~~~~~~~~~

bugbear.py

Lines changed: 99 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,7 @@
2626
ast.DictComp,
2727
ast.GeneratorExp,
2828
)
29+
FUNCTION_NODES = (ast.AsyncFunctionDef, ast.FunctionDef, ast.Lambda)
2930

3031
Context = namedtuple("Context", ["node", "stack"])
3132

@@ -198,6 +199,23 @@ def _to_name_str(node):
198199
return _to_name_str(node.value)
199200

200201

202+
def names_from_assignments(assign_target):
203+
if isinstance(assign_target, ast.Name):
204+
yield assign_target.id
205+
elif isinstance(assign_target, ast.Starred):
206+
yield from names_from_assignments(assign_target.value)
207+
elif isinstance(assign_target, (ast.List, ast.Tuple)):
208+
for child in assign_target.elts:
209+
yield from names_from_assignments(child)
210+
211+
212+
def children_in_scope(node):
213+
yield node
214+
if not isinstance(node, FUNCTION_NODES):
215+
for child in ast.iter_child_nodes(node):
216+
yield from children_in_scope(child)
217+
218+
201219
def _typesafe_issubclass(cls, class_or_tuple):
202220
try:
203221
return issubclass(cls, class_or_tuple)
@@ -220,6 +238,7 @@ class BugBearVisitor(ast.NodeVisitor):
220238
contexts = attr.ib(default=attr.Factory(list))
221239

222240
NODE_WINDOW_SIZE = 4
241+
_b023_seen = attr.ib(factory=set, init=False)
223242

224243
if False:
225244
# Useful for tracing what the hell is going on.
@@ -348,6 +367,31 @@ def visit_Assign(self, node):
348367
def visit_For(self, node):
349368
self.check_for_b007(node)
350369
self.check_for_b020(node)
370+
self.check_for_b023(node)
371+
self.generic_visit(node)
372+
373+
def visit_AsyncFor(self, node):
374+
self.check_for_b023(node)
375+
self.generic_visit(node)
376+
377+
def visit_While(self, node):
378+
self.check_for_b023(node)
379+
self.generic_visit(node)
380+
381+
def visit_ListComp(self, node):
382+
self.check_for_b023(node)
383+
self.generic_visit(node)
384+
385+
def visit_SetComp(self, node):
386+
self.check_for_b023(node)
387+
self.generic_visit(node)
388+
389+
def visit_DictComp(self, node):
390+
self.check_for_b023(node)
391+
self.generic_visit(node)
392+
393+
def visit_GeneratorExp(self, node):
394+
self.check_for_b023(node)
351395
self.generic_visit(node)
352396

353397
def visit_Assert(self, node):
@@ -520,6 +564,59 @@ def check_for_b020(self, node):
520564
n = targets.names[name][0]
521565
self.errors.append(B020(n.lineno, n.col_offset, vars=(name,)))
522566

567+
def check_for_b023(self, loop_node):
568+
"""Check that functions (including lambdas) do not use loop variables.
569+
570+
https://docs.python-guide.org/writing/gotchas/#late-binding-closures from
571+
functions - usually but not always lambdas - defined inside a loop are a
572+
classic source of bugs.
573+
574+
For each use of a variable inside a function defined inside a loop, we
575+
emit a warning if that variable is reassigned on each loop iteration
576+
(outside the function). This includes but is not limited to explicit
577+
loop variables like the `x` in `for x in range(3):`.
578+
"""
579+
# Because most loops don't contain functions, it's most efficient to
580+
# implement this "backwards": first we find all the candidate variable
581+
# uses, and then if there are any we check for assignment of those names
582+
# inside the loop body.
583+
suspicious_variables = []
584+
for node in ast.walk(loop_node):
585+
if isinstance(node, FUNCTION_NODES):
586+
argnames = {
587+
arg.arg for arg in ast.walk(node.args) if isinstance(arg, ast.arg)
588+
}
589+
if isinstance(node, ast.Lambda):
590+
body_nodes = ast.walk(node.body)
591+
else:
592+
body_nodes = itertools.chain.from_iterable(map(ast.walk, node.body))
593+
for name in body_nodes:
594+
if (
595+
isinstance(name, ast.Name)
596+
and name.id not in argnames
597+
and isinstance(name.ctx, ast.Load)
598+
):
599+
err = B023(name.lineno, name.col_offset, vars=(name.id,))
600+
if err not in self._b023_seen:
601+
self._b023_seen.add(err) # dedupe across nested loops
602+
suspicious_variables.append(err)
603+
604+
if suspicious_variables:
605+
reassigned_in_loop = set(self._get_assigned_names(loop_node))
606+
607+
for err in sorted(suspicious_variables):
608+
if reassigned_in_loop.issuperset(err.vars):
609+
self.errors.append(err)
610+
611+
def _get_assigned_names(self, loop_node):
612+
loop_targets = (ast.For, ast.AsyncFor, ast.comprehension)
613+
for node in children_in_scope(loop_node):
614+
if isinstance(node, (ast.Assign)):
615+
for child in node.targets:
616+
yield from names_from_assignments(child)
617+
if isinstance(node, loop_targets + (ast.AnnAssign, ast.AugAssign)):
618+
yield from names_from_assignments(node.target)
619+
523620
def check_for_b904(self, node):
524621
"""Checks `raise` without `from` inside an `except` clause.
525622
@@ -1041,6 +1138,8 @@ def visit_Lambda(self, node):
10411138
)
10421139
)
10431140

1141+
B023 = Error(message="B023 Function definition does not bind loop variable {!r}.")
1142+
10441143
# Warnings disabled by default.
10451144
B901 = Error(
10461145
message=(

tests/b023.py

Lines changed: 78 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,78 @@
1+
"""
2+
Should emit:
3+
B023 - on lines 12, 13, 16, 28, 29, 30, 31, 40, 42, 50, 51, 52, 53, 61, 68.
4+
"""
5+
6+
functions = []
7+
z = 0
8+
9+
for x in range(3):
10+
y = x + 1
11+
# Subject to late-binding problems
12+
functions.append(lambda: x)
13+
functions.append(lambda: y) # not just the loop var
14+
15+
def f_bad_1():
16+
return x
17+
18+
# Actually OK
19+
functions.append(lambda x: x * 2)
20+
functions.append(lambda x=x: x)
21+
functions.append(lambda: z) # OK because not assigned in the loop
22+
23+
def f_ok_1(x):
24+
return x * 2
25+
26+
27+
def check_inside_functions_too():
28+
ls = [lambda: x for x in range(2)]
29+
st = {lambda: x for x in range(2)}
30+
gn = (lambda: x for x in range(2))
31+
dt = {x: lambda: x for x in range(2)}
32+
33+
34+
async def pointless_async_iterable():
35+
yield 1
36+
37+
38+
async def container_for_problems():
39+
async for x in pointless_async_iterable():
40+
functions.append(lambda: x)
41+
42+
[lambda: x async for x in pointless_async_iterable()]
43+
44+
45+
a = 10
46+
b = 0
47+
while True:
48+
a = a_ = a - 1
49+
b += 1
50+
functions.append(lambda: a)
51+
functions.append(lambda: a_)
52+
functions.append(lambda: b)
53+
functions.append(lambda: c) # not a name error because of late binding!
54+
c: bool = a > 3
55+
if not c:
56+
break
57+
58+
# Nested loops should not duplicate reports
59+
for j in range(2):
60+
for k in range(3):
61+
lambda: j * k
62+
63+
64+
for j, k, l in [(1, 2, 3)]:
65+
66+
def f():
67+
j = None # OK because it's an assignment
68+
[l for k in range(2)] # error for l, not for k
69+
70+
assert a and functions
71+
72+
a.attribute = 1 # modifying an attribute doesn't make it a loop variable
73+
functions[0] = lambda: None # same for an element
74+
75+
for var in range(2):
76+
77+
def explicit_capture(captured=var):
78+
return captured

tests/test_bugbear.py

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,7 @@
3333
B020,
3434
B021,
3535
B022,
36+
B023,
3637
B901,
3738
B902,
3839
B903,
@@ -325,6 +326,30 @@ def test_b022(self):
325326
errors = list(bbc.run())
326327
self.assertEqual(errors, self.errors(B022(8, 0)))
327328

329+
def test_b023(self):
330+
filename = Path(__file__).absolute().parent / "b023.py"
331+
bbc = BugBearChecker(filename=str(filename))
332+
errors = list(bbc.run())
333+
expected = self.errors(
334+
B023(12, 29, vars=("x",)),
335+
B023(13, 29, vars=("y",)),
336+
B023(16, 15, vars=("x",)),
337+
B023(28, 18, vars=("x",)),
338+
B023(29, 18, vars=("x",)),
339+
B023(30, 18, vars=("x",)),
340+
B023(31, 21, vars=("x",)),
341+
B023(40, 33, vars=("x",)),
342+
B023(42, 13, vars=("x",)),
343+
B023(50, 29, vars=("a",)),
344+
B023(51, 29, vars=("a_",)),
345+
B023(52, 29, vars=("b",)),
346+
B023(53, 29, vars=("c",)),
347+
B023(61, 16, vars=("j",)),
348+
B023(61, 20, vars=("k",)),
349+
B023(68, 9, vars=("l",)),
350+
)
351+
self.assertEqual(errors, expected)
352+
328353
def test_b901(self):
329354
filename = Path(__file__).absolute().parent / "b901.py"
330355
bbc = BugBearChecker(filename=str(filename))

0 commit comments

Comments
 (0)