Skip to content

Commit f40da64

Browse files
authored
perf: PyTracer improvements (#1388)
* cache the bound method of _trace on self this speeds up pure python tracing because we don't have to re-create a bound method object all the time * optimize checking whether a file should be traced the optimization works based on the following heuristic: in a majority of cases, functions call other functions in the same file. In that situation we don't have to re-check whether we should trace the file * fix optimization in the presence of contexts * fix too long line
1 parent f9a74c7 commit f40da64

File tree

1 file changed

+41
-24
lines changed

1 file changed

+41
-24
lines changed

coverage/pytracer.py

Lines changed: 41 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -67,6 +67,11 @@ def __init__(self):
6767
# On exit, self.in_atexit = True
6868
atexit.register(setattr, self, 'in_atexit', True)
6969

70+
# cache a bound method on the instance, so that we don't have to
71+
# re-create a bound method object all the time
72+
self._cached_bound_method_trace = self._trace
73+
74+
7075
def __repr__(self):
7176
return "<PyTracer at 0x{:x}: {} lines in {} files>".format(
7277
id(self),
@@ -105,7 +110,7 @@ def _trace(self, frame, event, arg_unused):
105110

106111
#self.log(":", frame.f_code.co_filename, frame.f_lineno, frame.f_code.co_name + "()", event)
107112

108-
if (self.stopped and sys.gettrace() == self._trace): # pylint: disable=comparison-with-callable
113+
if (self.stopped and sys.gettrace() == self._cached_bound_method_trace): # pylint: disable=comparison-with-callable
109114
# The PyTrace.stop() method has been called, possibly by another
110115
# thread, let's deactivate ourselves now.
111116
if 0:
@@ -129,12 +134,13 @@ def _trace(self, frame, event, arg_unused):
129134
context_maybe = self.should_start_context(frame)
130135
if context_maybe is not None:
131136
self.context = context_maybe
132-
self.started_context = True
137+
started_context = True
133138
self.switch_context(self.context)
134139
else:
135-
self.started_context = False
140+
started_context = False
136141
else:
137-
self.started_context = False
142+
started_context = False
143+
self.started_context = started_context
138144

139145
# Entering a new frame. Decide if we should trace in this file.
140146
self._activity = True
@@ -143,23 +149,33 @@ def _trace(self, frame, event, arg_unused):
143149
self.cur_file_data,
144150
self.cur_file_name,
145151
self.last_line,
146-
self.started_context,
152+
started_context,
147153
)
148154
)
155+
156+
# Improve tracing performance: when calling a function, both caller
157+
# and callee are often within the same file. if that's the case, we
158+
# don't have to re-check whether to trace the corresponding
159+
# function (which is a little bit espensive since it involves
160+
# dictionary lookups). This optimization is only correct if we
161+
# didn't start a context.
149162
filename = frame.f_code.co_filename
150-
self.cur_file_name = filename
151-
disp = self.should_trace_cache.get(filename)
152-
if disp is None:
153-
disp = self.should_trace(filename, frame)
154-
self.should_trace_cache[filename] = disp
155-
156-
self.cur_file_data = None
157-
if disp.trace:
158-
tracename = disp.source_filename
159-
if tracename not in self.data:
160-
self.data[tracename] = set()
161-
self.cur_file_data = self.data[tracename]
162-
else:
163+
if filename != self.cur_file_name or started_context:
164+
self.cur_file_name = filename
165+
disp = self.should_trace_cache.get(filename)
166+
if disp is None:
167+
disp = self.should_trace(filename, frame)
168+
self.should_trace_cache[filename] = disp
169+
170+
self.cur_file_data = None
171+
if disp.trace:
172+
tracename = disp.source_filename
173+
if tracename not in self.data:
174+
self.data[tracename] = set()
175+
self.cur_file_data = self.data[tracename]
176+
else:
177+
frame.f_trace_lines = False
178+
elif not self.cur_file_data:
163179
frame.f_trace_lines = False
164180

165181
# The call event is really a "start frame" event, and happens for
@@ -225,7 +241,7 @@ def _trace(self, frame, event, arg_unused):
225241
if self.started_context:
226242
self.context = None
227243
self.switch_context(None)
228-
return self._trace
244+
return self._cached_bound_method_trace
229245

230246
def start(self):
231247
"""Start this Tracer.
@@ -243,10 +259,10 @@ def start(self):
243259
# function, but we are marked as running again, so maybe it
244260
# will be ok?
245261
#self.log("~", "starting on different threads")
246-
return self._trace
262+
return self._cached_bound_method_trace
247263

248-
sys.settrace(self._trace)
249-
return self._trace
264+
sys.settrace(self._cached_bound_method_trace)
265+
return self._cached_bound_method_trace
250266

251267
def stop(self):
252268
"""Stop this Tracer."""
@@ -271,9 +287,10 @@ def stop(self):
271287
# so don't warn if we are in atexit on PyPy and the trace function
272288
# has changed to None.
273289
dont_warn = (env.PYPY and env.PYPYVERSION >= (5, 4) and self.in_atexit and tf is None)
274-
if (not dont_warn) and tf != self._trace: # pylint: disable=comparison-with-callable
290+
if (not dont_warn) and tf != self._cached_bound_method_trace: # pylint: disable=comparison-with-callable
275291
self.warn(
276-
f"Trace function changed, data is likely wrong: {tf!r} != {self._trace!r}",
292+
f"Trace function changed, data is likely wrong: "
293+
f"{tf!r} != {self._cached_bound_method_trace!r}",
277294
slug="trace-changed",
278295
)
279296

0 commit comments

Comments
 (0)