Skip to content

Commit d65ba51

Browse files
committed
subprocess's Popen.wait() is now thread safe so that multiple threads
may be calling wait() or poll() on a Popen instance at the same time without losing the Popen.returncode value. Fixes issue #21291.
1 parent 9e59967 commit d65ba51

File tree

3 files changed

+92
-10
lines changed

3 files changed

+92
-10
lines changed

Lib/subprocess.py

Lines changed: 40 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -405,6 +405,10 @@ class STARTUPINFO:
405405
import _posixsubprocess
406406
import select
407407
import selectors
408+
try:
409+
import threading
410+
except ImportError:
411+
import dummy_threading as threading
408412

409413
# When select or poll has indicated that the file is writable,
410414
# we can write up to _PIPE_BUF bytes without risk of blocking.
@@ -748,6 +752,12 @@ def __init__(self, args, bufsize=-1, executable=None,
748752
pass_fds=()):
749753
"""Create new Popen instance."""
750754
_cleanup()
755+
# Held while anything is calling waitpid before returncode has been
756+
# updated to prevent clobbering returncode if wait() or poll() are
757+
# called from multiple threads at once. After acquiring the lock,
758+
# code must re-check self.returncode to see if another thread just
759+
# finished a waitpid() call.
760+
self._waitpid_lock = threading.Lock()
751761

752762
self._input = None
753763
self._communication_started = False
@@ -1450,6 +1460,7 @@ def _execute_child(self, args, executable, preexec_fn, close_fds,
14501460
def _handle_exitstatus(self, sts, _WIFSIGNALED=os.WIFSIGNALED,
14511461
_WTERMSIG=os.WTERMSIG, _WIFEXITED=os.WIFEXITED,
14521462
_WEXITSTATUS=os.WEXITSTATUS):
1463+
"""All callers to this function MUST hold self._waitpid_lock."""
14531464
# This method is called (indirectly) by __del__, so it cannot
14541465
# refer to anything outside of its local scope.
14551466
if _WIFSIGNALED(sts):
@@ -1471,7 +1482,13 @@ def _internal_poll(self, _deadstate=None, _waitpid=os.waitpid,
14711482
14721483
"""
14731484
if self.returncode is None:
1485+
if not self._waitpid_lock.acquire(False):
1486+
# Something else is busy calling waitpid. Don't allow two
1487+
# at once. We know nothing yet.
1488+
return None
14741489
try:
1490+
if self.returncode is not None:
1491+
return self.returncode # Another thread waited.
14751492
pid, sts = _waitpid(self.pid, _WNOHANG)
14761493
if pid == self.pid:
14771494
self._handle_exitstatus(sts)
@@ -1485,10 +1502,13 @@ def _internal_poll(self, _deadstate=None, _waitpid=os.waitpid,
14851502
# can't get the status.
14861503
# http://bugs.python.org/issue15756
14871504
self.returncode = 0
1505+
finally:
1506+
self._waitpid_lock.release()
14881507
return self.returncode
14891508

14901509

14911510
def _try_wait(self, wait_flags):
1511+
"""All callers to this function MUST hold self._waitpid_lock."""
14921512
try:
14931513
(pid, sts) = _eintr_retry_call(os.waitpid, self.pid, wait_flags)
14941514
except OSError as e:
@@ -1521,23 +1541,33 @@ def wait(self, timeout=None, endtime=None):
15211541
# cribbed from Lib/threading.py in Thread.wait() at r71065.
15221542
delay = 0.0005 # 500 us -> initial delay of 1 ms
15231543
while True:
1524-
(pid, sts) = self._try_wait(os.WNOHANG)
1525-
assert pid == self.pid or pid == 0
1526-
if pid == self.pid:
1527-
self._handle_exitstatus(sts)
1528-
break
1544+
if self._waitpid_lock.acquire(False):
1545+
try:
1546+
if self.returncode is not None:
1547+
break # Another thread waited.
1548+
(pid, sts) = self._try_wait(os.WNOHANG)
1549+
assert pid == self.pid or pid == 0
1550+
if pid == self.pid:
1551+
self._handle_exitstatus(sts)
1552+
break
1553+
finally:
1554+
self._waitpid_lock.release()
15291555
remaining = self._remaining_time(endtime)
15301556
if remaining <= 0:
15311557
raise TimeoutExpired(self.args, timeout)
15321558
delay = min(delay * 2, remaining, .05)
15331559
time.sleep(delay)
15341560
else:
15351561
while self.returncode is None:
1536-
(pid, sts) = self._try_wait(0)
1537-
# Check the pid and loop as waitpid has been known to return
1538-
# 0 even without WNOHANG in odd situations. issue14396.
1539-
if pid == self.pid:
1540-
self._handle_exitstatus(sts)
1562+
with self._waitpid_lock:
1563+
if self.returncode is not None:
1564+
break # Another thread waited.
1565+
(pid, sts) = self._try_wait(0)
1566+
# Check the pid and loop as waitpid has been known to
1567+
# return 0 even without WNOHANG in odd situations.
1568+
# http://bugs.python.org/issue14396.
1569+
if pid == self.pid:
1570+
self._handle_exitstatus(sts)
15411571
return self.returncode
15421572

15431573

Lib/test/test_subprocess.py

Lines changed: 48 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1052,6 +1052,54 @@ def open_fds():
10521052
if exc is not None:
10531053
raise exc
10541054

1055+
@unittest.skipIf(threading is None, "threading required")
1056+
def test_threadsafe_wait(self):
1057+
"""Issue21291: Popen.wait() needs to be threadsafe for returncode."""
1058+
proc = subprocess.Popen([sys.executable, '-c',
1059+
'import time; time.sleep(12)'])
1060+
self.assertEqual(proc.returncode, None)
1061+
results = []
1062+
1063+
def kill_proc_timer_thread():
1064+
results.append(('thread-start-poll-result', proc.poll()))
1065+
# terminate it from the thread and wait for the result.
1066+
proc.kill()
1067+
proc.wait()
1068+
results.append(('thread-after-kill-and-wait', proc.returncode))
1069+
# this wait should be a no-op given the above.
1070+
proc.wait()
1071+
results.append(('thread-after-second-wait', proc.returncode))
1072+
1073+
# This is a timing sensitive test, the failure mode is
1074+
# triggered when both the main thread and this thread are in
1075+
# the wait() call at once. The delay here is to allow the
1076+
# main thread to most likely be blocked in its wait() call.
1077+
t = threading.Timer(0.2, kill_proc_timer_thread)
1078+
t.start()
1079+
1080+
# Wait for the process to finish; the thread should kill it
1081+
# long before it finishes on its own. Supplying a timeout
1082+
# triggers a different code path for better coverage.
1083+
proc.wait(timeout=20)
1084+
# Should be -9 because of the proc.kill() from the thread.
1085+
self.assertEqual(proc.returncode, -9,
1086+
msg="unexpected result in wait from main thread")
1087+
1088+
# This should be a no-op with no change in returncode.
1089+
proc.wait()
1090+
self.assertEqual(proc.returncode, -9,
1091+
msg="unexpected result in second main wait.")
1092+
1093+
t.join()
1094+
# Ensure that all of the thread results are as expected.
1095+
# When a race condition occurs in wait(), the returncode could
1096+
# be set by the wrong thread that doesn't actually have it
1097+
# leading to an incorrect value.
1098+
self.assertEqual([('thread-start-poll-result', None),
1099+
('thread-after-kill-and-wait', -9),
1100+
('thread-after-second-wait', -9)],
1101+
results)
1102+
10551103
def test_issue8780(self):
10561104
# Ensure that stdout is inherited from the parent
10571105
# if stdout=PIPE is not used

Misc/NEWS

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,10 @@ Core and Builtins
3939
Library
4040
-------
4141

42+
- Issue #21291: subprocess's Popen.wait() is now thread safe so that
43+
multiple threads may be calling wait() or poll() on a Popen instance
44+
at the same time without losing the Popen.returncode value.
45+
4246
- Issue #21127: Path objects can now be instantiated from str subclass
4347
instances (such as numpy.str_).
4448

0 commit comments

Comments
 (0)