From f038e7f472efd2918dcd68ade9104cd073740a60 Mon Sep 17 00:00:00 2001 From: Lauta Date: Fri, 9 May 2025 01:24:20 +0200 Subject: [PATCH 1/6] Allow LOAD_FAST to be optimized to LOAD_FAST_BORROW when the loaded reference is unconsumed at the end of a basic block, as long as it's otherwise safe to borrow (not killed and not stored as a local). --- Lib/test/test_peepholer.py | 35 ++++++++++++++++++++++++++--------- Python/flowgraph.c | 2 +- 2 files changed, 27 insertions(+), 10 deletions(-) diff --git a/Lib/test/test_peepholer.py b/Lib/test/test_peepholer.py index 47f51f1979faab..475dd37e38747f 100644 --- a/Lib/test/test_peepholer.py +++ b/Lib/test/test_peepholer.py @@ -2430,14 +2430,14 @@ def test_optimized(self): ] self.check(insts, expected) - def test_unoptimized_if_unconsumed(self): + def test_optimize_unconsumed_when_is_safe(self): insts = [ ("LOAD_FAST", 0, 1), ("LOAD_FAST", 1, 2), ("POP_TOP", None, 3), ] expected = [ - ("LOAD_FAST", 0, 1), + ("LOAD_FAST_BORROW", 0, 1), ("LOAD_FAST_BORROW", 1, 2), ("POP_TOP", None, 3), ] @@ -2449,7 +2449,7 @@ def test_unoptimized_if_unconsumed(self): ("POP_TOP", None, 3), ] expected = [ - ("LOAD_FAST", 0, 1), + ("LOAD_FAST_BORROW", 0, 1), ("NOP", None, 2), ("NOP", None, 3), ] @@ -2509,7 +2509,12 @@ def test_consume_some_inputs_no_outputs(self): ("GET_LEN", None, 2), ("LIST_APPEND", 0, 3), ] - self.check(insts, insts) + expected = [ + ("LOAD_FAST_BORROW", 0, 1), + ("GET_LEN", None, 2), + ("LIST_APPEND", 0, 3), + ] + self.check(insts, expected) def test_check_exc_match(self): insts = [ @@ -2518,7 +2523,7 @@ def test_check_exc_match(self): ("CHECK_EXC_MATCH", None, 3) ] expected = [ - ("LOAD_FAST", 0, 1), + ("LOAD_FAST_BORROW", 0, 1), ("LOAD_FAST_BORROW", 1, 2), ("CHECK_EXC_MATCH", None, 3) ] @@ -2537,7 +2542,19 @@ def test_for_iter(self): ("LOAD_CONST", 0, 7), ("RETURN_VALUE", None, 8), ] - self.cfg_optimization_test(insts, insts, consts=[None]) + expected = [ + ("LOAD_FAST_BORROW", 0, 1), + top := self.Label(), + ("FOR_ITER", end := self.Label(), 2), + ("STORE_FAST", 2, 3), + ("JUMP", top, 4), + end, + ("END_FOR", None, 5), + ("POP_TOP", None, 6), + ("LOAD_CONST", 0, 7), + ("RETURN_VALUE", None, 8), + ] + self.cfg_optimization_test(insts, expected, consts=[None]) def test_load_attr(self): insts = [ @@ -2556,7 +2573,7 @@ def test_load_attr(self): ("LOAD_ATTR", 1, 2), ] expected = [ - ("LOAD_FAST", 0, 1), + ("LOAD_FAST_BORROW", 0, 1), ("LOAD_ATTR", 1, 2), ] self.check(insts, expected) @@ -2586,7 +2603,7 @@ def test_super_attr(self): expected = [ ("LOAD_FAST_BORROW", 0, 1), ("LOAD_FAST_BORROW", 1, 2), - ("LOAD_FAST", 2, 3), + ("LOAD_FAST_BORROW", 2, 3), ("LOAD_SUPER_ATTR", 1, 4), ] self.check(insts, expected) @@ -2603,7 +2620,7 @@ def test_send(self): ("RETURN_VALUE", None, 7) ] expected = [ - ("LOAD_FAST", 0, 1), + ("LOAD_FAST_BORROW", 0, 1), ("LOAD_FAST_BORROW", 1, 2), ("SEND", end := self.Label(), 3), ("LOAD_CONST", 0, 4), diff --git a/Python/flowgraph.c b/Python/flowgraph.c index 78ef02a911a72b..80b30f8c702e23 100644 --- a/Python/flowgraph.c +++ b/Python/flowgraph.c @@ -2976,7 +2976,7 @@ optimize_load_fast(cfg_builder *g) // Optimize instructions for (int i = 0; i < block->b_iused; i++) { - if (!instr_flags[i]) { + if (!(instr_flags[i] & (SUPPORT_KILLED | STORED_AS_LOCAL))) { cfg_instr *instr = &block->b_instr[i]; switch (instr->i_opcode) { case LOAD_FAST: From 8f38622c2148e060f0191f5e3d971c2e7930005b Mon Sep 17 00:00:00 2001 From: "blurb-it[bot]" <43283697+blurb-it[bot]@users.noreply.github.com> Date: Fri, 9 May 2025 00:06:53 +0000 Subject: [PATCH 2/6] =?UTF-8?q?=F0=9F=93=9C=F0=9F=A4=96=20Added=20by=20blu?= =?UTF-8?q?rb=5Fit.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../2025-05-09-00-06-52.gh-issue-133672.y6QwwH.rst | 1 + 1 file changed, 1 insertion(+) create mode 100644 Misc/NEWS.d/next/Core_and_Builtins/2025-05-09-00-06-52.gh-issue-133672.y6QwwH.rst diff --git a/Misc/NEWS.d/next/Core_and_Builtins/2025-05-09-00-06-52.gh-issue-133672.y6QwwH.rst b/Misc/NEWS.d/next/Core_and_Builtins/2025-05-09-00-06-52.gh-issue-133672.y6QwwH.rst new file mode 100644 index 00000000000000..63d014e4329f8c --- /dev/null +++ b/Misc/NEWS.d/next/Core_and_Builtins/2025-05-09-00-06-52.gh-issue-133672.y6QwwH.rst @@ -0,0 +1 @@ +The peephole optimizer now converts LOAD_FAST to LOAD_FAST_BORROW more aggressively. This optimization is applied even if the loaded variable remains live on the stack at the end of a basic block, as long as the borrow is otherwise determined to be safe. This can reduce reference counting overhead in certain code patterns, such as with iterables in loops. From 2f0ffdda25a22b0d46a24224ac552392d80e2944 Mon Sep 17 00:00:00 2001 From: Lauta Date: Wed, 14 May 2025 14:56:02 +0200 Subject: [PATCH 3/6] Improve CFG optimization with new LOAD_FAST_BORROW safety checks. --- Python/flowgraph.c | 349 +++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 340 insertions(+), 9 deletions(-) diff --git a/Python/flowgraph.c b/Python/flowgraph.c index 80b30f8c702e23..c795ebb454054b 100644 --- a/Python/flowgraph.c +++ b/Python/flowgraph.c @@ -71,6 +71,8 @@ typedef struct _PyCfgBasicblock { unsigned b_cold : 1; /* b_warm is used by the cold-detection algorithm to mark blocks which are definitely not cold */ unsigned b_warm : 1; + /* b_dfs_visited_gen is used by the borrow-safe analysis to mark whether a block has been visited */ + int b_dfs_visited_gen; } basicblock; @@ -95,6 +97,8 @@ typedef struct _PyCfgBuilder cfg_builder; #define LOCATION(LNO, END_LNO, COL, END_COL) \ ((const _Py_SourceLocation){(LNO), (END_LNO), (COL), (END_COL)}) +static int cfg_dfs_generation_counter = 0; + static inline int is_block_push(cfg_instr *i) { @@ -2701,6 +2705,267 @@ load_fast_push_block(basicblock ***sp, basicblock *target, } } +/* + * Recursively determine if a borrowed reference remains safe along all CFG paths. + * + * This function performs a Depth-First Search (DFS) starting from 'current_block'. + * It tracks a specific value that was notionally loaded by a LOAD_FAST_BORROW + * instruction and is currently at 'depth_of_value_on_entry' on the operand stack + * (0 being TOS, -1 if already known to be consumed). The 'original_local_idx' + * is the index of the local variable from which this value was borrowed. + * + * A borrow is considered UNSAFE if, along any execution path before the + * borrowed value is consumed from the stack: + * 1. The 'original_local_idx' (the backing store for the borrow) is overwritten + * (e.g., by STORE_FAST, DELETE_FAST on that local). + * 2. The borrowed value itself, when at the top of the stack (depth 0), is + * consumed by an instruction that stores it into any local variable + * (e.g., STORE_FAST). + * 3. The value is not consumed and the CFG path ends (e.g., end of function + * without consumption) or enters a cycle where it's not consumed. + * + * The function uses 'cfg_dfs_generation_counter' in conjunction with + * 'current_block->b_dfs_visited_gen' to detect cycles within the current + * specific DFS traversal, preventing infinite loops and deeming such cyclic + * paths unsafe if the value isn't consumed within the cycle. + * + * Stack depths are tracked by: + * - 'depth_of_value_on_entry': The depth of the tracked item upon entering 'current_block'. + * - 'current_target_depth': The depth of the item as instructions in 'current_block' are processed. + * - 'depth_before_jump_op_effect': When a jump instruction is encountered, this + * is calculated by simulating all prior instructions in 'current_block' to find + * the item's depth *just before* the jump instruction itself has any stack effect. + * This precise depth is then used to calculate the 'target_entry_depth' for the + * recursive call to the jump's target block. + * + * Args: + * current_block: The basic block to start analysis from for this recursive step. + * depth_of_value_on_entry: The depth of the tracked borrowed value from TOS + * (0 = TOS, -1 if already consumed). + * original_local_idx: The index of the local variable backing the borrow. + * g: The CFG builder. + * + * Returns: + * true if the borrow is safe along all paths from this point, false otherwise. + */ +static bool +is_borrow_safe( + basicblock *current_block, + Py_ssize_t depth_of_value_on_entry, + int original_local_idx, + cfg_builder *g) +{ + if (depth_of_value_on_entry == -1) { + return true; + } + + if (current_block->b_dfs_visited_gen == cfg_dfs_generation_counter) { + return false; + } + current_block->b_dfs_visited_gen = cfg_dfs_generation_counter; + + Py_ssize_t current_target_depth = depth_of_value_on_entry; + + for (int i = 0; i < current_block->b_iused; i++) { + cfg_instr *instr = ¤t_block->b_instr[i]; + int opcode = instr->i_opcode; + int oparg = instr->i_oparg; + + // 1. Check if the original local is killed before the value is consumed. + if ((opcode == STORE_FAST || opcode == DELETE_FAST || opcode == LOAD_FAST_AND_CLEAR) && + oparg == original_local_idx) { + return false; + } + + // 2. Simulate stack effect and check for consumption or unsafe store. + stack_effects effects_noj; + if (get_stack_effects(opcode, oparg, 0, &effects_noj) < 0) { + return false; // Error in stack effect calculation + } + int num_popped = _PyOpcode_num_popped(opcode, oparg); + int num_pushed = _PyOpcode_num_pushed(opcode, oparg); + + if (current_target_depth < num_popped) { + if (opcode == STORE_FAST && current_target_depth == 0) { + // Unsafe: borrowed value was at TOS and is being stored into a local. + return false; + } + return true; // Safely consumed by this instruction. + } + // Value not consumed, update its depth. + current_target_depth = current_target_depth - num_popped + num_pushed; + + // 3. Handle branches (jumps are always last in a basic block). + if (HAS_TARGET(opcode)) { + if (i != current_block->b_iused - 1) { + continue; // Skip jumps that are not the last instruction in the block. + } + bool safe_on_all_branches = true; + + // Calculate item's depth just before this jump instruction's own stack effect. + Py_ssize_t depth_before_jump_op_effect = depth_of_value_on_entry; + for(int k=0; k < i; ++k) { // Iterate instructions before the current jump + cfg_instr *prev_instr_in_block = ¤t_block->b_instr[k]; + // Kill check for intermediate instructions + if ((prev_instr_in_block->i_opcode == STORE_FAST || + prev_instr_in_block->i_opcode == DELETE_FAST || + prev_instr_in_block->i_opcode == LOAD_FAST_AND_CLEAR) && + prev_instr_in_block->i_oparg == original_local_idx) { + return false; + } + stack_effects prev_effects; + if (get_stack_effects(prev_instr_in_block->i_opcode, prev_instr_in_block->i_oparg, 0, &prev_effects) < 0) { + return false; + } + int prev_popped_val = _PyOpcode_num_popped(prev_instr_in_block->i_opcode, prev_instr_in_block->i_oparg); + if (depth_before_jump_op_effect < prev_popped_val) { // Consumed before jump + if (prev_instr_in_block->i_opcode == STORE_FAST && depth_before_jump_op_effect == 0) { + return false; // Stored into local + } + return true; // Safely consumed before the jump + } + depth_before_jump_op_effect = depth_before_jump_op_effect - prev_popped_val + + _PyOpcode_num_pushed(prev_instr_in_block->i_opcode, prev_instr_in_block->i_oparg); + } + + // Analyze jump target path + stack_effects effects_jump; + if (get_stack_effects(opcode, oparg, 1, &effects_jump) < 0) return false; + int jump_popped_val = _PyOpcode_num_popped(opcode, oparg); + Py_ssize_t target_entry_depth = depth_before_jump_op_effect; + + if (target_entry_depth < jump_popped_val) { // Consumed by the jump op itself + if (opcode == STORE_FAST && target_entry_depth == 0) { + safe_on_all_branches = false; + } + // else: safely consumed by jump operation. + } else { // Not consumed by jump op, recurse on target. + target_entry_depth = target_entry_depth - jump_popped_val + _PyOpcode_num_pushed(opcode, oparg); + if (!is_borrow_safe(instr->i_target, target_entry_depth, original_local_idx, g)) { + safe_on_all_branches = false; + } + } + + // Analyze fallthrough path for conditional jumps, if the jump path was safe. + if (safe_on_all_branches && IS_CONDITIONAL_JUMP_OPCODE(opcode) && current_block->b_next) { + // 'current_target_depth' already reflects the stack state if the jump is *not* taken. + if (!is_borrow_safe(current_block->b_next, current_target_depth, original_local_idx, g)) { + safe_on_all_branches = false; + } + } + return safe_on_all_branches; + } + } + + // When the instructions finish and the value isn't consumed, check fallthrough. + if (current_block->b_next && BB_HAS_FALLTHROUGH(current_block)) { + return is_borrow_safe(current_block->b_next, current_target_depth, original_local_idx, g); + } + + // No further path (or no fallthrough) and value is still on stack (current_target_depth is valid). + // This means it's unconsumed at the end of a CFG path. + return false; +} + + +/* + * Initiates a Control Flow Graph (CFG) wide safety check for a borrowed reference. + * + * This function is called when a LOAD_FAST instruction is a candidate for + * LOAD_FAST_BORROW, but its produced value ('REF_UNCONSUMED' flag is set) + * might live beyond the current basic block ('producer_block'). + * + * It determines the immediate successor(s) of the 'producer_block' and the + * stack depth at which the borrowed value (identified by 'original_local_idx' + * and initially at 'depth_of_value_at_producer_end' from TOS) would enter + * these successors. + * + * It then calls 'is_borrow_safe()' for each successor path. The borrow is + * considered globally safe only if 'is_borrow_safe()' returns true for ALL + * possible successor paths. + * + * A new DFS traversal is started by incrementing 'cfg_dfs_generation_counter' + * to ensure that 'is_borrow_safe()' uses a fresh set of visited markers + * ('b_dfs_visited_gen') for its analysis. + * + * Args: + * producer_block: The basic block containing the LOAD_FAST candidate. + * depth_of_value_at_producer_end: The depth of the candidate borrowed value + * from TOS at the end of 'producer_block'. + * (0 = TOS, -1 if already consumed - though + * this function expects a live value). + * original_local_idx: The index of the local variable backing the borrow. + * g: The CFG builder. + * + * Returns: + * true if the borrow is safe across all subsequent CFG paths, false otherwise. + */ +static bool +check_borrow_safety_globally( + basicblock *producer_block, + Py_ssize_t depth_of_value_at_producer_end, + int original_local_idx, + cfg_builder *g) +{ + cfg_dfs_generation_counter++; + bool overall_safety = true; + + cfg_instr *last_instr = basicblock_last_instr(producer_block); + + // If depth is -1, implies value was already consumed or is invalid for tracking. + if (depth_of_value_at_producer_end == -1) { + return false; + } + + if (last_instr && HAS_TARGET(last_instr->i_opcode)) { + stack_effects effects_jump; + if (get_stack_effects(last_instr->i_opcode, last_instr->i_oparg, 1, &effects_jump) < 0) return false; + int jump_popped = _PyOpcode_num_popped(last_instr->i_opcode, last_instr->i_oparg); + Py_ssize_t entry_depth_for_target = depth_of_value_at_producer_end; + + if (entry_depth_for_target < jump_popped) { // Consumed by the jump itself. + if (last_instr->i_opcode == STORE_FAST && entry_depth_for_target == 0) { + overall_safety = false; // Unsafe store of borrowed value. + } + // else: safely consumed. + } else { + entry_depth_for_target = entry_depth_for_target - jump_popped + + _PyOpcode_num_pushed(last_instr->i_opcode, last_instr->i_oparg); + if (!is_borrow_safe(last_instr->i_target, entry_depth_for_target, original_local_idx, g)) { + overall_safety = false; + } + } + + // Analyze fallthrough path for conditional jumps, if the jump path was safe. + if (overall_safety && IS_CONDITIONAL_JUMP_OPCODE(last_instr->i_opcode) && producer_block->b_next) { + stack_effects effects_fall; // Stack effect if jump is NOT taken. + if (get_stack_effects(last_instr->i_opcode, last_instr->i_oparg, 0, &effects_fall) < 0) return false; + int fall_popped = _PyOpcode_num_popped(last_instr->i_opcode, last_instr->i_oparg); + Py_ssize_t entry_depth_for_fallthrough = depth_of_value_at_producer_end; + + if (entry_depth_for_fallthrough < fall_popped) { // Consumed by not taking jump. + if (last_instr->i_opcode == STORE_FAST && entry_depth_for_fallthrough == 0) { + overall_safety = false; // Unsafe store. + } + // else: safely consumed. + } else { // Not consumed by not taking jump, recurse on fallthrough. + entry_depth_for_fallthrough = entry_depth_for_fallthrough - fall_popped + + _PyOpcode_num_pushed(last_instr->i_opcode, last_instr->i_oparg); + if (!is_borrow_safe(producer_block->b_next, entry_depth_for_fallthrough, original_local_idx, g)) { + overall_safety = false; + } + } + } + } else if (producer_block->b_next && BB_HAS_FALLTHROUGH(producer_block)) { // Standard fallthrough, no jump. + if (!is_borrow_safe(producer_block->b_next, depth_of_value_at_producer_end, original_local_idx, g)) { + overall_safety = false; + } + } else { // No successors (e.g., block ends in RETURN/RAISE) and value still on stack. + overall_safety = false; // Unconsumed at end of a CFG path. + } + return overall_safety; +} + /* * Strength reduce LOAD_FAST{_LOAD_FAST} instructions into faster variants that * load borrowed references onto the operand stack. @@ -2747,6 +3012,7 @@ optimize_load_fast(cfg_builder *g) basicblock *entryblock = g->g_entryblock; for (basicblock *b = entryblock; b != NULL; b = b->b_next) { max_instrs = Py_MAX(max_instrs, b->b_iused); + b->b_dfs_visited_gen = 0; } size_t instr_flags_size = max_instrs * sizeof(uint8_t); uint8_t *instr_flags = PyMem_Malloc(instr_flags_size); @@ -2976,17 +3242,82 @@ optimize_load_fast(cfg_builder *g) // Optimize instructions for (int i = 0; i < block->b_iused; i++) { - if (!(instr_flags[i] & (SUPPORT_KILLED | STORED_AS_LOCAL))) { - cfg_instr *instr = &block->b_instr[i]; - switch (instr->i_opcode) { - case LOAD_FAST: + cfg_instr *instr = &block->b_instr[i]; + bool is_load_fast_type = (instr->i_opcode == LOAD_FAST || instr->i_opcode == LOAD_FAST_LOAD_FAST); + + if (is_load_fast_type) { + bool killed_or_stored_locally = (instr_flags[i] & (SUPPORT_KILLED | STORED_AS_LOCAL)); + if (killed_or_stored_locally) { + continue; // Definitely cannot borrow due to local block issues + } + + bool unconsumed_in_block = (instr_flags[i] & REF_UNCONSUMED); + bool can_borrow = false; + + if (!unconsumed_in_block) { + can_borrow = true; // Safe by local analysis, consumed in current block + } else { + if (instr->i_opcode == LOAD_FAST) { + int local_idx = instr->i_oparg; + Py_ssize_t depth_from_tos_at_block_end = -1; + // Find the specific item on the `refs` stack (at end of current block simulation) + for (Py_ssize_t k = 0; k < refs.size; k++) { + ref r = ref_stack_at(&refs, k); + if (r.instr == i && r.local == local_idx) { // Match instruction and local + depth_from_tos_at_block_end = refs.size - 1 - k; + break; + } + } + + if (depth_from_tos_at_block_end != -1) { + can_borrow = check_borrow_safety_globally(block, depth_from_tos_at_block_end, local_idx, g); + } else { + // If REF_UNCONSUMED is set but we couldn't find its depth, assume unsafe. + // This can happen if refs.size is 0, yet flag is set. + can_borrow = false; + } + + } else { // LOAD_FAST_LOAD_FAST + can_borrow = true; // Assume true, prove false if any part is unsafe + int local_idx1 = instr->i_oparg >> 4; + int local_idx2 = instr->i_oparg & 15; + Py_ssize_t depth1 = -1, depth2 = -1; + + // Find depths on `refs` stack for both products of this LFLF instruction `i` + for (Py_ssize_t k = 0; k < refs.size; k++) { + ref r = ref_stack_at(&refs, k); + if (r.instr == i) { + Py_ssize_t current_depth = refs.size - 1 - k; + if (r.local == local_idx1 && depth1 == -1) depth1 = current_depth; + else if (r.local == local_idx2 && depth2 == -1) depth2 = current_depth; + } + } + + bool found_any_on_stack = (depth1 != -1 || depth2 != -1); + + if (depth1 != -1) { + if (!check_borrow_safety_globally(block, depth1, local_idx1, g)) { + can_borrow = false; + } + } + if (can_borrow && depth2 != -1) { + if (!check_borrow_safety_globally(block, depth2, local_idx2, g)) { + can_borrow = false; + } + } + + if (unconsumed_in_block && !found_any_on_stack) { + can_borrow = false; + } + } + } + + if (can_borrow) { + if (instr->i_opcode == LOAD_FAST) { instr->i_opcode = LOAD_FAST_BORROW; - break; - case LOAD_FAST_LOAD_FAST: + } else if (instr->i_opcode == LOAD_FAST_LOAD_FAST) { instr->i_opcode = LOAD_FAST_BORROW_LOAD_FAST_BORROW; - break; - default: - break; + } } } } From 695a9283e091748c1928b68958e23497c4771cc8 Mon Sep 17 00:00:00 2001 From: Lauta Date: Wed, 14 May 2025 15:30:21 +0200 Subject: [PATCH 4/6] Update test_peepholer.py to reflect the new changes made to flowgraph.c --- Lib/test/test_peepholer.py | 19 +++++++------------ 1 file changed, 7 insertions(+), 12 deletions(-) diff --git a/Lib/test/test_peepholer.py b/Lib/test/test_peepholer.py index 475dd37e38747f..8f2df397f00f34 100644 --- a/Lib/test/test_peepholer.py +++ b/Lib/test/test_peepholer.py @@ -2430,14 +2430,14 @@ def test_optimized(self): ] self.check(insts, expected) - def test_optimize_unconsumed_when_is_safe(self): + def test_unoptimized_if_unconsumed(self): insts = [ ("LOAD_FAST", 0, 1), ("LOAD_FAST", 1, 2), ("POP_TOP", None, 3), ] expected = [ - ("LOAD_FAST_BORROW", 0, 1), + ("LOAD_FAST", 0, 1), ("LOAD_FAST_BORROW", 1, 2), ("POP_TOP", None, 3), ] @@ -2449,7 +2449,7 @@ def test_optimize_unconsumed_when_is_safe(self): ("POP_TOP", None, 3), ] expected = [ - ("LOAD_FAST_BORROW", 0, 1), + ("LOAD_FAST", 0, 1), ("NOP", None, 2), ("NOP", None, 3), ] @@ -2509,12 +2509,7 @@ def test_consume_some_inputs_no_outputs(self): ("GET_LEN", None, 2), ("LIST_APPEND", 0, 3), ] - expected = [ - ("LOAD_FAST_BORROW", 0, 1), - ("GET_LEN", None, 2), - ("LIST_APPEND", 0, 3), - ] - self.check(insts, expected) + self.check(insts, insts) def test_check_exc_match(self): insts = [ @@ -2523,7 +2518,7 @@ def test_check_exc_match(self): ("CHECK_EXC_MATCH", None, 3) ] expected = [ - ("LOAD_FAST_BORROW", 0, 1), + ("LOAD_FAST", 0, 1), ("LOAD_FAST_BORROW", 1, 2), ("CHECK_EXC_MATCH", None, 3) ] @@ -2573,7 +2568,7 @@ def test_load_attr(self): ("LOAD_ATTR", 1, 2), ] expected = [ - ("LOAD_FAST_BORROW", 0, 1), + ("LOAD_FAST", 0, 1), ("LOAD_ATTR", 1, 2), ] self.check(insts, expected) @@ -2603,7 +2598,7 @@ def test_super_attr(self): expected = [ ("LOAD_FAST_BORROW", 0, 1), ("LOAD_FAST_BORROW", 1, 2), - ("LOAD_FAST_BORROW", 2, 3), + ("LOAD_FAST", 2, 3), ("LOAD_SUPER_ATTR", 1, 4), ] self.check(insts, expected) From a4300a5e6c21b6555cfba4ceb4c5803c49dd8294 Mon Sep 17 00:00:00 2001 From: Lauta Date: Wed, 14 May 2025 16:47:39 +0200 Subject: [PATCH 5/6] Fix linter issues. --- Python/flowgraph.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Python/flowgraph.c b/Python/flowgraph.c index c795ebb454054b..dd9fbe83962eed 100644 --- a/Python/flowgraph.c +++ b/Python/flowgraph.c @@ -3268,7 +3268,7 @@ optimize_load_fast(cfg_builder *g) break; } } - + if (depth_from_tos_at_block_end != -1) { can_borrow = check_borrow_safety_globally(block, depth_from_tos_at_block_end, local_idx, g); } else { @@ -3305,7 +3305,7 @@ optimize_load_fast(cfg_builder *g) can_borrow = false; } } - + if (unconsumed_in_block && !found_any_on_stack) { can_borrow = false; } From 8cba63a366d015156dcfeb7a78f490c92c0f0a0f Mon Sep 17 00:00:00 2001 From: Lauta Date: Wed, 14 May 2025 17:02:28 +0200 Subject: [PATCH 6/6] Improved documentation for is_borrow_safe and check_borrow_safety_globally --- Python/flowgraph.c | 78 +++++++++++++++------------------------------- 1 file changed, 25 insertions(+), 53 deletions(-) diff --git a/Python/flowgraph.c b/Python/flowgraph.c index dd9fbe83962eed..5d827c6a90ed46 100644 --- a/Python/flowgraph.c +++ b/Python/flowgraph.c @@ -2708,7 +2708,7 @@ load_fast_push_block(basicblock ***sp, basicblock *target, /* * Recursively determine if a borrowed reference remains safe along all CFG paths. * - * This function performs a Depth-First Search (DFS) starting from 'current_block'. + * This function performs a DFS starting from 'current_block'. * It tracks a specific value that was notionally loaded by a LOAD_FAST_BORROW * instruction and is currently at 'depth_of_value_on_entry' on the operand stack * (0 being TOS, -1 if already known to be consumed). The 'original_local_idx' @@ -2733,20 +2733,11 @@ load_fast_push_block(basicblock ***sp, basicblock *target, * - 'depth_of_value_on_entry': The depth of the tracked item upon entering 'current_block'. * - 'current_target_depth': The depth of the item as instructions in 'current_block' are processed. * - 'depth_before_jump_op_effect': When a jump instruction is encountered, this - * is calculated by simulating all prior instructions in 'current_block' to find - * the item's depth *just before* the jump instruction itself has any stack effect. - * This precise depth is then used to calculate the 'target_entry_depth' for the - * recursive call to the jump's target block. + * is calculated by simulating all prior instructions in 'current_block' to find + * the item's depth *just before* the jump instruction itself has any stack effect. + * This precise depth is then used to calculate the 'target_entry_depth' for the + * recursive call to the jump's target block. * - * Args: - * current_block: The basic block to start analysis from for this recursive step. - * depth_of_value_on_entry: The depth of the tracked borrowed value from TOS - * (0 = TOS, -1 if already consumed). - * original_local_idx: The index of the local variable backing the borrow. - * g: The CFG builder. - * - * Returns: - * true if the borrow is safe along all paths from this point, false otherwise. */ static bool is_borrow_safe( @@ -2771,40 +2762,34 @@ is_borrow_safe( int opcode = instr->i_opcode; int oparg = instr->i_oparg; - // 1. Check if the original local is killed before the value is consumed. if ((opcode == STORE_FAST || opcode == DELETE_FAST || opcode == LOAD_FAST_AND_CLEAR) && oparg == original_local_idx) { return false; } - // 2. Simulate stack effect and check for consumption or unsafe store. stack_effects effects_noj; if (get_stack_effects(opcode, oparg, 0, &effects_noj) < 0) { - return false; // Error in stack effect calculation + return false; } int num_popped = _PyOpcode_num_popped(opcode, oparg); int num_pushed = _PyOpcode_num_pushed(opcode, oparg); if (current_target_depth < num_popped) { if (opcode == STORE_FAST && current_target_depth == 0) { - // Unsafe: borrowed value was at TOS and is being stored into a local. return false; } - return true; // Safely consumed by this instruction. + return true; } - // Value not consumed, update its depth. current_target_depth = current_target_depth - num_popped + num_pushed; - // 3. Handle branches (jumps are always last in a basic block). if (HAS_TARGET(opcode)) { if (i != current_block->b_iused - 1) { continue; // Skip jumps that are not the last instruction in the block. } bool safe_on_all_branches = true; - // Calculate item's depth just before this jump instruction's own stack effect. Py_ssize_t depth_before_jump_op_effect = depth_of_value_on_entry; - for(int k=0; k < i; ++k) { // Iterate instructions before the current jump + for(int k=0; k < i; ++k) { cfg_instr *prev_instr_in_block = ¤t_block->b_instr[k]; // Kill check for intermediate instructions if ((prev_instr_in_block->i_opcode == STORE_FAST || @@ -2818,11 +2803,11 @@ is_borrow_safe( return false; } int prev_popped_val = _PyOpcode_num_popped(prev_instr_in_block->i_opcode, prev_instr_in_block->i_oparg); - if (depth_before_jump_op_effect < prev_popped_val) { // Consumed before jump + if (depth_before_jump_op_effect < prev_popped_val) { if (prev_instr_in_block->i_opcode == STORE_FAST && depth_before_jump_op_effect == 0) { - return false; // Stored into local + return false; } - return true; // Safely consumed before the jump + return true; } depth_before_jump_op_effect = depth_before_jump_op_effect - prev_popped_val + _PyOpcode_num_pushed(prev_instr_in_block->i_opcode, prev_instr_in_block->i_oparg); @@ -2834,11 +2819,10 @@ is_borrow_safe( int jump_popped_val = _PyOpcode_num_popped(opcode, oparg); Py_ssize_t target_entry_depth = depth_before_jump_op_effect; - if (target_entry_depth < jump_popped_val) { // Consumed by the jump op itself + if (target_entry_depth < jump_popped_val) { if (opcode == STORE_FAST && target_entry_depth == 0) { safe_on_all_branches = false; } - // else: safely consumed by jump operation. } else { // Not consumed by jump op, recurse on target. target_entry_depth = target_entry_depth - jump_popped_val + _PyOpcode_num_pushed(opcode, oparg); if (!is_borrow_safe(instr->i_target, target_entry_depth, original_local_idx, g)) { @@ -2848,7 +2832,6 @@ is_borrow_safe( // Analyze fallthrough path for conditional jumps, if the jump path was safe. if (safe_on_all_branches && IS_CONDITIONAL_JUMP_OPCODE(opcode) && current_block->b_next) { - // 'current_target_depth' already reflects the stack state if the jump is *not* taken. if (!is_borrow_safe(current_block->b_next, current_target_depth, original_local_idx, g)) { safe_on_all_branches = false; } @@ -2862,14 +2845,16 @@ is_borrow_safe( return is_borrow_safe(current_block->b_next, current_target_depth, original_local_idx, g); } - // No further path (or no fallthrough) and value is still on stack (current_target_depth is valid). - // This means it's unconsumed at the end of a CFG path. + /* + No further path (or no fallthrough) and value is still on stack (current_target_depth is valid). + This means it's unconsumed at the end of a CFG path, so it's unsafe. + */ return false; } /* - * Initiates a Control Flow Graph (CFG) wide safety check for a borrowed reference. + * Initiates a CFG wide safety check for a borrowed reference. * * This function is called when a LOAD_FAST instruction is a candidate for * LOAD_FAST_BORROW, but its produced value ('REF_UNCONSUMED' flag is set) @@ -2888,17 +2873,6 @@ is_borrow_safe( * to ensure that 'is_borrow_safe()' uses a fresh set of visited markers * ('b_dfs_visited_gen') for its analysis. * - * Args: - * producer_block: The basic block containing the LOAD_FAST candidate. - * depth_of_value_at_producer_end: The depth of the candidate borrowed value - * from TOS at the end of 'producer_block'. - * (0 = TOS, -1 if already consumed - though - * this function expects a live value). - * original_local_idx: The index of the local variable backing the borrow. - * g: The CFG builder. - * - * Returns: - * true if the borrow is safe across all subsequent CFG paths, false otherwise. */ static bool check_borrow_safety_globally( @@ -2923,11 +2897,10 @@ check_borrow_safety_globally( int jump_popped = _PyOpcode_num_popped(last_instr->i_opcode, last_instr->i_oparg); Py_ssize_t entry_depth_for_target = depth_of_value_at_producer_end; - if (entry_depth_for_target < jump_popped) { // Consumed by the jump itself. + if (entry_depth_for_target < jump_popped) { if (last_instr->i_opcode == STORE_FAST && entry_depth_for_target == 0) { - overall_safety = false; // Unsafe store of borrowed value. + overall_safety = false; } - // else: safely consumed. } else { entry_depth_for_target = entry_depth_for_target - jump_popped + _PyOpcode_num_pushed(last_instr->i_opcode, last_instr->i_oparg); @@ -2938,17 +2911,16 @@ check_borrow_safety_globally( // Analyze fallthrough path for conditional jumps, if the jump path was safe. if (overall_safety && IS_CONDITIONAL_JUMP_OPCODE(last_instr->i_opcode) && producer_block->b_next) { - stack_effects effects_fall; // Stack effect if jump is NOT taken. + stack_effects effects_fall; if (get_stack_effects(last_instr->i_opcode, last_instr->i_oparg, 0, &effects_fall) < 0) return false; int fall_popped = _PyOpcode_num_popped(last_instr->i_opcode, last_instr->i_oparg); Py_ssize_t entry_depth_for_fallthrough = depth_of_value_at_producer_end; - if (entry_depth_for_fallthrough < fall_popped) { // Consumed by not taking jump. + if (entry_depth_for_fallthrough < fall_popped) { if (last_instr->i_opcode == STORE_FAST && entry_depth_for_fallthrough == 0) { - overall_safety = false; // Unsafe store. + overall_safety = false; } - // else: safely consumed. - } else { // Not consumed by not taking jump, recurse on fallthrough. + } else { // Recurse on fallthrough. entry_depth_for_fallthrough = entry_depth_for_fallthrough - fall_popped + _PyOpcode_num_pushed(last_instr->i_opcode, last_instr->i_oparg); if (!is_borrow_safe(producer_block->b_next, entry_depth_for_fallthrough, original_local_idx, g)) { @@ -2960,8 +2932,8 @@ check_borrow_safety_globally( if (!is_borrow_safe(producer_block->b_next, depth_of_value_at_producer_end, original_local_idx, g)) { overall_safety = false; } - } else { // No successors (e.g., block ends in RETURN/RAISE) and value still on stack. - overall_safety = false; // Unconsumed at end of a CFG path. + } else { + overall_safety = false; } return overall_safety; }