From 90d38d7611fc5853c66573565055c2b0ef99d0c9 Mon Sep 17 00:00:00 2001 From: Tomas Roun Date: Wed, 21 May 2025 21:32:45 +0200 Subject: [PATCH 1/9] Optimize _POP_CALL_TWO_LOAD_CONST_INLINE_BORROW --- Include/internal/pycore_uop_ids.h | 91 ++++++++++---------- Include/internal/pycore_uop_metadata.h | 26 +++++- Lib/test/test_capi/test_opt.py | 21 ++++- Python/bytecodes.c | 42 +++++++++- Python/executor_cases.c.h | 110 +++++++++++++++++++++++++ Python/optimizer_analysis.c | 34 ++++++++ Python/optimizer_bytecodes.c | 8 ++ Python/optimizer_cases.c.h | 38 +++++++++ 8 files changed, 317 insertions(+), 53 deletions(-) diff --git a/Include/internal/pycore_uop_ids.h b/Include/internal/pycore_uop_ids.h index d6c2ba59db1eda..d08799487fda42 100644 --- a/Include/internal/pycore_uop_ids.h +++ b/Include/internal/pycore_uop_ids.h @@ -265,75 +265,80 @@ extern "C" { #define _MONITOR_JUMP_BACKWARD 483 #define _MONITOR_RESUME 484 #define _NOP NOP -#define _POP_CALL_TWO_LOAD_CONST_INLINE_BORROW 485 +#define _POP_CALL 485 +#define _POP_CALL_LOAD_CONST_INLINE_BORROW 486 +#define _POP_CALL_ONE 487 +#define _POP_CALL_ONE_LOAD_CONST_INLINE_BORROW 488 +#define _POP_CALL_TWO 489 +#define _POP_CALL_TWO_LOAD_CONST_INLINE_BORROW 490 #define _POP_EXCEPT POP_EXCEPT -#define _POP_JUMP_IF_FALSE 486 -#define _POP_JUMP_IF_TRUE 487 +#define _POP_JUMP_IF_FALSE 491 +#define _POP_JUMP_IF_TRUE 492 #define _POP_TOP POP_TOP -#define _POP_TOP_LOAD_CONST_INLINE 488 -#define _POP_TOP_LOAD_CONST_INLINE_BORROW 489 -#define _POP_TWO 490 -#define _POP_TWO_LOAD_CONST_INLINE_BORROW 491 +#define _POP_TOP_LOAD_CONST_INLINE 493 +#define _POP_TOP_LOAD_CONST_INLINE_BORROW 494 +#define _POP_TWO 495 +#define _POP_TWO_LOAD_CONST_INLINE_BORROW 496 #define _PUSH_EXC_INFO PUSH_EXC_INFO -#define _PUSH_FRAME 492 +#define _PUSH_FRAME 497 #define _PUSH_NULL PUSH_NULL -#define _PUSH_NULL_CONDITIONAL 493 -#define _PY_FRAME_GENERAL 494 -#define _PY_FRAME_KW 495 -#define _QUICKEN_RESUME 496 -#define _REPLACE_WITH_TRUE 497 +#define _PUSH_NULL_CONDITIONAL 498 +#define _PY_FRAME_GENERAL 499 +#define _PY_FRAME_KW 500 +#define _QUICKEN_RESUME 501 +#define _REPLACE_WITH_TRUE 502 #define _RESUME_CHECK RESUME_CHECK #define _RETURN_GENERATOR RETURN_GENERATOR #define _RETURN_VALUE RETURN_VALUE -#define _SAVE_RETURN_OFFSET 498 -#define _SEND 499 -#define _SEND_GEN_FRAME 500 +#define _SAVE_RETURN_OFFSET 503 +#define _SEND 504 +#define _SEND_GEN_FRAME 505 #define _SETUP_ANNOTATIONS SETUP_ANNOTATIONS #define _SET_ADD SET_ADD #define _SET_FUNCTION_ATTRIBUTE SET_FUNCTION_ATTRIBUTE #define _SET_UPDATE SET_UPDATE -#define _START_EXECUTOR 501 -#define _STORE_ATTR 502 -#define _STORE_ATTR_INSTANCE_VALUE 503 -#define _STORE_ATTR_SLOT 504 -#define _STORE_ATTR_WITH_HINT 505 +#define _START_EXECUTOR 506 +#define _STORE_ATTR 507 +#define _STORE_ATTR_INSTANCE_VALUE 508 +#define _STORE_ATTR_SLOT 509 +#define _STORE_ATTR_WITH_HINT 510 #define _STORE_DEREF STORE_DEREF -#define _STORE_FAST 506 -#define _STORE_FAST_0 507 -#define _STORE_FAST_1 508 -#define _STORE_FAST_2 509 -#define _STORE_FAST_3 510 -#define _STORE_FAST_4 511 -#define _STORE_FAST_5 512 -#define _STORE_FAST_6 513 -#define _STORE_FAST_7 514 +#define _STORE_FAST 511 +#define _STORE_FAST_0 512 +#define _STORE_FAST_1 513 +#define _STORE_FAST_2 514 +#define _STORE_FAST_3 515 +#define _STORE_FAST_4 516 +#define _STORE_FAST_5 517 +#define _STORE_FAST_6 518 +#define _STORE_FAST_7 519 #define _STORE_FAST_LOAD_FAST STORE_FAST_LOAD_FAST #define _STORE_FAST_STORE_FAST STORE_FAST_STORE_FAST #define _STORE_GLOBAL STORE_GLOBAL #define _STORE_NAME STORE_NAME -#define _STORE_SLICE 515 -#define _STORE_SUBSCR 516 -#define _STORE_SUBSCR_DICT 517 -#define _STORE_SUBSCR_LIST_INT 518 +#define _STORE_SLICE 520 +#define _STORE_SUBSCR 521 +#define _STORE_SUBSCR_DICT 522 +#define _STORE_SUBSCR_LIST_INT 523 #define _SWAP SWAP -#define _TIER2_RESUME_CHECK 519 -#define _TO_BOOL 520 +#define _TIER2_RESUME_CHECK 524 +#define _TO_BOOL 525 #define _TO_BOOL_BOOL TO_BOOL_BOOL #define _TO_BOOL_INT TO_BOOL_INT -#define _TO_BOOL_LIST 521 +#define _TO_BOOL_LIST 526 #define _TO_BOOL_NONE TO_BOOL_NONE -#define _TO_BOOL_STR 522 +#define _TO_BOOL_STR 527 #define _UNARY_INVERT UNARY_INVERT #define _UNARY_NEGATIVE UNARY_NEGATIVE #define _UNARY_NOT UNARY_NOT #define _UNPACK_EX UNPACK_EX -#define _UNPACK_SEQUENCE 523 -#define _UNPACK_SEQUENCE_LIST 524 -#define _UNPACK_SEQUENCE_TUPLE 525 -#define _UNPACK_SEQUENCE_TWO_TUPLE 526 +#define _UNPACK_SEQUENCE 528 +#define _UNPACK_SEQUENCE_LIST 529 +#define _UNPACK_SEQUENCE_TUPLE 530 +#define _UNPACK_SEQUENCE_TWO_TUPLE 531 #define _WITH_EXCEPT_START WITH_EXCEPT_START #define _YIELD_VALUE YIELD_VALUE -#define MAX_UOP_ID 526 +#define MAX_UOP_ID 531 #ifdef __cplusplus } diff --git a/Include/internal/pycore_uop_metadata.h b/Include/internal/pycore_uop_metadata.h index 725238228a3dbc..5ebe124983b862 100644 --- a/Include/internal/pycore_uop_metadata.h +++ b/Include/internal/pycore_uop_metadata.h @@ -303,9 +303,14 @@ const uint16_t _PyUop_Flags[MAX_UOP_ID+1] = { [_LOAD_CONST_INLINE] = HAS_PURE_FLAG, [_POP_TOP_LOAD_CONST_INLINE] = HAS_ESCAPES_FLAG | HAS_PURE_FLAG, [_LOAD_CONST_INLINE_BORROW] = HAS_PURE_FLAG, - [_POP_TOP_LOAD_CONST_INLINE_BORROW] = HAS_ESCAPES_FLAG | HAS_PURE_FLAG, - [_POP_TWO_LOAD_CONST_INLINE_BORROW] = HAS_ESCAPES_FLAG | HAS_PURE_FLAG, - [_POP_CALL_TWO_LOAD_CONST_INLINE_BORROW] = HAS_ESCAPES_FLAG | HAS_PURE_FLAG, + [_POP_CALL] = HAS_ESCAPES_FLAG, + [_POP_CALL_ONE] = HAS_ESCAPES_FLAG, + [_POP_CALL_TWO] = HAS_ESCAPES_FLAG, + [_POP_TOP_LOAD_CONST_INLINE_BORROW] = HAS_ESCAPES_FLAG, + [_POP_TWO_LOAD_CONST_INLINE_BORROW] = HAS_ESCAPES_FLAG, + [_POP_CALL_LOAD_CONST_INLINE_BORROW] = HAS_ESCAPES_FLAG, + [_POP_CALL_ONE_LOAD_CONST_INLINE_BORROW] = HAS_ESCAPES_FLAG, + [_POP_CALL_TWO_LOAD_CONST_INLINE_BORROW] = HAS_ESCAPES_FLAG, [_LOAD_CONST_UNDER_INLINE] = 0, [_LOAD_CONST_UNDER_INLINE_BORROW] = 0, [_CHECK_FUNCTION] = HAS_DEOPT_FLAG, @@ -557,6 +562,11 @@ const char *const _PyOpcode_uop_name[MAX_UOP_ID+1] = { [_MAYBE_EXPAND_METHOD] = "_MAYBE_EXPAND_METHOD", [_MAYBE_EXPAND_METHOD_KW] = "_MAYBE_EXPAND_METHOD_KW", [_NOP] = "_NOP", + [_POP_CALL] = "_POP_CALL", + [_POP_CALL_LOAD_CONST_INLINE_BORROW] = "_POP_CALL_LOAD_CONST_INLINE_BORROW", + [_POP_CALL_ONE] = "_POP_CALL_ONE", + [_POP_CALL_ONE_LOAD_CONST_INLINE_BORROW] = "_POP_CALL_ONE_LOAD_CONST_INLINE_BORROW", + [_POP_CALL_TWO] = "_POP_CALL_TWO", [_POP_CALL_TWO_LOAD_CONST_INLINE_BORROW] = "_POP_CALL_TWO_LOAD_CONST_INLINE_BORROW", [_POP_EXCEPT] = "_POP_EXCEPT", [_POP_TOP] = "_POP_TOP", @@ -1194,10 +1204,20 @@ int _PyUop_num_popped(int opcode, int oparg) return 1; case _LOAD_CONST_INLINE_BORROW: return 0; + case _POP_CALL: + return 2; + case _POP_CALL_ONE: + return 3; + case _POP_CALL_TWO: + return 4; case _POP_TOP_LOAD_CONST_INLINE_BORROW: return 1; case _POP_TWO_LOAD_CONST_INLINE_BORROW: return 2; + case _POP_CALL_LOAD_CONST_INLINE_BORROW: + return 2; + case _POP_CALL_ONE_LOAD_CONST_INLINE_BORROW: + return 3; case _POP_CALL_TWO_LOAD_CONST_INLINE_BORROW: return 4; case _LOAD_CONST_UNDER_INLINE: diff --git a/Lib/test/test_capi/test_opt.py b/Lib/test/test_capi/test_opt.py index 0bc0e1b212b6b8..f19669f69512a9 100644 --- a/Lib/test/test_capi/test_opt.py +++ b/Lib/test/test_capi/test_opt.py @@ -2002,7 +2002,11 @@ def testfunc(n): self.assertNotIn("_CALL_ISINSTANCE", uops) self.assertNotIn("_GUARD_THIRD_NULL", uops) self.assertNotIn("_GUARD_CALLABLE_ISINSTANCE", uops) - self.assertIn("_POP_CALL_TWO_LOAD_CONST_INLINE_BORROW", uops) + self.assertNotIn("_POP_TOP_LOAD_CONST_INLINE_BORROW", uops) + self.assertNotIn("_POP_CALL_LOAD_CONST_INLINE_BORROW", uops) + self.assertNotIn("_POP_CALL_ONE_LOAD_CONST_INLINE_BORROW", uops) + self.assertNotIn("_POP_CALL_TWO_LOAD_CONST_INLINE_BORROW", uops) + def test_call_list_append(self): def testfunc(n): @@ -2035,7 +2039,10 @@ def testfunc(n): self.assertNotIn("_CALL_ISINSTANCE", uops) self.assertNotIn("_TO_BOOL_BOOL", uops) self.assertNotIn("_GUARD_IS_TRUE_POP", uops) - self.assertIn("_POP_CALL_TWO_LOAD_CONST_INLINE_BORROW", uops) + self.assertNotIn("_POP_TOP_LOAD_CONST_INLINE_BORROW", uops) + self.assertNotIn("_POP_CALL_LOAD_CONST_INLINE_BORROW", uops) + self.assertNotIn("_POP_CALL_ONE_LOAD_CONST_INLINE_BORROW", uops) + self.assertNotIn("_POP_CALL_TWO_LOAD_CONST_INLINE_BORROW", uops) def test_call_isinstance_is_false(self): def testfunc(n): @@ -2053,7 +2060,10 @@ def testfunc(n): self.assertNotIn("_CALL_ISINSTANCE", uops) self.assertNotIn("_TO_BOOL_BOOL", uops) self.assertNotIn("_GUARD_IS_FALSE_POP", uops) - self.assertIn("_POP_CALL_TWO_LOAD_CONST_INLINE_BORROW", uops) + self.assertNotIn("_POP_TOP_LOAD_CONST_INLINE_BORROW", uops) + self.assertNotIn("_POP_CALL_LOAD_CONST_INLINE_BORROW", uops) + self.assertNotIn("_POP_CALL_ONE_LOAD_CONST_INLINE_BORROW", uops) + self.assertNotIn("_POP_CALL_TWO_LOAD_CONST_INLINE_BORROW", uops) def test_call_isinstance_subclass(self): def testfunc(n): @@ -2071,7 +2081,10 @@ def testfunc(n): self.assertNotIn("_CALL_ISINSTANCE", uops) self.assertNotIn("_TO_BOOL_BOOL", uops) self.assertNotIn("_GUARD_IS_TRUE_POP", uops) - self.assertIn("_POP_CALL_TWO_LOAD_CONST_INLINE_BORROW", uops) + self.assertNotIn("_POP_TOP_LOAD_CONST_INLINE_BORROW", uops) + self.assertNotIn("_POP_CALL_LOAD_CONST_INLINE_BORROW", uops) + self.assertNotIn("_POP_CALL_ONE_LOAD_CONST_INLINE_BORROW", uops) + self.assertNotIn("_POP_CALL_TWO_LOAD_CONST_INLINE_BORROW", uops) def test_call_isinstance_unknown_object(self): def testfunc(n): diff --git a/Python/bytecodes.c b/Python/bytecodes.c index 652bda9c182e49..be22c7446f5402 100644 --- a/Python/bytecodes.c +++ b/Python/bytecodes.c @@ -5287,18 +5287,54 @@ dummy_func( value = PyStackRef_FromPyObjectBorrow(ptr); } - tier2 pure op (_POP_TOP_LOAD_CONST_INLINE_BORROW, (ptr/4, pop -- value)) { + tier2 op(_POP_CALL, (callable, null --)) { + (void)null; // Silence compiler warnings about unused variables + DEAD(null); + PyStackRef_CLOSE(callable); + } + + tier2 op(_POP_CALL_ONE, (callable, null, pop --)) { + PyStackRef_CLOSE(pop); + (void)null; // Silence compiler warnings about unused variables + DEAD(null); + PyStackRef_CLOSE(callable); + } + + tier2 op(_POP_CALL_TWO, (callable, null, pop1, pop2 --)) { + PyStackRef_CLOSE(pop2); + PyStackRef_CLOSE(pop1); + (void)null; // Silence compiler warnings about unused variables + DEAD(null); + PyStackRef_CLOSE(callable); + } + + tier2 op(_POP_TOP_LOAD_CONST_INLINE_BORROW, (ptr/4, pop -- value)) { PyStackRef_CLOSE(pop); value = PyStackRef_FromPyObjectBorrow(ptr); } - tier2 pure op(_POP_TWO_LOAD_CONST_INLINE_BORROW, (ptr/4, pop1, pop2 -- value)) { + tier2 op(_POP_TWO_LOAD_CONST_INLINE_BORROW, (ptr/4, pop1, pop2 -- value)) { PyStackRef_CLOSE(pop2); PyStackRef_CLOSE(pop1); value = PyStackRef_FromPyObjectBorrow(ptr); } - tier2 pure op(_POP_CALL_TWO_LOAD_CONST_INLINE_BORROW, (ptr/4, callable, null, pop1, pop2 -- value)) { + tier2 op(_POP_CALL_LOAD_CONST_INLINE_BORROW, (ptr/4, callable, null -- value)) { + (void)null; // Silence compiler warnings about unused variables + DEAD(null); + PyStackRef_CLOSE(callable); + value = PyStackRef_FromPyObjectBorrow(ptr); + } + + tier2 op(_POP_CALL_ONE_LOAD_CONST_INLINE_BORROW, (ptr/4, callable, null, pop -- value)) { + PyStackRef_CLOSE(pop); + (void)null; // Silence compiler warnings about unused variables + DEAD(null); + PyStackRef_CLOSE(callable); + value = PyStackRef_FromPyObjectBorrow(ptr); + } + + tier2 op(_POP_CALL_TWO_LOAD_CONST_INLINE_BORROW, (ptr/4, callable, null, pop1, pop2 -- value)) { PyStackRef_CLOSE(pop2); PyStackRef_CLOSE(pop1); (void)null; // Silence compiler warnings about unused variables diff --git a/Python/executor_cases.c.h b/Python/executor_cases.c.h index fcde31a30126a4..40090e692e4a72 100644 --- a/Python/executor_cases.c.h +++ b/Python/executor_cases.c.h @@ -7030,6 +7030,69 @@ break; } + case _POP_CALL: { + _PyStackRef null; + _PyStackRef callable; + null = stack_pointer[-1]; + callable = stack_pointer[-2]; + (void)null; + stack_pointer += -2; + assert(WITHIN_STACK_BOUNDS()); + _PyFrame_SetStackPointer(frame, stack_pointer); + PyStackRef_CLOSE(callable); + stack_pointer = _PyFrame_GetStackPointer(frame); + break; + } + + case _POP_CALL_ONE: { + _PyStackRef pop; + _PyStackRef null; + _PyStackRef callable; + pop = stack_pointer[-1]; + null = stack_pointer[-2]; + callable = stack_pointer[-3]; + stack_pointer += -1; + assert(WITHIN_STACK_BOUNDS()); + _PyFrame_SetStackPointer(frame, stack_pointer); + PyStackRef_CLOSE(pop); + stack_pointer = _PyFrame_GetStackPointer(frame); + (void)null; + stack_pointer += -2; + assert(WITHIN_STACK_BOUNDS()); + _PyFrame_SetStackPointer(frame, stack_pointer); + PyStackRef_CLOSE(callable); + stack_pointer = _PyFrame_GetStackPointer(frame); + break; + } + + case _POP_CALL_TWO: { + _PyStackRef pop2; + _PyStackRef pop1; + _PyStackRef null; + _PyStackRef callable; + pop2 = stack_pointer[-1]; + pop1 = stack_pointer[-2]; + null = stack_pointer[-3]; + callable = stack_pointer[-4]; + stack_pointer += -1; + assert(WITHIN_STACK_BOUNDS()); + _PyFrame_SetStackPointer(frame, stack_pointer); + PyStackRef_CLOSE(pop2); + stack_pointer = _PyFrame_GetStackPointer(frame); + stack_pointer += -1; + assert(WITHIN_STACK_BOUNDS()); + _PyFrame_SetStackPointer(frame, stack_pointer); + PyStackRef_CLOSE(pop1); + stack_pointer = _PyFrame_GetStackPointer(frame); + (void)null; + stack_pointer += -2; + assert(WITHIN_STACK_BOUNDS()); + _PyFrame_SetStackPointer(frame, stack_pointer); + PyStackRef_CLOSE(callable); + stack_pointer = _PyFrame_GetStackPointer(frame); + break; + } + case _POP_TOP_LOAD_CONST_INLINE_BORROW: { _PyStackRef pop; _PyStackRef value; @@ -7071,6 +7134,53 @@ break; } + case _POP_CALL_LOAD_CONST_INLINE_BORROW: { + _PyStackRef null; + _PyStackRef callable; + _PyStackRef value; + null = stack_pointer[-1]; + callable = stack_pointer[-2]; + PyObject *ptr = (PyObject *)CURRENT_OPERAND0(); + (void)null; + stack_pointer += -2; + assert(WITHIN_STACK_BOUNDS()); + _PyFrame_SetStackPointer(frame, stack_pointer); + PyStackRef_CLOSE(callable); + stack_pointer = _PyFrame_GetStackPointer(frame); + value = PyStackRef_FromPyObjectBorrow(ptr); + stack_pointer[0] = value; + stack_pointer += 1; + assert(WITHIN_STACK_BOUNDS()); + break; + } + + case _POP_CALL_ONE_LOAD_CONST_INLINE_BORROW: { + _PyStackRef pop; + _PyStackRef null; + _PyStackRef callable; + _PyStackRef value; + pop = stack_pointer[-1]; + null = stack_pointer[-2]; + callable = stack_pointer[-3]; + PyObject *ptr = (PyObject *)CURRENT_OPERAND0(); + stack_pointer += -1; + assert(WITHIN_STACK_BOUNDS()); + _PyFrame_SetStackPointer(frame, stack_pointer); + PyStackRef_CLOSE(pop); + stack_pointer = _PyFrame_GetStackPointer(frame); + (void)null; + stack_pointer += -2; + assert(WITHIN_STACK_BOUNDS()); + _PyFrame_SetStackPointer(frame, stack_pointer); + PyStackRef_CLOSE(callable); + stack_pointer = _PyFrame_GetStackPointer(frame); + value = PyStackRef_FromPyObjectBorrow(ptr); + stack_pointer[0] = value; + stack_pointer += 1; + assert(WITHIN_STACK_BOUNDS()); + break; + } + case _POP_CALL_TWO_LOAD_CONST_INLINE_BORROW: { _PyStackRef pop2; _PyStackRef pop1; diff --git a/Python/optimizer_analysis.c b/Python/optimizer_analysis.c index 5c50228a13b2d1..c129177c4fdd03 100644 --- a/Python/optimizer_analysis.c +++ b/Python/optimizer_analysis.c @@ -552,11 +552,13 @@ const uint16_t op_without_push[MAX_UOP_ID + 1] = { [_POP_TOP_LOAD_CONST_INLINE] = _POP_TOP, [_POP_TOP_LOAD_CONST_INLINE_BORROW] = _POP_TOP, [_POP_TWO_LOAD_CONST_INLINE_BORROW] = _POP_TWO, + [_POP_CALL_TWO_LOAD_CONST_INLINE_BORROW] = _POP_CALL_TWO, }; const bool op_skip[MAX_UOP_ID + 1] = { [_NOP] = true, [_CHECK_VALIDITY] = true, + [_SET_IP] = true, }; const uint16_t op_without_pop[MAX_UOP_ID + 1] = { @@ -565,6 +567,14 @@ const uint16_t op_without_pop[MAX_UOP_ID + 1] = { [_POP_TOP_LOAD_CONST_INLINE_BORROW] = _LOAD_CONST_INLINE_BORROW, [_POP_TWO] = _POP_TOP, [_POP_TWO_LOAD_CONST_INLINE_BORROW] = _POP_TOP_LOAD_CONST_INLINE_BORROW, + [_POP_CALL_TWO_LOAD_CONST_INLINE_BORROW] = _POP_CALL_ONE_LOAD_CONST_INLINE_BORROW, + [_POP_CALL_ONE_LOAD_CONST_INLINE_BORROW] = _POP_CALL_LOAD_CONST_INLINE_BORROW, + [_POP_CALL_TWO] = _POP_CALL_ONE, + [_POP_CALL_ONE] = _POP_CALL, + // The following two instructions are handled separately in remove_unneeded_uops. + // The TOS for both is null so calling _POP_TOP (which closes the stackref) is not safe. + // [_POP_CALL] = _POP_TOP, + // [_POP_CALL_LOAD_CONST_INLINE_BORROW] = _POP_TOP_LOAD_CONST_INLINE_BORROW, }; @@ -601,6 +611,7 @@ remove_unneeded_uops(_PyUOpInstruction *buffer, int buffer_size) // _LOAD_FAST + _POP_TWO_LOAD_CONST_INLINE_BORROW + _POP_TOP // ...becomes: // _NOP + _POP_TOP + _NOP + again: while (op_without_pop[opcode]) { _PyUOpInstruction *last = &buffer[pc - 1]; while (op_skip[last->opcode]) { @@ -616,6 +627,29 @@ remove_unneeded_uops(_PyUOpInstruction *buffer, int buffer_size) pc = last - buffer; } } + // Handle _POP_CALL and _POP_CALL_LOAD_CONST_INLINE_BORROW separately. + // This looks for a preceding _PUSH_NULL instruction and simplifies to _POP_TOP. + if (opcode == _POP_CALL || opcode == _POP_CALL_LOAD_CONST_INLINE_BORROW) { + _PyUOpInstruction *last = &buffer[pc - 1]; + while (op_skip[last->opcode]) { + last--; + } + if (last->opcode == _PUSH_NULL) { + last->opcode = _NOP; + if (opcode == _POP_CALL) { + opcode = buffer[pc].opcode = _POP_TOP; + } + else { + opcode = buffer[pc].opcode = _POP_TOP_LOAD_CONST_INLINE_BORROW; + } + opcode = buffer[pc].opcode = _POP_TOP; + if (op_without_pop[last->opcode]) { + opcode = last->opcode; + pc = last - buffer; + } + goto again; + } + } /* _PUSH_FRAME doesn't escape or error, but it * does need the IP for the return address */ bool needs_ip = opcode == _PUSH_FRAME; diff --git a/Python/optimizer_bytecodes.c b/Python/optimizer_bytecodes.c index 0b6bbd133d6ac9..49c6bfb6c1b01a 100644 --- a/Python/optimizer_bytecodes.c +++ b/Python/optimizer_bytecodes.c @@ -551,6 +551,14 @@ dummy_func(void) { value = sym_new_const(ctx, ptr); } + op(_POP_CALL_LOAD_CONST_INLINE_BORROW, (ptr/4, unused, unused -- value)) { + value = sym_new_const(ctx, ptr); + } + + op(_POP_CALL_ONE_LOAD_CONST_INLINE_BORROW, (ptr/4, unused, unused, unused -- value)) { + value = sym_new_const(ctx, ptr); + } + op(_POP_CALL_TWO_LOAD_CONST_INLINE_BORROW, (ptr/4, unused, unused, unused, unused -- value)) { value = sym_new_const(ctx, ptr); } diff --git a/Python/optimizer_cases.c.h b/Python/optimizer_cases.c.h index 5a9fcf3b1b6924..bf7ac72d4579e7 100644 --- a/Python/optimizer_cases.c.h +++ b/Python/optimizer_cases.c.h @@ -2601,6 +2601,24 @@ break; } + case _POP_CALL: { + stack_pointer += -2; + assert(WITHIN_STACK_BOUNDS()); + break; + } + + case _POP_CALL_ONE: { + stack_pointer += -3; + assert(WITHIN_STACK_BOUNDS()); + break; + } + + case _POP_CALL_TWO: { + stack_pointer += -4; + assert(WITHIN_STACK_BOUNDS()); + break; + } + case _POP_TOP_LOAD_CONST_INLINE_BORROW: { JitOptSymbol *value; PyObject *ptr = (PyObject *)this_instr->operand0; @@ -2618,6 +2636,26 @@ break; } + case _POP_CALL_LOAD_CONST_INLINE_BORROW: { + JitOptSymbol *value; + PyObject *ptr = (PyObject *)this_instr->operand0; + value = sym_new_const(ctx, ptr); + stack_pointer[-2] = value; + stack_pointer += -1; + assert(WITHIN_STACK_BOUNDS()); + break; + } + + case _POP_CALL_ONE_LOAD_CONST_INLINE_BORROW: { + JitOptSymbol *value; + PyObject *ptr = (PyObject *)this_instr->operand0; + value = sym_new_const(ctx, ptr); + stack_pointer[-3] = value; + stack_pointer += -2; + assert(WITHIN_STACK_BOUNDS()); + break; + } + case _POP_CALL_TWO_LOAD_CONST_INLINE_BORROW: { JitOptSymbol *value; PyObject *ptr = (PyObject *)this_instr->operand0; From 0e66db44fb0f0900bd98d92b8ebfac9e24d9a5e3 Mon Sep 17 00:00:00 2001 From: Tomas Roun Date: Thu, 22 May 2025 17:49:46 +0200 Subject: [PATCH 2/9] Add news entry --- .../2025-05-22-17-49-39.gh-issue-131798.U6ZmFm.rst | 1 + 1 file changed, 1 insertion(+) create mode 100644 Misc/NEWS.d/next/Core_and_Builtins/2025-05-22-17-49-39.gh-issue-131798.U6ZmFm.rst diff --git a/Misc/NEWS.d/next/Core_and_Builtins/2025-05-22-17-49-39.gh-issue-131798.U6ZmFm.rst b/Misc/NEWS.d/next/Core_and_Builtins/2025-05-22-17-49-39.gh-issue-131798.U6ZmFm.rst new file mode 100644 index 00000000000000..fdb6a2fe0b383b --- /dev/null +++ b/Misc/NEWS.d/next/Core_and_Builtins/2025-05-22-17-49-39.gh-issue-131798.U6ZmFm.rst @@ -0,0 +1 @@ +Optimize ``_POP_CALL_TWO_LOAD_CONST_INLINE_BORROW``. From 0e1030bd1bf9d131530f4446f6e81696f87499b4 Mon Sep 17 00:00:00 2001 From: Tomas Roun Date: Thu, 22 May 2025 17:54:10 +0200 Subject: [PATCH 3/9] Trailing whitespace --- Python/optimizer_analysis.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Python/optimizer_analysis.c b/Python/optimizer_analysis.c index c129177c4fdd03..938e0a9583f4f2 100644 --- a/Python/optimizer_analysis.c +++ b/Python/optimizer_analysis.c @@ -572,7 +572,7 @@ const uint16_t op_without_pop[MAX_UOP_ID + 1] = { [_POP_CALL_TWO] = _POP_CALL_ONE, [_POP_CALL_ONE] = _POP_CALL, // The following two instructions are handled separately in remove_unneeded_uops. - // The TOS for both is null so calling _POP_TOP (which closes the stackref) is not safe. + // The TOS for both is null so calling _POP_TOP (which closes the stackref) is not safe. // [_POP_CALL] = _POP_TOP, // [_POP_CALL_LOAD_CONST_INLINE_BORROW] = _POP_TOP_LOAD_CONST_INLINE_BORROW, }; From af8864687ac5bf1c8e717d1a8c2d7df88631230a Mon Sep 17 00:00:00 2001 From: Tomas Roun Date: Thu, 22 May 2025 18:02:12 +0200 Subject: [PATCH 4/9] Simplify logic --- Python/optimizer_analysis.c | 39 +++++++++++++------------------------ 1 file changed, 14 insertions(+), 25 deletions(-) diff --git a/Python/optimizer_analysis.c b/Python/optimizer_analysis.c index 938e0a9583f4f2..061b4c3fa20060 100644 --- a/Python/optimizer_analysis.c +++ b/Python/optimizer_analysis.c @@ -611,30 +611,23 @@ remove_unneeded_uops(_PyUOpInstruction *buffer, int buffer_size) // _LOAD_FAST + _POP_TWO_LOAD_CONST_INLINE_BORROW + _POP_TOP // ...becomes: // _NOP + _POP_TOP + _NOP - again: - while (op_without_pop[opcode]) { + while (op_without_pop[opcode] || opcode == _POP_CALL || opcode == _POP_CALL_LOAD_CONST_INLINE_BORROW) { _PyUOpInstruction *last = &buffer[pc - 1]; while (op_skip[last->opcode]) { last--; } - if (!op_without_push[last->opcode]) { - break; - } - last->opcode = op_without_push[last->opcode]; - opcode = buffer[pc].opcode = op_without_pop[opcode]; - if (op_without_pop[last->opcode]) { - opcode = last->opcode; - pc = last - buffer; - } - } - // Handle _POP_CALL and _POP_CALL_LOAD_CONST_INLINE_BORROW separately. - // This looks for a preceding _PUSH_NULL instruction and simplifies to _POP_TOP. - if (opcode == _POP_CALL || opcode == _POP_CALL_LOAD_CONST_INLINE_BORROW) { - _PyUOpInstruction *last = &buffer[pc - 1]; - while (op_skip[last->opcode]) { - last--; + if (op_without_push[last->opcode]) { + last->opcode = op_without_push[last->opcode]; + opcode = buffer[pc].opcode = op_without_pop[opcode]; + if (op_without_pop[last->opcode]) { + opcode = last->opcode; + pc = last - buffer; + } } - if (last->opcode == _PUSH_NULL) { + else if (last->opcode == _PUSH_NULL) { + // Handle _POP_CALL and _POP_CALL_LOAD_CONST_INLINE_BORROW separately. + // This looks for a preceding _PUSH_NULL instruction and + // simplifies to _POP_TOP(_LOAD_CONST_INLINE_BORROW). last->opcode = _NOP; if (opcode == _POP_CALL) { opcode = buffer[pc].opcode = _POP_TOP; @@ -642,12 +635,8 @@ remove_unneeded_uops(_PyUOpInstruction *buffer, int buffer_size) else { opcode = buffer[pc].opcode = _POP_TOP_LOAD_CONST_INLINE_BORROW; } - opcode = buffer[pc].opcode = _POP_TOP; - if (op_without_pop[last->opcode]) { - opcode = last->opcode; - pc = last - buffer; - } - goto again; + } else { + break; } } /* _PUSH_FRAME doesn't escape or error, but it From 3ed47d286f1d5649022b08acc63ef8e04a3f33f7 Mon Sep 17 00:00:00 2001 From: Tomas Roun Date: Thu, 22 May 2025 18:05:17 +0200 Subject: [PATCH 5/9] Wrap line --- Python/optimizer_analysis.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/Python/optimizer_analysis.c b/Python/optimizer_analysis.c index 061b4c3fa20060..5b2923a54ae9b7 100644 --- a/Python/optimizer_analysis.c +++ b/Python/optimizer_analysis.c @@ -611,7 +611,9 @@ remove_unneeded_uops(_PyUOpInstruction *buffer, int buffer_size) // _LOAD_FAST + _POP_TWO_LOAD_CONST_INLINE_BORROW + _POP_TOP // ...becomes: // _NOP + _POP_TOP + _NOP - while (op_without_pop[opcode] || opcode == _POP_CALL || opcode == _POP_CALL_LOAD_CONST_INLINE_BORROW) { + while (op_without_pop[opcode] || + opcode == _POP_CALL || + opcode == _POP_CALL_LOAD_CONST_INLINE_BORROW) { _PyUOpInstruction *last = &buffer[pc - 1]; while (op_skip[last->opcode]) { last--; From bf2786780efae4c9401f7873e120467f132c37be Mon Sep 17 00:00:00 2001 From: Tomas Roun Date: Thu, 22 May 2025 18:06:40 +0200 Subject: [PATCH 6/9] PEP7 --- Python/optimizer_analysis.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/Python/optimizer_analysis.c b/Python/optimizer_analysis.c index 5b2923a54ae9b7..451c633ac1fe37 100644 --- a/Python/optimizer_analysis.c +++ b/Python/optimizer_analysis.c @@ -613,7 +613,8 @@ remove_unneeded_uops(_PyUOpInstruction *buffer, int buffer_size) // _NOP + _POP_TOP + _NOP while (op_without_pop[opcode] || opcode == _POP_CALL || - opcode == _POP_CALL_LOAD_CONST_INLINE_BORROW) { + opcode == _POP_CALL_LOAD_CONST_INLINE_BORROW) + { _PyUOpInstruction *last = &buffer[pc - 1]; while (op_skip[last->opcode]) { last--; From 2f90609602b0de1894fd43e31d7030808132e6c6 Mon Sep 17 00:00:00 2001 From: Tomas Roun Date: Thu, 22 May 2025 18:07:27 +0200 Subject: [PATCH 7/9] PEP7 vol. 2 --- Python/optimizer_analysis.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/Python/optimizer_analysis.c b/Python/optimizer_analysis.c index 451c633ac1fe37..aa50831af959cf 100644 --- a/Python/optimizer_analysis.c +++ b/Python/optimizer_analysis.c @@ -638,7 +638,8 @@ remove_unneeded_uops(_PyUOpInstruction *buffer, int buffer_size) else { opcode = buffer[pc].opcode = _POP_TOP_LOAD_CONST_INLINE_BORROW; } - } else { + } + else { break; } } From 5db29d8974bfc665db5ea081b880917ffeb23979 Mon Sep 17 00:00:00 2001 From: Tomas Roun Date: Thu, 22 May 2025 18:11:21 +0200 Subject: [PATCH 8/9] Simplify logic --- Python/optimizer_analysis.c | 21 ++++++++------------- 1 file changed, 8 insertions(+), 13 deletions(-) diff --git a/Python/optimizer_analysis.c b/Python/optimizer_analysis.c index aa50831af959cf..99e25591b51892 100644 --- a/Python/optimizer_analysis.c +++ b/Python/optimizer_analysis.c @@ -571,10 +571,11 @@ const uint16_t op_without_pop[MAX_UOP_ID + 1] = { [_POP_CALL_ONE_LOAD_CONST_INLINE_BORROW] = _POP_CALL_LOAD_CONST_INLINE_BORROW, [_POP_CALL_TWO] = _POP_CALL_ONE, [_POP_CALL_ONE] = _POP_CALL, - // The following two instructions are handled separately in remove_unneeded_uops. - // The TOS for both is null so calling _POP_TOP (which closes the stackref) is not safe. - // [_POP_CALL] = _POP_TOP, - // [_POP_CALL_LOAD_CONST_INLINE_BORROW] = _POP_TOP_LOAD_CONST_INLINE_BORROW, +}; + +const uint16_t op_without_pop_null[MAX_UOP_ID + 1] = { + [_POP_CALL] = _POP_TOP, + [_POP_CALL_LOAD_CONST_INLINE_BORROW] = _POP_TOP_LOAD_CONST_INLINE_BORROW, }; @@ -611,9 +612,7 @@ remove_unneeded_uops(_PyUOpInstruction *buffer, int buffer_size) // _LOAD_FAST + _POP_TWO_LOAD_CONST_INLINE_BORROW + _POP_TOP // ...becomes: // _NOP + _POP_TOP + _NOP - while (op_without_pop[opcode] || - opcode == _POP_CALL || - opcode == _POP_CALL_LOAD_CONST_INLINE_BORROW) + while (op_without_pop[opcode] || op_without_pop_null[opcode]) { _PyUOpInstruction *last = &buffer[pc - 1]; while (op_skip[last->opcode]) { @@ -632,12 +631,8 @@ remove_unneeded_uops(_PyUOpInstruction *buffer, int buffer_size) // This looks for a preceding _PUSH_NULL instruction and // simplifies to _POP_TOP(_LOAD_CONST_INLINE_BORROW). last->opcode = _NOP; - if (opcode == _POP_CALL) { - opcode = buffer[pc].opcode = _POP_TOP; - } - else { - opcode = buffer[pc].opcode = _POP_TOP_LOAD_CONST_INLINE_BORROW; - } + opcode = buffer[pc].opcode = op_without_pop_null[opcode]; + assert(opcode); } else { break; From efbded1d012b82e148f62e8598586ccbdc5f6bdf Mon Sep 17 00:00:00 2001 From: Tomas Roun Date: Thu, 22 May 2025 18:12:40 +0200 Subject: [PATCH 9/9] Style fixes --- Lib/test/test_capi/test_opt.py | 1 - Python/optimizer_analysis.c | 3 +-- 2 files changed, 1 insertion(+), 3 deletions(-) diff --git a/Lib/test/test_capi/test_opt.py b/Lib/test/test_capi/test_opt.py index f19669f69512a9..cb6eae484149ee 100644 --- a/Lib/test/test_capi/test_opt.py +++ b/Lib/test/test_capi/test_opt.py @@ -2007,7 +2007,6 @@ def testfunc(n): self.assertNotIn("_POP_CALL_ONE_LOAD_CONST_INLINE_BORROW", uops) self.assertNotIn("_POP_CALL_TWO_LOAD_CONST_INLINE_BORROW", uops) - def test_call_list_append(self): def testfunc(n): a = [] diff --git a/Python/optimizer_analysis.c b/Python/optimizer_analysis.c index 99e25591b51892..851e1efa0497af 100644 --- a/Python/optimizer_analysis.c +++ b/Python/optimizer_analysis.c @@ -612,8 +612,7 @@ remove_unneeded_uops(_PyUOpInstruction *buffer, int buffer_size) // _LOAD_FAST + _POP_TWO_LOAD_CONST_INLINE_BORROW + _POP_TOP // ...becomes: // _NOP + _POP_TOP + _NOP - while (op_without_pop[opcode] || op_without_pop_null[opcode]) - { + while (op_without_pop[opcode] || op_without_pop_null[opcode]) { _PyUOpInstruction *last = &buffer[pc - 1]; while (op_skip[last->opcode]) { last--;