Skip to content

Commit 586153b

Browse files
committed
Update IR
IR commit: 2c5b63d4b89b25a323444f6629e6d7c9e9d17300
1 parent 250b160 commit 586153b

File tree

7 files changed

+111
-36
lines changed

7 files changed

+111
-36
lines changed

ext/opcache/jit/ir/gen_ir_fold_hash.c

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -25,10 +25,11 @@ void print_hash(uint32_t *mask, uint32_t count)
2525
{
2626
uint32_t i;
2727

28-
printf("static const uint32_t _ir_fold_hash[%d] = {\n", count);
28+
printf("static const uint32_t _ir_fold_hash[%d] = {\n", count + 1);
2929
for (i = 0; i < count; i++) {
3030
printf("\t0x%08x,\n", mask[i]);
3131
}
32+
printf("\t0x%08x\n", 0);
3233
printf("};\n\n");
3334
}
3435

ext/opcache/jit/ir/ir.h

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,21 @@ extern "C" {
3636
# endif
3737
#endif
3838

39+
/* target auto detection */
40+
#if !defined(IR_TARGET_X86) && !defined(IR_TARGET_X64) && !defined(IR_TARGET_AARCH64)
41+
# if defined(__x86_64__) || defined(__x86_64) || defined(_M_X64) || defined(_M_AMD64)
42+
# define IR_TARGET_X64
43+
# elif defined(i386) || defined(__i386) || defined(__i386__) || defined(_M_IX86)
44+
# define IR_TARGET_X86
45+
# elif defined(__aarch64__) || defined(_M_ARM64)
46+
# define IR_TARGET_AARCH64
47+
# elif defined (_WIN64)
48+
# define IR_TARGET_X64
49+
# elif defined (_WIN32)
50+
# define IR_TARGET_X86
51+
# endif
52+
#endif
53+
3954
#if defined(IR_TARGET_X86)
4055
# define IR_TARGET "x86"
4156
#elif defined(IR_TARGET_X64)

ext/opcache/jit/ir/ir_aarch64.dasc

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,8 @@
1111
|.globals ir_lb
1212
|.section code, cold_code, rodata, jmp_table
1313

14+
|.define IR_LOOP_ALIGNMENT, 8
15+
1416
#ifdef IR_DEBUG
1517
typedef struct _ir_mem {uint64_t v;} ir_mem;
1618

@@ -5700,6 +5702,9 @@ void *ir_emit_code(ir_ctx *ctx, size_t *size_ptr)
57005702
if ((bb->flags & (IR_BB_START|IR_BB_ENTRY|IR_BB_EMPTY)) == IR_BB_EMPTY) {
57015703
continue;
57025704
}
5705+
if (bb->flags & IR_BB_ALIGN_LOOP) {
5706+
| .align IR_LOOP_ALIGNMENT
5707+
}
57035708
|=>b:
57045709

57055710
i = bb->start;

ext/opcache/jit/ir/ir_cfg.c

Lines changed: 14 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2359,7 +2359,19 @@ static int ir_schedule_blocks_bottom_up(ir_ctx *ctx)
23592359
#endif
23602360
}
23612361

2362-
/* 5. Group chains according to the most frequent edge between them */
2362+
/* 5. Align loop headers */
2363+
for (b = 1; b <= ctx->cfg_blocks_count; b++) {
2364+
if (chains[b].head == b) {
2365+
bb = &ctx->cfg_blocks[b];
2366+
if (bb->loop_depth) {
2367+
if ((bb->flags & IR_BB_LOOP_HEADER) || ir_chain_head(chains, bb->loop_header) == b) {
2368+
bb->flags |= IR_BB_ALIGN_LOOP;
2369+
}
2370+
}
2371+
}
2372+
}
2373+
2374+
/* 6. Group chains according to the most frequent edge between them */
23632375
// TODO: Try to find a better heuristic
23642376
for (e = edges, i = edges_count; i > 0; e++, i--) {
23652377
#if !IR_DEBUG_BB_SCHEDULE_GRAPH
@@ -2380,7 +2392,7 @@ static int ir_schedule_blocks_bottom_up(ir_ctx *ctx)
23802392
ir_dump_chains(ctx, chains);
23812393
#endif
23822394

2383-
/* 6. Form a final BB order */
2395+
/* 7. Form a final BB order */
23842396
count = 0;
23852397
for (b = 1; b <= ctx->cfg_blocks_count; b++) {
23862398
if (chains[b].head == b) {

ext/opcache/jit/ir/ir_private.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -953,6 +953,8 @@ bool ir_use_list_add(ir_ctx *ctx, ir_ref to, ir_ref new_use);
953953
#define IR_BB_HAS_PARAM (1<<12)
954954
#define IR_BB_HAS_VAR (1<<13)
955955

956+
/* The following flags are set by BB scheduler */
957+
#define IR_BB_ALIGN_LOOP (1<<14)
956958

957959
struct _ir_block {
958960
uint32_t flags;

ext/opcache/jit/ir/ir_ra.c

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3926,7 +3926,6 @@ static void assign_regs(ir_ctx *ctx)
39263926
}
39273927
}
39283928
if (use_pos->hint_ref < 0
3929-
&& ctx->use_lists[-use_pos->hint_ref].count > 1
39303929
&& (old_reg = ir_get_alocated_reg(ctx, -use_pos->hint_ref, use_pos->op_num)) != IR_REG_NONE) {
39313930
if (top_ival->flags & IR_LIVE_INTERVAL_SPILL_SPECIAL) {
39323931
reg |= IR_REG_SPILL_SPECIAL;
@@ -3950,7 +3949,6 @@ static void assign_regs(ir_ctx *ctx)
39503949
reg = IR_REG_NONE;
39513950
}
39523951
} else if (use_pos->hint_ref < 0
3953-
&& ctx->use_lists[-use_pos->hint_ref].count > 1
39543952
&& (old_reg = ir_get_alocated_reg(ctx, -use_pos->hint_ref, use_pos->op_num)) != IR_REG_NONE) {
39553953
if (reg != old_reg) {
39563954
IR_ASSERT(ctx->rules[-use_pos->hint_ref] & IR_FUSED);

ext/opcache/jit/ir/ir_x86.dasc

Lines changed: 73 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,8 @@
1515
|.globals ir_lb
1616
|.section code, cold_code, rodata, jmp_table
1717

18+
|.define IR_LOOP_ALIGNMENT, 16
19+
1820
#ifdef IR_DEBUG
1921
typedef struct _ir_mem {uint64_t v;} ir_mem;
2022

@@ -1402,6 +1404,7 @@ op2_const:
14021404
case IR_TRUNC:
14031405
case IR_BITCAST:
14041406
case IR_PROTO:
1407+
case IR_FP2FP:
14051408
flags = IR_DEF_REUSES_OP1_REG | IR_USE_MUST_BE_IN_REG | IR_OP1_SHOULD_BE_IN_REG;
14061409
break;
14071410
case IR_ABS_INT:
@@ -1771,7 +1774,7 @@ static uint32_t ir_match_insn(ir_ctx *ctx, ir_ref ref)
17711774
} else if ((ir_type_size[insn->type] >= 4 && insn->op == IR_ADD && IR_IS_SIGNED_32BIT(op2_insn->val.i64)) ||
17721775
(ir_type_size[insn->type] >= 4 && insn->op == IR_SUB && IR_IS_SIGNED_NEG_32BIT(op2_insn->val.i64))) {
17731776
lea:
1774-
if (ir_in_same_block(ctx, insn->op1) && ctx->use_lists[insn->op1].count == 1) {
1777+
if (ctx->use_lists[insn->op1].count == 1) {
17751778
uint32_t rule = ctx->rules[insn->op1];
17761779

17771780
if (!rule) {
@@ -1814,14 +1817,14 @@ lea:
18141817
}
18151818
} else if ((ctx->flags & IR_OPT_CODEGEN) && insn->op == IR_ADD && ir_type_size[insn->type] >= 4) {
18161819
if (insn->op1 != insn->op2) {
1817-
if (ir_in_same_block(ctx, insn->op1) && ctx->use_lists[insn->op1].count == 1) {
1820+
if (ctx->use_lists[insn->op1].count == 1) {
18181821
uint32_t rule =ctx->rules[insn->op1];
18191822
if (!rule) {
18201823
ctx->rules[insn->op1] = rule = ir_match_insn(ctx, insn->op1);
18211824
}
18221825
if (rule == IR_LEA_OB) {
18231826
ctx->rules[insn->op1] = IR_FUSED | IR_SIMPLE | IR_LEA_OB;
1824-
if (ir_in_same_block(ctx, insn->op2) && ctx->use_lists[insn->op2].count == 1) {
1827+
if (ctx->use_lists[insn->op2].count == 1) {
18251828
rule = ctx->rules[insn->op2];
18261829
if (!rule) {
18271830
ctx->rules[insn->op2] = rule = ir_match_insn(ctx, insn->op2);
@@ -1836,7 +1839,7 @@ lea:
18361839
return IR_LEA_OB_I;
18371840
} else if (rule == IR_LEA_SI) {
18381841
ctx->rules[insn->op1] = IR_FUSED | IR_SIMPLE | IR_LEA_SI;
1839-
if (ir_in_same_block(ctx, insn->op2) && ctx->use_lists[insn->op2].count == 1) {
1842+
if (ctx->use_lists[insn->op2].count == 1) {
18401843
rule = ctx->rules[insn->op2];
18411844
if (!rule) {
18421845
ctx->rules[insn->op2] = rule = ir_match_insn(ctx, insn->op2);
@@ -1851,7 +1854,7 @@ lea:
18511854
return IR_LEA_SI_B;
18521855
}
18531856
}
1854-
if (ir_in_same_block(ctx, insn->op2) && ctx->use_lists[insn->op2].count == 1) {
1857+
if (ctx->use_lists[insn->op2].count == 1) {
18551858
uint32_t rule = ctx->rules[insn->op2];
18561859
if (!rule) {
18571860
ctx->rules[insn->op2] = rule = ir_match_insn(ctx, insn->op2);
@@ -3227,43 +3230,48 @@ static void ir_emit_prologue(ir_ctx *ctx)
32273230
{
32283231
ir_backend_data *data = ctx->data;
32293232
dasm_State **Dst = &data->dasm_state;
3233+
int offset = ctx->stack_frame_size + ctx->call_stack_size;
32303234

32313235
if (ctx->flags & IR_USE_FRAME_POINTER) {
32323236
| push Ra(IR_REG_RBP)
32333237
| mov Ra(IR_REG_RBP), Ra(IR_REG_RSP)
32343238
}
3239+
if (IR_REGSET_INTERSECTION((ir_regset)ctx->used_preserved_regs, IR_REGSET_GP)) {
3240+
int i;
3241+
ir_regset used_preserved_regs = IR_REGSET_INTERSECTION((ir_regset)ctx->used_preserved_regs, IR_REGSET_GP);
3242+
3243+
for (i = IR_REG_GP_FIRST; i <= IR_REG_GP_LAST; i++) {
3244+
if (IR_REGSET_IN(used_preserved_regs, i)) {
3245+
offset -= sizeof(void*);
3246+
| push Ra(i)
3247+
}
3248+
}
3249+
}
32353250
if (ctx->stack_frame_size + ctx->call_stack_size) {
32363251
if (ctx->fixed_stack_red_zone) {
32373252
IR_ASSERT(ctx->stack_frame_size + ctx->call_stack_size <= ctx->fixed_stack_red_zone);
3238-
} else {
3239-
| sub Ra(IR_REG_RSP), (ctx->stack_frame_size + ctx->call_stack_size)
3253+
} else if (offset) {
3254+
| sub Ra(IR_REG_RSP), offset
32403255
}
32413256
}
3242-
if (ctx->used_preserved_regs) {
3257+
if (IR_REGSET_INTERSECTION((ir_regset)ctx->used_preserved_regs, IR_REGSET_FP)) {
32433258
ir_reg fp;
3244-
int offset;
3245-
uint32_t i;
3246-
ir_regset used_preserved_regs = (ir_regset)ctx->used_preserved_regs;
3259+
int i;
3260+
ir_regset used_preserved_regs = IR_REGSET_INTERSECTION((ir_regset)ctx->used_preserved_regs, IR_REGSET_FP);
32473261

32483262
if (ctx->flags & IR_USE_FRAME_POINTER) {
32493263
fp = IR_REG_FRAME_POINTER;
3250-
offset = 0;
3264+
offset -= ctx->stack_frame_size + ctx->call_stack_size;
32513265
} else {
32523266
fp = IR_REG_STACK_POINTER;
3253-
offset = ctx->stack_frame_size + ctx->call_stack_size;
32543267
}
3255-
for (i = 0; i < IR_REG_NUM; i++) {
3268+
for (i = IR_REG_FP_FIRST; i <= IR_REG_FP_LAST; i++) {
32563269
if (IR_REGSET_IN(used_preserved_regs, i)) {
3257-
if (i < IR_REG_FP_FIRST) {
3258-
offset -= sizeof(void*);
3259-
| mov aword [Ra(fp)+offset], Ra(i)
3270+
offset -= sizeof(void*);
3271+
if (ctx->mflags & IR_X86_AVX) {
3272+
| vmovsd qword [Ra(fp)+offset], xmm(i-IR_REG_FP_FIRST)
32603273
} else {
3261-
offset -= sizeof(void*);
3262-
if (ctx->mflags & IR_X86_AVX) {
3263-
| vmovsd qword [Ra(fp)+offset], xmm(i-IR_REG_FP_FIRST)
3264-
} else {
3265-
| movsd qword [Ra(fp)+offset], xmm(i-IR_REG_FP_FIRST)
3266-
}
3274+
| movsd qword [Ra(fp)+offset], xmm(i-IR_REG_FP_FIRST)
32673275
}
32683276
}
32693277
}
@@ -3330,26 +3338,24 @@ static void ir_emit_epilogue(ir_ctx *ctx)
33303338
ir_backend_data *data = ctx->data;
33313339
dasm_State **Dst = &data->dasm_state;
33323340

3333-
if (ctx->used_preserved_regs) {
3341+
if (IR_REGSET_INTERSECTION((ir_regset)ctx->used_preserved_regs, IR_REGSET_FP)) {
3342+
int i;
33343343
int offset;
3335-
uint32_t i;
3344+
ir_reg fp = (ctx->flags & IR_USE_FRAME_POINTER) ? IR_REG_FRAME_POINTER : IR_REG_STACK_POINTER;
33363345
ir_regset used_preserved_regs = (ir_regset)ctx->used_preserved_regs;
33373346

33383347
if (ctx->flags & IR_USE_FRAME_POINTER) {
3348+
fp = IR_REG_FRAME_POINTER;
33393349
offset = 0;
33403350
} else {
3351+
fp = IR_REG_STACK_POINTER;
33413352
offset = ctx->stack_frame_size + ctx->call_stack_size;
33423353
}
33433354
for (i = 0; i < IR_REG_NUM; i++) {
33443355
if (IR_REGSET_IN(used_preserved_regs, i)) {
33453356
if (i < IR_REG_FP_FIRST) {
3346-
ir_reg fp = (ctx->flags & IR_USE_FRAME_POINTER) ? IR_REG_FRAME_POINTER : IR_REG_STACK_POINTER;
3347-
33483357
offset -= sizeof(void*);
3349-
| mov Ra(i), aword [Ra(fp)+offset]
33503358
} else {
3351-
ir_reg fp = (ctx->flags & IR_USE_FRAME_POINTER) ? IR_REG_FRAME_POINTER : IR_REG_STACK_POINTER;
3352-
33533359
offset -= sizeof(void*);
33543360
if (ctx->mflags & IR_X86_AVX) {
33553361
| vmovsd xmm(i-IR_REG_FP_FIRST), qword [Ra(fp)+offset]
@@ -3361,7 +3367,40 @@ static void ir_emit_epilogue(ir_ctx *ctx)
33613367
}
33623368
}
33633369

3364-
if (ctx->flags & IR_USE_FRAME_POINTER) {
3370+
if (IR_REGSET_INTERSECTION((ir_regset)ctx->used_preserved_regs, IR_REGSET_GP)) {
3371+
int i;
3372+
ir_regset used_preserved_regs = IR_REGSET_INTERSECTION((ir_regset)ctx->used_preserved_regs, IR_REGSET_GP);
3373+
int offset;
3374+
3375+
if (ctx->flags & IR_USE_FRAME_POINTER) {
3376+
offset = 0;
3377+
} else {
3378+
offset = ctx->stack_frame_size + ctx->call_stack_size;
3379+
}
3380+
if (IR_REGSET_INTERSECTION((ir_regset)ctx->used_preserved_regs, IR_REGSET_GP)) {
3381+
int i;
3382+
ir_regset used_preserved_regs = IR_REGSET_INTERSECTION((ir_regset)ctx->used_preserved_regs, IR_REGSET_GP);
3383+
3384+
for (i = IR_REG_GP_LAST; i >= IR_REG_GP_FIRST; i--) {
3385+
if (IR_REGSET_IN(used_preserved_regs, i)) {
3386+
offset -= sizeof(void*);
3387+
}
3388+
}
3389+
}
3390+
if (ctx->flags & IR_USE_FRAME_POINTER) {
3391+
| lea Ra(IR_REG_RSP), [Ra(IR_REG_RBP)+offset]
3392+
} else if (offset) {
3393+
| add Ra(IR_REG_RSP), offset
3394+
}
3395+
for (i = IR_REG_GP_LAST; i >= IR_REG_GP_FIRST; i--) {
3396+
if (IR_REGSET_IN(used_preserved_regs, i)) {
3397+
| pop Ra(i)
3398+
}
3399+
}
3400+
if (ctx->flags & IR_USE_FRAME_POINTER) {
3401+
| pop Ra(IR_REG_RBP)
3402+
}
3403+
} else if (ctx->flags & IR_USE_FRAME_POINTER) {
33653404
| mov Ra(IR_REG_RSP), Ra(IR_REG_RBP)
33663405
| pop Ra(IR_REG_RBP)
33673406
} else if (ctx->stack_frame_size + ctx->call_stack_size) {
@@ -9742,6 +9781,9 @@ void *ir_emit_code(ir_ctx *ctx, size_t *size_ptr)
97429781
if ((bb->flags & (IR_BB_START|IR_BB_ENTRY|IR_BB_EMPTY)) == IR_BB_EMPTY) {
97439782
continue;
97449783
}
9784+
if (bb->flags & IR_BB_ALIGN_LOOP) {
9785+
| .align IR_LOOP_ALIGNMENT
9786+
}
97459787
|=>b:
97469788

97479789
i = bb->start;

0 commit comments

Comments
 (0)