15
15
|.globals ir_lb
16
16
|.section code, cold_code, rodata, jmp_table
17
17
18
+ |.define IR_LOOP_ALIGNMENT, 16
19
+
18
20
#ifdef IR_DEBUG
19
21
typedef struct _ir_mem {uint64_t v;} ir_mem;
20
22
@@ -1402,6 +1404,7 @@ op2_const:
1402
1404
case IR_TRUNC:
1403
1405
case IR_BITCAST:
1404
1406
case IR_PROTO:
1407
+ case IR_FP2FP:
1405
1408
flags = IR_DEF_REUSES_OP1_REG | IR_USE_MUST_BE_IN_REG | IR_OP1_SHOULD_BE_IN_REG;
1406
1409
break;
1407
1410
case IR_ABS_INT:
@@ -1771,7 +1774,7 @@ static uint32_t ir_match_insn(ir_ctx *ctx, ir_ref ref)
1771
1774
} else if ((ir_type_size[insn->type] >= 4 && insn->op == IR_ADD && IR_IS_SIGNED_32BIT(op2_insn->val.i64)) ||
1772
1775
(ir_type_size[insn->type] >= 4 && insn->op == IR_SUB && IR_IS_SIGNED_NEG_32BIT(op2_insn->val.i64))) {
1773
1776
lea:
1774
- if (ir_in_same_block(ctx, insn->op1) && ctx->use_lists[insn->op1].count == 1) {
1777
+ if (ctx->use_lists[insn->op1].count == 1) {
1775
1778
uint32_t rule = ctx->rules[insn->op1];
1776
1779
1777
1780
if (!rule) {
@@ -1814,14 +1817,14 @@ lea:
1814
1817
}
1815
1818
} else if ((ctx->flags & IR_OPT_CODEGEN) && insn->op == IR_ADD && ir_type_size[insn->type] >= 4) {
1816
1819
if (insn->op1 != insn->op2) {
1817
- if (ir_in_same_block(ctx, insn->op1) && ctx->use_lists[insn->op1].count == 1) {
1820
+ if (ctx->use_lists[insn->op1].count == 1) {
1818
1821
uint32_t rule =ctx->rules[insn->op1];
1819
1822
if (!rule) {
1820
1823
ctx->rules[insn->op1] = rule = ir_match_insn(ctx, insn->op1);
1821
1824
}
1822
1825
if (rule == IR_LEA_OB) {
1823
1826
ctx->rules[insn->op1] = IR_FUSED | IR_SIMPLE | IR_LEA_OB;
1824
- if (ir_in_same_block(ctx, insn->op2) && ctx->use_lists[insn->op2].count == 1) {
1827
+ if (ctx->use_lists[insn->op2].count == 1) {
1825
1828
rule = ctx->rules[insn->op2];
1826
1829
if (!rule) {
1827
1830
ctx->rules[insn->op2] = rule = ir_match_insn(ctx, insn->op2);
@@ -1836,7 +1839,7 @@ lea:
1836
1839
return IR_LEA_OB_I;
1837
1840
} else if (rule == IR_LEA_SI) {
1838
1841
ctx->rules[insn->op1] = IR_FUSED | IR_SIMPLE | IR_LEA_SI;
1839
- if (ir_in_same_block(ctx, insn->op2) && ctx->use_lists[insn->op2].count == 1) {
1842
+ if (ctx->use_lists[insn->op2].count == 1) {
1840
1843
rule = ctx->rules[insn->op2];
1841
1844
if (!rule) {
1842
1845
ctx->rules[insn->op2] = rule = ir_match_insn(ctx, insn->op2);
@@ -1851,7 +1854,7 @@ lea:
1851
1854
return IR_LEA_SI_B;
1852
1855
}
1853
1856
}
1854
- if (ir_in_same_block(ctx, insn->op2) && ctx->use_lists[insn->op2].count == 1) {
1857
+ if (ctx->use_lists[insn->op2].count == 1) {
1855
1858
uint32_t rule = ctx->rules[insn->op2];
1856
1859
if (!rule) {
1857
1860
ctx->rules[insn->op2] = rule = ir_match_insn(ctx, insn->op2);
@@ -3227,43 +3230,48 @@ static void ir_emit_prologue(ir_ctx *ctx)
3227
3230
{
3228
3231
ir_backend_data *data = ctx->data;
3229
3232
dasm_State **Dst = &data->dasm_state;
3233
+ int offset = ctx->stack_frame_size + ctx->call_stack_size;
3230
3234
3231
3235
if (ctx->flags & IR_USE_FRAME_POINTER) {
3232
3236
| push Ra(IR_REG_RBP)
3233
3237
| mov Ra(IR_REG_RBP), Ra(IR_REG_RSP)
3234
3238
}
3239
+ if (IR_REGSET_INTERSECTION((ir_regset)ctx->used_preserved_regs, IR_REGSET_GP)) {
3240
+ int i;
3241
+ ir_regset used_preserved_regs = IR_REGSET_INTERSECTION((ir_regset)ctx->used_preserved_regs, IR_REGSET_GP);
3242
+
3243
+ for (i = IR_REG_GP_FIRST; i <= IR_REG_GP_LAST; i++) {
3244
+ if (IR_REGSET_IN(used_preserved_regs, i)) {
3245
+ offset -= sizeof(void*);
3246
+ | push Ra(i)
3247
+ }
3248
+ }
3249
+ }
3235
3250
if (ctx->stack_frame_size + ctx->call_stack_size) {
3236
3251
if (ctx->fixed_stack_red_zone) {
3237
3252
IR_ASSERT(ctx->stack_frame_size + ctx->call_stack_size <= ctx->fixed_stack_red_zone);
3238
- } else {
3239
- | sub Ra(IR_REG_RSP), (ctx->stack_frame_size + ctx->call_stack_size)
3253
+ } else if (offset) {
3254
+ | sub Ra(IR_REG_RSP), offset
3240
3255
}
3241
3256
}
3242
- if (ctx->used_preserved_regs) {
3257
+ if (IR_REGSET_INTERSECTION((ir_regset) ctx->used_preserved_regs, IR_REGSET_FP) ) {
3243
3258
ir_reg fp;
3244
- int offset;
3245
- uint32_t i;
3246
- ir_regset used_preserved_regs = (ir_regset)ctx->used_preserved_regs;
3259
+ int i;
3260
+ ir_regset used_preserved_regs = IR_REGSET_INTERSECTION((ir_regset)ctx->used_preserved_regs, IR_REGSET_FP);
3247
3261
3248
3262
if (ctx->flags & IR_USE_FRAME_POINTER) {
3249
3263
fp = IR_REG_FRAME_POINTER;
3250
- offset = 0 ;
3264
+ offset -= ctx->stack_frame_size + ctx->call_stack_size ;
3251
3265
} else {
3252
3266
fp = IR_REG_STACK_POINTER;
3253
- offset = ctx->stack_frame_size + ctx->call_stack_size;
3254
3267
}
3255
- for (i = 0 ; i < IR_REG_NUM ; i++) {
3268
+ for (i = IR_REG_FP_FIRST ; i <= IR_REG_FP_LAST ; i++) {
3256
3269
if (IR_REGSET_IN(used_preserved_regs, i)) {
3257
- if (i < IR_REG_FP_FIRST) {
3258
- offset -= sizeof(void*);
3259
- | mov aword [Ra(fp)+offset], Ra(i )
3270
+ offset -= sizeof(void*);
3271
+ if (ctx->mflags & IR_X86_AVX) {
3272
+ | vmovsd qword [Ra(fp)+offset], xmm(i-IR_REG_FP_FIRST )
3260
3273
} else {
3261
- offset -= sizeof(void*);
3262
- if (ctx->mflags & IR_X86_AVX) {
3263
- | vmovsd qword [Ra(fp)+offset], xmm(i-IR_REG_FP_FIRST)
3264
- } else {
3265
- | movsd qword [Ra(fp)+offset], xmm(i-IR_REG_FP_FIRST)
3266
- }
3274
+ | movsd qword [Ra(fp)+offset], xmm(i-IR_REG_FP_FIRST)
3267
3275
}
3268
3276
}
3269
3277
}
@@ -3330,26 +3338,24 @@ static void ir_emit_epilogue(ir_ctx *ctx)
3330
3338
ir_backend_data *data = ctx->data;
3331
3339
dasm_State **Dst = &data->dasm_state;
3332
3340
3333
- if (ctx->used_preserved_regs) {
3341
+ if (IR_REGSET_INTERSECTION((ir_regset)ctx->used_preserved_regs, IR_REGSET_FP)) {
3342
+ int i;
3334
3343
int offset;
3335
- uint32_t i ;
3344
+ ir_reg fp = (ctx->flags & IR_USE_FRAME_POINTER) ? IR_REG_FRAME_POINTER : IR_REG_STACK_POINTER ;
3336
3345
ir_regset used_preserved_regs = (ir_regset)ctx->used_preserved_regs;
3337
3346
3338
3347
if (ctx->flags & IR_USE_FRAME_POINTER) {
3348
+ fp = IR_REG_FRAME_POINTER;
3339
3349
offset = 0;
3340
3350
} else {
3351
+ fp = IR_REG_STACK_POINTER;
3341
3352
offset = ctx->stack_frame_size + ctx->call_stack_size;
3342
3353
}
3343
3354
for (i = 0; i < IR_REG_NUM; i++) {
3344
3355
if (IR_REGSET_IN(used_preserved_regs, i)) {
3345
3356
if (i < IR_REG_FP_FIRST) {
3346
- ir_reg fp = (ctx->flags & IR_USE_FRAME_POINTER) ? IR_REG_FRAME_POINTER : IR_REG_STACK_POINTER;
3347
-
3348
3357
offset -= sizeof(void*);
3349
- | mov Ra(i), aword [Ra(fp)+offset]
3350
3358
} else {
3351
- ir_reg fp = (ctx->flags & IR_USE_FRAME_POINTER) ? IR_REG_FRAME_POINTER : IR_REG_STACK_POINTER;
3352
-
3353
3359
offset -= sizeof(void*);
3354
3360
if (ctx->mflags & IR_X86_AVX) {
3355
3361
| vmovsd xmm(i-IR_REG_FP_FIRST), qword [Ra(fp)+offset]
@@ -3361,7 +3367,40 @@ static void ir_emit_epilogue(ir_ctx *ctx)
3361
3367
}
3362
3368
}
3363
3369
3364
- if (ctx->flags & IR_USE_FRAME_POINTER) {
3370
+ if (IR_REGSET_INTERSECTION((ir_regset)ctx->used_preserved_regs, IR_REGSET_GP)) {
3371
+ int i;
3372
+ ir_regset used_preserved_regs = IR_REGSET_INTERSECTION((ir_regset)ctx->used_preserved_regs, IR_REGSET_GP);
3373
+ int offset;
3374
+
3375
+ if (ctx->flags & IR_USE_FRAME_POINTER) {
3376
+ offset = 0;
3377
+ } else {
3378
+ offset = ctx->stack_frame_size + ctx->call_stack_size;
3379
+ }
3380
+ if (IR_REGSET_INTERSECTION((ir_regset)ctx->used_preserved_regs, IR_REGSET_GP)) {
3381
+ int i;
3382
+ ir_regset used_preserved_regs = IR_REGSET_INTERSECTION((ir_regset)ctx->used_preserved_regs, IR_REGSET_GP);
3383
+
3384
+ for (i = IR_REG_GP_LAST; i >= IR_REG_GP_FIRST; i--) {
3385
+ if (IR_REGSET_IN(used_preserved_regs, i)) {
3386
+ offset -= sizeof(void*);
3387
+ }
3388
+ }
3389
+ }
3390
+ if (ctx->flags & IR_USE_FRAME_POINTER) {
3391
+ | lea Ra(IR_REG_RSP), [Ra(IR_REG_RBP)+offset]
3392
+ } else if (offset) {
3393
+ | add Ra(IR_REG_RSP), offset
3394
+ }
3395
+ for (i = IR_REG_GP_LAST; i >= IR_REG_GP_FIRST; i--) {
3396
+ if (IR_REGSET_IN(used_preserved_regs, i)) {
3397
+ | pop Ra(i)
3398
+ }
3399
+ }
3400
+ if (ctx->flags & IR_USE_FRAME_POINTER) {
3401
+ | pop Ra(IR_REG_RBP)
3402
+ }
3403
+ } else if (ctx->flags & IR_USE_FRAME_POINTER) {
3365
3404
| mov Ra(IR_REG_RSP), Ra(IR_REG_RBP)
3366
3405
| pop Ra(IR_REG_RBP)
3367
3406
} else if (ctx->stack_frame_size + ctx->call_stack_size) {
@@ -9742,6 +9781,9 @@ void *ir_emit_code(ir_ctx *ctx, size_t *size_ptr)
9742
9781
if ((bb->flags & (IR_BB_START|IR_BB_ENTRY|IR_BB_EMPTY)) == IR_BB_EMPTY) {
9743
9782
continue;
9744
9783
}
9784
+ if (bb->flags & IR_BB_ALIGN_LOOP) {
9785
+ | .align IR_LOOP_ALIGNMENT
9786
+ }
9745
9787
|=>b:
9746
9788
9747
9789
i = bb->start;
0 commit comments