@@ -94,6 +94,13 @@ static bool aarch64_may_encode_logical_imm(uint64_t value, uint32_t type_size)
94
94
return 0;
95
95
}
96
96
97
+ static bool aarch64_may_encode_imm7_addr_offset(const int64_t offset, uint32_t type_size)
98
+ {
99
+ return (uintptr_t)(offset) % type_size == 0
100
+ && offset < 63 * (int32_t)type_size
101
+ && offset >= -64 * (int32_t)type_size;
102
+ }
103
+
97
104
static bool aarch64_may_encode_addr_offset(int64_t offset, uint32_t type_size)
98
105
{
99
106
return (uintptr_t)(offset) % type_size == 0 && (uintptr_t)(offset) < 0xfff * type_size;
@@ -352,7 +359,20 @@ int ir_get_target_constraints(const ir_ctx *ctx, ir_ref ref, ir_target_constrain
352
359
constraints->tmp_regs[n] = IR_TMP_REG(1, insn->type, IR_LOAD_SUB_REF, IR_DEF_SUB_REF);
353
360
n++;
354
361
}
355
- if (rule == IR_SHIFT && insn->op == IR_ROL) {
362
+ if (rule == IR_SHIFT_CONST
363
+ && (insn->op == IR_ROL || insn->op == IR_ROR)
364
+ && ir_type_size[insn->type] < 4) {
365
+ constraints->tmp_regs[n] = IR_TMP_REG(3, insn->type, IR_LOAD_SUB_REF, IR_DEF_SUB_REF);
366
+ n++;
367
+ } else if (rule == IR_SHIFT
368
+ && (insn->op == IR_ROL || insn->op == IR_ROR)
369
+ && ir_type_size[insn->type] < 4) {
370
+ if (insn->op == IR_ROL) {
371
+ flags |= IR_DEF_CONFLICTS_WITH_INPUT_REGS;
372
+ }
373
+ constraints->tmp_regs[n] = IR_TMP_REG(3, insn->type, IR_LOAD_SUB_REF, IR_SAVE_SUB_REF);
374
+ n++;
375
+ } else if (rule == IR_SHIFT && insn->op == IR_ROL) {
356
376
constraints->tmp_regs[n] = IR_TMP_REG(3, insn->type, IR_LOAD_SUB_REF, IR_DEF_SUB_REF);
357
377
n++;
358
378
}
@@ -1341,9 +1361,16 @@ static void ir_emit_prologue(ir_ctx *ctx)
1341
1361
{
1342
1362
ir_backend_data *data = ctx->data;
1343
1363
dasm_State **Dst = &data->dasm_state;
1364
+ int offset;
1344
1365
1345
1366
if (ctx->flags & IR_USE_FRAME_POINTER) {
1346
- | stp x29, x30, [sp, # (-(ctx->stack_frame_size+16))]!
1367
+ offset = -(ctx->stack_frame_size+16);
1368
+ if (aarch64_may_encode_imm7_addr_offset(offset, 8)) {
1369
+ | stp x29, x30, [sp, #offset]!
1370
+ } else {
1371
+ | sub sp, sp, #(ctx->stack_frame_size+16)
1372
+ | stp x29, x30, [sp]
1373
+ }
1347
1374
| mov x29, sp
1348
1375
if (ctx->call_stack_size) {
1349
1376
| sub sp, sp, #(ctx->call_stack_size)
@@ -1357,7 +1384,6 @@ static void ir_emit_prologue(ir_ctx *ctx)
1357
1384
}
1358
1385
if (ctx->used_preserved_regs) {
1359
1386
ir_reg fp;
1360
- int offset;
1361
1387
uint32_t i;
1362
1388
ir_reg prev = IR_REG_NONE;
1363
1389
ir_regset used_preserved_regs = (ir_regset)ctx->used_preserved_regs;
@@ -1375,7 +1401,13 @@ static void ir_emit_prologue(ir_ctx *ctx)
1375
1401
prev = i;
1376
1402
} else if (i < IR_REG_FP_FIRST) {
1377
1403
offset -= sizeof(void*) * 2;
1378
- | stp Rx(prev), Rx(i), [Rx(fp), #offset]
1404
+ if (aarch64_may_encode_imm7_addr_offset(offset, 8)) {
1405
+ | stp Rx(prev), Rx(i), [Rx(fp), #offset]
1406
+ } else {
1407
+ IR_ASSERT(aarch64_may_encode_addr_offset(offset, 8));
1408
+ | str Rx(prev), [Rx(fp), #offset]
1409
+ | str Rx(i), [Rx(fp), #(offset+8)]
1410
+ }
1379
1411
prev = IR_REG_NONE;
1380
1412
} else {
1381
1413
if (prev < IR_REG_FP_FIRST) {
@@ -1385,7 +1417,13 @@ static void ir_emit_prologue(ir_ctx *ctx)
1385
1417
| str Rd(i-IR_REG_FP_FIRST), [Rx(fp), #offset]
1386
1418
} else {
1387
1419
offset -= sizeof(void*) * 2;
1388
- | stp Rd(prev-IR_REG_FP_FIRST), Rd(i-IR_REG_FP_FIRST), [Rx(fp), #offset]
1420
+ if (aarch64_may_encode_imm7_addr_offset(offset, 8)) {
1421
+ | stp Rd(prev-IR_REG_FP_FIRST), Rd(i-IR_REG_FP_FIRST), [Rx(fp), #offset]
1422
+ } else {
1423
+ IR_ASSERT(aarch64_may_encode_addr_offset(offset, 8));
1424
+ | str Rd(prev-IR_REG_FP_FIRST), [Rx(fp), #offset]
1425
+ | str Rd(i-IR_REG_FP_FIRST), [Rx(fp), #(offset+8)]
1426
+ }
1389
1427
}
1390
1428
prev = IR_REG_NONE;
1391
1429
}
@@ -1425,7 +1463,13 @@ static void ir_emit_prologue(ir_ctx *ctx)
1425
1463
offset += sizeof(void*) * ctx->gp_reg_params;
1426
1464
for (i = ctx->gp_reg_params; i < IR_REG_INT_ARGS; i++) {
1427
1465
if (prev != IR_REG_NONE) {
1428
- | stp Rx(prev), Rx(int_reg_params[i]), [Rx(fp), #offset]
1466
+ if (aarch64_may_encode_imm7_addr_offset(offset, 8)) {
1467
+ | stp Rx(prev), Rx(int_reg_params[i]), [Rx(fp), #offset]
1468
+ } else {
1469
+ IR_ASSERT(aarch64_may_encode_addr_offset(offset, 8));
1470
+ | str Rx(prev), [Rx(fp), #offset]
1471
+ | str Rx(int_reg_params[i]), [Rx(fp), #(offset+8)]
1472
+ }
1429
1473
prev = IR_REG_NONE;
1430
1474
offset += sizeof(void*) * 2;
1431
1475
} else {
@@ -1473,7 +1517,13 @@ static void ir_emit_epilogue(ir_ctx *ctx)
1473
1517
prev = i;
1474
1518
} else if (i < IR_REG_FP_FIRST) {
1475
1519
offset -= sizeof(void*) * 2;
1476
- | ldp Rx(prev), Rx(i), [Rx(fp), #offset]
1520
+ if (aarch64_may_encode_imm7_addr_offset(offset, 8)) {
1521
+ | ldp Rx(prev), Rx(i), [Rx(fp), #offset]
1522
+ } else {
1523
+ IR_ASSERT(aarch64_may_encode_addr_offset(offset, 8));
1524
+ | ldr Rx(prev), [Rx(fp), #offset]
1525
+ | ldr Rx(i), [Rx(fp), #(offset+8)]
1526
+ }
1477
1527
prev = IR_REG_NONE;
1478
1528
} else {
1479
1529
if (prev < IR_REG_FP_FIRST) {
@@ -1483,7 +1533,13 @@ static void ir_emit_epilogue(ir_ctx *ctx)
1483
1533
| ldr Rd(i-IR_REG_FP_FIRST), [Rx(fp), #offset]
1484
1534
} else {
1485
1535
offset -= sizeof(void*) * 2;
1486
- | ldp Rd(prev-IR_REG_FP_FIRST), Rd(i-IR_REG_FP_FIRST), [Rx(fp), #offset]
1536
+ if (aarch64_may_encode_imm7_addr_offset(offset, 8)) {
1537
+ | ldp Rd(prev-IR_REG_FP_FIRST), Rd(i-IR_REG_FP_FIRST), [Rx(fp), #offset]
1538
+ } else {
1539
+ IR_ASSERT(aarch64_may_encode_addr_offset(offset, 8));
1540
+ | ldr Rd(prev-IR_REG_FP_FIRST), [Rx(fp), #offset]
1541
+ | ldr Rd(i-IR_REG_FP_FIRST), [Rx(fp), #(offset+8)]
1542
+ }
1487
1543
}
1488
1544
prev = IR_REG_NONE;
1489
1545
}
@@ -1504,7 +1560,12 @@ static void ir_emit_epilogue(ir_ctx *ctx)
1504
1560
if (ctx->call_stack_size || (ctx->flags2 & IR_HAS_ALLOCA)) {
1505
1561
| mov sp, x29
1506
1562
}
1507
- | ldp x29, x30, [sp], # (ctx->stack_frame_size+16)
1563
+ if (aarch64_may_encode_imm7_addr_offset(ctx->stack_frame_size+16, 8)) {
1564
+ | ldp x29, x30, [sp], #(ctx->stack_frame_size+16)
1565
+ } else {
1566
+ | ldp x29, x30, [sp]
1567
+ | add sp, sp, #(ctx->stack_frame_size+16)
1568
+ }
1508
1569
} else if (ctx->stack_frame_size + ctx->call_stack_size) {
1509
1570
if (ctx->fixed_stack_red_zone) {
1510
1571
IR_ASSERT(ctx->stack_frame_size + ctx->call_stack_size <= ctx->fixed_stack_red_zone);
@@ -1922,18 +1983,55 @@ static void ir_emit_shift(ir_ctx *ctx, ir_ref def, ir_insn *insn)
1922
1983
default:
1923
1984
IR_ASSERT(0);
1924
1985
case IR_SHL:
1925
- | ASM_REG_REG_REG_OP lsl, type, def_reg, op1_reg, op2_reg
1986
+ if (ir_type_size[type] == 1) {
1987
+ | and Rw(def_reg), Rw(op1_reg), #0xff
1988
+ | lsl Rw(def_reg), Rw(def_reg), Rw(op2_reg)
1989
+ } else if (ir_type_size[type] == 2) {
1990
+ | and Rw(def_reg), Rw(op1_reg), #0xffff
1991
+ | lsl Rw(def_reg), Rw(def_reg), Rw(op2_reg)
1992
+ } else {
1993
+ | ASM_REG_REG_REG_OP lsl, type, def_reg, op1_reg, op2_reg
1994
+ }
1926
1995
break;
1927
1996
case IR_SHR:
1928
- | ASM_REG_REG_REG_OP lsr, type, def_reg, op1_reg, op2_reg
1997
+ if (ir_type_size[type] == 1) {
1998
+ | and Rw(def_reg), Rw(op1_reg), #0xff
1999
+ | lsr Rw(def_reg), Rw(def_reg), Rw(op2_reg)
2000
+ } else if (ir_type_size[type] == 2) {
2001
+ | and Rw(def_reg), Rw(op1_reg), #0xffff
2002
+ | lsr Rw(def_reg), Rw(def_reg), Rw(op2_reg)
2003
+ } else {
2004
+ | ASM_REG_REG_REG_OP lsr, type, def_reg, op1_reg, op2_reg
2005
+ }
1929
2006
break;
1930
2007
case IR_SAR:
1931
- | ASM_REG_REG_REG_OP asr, type, def_reg, op1_reg, op2_reg
2008
+ if (ir_type_size[type] == 1) {
2009
+ | sxtb Rw(def_reg), Rw(op1_reg)
2010
+ | asr Rw(def_reg), Rw(def_reg), Rw(op2_reg)
2011
+ } else if (ir_type_size[type] == 2) {
2012
+ | sxth Rw(def_reg), Rw(op1_reg)
2013
+ | asr Rw(def_reg), Rw(def_reg), Rw(op2_reg)
2014
+ } else {
2015
+ | ASM_REG_REG_REG_OP asr, type, def_reg, op1_reg, op2_reg
2016
+ }
1932
2017
break;
1933
2018
case IR_ROL:
1934
2019
tmp_reg = ctx->regs[def][3];
1935
2020
IR_ASSERT(tmp_reg != IR_REG_NONE);
1936
- if (ir_type_size[type] == 8) {
2021
+ if (ir_type_size[type] == 1) {
2022
+ | and Rw(def_reg), Rw(op1_reg), #0xff
2023
+ | add Rw(def_reg), Rw(def_reg), Rw(def_reg), lsl #8
2024
+ | add Rw(def_reg), Rw(def_reg), Rw(def_reg), lsl #16
2025
+ | neg Rw(tmp_reg), Rw(op2_reg)
2026
+ | ror Rw(def_reg), Rw(def_reg), Rw(tmp_reg)
2027
+ | and Rw(def_reg), Rw(def_reg), #0xff
2028
+ } else if (ir_type_size[type] == 2) {
2029
+ | and Rw(def_reg), Rw(op1_reg), #0xffff
2030
+ | add Rw(def_reg), Rw(def_reg), Rw(def_reg), lsl #16
2031
+ | neg Rw(tmp_reg), Rw(op2_reg)
2032
+ | ror Rw(def_reg), Rw(def_reg), Rw(tmp_reg)
2033
+ | and Rw(def_reg), Rw(def_reg), #0xffff
2034
+ } else if (ir_type_size[type] == 8) {
1937
2035
| neg Rx(tmp_reg), Rx(op2_reg)
1938
2036
| ror Rx(def_reg), Rx(op1_reg), Rx(tmp_reg)
1939
2037
} else {
@@ -1942,7 +2040,24 @@ static void ir_emit_shift(ir_ctx *ctx, ir_ref def, ir_insn *insn)
1942
2040
}
1943
2041
break;
1944
2042
case IR_ROR:
1945
- | ASM_REG_REG_REG_OP ror, type, def_reg, op1_reg, op2_reg
2043
+ if (ir_type_size[type] == 1) {
2044
+ tmp_reg = ctx->regs[def][3];
2045
+ IR_ASSERT(tmp_reg != IR_REG_NONE);
2046
+ | and Rw(tmp_reg), Rw(op1_reg), #0xff
2047
+ | add Rw(tmp_reg), Rw(tmp_reg), Rw(tmp_reg), lsl #8
2048
+ | add Rw(tmp_reg), Rw(tmp_reg), Rw(tmp_reg), lsl #16
2049
+ | ror Rw(def_reg), Rw(tmp_reg), Rw(op2_reg)
2050
+ | and Rw(def_reg), Rw(def_reg), #0xff
2051
+ } else if (ir_type_size[type] == 2) {
2052
+ tmp_reg = ctx->regs[def][3];
2053
+ IR_ASSERT(tmp_reg != IR_REG_NONE);
2054
+ | and Rw(tmp_reg), Rw(op1_reg), #0xffff
2055
+ | add Rw(tmp_reg), Rw(tmp_reg), Rw(tmp_reg), lsl #16
2056
+ | ror Rw(def_reg), Rw(tmp_reg), Rw(op2_reg)
2057
+ | and Rw(def_reg), Rw(def_reg), #0xffff
2058
+ } else {
2059
+ | ASM_REG_REG_REG_OP ror, type, def_reg, op1_reg, op2_reg
2060
+ }
1946
2061
break;
1947
2062
}
1948
2063
if (IR_REG_SPILLED(ctx->regs[def][0])) {
@@ -1959,6 +2074,7 @@ static void ir_emit_shift_const(ir_ctx *ctx, ir_ref def, ir_insn *insn)
1959
2074
ir_ref op1 = insn->op1;
1960
2075
ir_reg def_reg = IR_REG_NUM(ctx->regs[def][0]);
1961
2076
ir_reg op1_reg = ctx->regs[def][1];
2077
+ ir_reg tmp_reg;
1962
2078
1963
2079
IR_ASSERT(IR_IS_CONST_REF(insn->op2));
1964
2080
IR_ASSERT(!IR_IS_SYM_CONST(ctx->ir_base[insn->op2].op));
@@ -1972,16 +2088,42 @@ static void ir_emit_shift_const(ir_ctx *ctx, ir_ref def, ir_insn *insn)
1972
2088
default:
1973
2089
IR_ASSERT(0);
1974
2090
case IR_SHL:
1975
- | ASM_REG_REG_IMM_OP lsl, type, def_reg, op1_reg, shift
2091
+ if (ir_type_size[type] == 1) {
2092
+ | ubfiz Rw(def_reg), Rw(op1_reg), #shift, #(8-shift)
2093
+ } else if (ir_type_size[type] == 2) {
2094
+ | ubfiz Rw(def_reg), Rw(op1_reg), #shift, #(16-shift)
2095
+ } else {
2096
+ | ASM_REG_REG_IMM_OP lsl, type, def_reg, op1_reg, shift
2097
+ }
1976
2098
break;
1977
2099
case IR_SHR:
1978
- | ASM_REG_REG_IMM_OP lsr, type, def_reg, op1_reg, shift
2100
+ if (ir_type_size[type] == 1) {
2101
+ | ubfx Rw(def_reg), Rw(op1_reg), #shift, #(8-shift)
2102
+ } else if (ir_type_size[type] == 2) {
2103
+ | ubfx Rw(def_reg), Rw(op1_reg), #shift, #(16-shift)
2104
+ } else {
2105
+ | ASM_REG_REG_IMM_OP lsr, type, def_reg, op1_reg, shift
2106
+ }
1979
2107
break;
1980
2108
case IR_SAR:
1981
- | ASM_REG_REG_IMM_OP asr, type, def_reg, op1_reg, shift
2109
+ if (ir_type_size[type] == 1) {
2110
+ | sbfx Rw(def_reg), Rw(op1_reg), #shift, #(8-shift)
2111
+ } else if (ir_type_size[type] == 2) {
2112
+ | sbfx Rw(def_reg), Rw(op1_reg), #shift, #(16-shift)
2113
+ } else {
2114
+ | ASM_REG_REG_IMM_OP asr, type, def_reg, op1_reg, shift
2115
+ }
1982
2116
break;
1983
2117
case IR_ROL:
1984
- if (ir_type_size[type] == 8) {
2118
+ if (ir_type_size[type] == 1) {
2119
+ tmp_reg = ctx->regs[def][3];
2120
+ | ubfx Rw(tmp_reg), Rw(op1_reg), #(8-shift), #shift
2121
+ | orr Rw(def_reg), Rw(tmp_reg), Rw(op1_reg), lsl #shift
2122
+ } else if (ir_type_size[type] == 2) {
2123
+ tmp_reg = ctx->regs[def][3];
2124
+ | ubfx Rw(tmp_reg), Rw(op1_reg), #(16-shift), #shift
2125
+ | orr Rw(def_reg), Rw(tmp_reg), Rw(op1_reg), lsl #shift
2126
+ } else if (ir_type_size[type] == 8) {
1985
2127
shift = (64 - shift) % 64;
1986
2128
| ror Rx(def_reg), Rx(op1_reg), #shift
1987
2129
} else {
@@ -1990,7 +2132,17 @@ static void ir_emit_shift_const(ir_ctx *ctx, ir_ref def, ir_insn *insn)
1990
2132
}
1991
2133
break;
1992
2134
case IR_ROR:
1993
- | ASM_REG_REG_IMM_OP ror, type, def_reg, op1_reg, shift
2135
+ if (ir_type_size[type] == 1) {
2136
+ tmp_reg = ctx->regs[def][3];
2137
+ | ubfx Rw(tmp_reg), Rw(op1_reg), #shift, #(8-shift)
2138
+ | orr Rw(def_reg), Rw(tmp_reg), Rw(op1_reg), lsl #(8-shift)
2139
+ } else if (ir_type_size[type] == 2) {
2140
+ tmp_reg = ctx->regs[def][3];
2141
+ | ubfx Rw(tmp_reg), Rw(op1_reg), #shift, #(16-shift)
2142
+ | orr Rw(def_reg), Rw(tmp_reg), Rw(op1_reg), lsl #(16-shift)
2143
+ } else {
2144
+ | ASM_REG_REG_IMM_OP ror, type, def_reg, op1_reg, shift
2145
+ }
1994
2146
break;
1995
2147
}
1996
2148
if (IR_REG_SPILLED(ctx->regs[def][0])) {
@@ -3653,7 +3805,7 @@ static void ir_emit_alloca(ir_ctx *ctx, ir_ref def, ir_insn *insn)
3653
3805
3654
3806
IR_ASSERT(IR_IS_TYPE_INT(val->type));
3655
3807
IR_ASSERT(!IR_IS_SYM_CONST(val->op));
3656
- IR_ASSERT(IR_IS_TYPE_UNSIGNED(val->type) || val->val.i64 > 0);
3808
+ IR_ASSERT(IR_IS_TYPE_UNSIGNED(val->type) || val->val.i64 >= 0);
3657
3809
3658
3810
if (ctx->flags2 & IR_HAS_CALLS) {
3659
3811
/* Stack must be 16 byte aligned */
@@ -4971,7 +5123,7 @@ static void ir_emit_load_params(ir_ctx *ctx)
4971
5123
if (ctx->flags & IR_USE_FRAME_POINTER) {
4972
5124
stack_offset = sizeof(void*) * 2; /* skip old frame pointer and return address */
4973
5125
} else {
4974
- stack_offset = sizeof(void*) + ctx->stack_frame_size + ctx->call_stack_size; /* skip return address */
5126
+ stack_offset = ctx->stack_frame_size + ctx->call_stack_size;
4975
5127
}
4976
5128
n = use_list->count;
4977
5129
for (i = 0, p = &ctx->use_edges[use_list->refs]; i < n; i++, p++) {
@@ -5079,8 +5231,7 @@ static void ir_fix_param_spills(ir_ctx *ctx)
5079
5231
/* skip old frame pointer and return address */
5080
5232
stack_offset = sizeof(void*) * 2 + (ctx->stack_frame_size - ctx->stack_frame_alignment);
5081
5233
} else {
5082
- /* skip return address */
5083
- stack_offset = sizeof(void*) + ctx->stack_frame_size;
5234
+ stack_offset = ctx->stack_frame_size;
5084
5235
}
5085
5236
n = use_list->count;
5086
5237
for (i = 0, p = &ctx->use_edges[use_list->refs]; i < n; i++, p++) {
0 commit comments