Skip to content

Commit b7c8271

Browse files
authored
[DAG] getNode - convert scalar i1 arithmetic calls to bitwise instructions (#125486)
We already do this for vector vXi1 types - this patch removes the vector constraint to handle it for all bool types.
1 parent 0f11f20 commit b7c8271

File tree

20 files changed

+109
-155
lines changed

20 files changed

+109
-155
lines changed

llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp

Lines changed: 11 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -7297,15 +7297,15 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,
72977297
// it's worth handling here.
72987298
if (N2CV && N2CV->isZero())
72997299
return N1;
7300-
if ((Opcode == ISD::ADD || Opcode == ISD::SUB) && VT.isVector() &&
7301-
VT.getVectorElementType() == MVT::i1)
7300+
if ((Opcode == ISD::ADD || Opcode == ISD::SUB) &&
7301+
VT.getScalarType() == MVT::i1)
73027302
return getNode(ISD::XOR, DL, VT, N1, N2);
73037303
break;
73047304
case ISD::MUL:
73057305
assert(VT.isInteger() && "This operator does not apply to FP types!");
73067306
assert(N1.getValueType() == N2.getValueType() &&
73077307
N1.getValueType() == VT && "Binary operator types must match!");
7308-
if (VT.isVector() && VT.getVectorElementType() == MVT::i1)
7308+
if (VT.getScalarType() == MVT::i1)
73097309
return getNode(ISD::AND, DL, VT, N1, N2);
73107310
if (N2C && (N1.getOpcode() == ISD::VSCALE) && Flags.hasNoSignedWrap()) {
73117311
const APInt &MulImm = N1->getConstantOperandAPInt(0);
@@ -7326,7 +7326,7 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,
73267326
assert(VT.isInteger() && "This operator does not apply to FP types!");
73277327
assert(N1.getValueType() == N2.getValueType() &&
73287328
N1.getValueType() == VT && "Binary operator types must match!");
7329-
if (VT.isVector() && VT.getVectorElementType() == MVT::i1) {
7329+
if (VT.getScalarType() == MVT::i1) {
73307330
// fold (add_sat x, y) -> (or x, y) for bool types.
73317331
if (Opcode == ISD::SADDSAT || Opcode == ISD::UADDSAT)
73327332
return getNode(ISD::OR, DL, VT, N1, N2);
@@ -7359,23 +7359,23 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,
73597359
assert(VT.isInteger() && "This operator does not apply to FP types!");
73607360
assert(N1.getValueType() == N2.getValueType() &&
73617361
N1.getValueType() == VT && "Binary operator types must match!");
7362-
if (VT.isVector() && VT.getVectorElementType() == MVT::i1)
7362+
if (VT.getScalarType() == MVT::i1)
73637363
return getNode(ISD::XOR, DL, VT, N1, N2);
73647364
break;
73657365
case ISD::SMIN:
73667366
case ISD::UMAX:
73677367
assert(VT.isInteger() && "This operator does not apply to FP types!");
73687368
assert(N1.getValueType() == N2.getValueType() &&
73697369
N1.getValueType() == VT && "Binary operator types must match!");
7370-
if (VT.isVector() && VT.getVectorElementType() == MVT::i1)
7370+
if (VT.getScalarType() == MVT::i1)
73717371
return getNode(ISD::OR, DL, VT, N1, N2);
73727372
break;
73737373
case ISD::SMAX:
73747374
case ISD::UMIN:
73757375
assert(VT.isInteger() && "This operator does not apply to FP types!");
73767376
assert(N1.getValueType() == N2.getValueType() &&
73777377
N1.getValueType() == VT && "Binary operator types must match!");
7378-
if (VT.isVector() && VT.getVectorElementType() == MVT::i1)
7378+
if (VT.getScalarType() == MVT::i1)
73797379
return getNode(ISD::AND, DL, VT, N1, N2);
73807380
break;
73817381
case ISD::FADD:
@@ -10399,12 +10399,12 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,
1039910399
case ISD::VP_ADD:
1040010400
case ISD::VP_SUB:
1040110401
// If it is VP_ADD/VP_SUB mask operation then turn it to VP_XOR
10402-
if (VT.isVector() && VT.getVectorElementType() == MVT::i1)
10402+
if (VT.getScalarType() == MVT::i1)
1040310403
Opcode = ISD::VP_XOR;
1040410404
break;
1040510405
case ISD::VP_MUL:
1040610406
// If it is VP_MUL mask operation then turn it to VP_AND
10407-
if (VT.isVector() && VT.getVectorElementType() == MVT::i1)
10407+
if (VT.getScalarType() == MVT::i1)
1040810408
Opcode = ISD::VP_AND;
1040910409
break;
1041010410
case ISD::VP_REDUCE_MUL:
@@ -10509,9 +10509,8 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, SDVTList VTList,
1050910509
return getNode(ISD::MERGE_VALUES, DL, VTList, {N1, ZeroOverFlow}, Flags);
1051010510
}
1051110511

10512-
if (VTList.VTs[0].isVector() &&
10513-
VTList.VTs[0].getVectorElementType() == MVT::i1 &&
10514-
VTList.VTs[1].getVectorElementType() == MVT::i1) {
10512+
if (VTList.VTs[0].getScalarType() == MVT::i1 &&
10513+
VTList.VTs[1].getScalarType() == MVT::i1) {
1051510514
SDValue F1 = getFreeze(N1);
1051610515
SDValue F2 = getFreeze(N2);
1051710516
// {vXi1,vXi1} (u/s)addo(vXi1 x, vXi1y) -> {xor(x,y),and(x,y)}

llvm/test/CodeGen/AMDGPU/add_i1.ll

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -3,8 +3,8 @@
33
; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX10 %s
44

55
; GCN-LABEL: {{^}}add_var_var_i1:
6-
; GFX9: s_xor_b64
7-
; GFX10: s_xor_b32
6+
; GFX9: v_xor_b32_e32
7+
; GFX10: v_xor_b32_e32
88
define amdgpu_kernel void @add_var_var_i1(ptr addrspace(1) %out, ptr addrspace(1) %in0, ptr addrspace(1) %in1) {
99
%a = load volatile i1, ptr addrspace(1) %in0
1010
%b = load volatile i1, ptr addrspace(1) %in1
@@ -14,8 +14,8 @@ define amdgpu_kernel void @add_var_var_i1(ptr addrspace(1) %out, ptr addrspace(1
1414
}
1515

1616
; GCN-LABEL: {{^}}add_var_imm_i1:
17-
; GFX9: s_not_b64
18-
; GFX10: s_not_b32
17+
; GFX9: s_xor_b64
18+
; GFX10: s_xor_b32
1919
define amdgpu_kernel void @add_var_imm_i1(ptr addrspace(1) %out, ptr addrspace(1) %in) {
2020
%a = load volatile i1, ptr addrspace(1) %in
2121
%add = add i1 %a, 1
@@ -25,8 +25,8 @@ define amdgpu_kernel void @add_var_imm_i1(ptr addrspace(1) %out, ptr addrspace(1
2525

2626
; GCN-LABEL: {{^}}add_i1_cf:
2727
; GCN: ; %endif
28-
; GFX9: s_not_b64
29-
; GFX10: s_not_b32
28+
; GFX9: s_xor_b64
29+
; GFX10: s_xor_b32
3030
define amdgpu_kernel void @add_i1_cf(ptr addrspace(1) %out, ptr addrspace(1) %a, ptr addrspace(1) %b) {
3131
entry:
3232
%tid = call i32 @llvm.amdgcn.workitem.id.x()

llvm/test/CodeGen/AMDGPU/mul.ll

Lines changed: 17 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -1459,8 +1459,8 @@ define amdgpu_kernel void @s_mul_i1(ptr addrspace(1) %out, [8 x i32], i1 %a, [8
14591459
; SI-NEXT: s_mov_b32 s3, 0xf000
14601460
; SI-NEXT: s_mov_b32 s2, -1
14611461
; SI-NEXT: s_waitcnt lgkmcnt(0)
1462-
; SI-NEXT: s_mul_i32 s6, s6, s7
1463-
; SI-NEXT: s_and_b32 s4, s6, 1
1462+
; SI-NEXT: s_and_b32 s4, s6, s7
1463+
; SI-NEXT: s_and_b32 s4, s4, 1
14641464
; SI-NEXT: v_mov_b32_e32 v0, s4
14651465
; SI-NEXT: buffer_store_byte v0, off, s[0:3], 0
14661466
; SI-NEXT: s_endpgm
@@ -1473,8 +1473,8 @@ define amdgpu_kernel void @s_mul_i1(ptr addrspace(1) %out, [8 x i32], i1 %a, [8
14731473
; VI-NEXT: s_mov_b32 s3, 0xf000
14741474
; VI-NEXT: s_mov_b32 s2, -1
14751475
; VI-NEXT: s_waitcnt lgkmcnt(0)
1476-
; VI-NEXT: s_mul_i32 s6, s6, s7
1477-
; VI-NEXT: s_and_b32 s4, s6, 1
1476+
; VI-NEXT: s_and_b32 s4, s6, s7
1477+
; VI-NEXT: s_and_b32 s4, s4, 1
14781478
; VI-NEXT: v_mov_b32_e32 v0, s4
14791479
; VI-NEXT: buffer_store_byte v0, off, s[0:3], 0
14801480
; VI-NEXT: s_endpgm
@@ -1487,8 +1487,8 @@ define amdgpu_kernel void @s_mul_i1(ptr addrspace(1) %out, [8 x i32], i1 %a, [8
14871487
; GFX9-NEXT: s_mov_b32 s3, 0xf000
14881488
; GFX9-NEXT: s_mov_b32 s2, -1
14891489
; GFX9-NEXT: s_waitcnt lgkmcnt(0)
1490-
; GFX9-NEXT: s_mul_i32 s6, s6, s7
1491-
; GFX9-NEXT: s_and_b32 s4, s6, 1
1490+
; GFX9-NEXT: s_and_b32 s4, s6, s7
1491+
; GFX9-NEXT: s_and_b32 s4, s4, 1
14921492
; GFX9-NEXT: v_mov_b32_e32 v0, s4
14931493
; GFX9-NEXT: buffer_store_byte v0, off, s[0:3], 0
14941494
; GFX9-NEXT: s_endpgm
@@ -1500,7 +1500,7 @@ define amdgpu_kernel void @s_mul_i1(ptr addrspace(1) %out, [8 x i32], i1 %a, [8
15001500
; GFX10-NEXT: s_load_dword s3, s[4:5], 0x70
15011501
; GFX10-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
15021502
; GFX10-NEXT: s_waitcnt lgkmcnt(0)
1503-
; GFX10-NEXT: s_mul_i32 s2, s2, s3
1503+
; GFX10-NEXT: s_and_b32 s2, s2, s3
15041504
; GFX10-NEXT: s_mov_b32 s3, 0x31016000
15051505
; GFX10-NEXT: s_and_b32 s2, s2, 1
15061506
; GFX10-NEXT: v_mov_b32_e32 v0, s2
@@ -1515,7 +1515,7 @@ define amdgpu_kernel void @s_mul_i1(ptr addrspace(1) %out, [8 x i32], i1 %a, [8
15151515
; GFX11-NEXT: s_load_b32 s3, s[4:5], 0x70
15161516
; GFX11-NEXT: s_load_b64 s[0:1], s[4:5], 0x24
15171517
; GFX11-NEXT: s_waitcnt lgkmcnt(0)
1518-
; GFX11-NEXT: s_mul_i32 s2, s2, s3
1518+
; GFX11-NEXT: s_and_b32 s2, s2, s3
15191519
; GFX11-NEXT: s_mov_b32 s3, 0x31016000
15201520
; GFX11-NEXT: s_and_b32 s2, s2, 1
15211521
; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
@@ -1531,7 +1531,7 @@ define amdgpu_kernel void @s_mul_i1(ptr addrspace(1) %out, [8 x i32], i1 %a, [8
15311531
; GFX12-NEXT: s_load_b32 s3, s[4:5], 0x70
15321532
; GFX12-NEXT: s_load_b64 s[0:1], s[4:5], 0x24
15331533
; GFX12-NEXT: s_wait_kmcnt 0x0
1534-
; GFX12-NEXT: s_mul_i32 s2, s2, s3
1534+
; GFX12-NEXT: s_and_b32 s2, s2, s3
15351535
; GFX12-NEXT: s_mov_b32 s3, 0x31016000
15361536
; GFX12-NEXT: s_and_b32 s2, s2, 1
15371537
; GFX12-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
@@ -1555,7 +1555,7 @@ define amdgpu_kernel void @s_mul_i1(ptr addrspace(1) %out, [8 x i32], i1 %a, [8
15551555
; EG-NEXT: MOV * T0.X, 0.0,
15561556
; EG-NEXT: ALU clause starting at 11:
15571557
; EG-NEXT: AND_INT T0.W, KC0[2].Y, literal.x,
1558-
; EG-NEXT: MULLO_INT * T0.X, T1.X, T0.X,
1558+
; EG-NEXT: AND_INT * T1.W, T1.X, T0.X,
15591559
; EG-NEXT: 3(4.203895e-45), 0(0.000000e+00)
15601560
; EG-NEXT: AND_INT T1.W, PS, 1,
15611561
; EG-NEXT: LSHL * T0.W, PV.W, literal.x,
@@ -1589,7 +1589,7 @@ define amdgpu_kernel void @v_mul_i1(ptr addrspace(1) %out, ptr addrspace(1) %in)
15891589
; SI-NEXT: s_mov_b32 s4, s0
15901590
; SI-NEXT: s_mov_b32 s5, s1
15911591
; SI-NEXT: s_waitcnt vmcnt(0)
1592-
; SI-NEXT: v_mul_lo_u32 v0, v0, v1
1592+
; SI-NEXT: v_and_b32_e32 v0, v0, v1
15931593
; SI-NEXT: v_and_b32_e32 v0, 1, v0
15941594
; SI-NEXT: buffer_store_byte v0, off, s[4:7], 0
15951595
; SI-NEXT: s_endpgm
@@ -1609,7 +1609,7 @@ define amdgpu_kernel void @v_mul_i1(ptr addrspace(1) %out, ptr addrspace(1) %in)
16091609
; VI-NEXT: s_mov_b32 s4, s0
16101610
; VI-NEXT: s_mov_b32 s5, s1
16111611
; VI-NEXT: s_waitcnt vmcnt(0)
1612-
; VI-NEXT: v_mul_lo_u32 v0, v0, v1
1612+
; VI-NEXT: v_and_b32_e32 v0, v0, v1
16131613
; VI-NEXT: v_and_b32_e32 v0, 1, v0
16141614
; VI-NEXT: buffer_store_byte v0, off, s[4:7], 0
16151615
; VI-NEXT: s_endpgm
@@ -1629,7 +1629,7 @@ define amdgpu_kernel void @v_mul_i1(ptr addrspace(1) %out, ptr addrspace(1) %in)
16291629
; GFX9-NEXT: s_mov_b32 s4, s0
16301630
; GFX9-NEXT: s_mov_b32 s5, s1
16311631
; GFX9-NEXT: s_waitcnt vmcnt(0)
1632-
; GFX9-NEXT: v_mul_lo_u32 v0, v0, v1
1632+
; GFX9-NEXT: v_and_b32_e32 v0, v0, v1
16331633
; GFX9-NEXT: v_and_b32_e32 v0, 1, v0
16341634
; GFX9-NEXT: buffer_store_byte v0, off, s[4:7], 0
16351635
; GFX9-NEXT: s_endpgm
@@ -1650,7 +1650,7 @@ define amdgpu_kernel void @v_mul_i1(ptr addrspace(1) %out, ptr addrspace(1) %in)
16501650
; GFX10-NEXT: s_mov_b32 s4, s0
16511651
; GFX10-NEXT: s_mov_b32 s5, s1
16521652
; GFX10-NEXT: s_waitcnt vmcnt(0)
1653-
; GFX10-NEXT: v_mul_lo_u32 v0, v0, v1
1653+
; GFX10-NEXT: v_and_b32_e32 v0, v0, v1
16541654
; GFX10-NEXT: v_and_b32_e32 v0, 1, v0
16551655
; GFX10-NEXT: buffer_store_byte v0, off, s[4:7], 0
16561656
; GFX10-NEXT: s_endpgm
@@ -1671,7 +1671,7 @@ define amdgpu_kernel void @v_mul_i1(ptr addrspace(1) %out, ptr addrspace(1) %in)
16711671
; GFX11-NEXT: s_mov_b32 s4, s0
16721672
; GFX11-NEXT: s_mov_b32 s5, s1
16731673
; GFX11-NEXT: s_waitcnt vmcnt(0)
1674-
; GFX11-NEXT: v_mul_lo_u32 v0, v0, v1
1674+
; GFX11-NEXT: v_and_b32_e32 v0, v0, v1
16751675
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
16761676
; GFX11-NEXT: v_and_b32_e32 v0, 1, v0
16771677
; GFX11-NEXT: buffer_store_b8 v0, off, s[4:7], 0
@@ -1693,7 +1693,7 @@ define amdgpu_kernel void @v_mul_i1(ptr addrspace(1) %out, ptr addrspace(1) %in)
16931693
; GFX12-NEXT: s_mov_b32 s4, s0
16941694
; GFX12-NEXT: s_mov_b32 s5, s1
16951695
; GFX12-NEXT: s_wait_loadcnt 0x0
1696-
; GFX12-NEXT: v_mul_lo_u32 v0, v0, v1
1696+
; GFX12-NEXT: v_and_b32_e32 v0, v0, v1
16971697
; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1)
16981698
; GFX12-NEXT: v_and_b32_e32 v0, 1, v0
16991699
; GFX12-NEXT: buffer_store_b8 v0, off, s[4:7], null
@@ -1714,7 +1714,7 @@ define amdgpu_kernel void @v_mul_i1(ptr addrspace(1) %out, ptr addrspace(1) %in)
17141714
; EG-NEXT: MOV * T0.X, KC0[2].Z,
17151715
; EG-NEXT: ALU clause starting at 11:
17161716
; EG-NEXT: AND_INT T0.W, KC0[2].Y, literal.x,
1717-
; EG-NEXT: MULLO_INT * T0.X, T0.X, T1.X,
1717+
; EG-NEXT: AND_INT * T1.W, T0.X, T1.X,
17181718
; EG-NEXT: 3(4.203895e-45), 0(0.000000e+00)
17191719
; EG-NEXT: AND_INT T1.W, PS, 1,
17201720
; EG-NEXT: LSHL * T0.W, PV.W, literal.x,

llvm/test/CodeGen/AMDGPU/sub_i1.ll

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -4,8 +4,8 @@
44

55

66
; GCN-LABEL: {{^}}sub_var_var_i1:
7-
; WAVE32: s_xor_b32
8-
; WAVE64: s_xor_b64
7+
; WAVE32: v_xor_b32_e32
8+
; WAVE64: v_xor_b32_e32
99
define amdgpu_kernel void @sub_var_var_i1(ptr addrspace(1) %out, ptr addrspace(1) %in0, ptr addrspace(1) %in1) {
1010
%a = load volatile i1, ptr addrspace(1) %in0
1111
%b = load volatile i1, ptr addrspace(1) %in1
@@ -15,8 +15,8 @@ define amdgpu_kernel void @sub_var_var_i1(ptr addrspace(1) %out, ptr addrspace(1
1515
}
1616

1717
; GCN-LABEL: {{^}}sub_var_imm_i1:
18-
; WAVE32: s_not_b32
19-
; WAVE64: s_not_b64
18+
; WAVE32: s_xor_b32
19+
; WAVE64: s_xor_b64
2020
define amdgpu_kernel void @sub_var_imm_i1(ptr addrspace(1) %out, ptr addrspace(1) %in) {
2121
%a = load volatile i1, ptr addrspace(1) %in
2222
%sub = sub i1 %a, 1
@@ -26,8 +26,8 @@ define amdgpu_kernel void @sub_var_imm_i1(ptr addrspace(1) %out, ptr addrspace(1
2626

2727
; GCN-LABEL: {{^}}sub_i1_cf:
2828
; GCN: ; %endif
29-
; WAVE32: s_not_b32
30-
; WAVE64: s_not_b64
29+
; WAVE32: s_xor_b32
30+
; WAVE64: s_xor_b64
3131
define amdgpu_kernel void @sub_i1_cf(ptr addrspace(1) %out, ptr addrspace(1) %a, ptr addrspace(1) %b) {
3232
entry:
3333
%tid = call i32 @llvm.amdgcn.workitem.id.x()

llvm/test/CodeGen/LoongArch/ir-instruction/add.ll

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -7,12 +7,12 @@
77
define i1 @add_i1(i1 %x, i1 %y) {
88
; LA32-LABEL: add_i1:
99
; LA32: # %bb.0:
10-
; LA32-NEXT: add.w $a0, $a0, $a1
10+
; LA32-NEXT: xor $a0, $a0, $a1
1111
; LA32-NEXT: ret
1212
;
1313
; LA64-LABEL: add_i1:
1414
; LA64: # %bb.0:
15-
; LA64-NEXT: add.d $a0, $a0, $a1
15+
; LA64-NEXT: xor $a0, $a0, $a1
1616
; LA64-NEXT: ret
1717
%add = add i1 %x, %y
1818
ret i1 %add
@@ -97,12 +97,12 @@ define i64 @add_i64(i64 %x, i64 %y) {
9797
define i1 @add_i1_3(i1 %x) {
9898
; LA32-LABEL: add_i1_3:
9999
; LA32: # %bb.0:
100-
; LA32-NEXT: addi.w $a0, $a0, 1
100+
; LA32-NEXT: xori $a0, $a0, 1
101101
; LA32-NEXT: ret
102102
;
103103
; LA64-LABEL: add_i1_3:
104104
; LA64: # %bb.0:
105-
; LA64-NEXT: addi.d $a0, $a0, 1
105+
; LA64-NEXT: xori $a0, $a0, 1
106106
; LA64-NEXT: ret
107107
%add = add i1 %x, 3
108108
ret i1 %add

llvm/test/CodeGen/LoongArch/ir-instruction/mul.ll

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -7,12 +7,12 @@
77
define i1 @mul_i1(i1 %a, i1 %b) {
88
; LA32-LABEL: mul_i1:
99
; LA32: # %bb.0: # %entry
10-
; LA32-NEXT: mul.w $a0, $a0, $a1
10+
; LA32-NEXT: and $a0, $a0, $a1
1111
; LA32-NEXT: ret
1212
;
1313
; LA64-LABEL: mul_i1:
1414
; LA64: # %bb.0: # %entry
15-
; LA64-NEXT: mul.d $a0, $a0, $a1
15+
; LA64-NEXT: and $a0, $a0, $a1
1616
; LA64-NEXT: ret
1717
entry:
1818
%r = mul i1 %a, %b

llvm/test/CodeGen/LoongArch/ir-instruction/sub.ll

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -7,12 +7,12 @@
77
define i1 @sub_i1(i1 %x, i1 %y) {
88
; LA32-LABEL: sub_i1:
99
; LA32: # %bb.0:
10-
; LA32-NEXT: sub.w $a0, $a0, $a1
10+
; LA32-NEXT: xor $a0, $a0, $a1
1111
; LA32-NEXT: ret
1212
;
1313
; LA64-LABEL: sub_i1:
1414
; LA64: # %bb.0:
15-
; LA64-NEXT: sub.d $a0, $a0, $a1
15+
; LA64-NEXT: xor $a0, $a0, $a1
1616
; LA64-NEXT: ret
1717
%sub = sub i1 %x, %y
1818
ret i1 %sub

llvm/test/CodeGen/Mips/llvm-ir/add.ll

Lines changed: 8 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -38,18 +38,11 @@ define signext i1 @add_i1(i1 signext %a, i1 signext %b) {
3838
entry:
3939
; ALL-LABEL: add_i1:
4040

41-
; NOT-R2-R6: addu $[[T0:[0-9]+]], $4, $5
42-
; NOT-R2-R6: andi $[[T0]], $[[T0]], 1
43-
; NOT-R2-R6: negu $2, $[[T0]]
41+
; NOT-R2-R6: xor $[[T0:[0-9]+]], $4, $5
4442

45-
; R2-R6: addu $[[T0:[0-9]+]], $4, $5
46-
; R2-R6: andi $[[T0]], $[[T0]], 1
47-
; R2-R6: negu $2, $[[T0]]
43+
; R2-R6: xor $[[T0:[0-9]+]], $4, $5
4844

49-
; MMR6: addu16 $[[T0:[0-9]+]], $4, $5
50-
; MMR6: andi16 $[[T0]], $[[T0]], 1
51-
; MMR6: li16 $[[T1:[0-9]+]], 0
52-
; MMR6: subu16 $[[T0]], $[[T1]], $[[T0]]
45+
; MMR6: xor $[[T0:[0-9]+]], $4, $5
5346

5447
%r = add i1 %a, %b
5548
ret i1 %r
@@ -368,18 +361,11 @@ define signext i128 @add_i128_4(i128 signext %a) {
368361

369362
define signext i1 @add_i1_3(i1 signext %a) {
370363
; ALL-LABEL: add_i1_3:
371-
; GP32: addiu $[[T0:[0-9]+]], $4, 1
372-
; GP32: andi $[[T0]], $[[T0]], 1
373-
; GP32: negu $2, $[[T0]]
374-
375-
; GP64: addiu $[[T0:[0-9]+]], $4, 1
376-
; GP64: andi $[[T0]], $[[T0]], 1
377-
; GP64: negu $2, $[[T0]]
378-
379-
; MMR6: addiur2 $[[T0:[0-9]+]], $4, 1
380-
; MMR6: andi16 $[[T0]], $[[T0]], 1
381-
; MMR6: li16 $[[T1:[0-9]+]], 0
382-
; MMR6: subu16 $2, $[[T1]], $[[T0]]
364+
; GP32: not $[[T0:[0-9]+]], $4
365+
366+
; GP64: not $[[T0:[0-9]+]], $4
367+
368+
; MMR6: not16 $[[T0:[0-9]+]], $4
383369

384370
%r = add i1 3, %a
385371
ret i1 %r

0 commit comments

Comments
 (0)