Skip to content

Commit fb07683

Browse files
authored
[NVPTX] Add baseline srl-shl-zext tests from #138290 (#139878)
1 parent 8c67d25 commit fb07683

File tree

1 file changed

+192
-0
lines changed

1 file changed

+192
-0
lines changed

llvm/test/CodeGen/NVPTX/shift-opt.ll

Lines changed: 192 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,192 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
2+
; RUN: llc < %s -mtriple=nvptx64 | FileCheck %s
3+
4+
; Fold: srl (or (x, shl(zext(y),c1)),c1) -> or(srl(x,c1), zext(y))
5+
; c1 <= leadingzeros(zext(y))
6+
define i64 @test_or(i64 %x, i32 %y) {
7+
; CHECK-LABEL: test_or(
8+
; CHECK: {
9+
; CHECK-NEXT: .reg .b32 %r<2>;
10+
; CHECK-NEXT: .reg .b64 %rd<5>;
11+
; CHECK-EMPTY:
12+
; CHECK-NEXT: // %bb.0:
13+
; CHECK-NEXT: ld.param.b64 %rd1, [test_or_param_0];
14+
; CHECK-NEXT: ld.param.b32 %r1, [test_or_param_1];
15+
; CHECK-NEXT: mul.wide.u32 %rd2, %r1, 32;
16+
; CHECK-NEXT: or.b64 %rd3, %rd1, %rd2;
17+
; CHECK-NEXT: shr.u64 %rd4, %rd3, 5;
18+
; CHECK-NEXT: st.param.b64 [func_retval0], %rd4;
19+
; CHECK-NEXT: ret;
20+
%ext = zext i32 %y to i64
21+
%shl = shl i64 %ext, 5
22+
%or = or i64 %x, %shl
23+
%srl = lshr i64 %or, 5
24+
ret i64 %srl
25+
}
26+
27+
; Fold: srl (xor (x, shl(zext(y),c1)),c1) -> xor(srl(x,c1), zext(y))
28+
; c1 <= leadingzeros(zext(y))
29+
define i64 @test_xor(i64 %x, i32 %y) {
30+
; CHECK-LABEL: test_xor(
31+
; CHECK: {
32+
; CHECK-NEXT: .reg .b32 %r<2>;
33+
; CHECK-NEXT: .reg .b64 %rd<5>;
34+
; CHECK-EMPTY:
35+
; CHECK-NEXT: // %bb.0:
36+
; CHECK-NEXT: ld.param.b64 %rd1, [test_xor_param_0];
37+
; CHECK-NEXT: ld.param.b32 %r1, [test_xor_param_1];
38+
; CHECK-NEXT: mul.wide.u32 %rd2, %r1, 32;
39+
; CHECK-NEXT: xor.b64 %rd3, %rd1, %rd2;
40+
; CHECK-NEXT: shr.u64 %rd4, %rd3, 5;
41+
; CHECK-NEXT: st.param.b64 [func_retval0], %rd4;
42+
; CHECK-NEXT: ret;
43+
%ext = zext i32 %y to i64
44+
%shl = shl i64 %ext, 5
45+
%or = xor i64 %x, %shl
46+
%srl = lshr i64 %or, 5
47+
ret i64 %srl
48+
}
49+
50+
; Fold: srl (and (x, shl(zext(y),c1)),c1) -> and(srl(x,c1), zext(y))
51+
; c1 <= leadingzeros(zext(y))
52+
define i64 @test_and(i64 %x, i32 %y) {
53+
; CHECK-LABEL: test_and(
54+
; CHECK: {
55+
; CHECK-NEXT: .reg .b32 %r<2>;
56+
; CHECK-NEXT: .reg .b64 %rd<5>;
57+
; CHECK-EMPTY:
58+
; CHECK-NEXT: // %bb.0:
59+
; CHECK-NEXT: ld.param.b64 %rd1, [test_and_param_0];
60+
; CHECK-NEXT: ld.param.b32 %r1, [test_and_param_1];
61+
; CHECK-NEXT: mul.wide.u32 %rd2, %r1, 32;
62+
; CHECK-NEXT: and.b64 %rd3, %rd1, %rd2;
63+
; CHECK-NEXT: shr.u64 %rd4, %rd3, 5;
64+
; CHECK-NEXT: st.param.b64 [func_retval0], %rd4;
65+
; CHECK-NEXT: ret;
66+
%ext = zext i32 %y to i64
67+
%shl = shl i64 %ext, 5
68+
%or = and i64 %x, %shl
69+
%srl = lshr i64 %or, 5
70+
ret i64 %srl
71+
}
72+
73+
; Fold: srl (or (x, shl(zext(y),c1)),c1) -> or(srl(x,c1), zext(y))
74+
; c1 <= leadingzeros(zext(y))
75+
; x, y - vectors
76+
define <2 x i16> @test_vec(<2 x i16> %x, <2 x i8> %y) {
77+
; CHECK-LABEL: test_vec(
78+
; CHECK: {
79+
; CHECK-NEXT: .reg .b16 %rs<9>;
80+
; CHECK-NEXT: .reg .b32 %r<7>;
81+
; CHECK-EMPTY:
82+
; CHECK-NEXT: // %bb.0:
83+
; CHECK-NEXT: ld.param.b32 %r1, [test_vec_param_0];
84+
; CHECK-NEXT: ld.param.b32 %r2, [test_vec_param_1];
85+
; CHECK-NEXT: and.b32 %r3, %r2, 16711935;
86+
; CHECK-NEXT: mov.b32 {%rs1, %rs2}, %r3;
87+
; CHECK-NEXT: shl.b16 %rs3, %rs2, 5;
88+
; CHECK-NEXT: shl.b16 %rs4, %rs1, 5;
89+
; CHECK-NEXT: mov.b32 %r4, {%rs4, %rs3};
90+
; CHECK-NEXT: or.b32 %r5, %r1, %r4;
91+
; CHECK-NEXT: mov.b32 {%rs5, %rs6}, %r5;
92+
; CHECK-NEXT: shr.u16 %rs7, %rs6, 5;
93+
; CHECK-NEXT: shr.u16 %rs8, %rs5, 5;
94+
; CHECK-NEXT: mov.b32 %r6, {%rs8, %rs7};
95+
; CHECK-NEXT: st.param.b32 [func_retval0], %r6;
96+
; CHECK-NEXT: ret;
97+
%ext = zext <2 x i8> %y to <2 x i16>
98+
%shl = shl <2 x i16> %ext, splat(i16 5)
99+
%or = or <2 x i16> %x, %shl
100+
%srl = lshr <2 x i16> %or, splat(i16 5)
101+
ret <2 x i16> %srl
102+
}
103+
104+
; Do not fold: srl (or (x, shl(zext(y),c1)),c1) -> or(srl(x,c1), zext(y))
105+
; Reason: c1 > leadingzeros(zext(y)).
106+
define i64 @test_negative_c(i64 %x, i32 %y) {
107+
; CHECK-LABEL: test_negative_c(
108+
; CHECK: {
109+
; CHECK-NEXT: .reg .b64 %rd<6>;
110+
; CHECK-EMPTY:
111+
; CHECK-NEXT: // %bb.0:
112+
; CHECK-NEXT: ld.param.b64 %rd1, [test_negative_c_param_0];
113+
; CHECK-NEXT: ld.param.b32 %rd2, [test_negative_c_param_1];
114+
; CHECK-NEXT: shl.b64 %rd3, %rd2, 33;
115+
; CHECK-NEXT: or.b64 %rd4, %rd1, %rd3;
116+
; CHECK-NEXT: shr.u64 %rd5, %rd4, 33;
117+
; CHECK-NEXT: st.param.b64 [func_retval0], %rd5;
118+
; CHECK-NEXT: ret;
119+
%ext = zext i32 %y to i64
120+
%shl = shl i64 %ext, 33
121+
%or = or i64 %x, %shl
122+
%srl = lshr i64 %or, 33
123+
ret i64 %srl
124+
}
125+
126+
declare void @use(i64)
127+
128+
; Do not fold: srl (or (x, shl(zext(y),c1)),c1) -> or(srl(x,c1), zext(y))
129+
; Reason: multiple usage of "or"
130+
define i64 @test_negative_use_lop(i64 %x, i32 %y) {
131+
; CHECK-LABEL: test_negative_use_lop(
132+
; CHECK: {
133+
; CHECK-NEXT: .reg .b32 %r<2>;
134+
; CHECK-NEXT: .reg .b64 %rd<5>;
135+
; CHECK-EMPTY:
136+
; CHECK-NEXT: // %bb.0:
137+
; CHECK-NEXT: ld.param.b64 %rd1, [test_negative_use_lop_param_0];
138+
; CHECK-NEXT: ld.param.b32 %r1, [test_negative_use_lop_param_1];
139+
; CHECK-NEXT: mul.wide.u32 %rd2, %r1, 32;
140+
; CHECK-NEXT: or.b64 %rd3, %rd1, %rd2;
141+
; CHECK-NEXT: shr.u64 %rd4, %rd3, 5;
142+
; CHECK-NEXT: { // callseq 0, 0
143+
; CHECK-NEXT: .param .b64 param0;
144+
; CHECK-NEXT: st.param.b64 [param0], %rd3;
145+
; CHECK-NEXT: call.uni
146+
; CHECK-NEXT: use,
147+
; CHECK-NEXT: (
148+
; CHECK-NEXT: param0
149+
; CHECK-NEXT: );
150+
; CHECK-NEXT: } // callseq 0
151+
; CHECK-NEXT: st.param.b64 [func_retval0], %rd4;
152+
; CHECK-NEXT: ret;
153+
%ext = zext i32 %y to i64
154+
%shl = shl i64 %ext, 5
155+
%or = or i64 %x, %shl
156+
%srl = lshr i64 %or, 5
157+
call void @use(i64 %or)
158+
ret i64 %srl
159+
}
160+
161+
; Do not fold: srl (or (x, shl(zext(y),c1)),c1) -> or(srl(x,c1), zext(y))
162+
; Reason: multiple usage of "shl"
163+
define i64 @test_negative_use_shl(i64 %x, i32 %y) {
164+
; CHECK-LABEL: test_negative_use_shl(
165+
; CHECK: {
166+
; CHECK-NEXT: .reg .b32 %r<2>;
167+
; CHECK-NEXT: .reg .b64 %rd<5>;
168+
; CHECK-EMPTY:
169+
; CHECK-NEXT: // %bb.0:
170+
; CHECK-NEXT: ld.param.b64 %rd1, [test_negative_use_shl_param_0];
171+
; CHECK-NEXT: ld.param.b32 %r1, [test_negative_use_shl_param_1];
172+
; CHECK-NEXT: mul.wide.u32 %rd2, %r1, 32;
173+
; CHECK-NEXT: or.b64 %rd3, %rd1, %rd2;
174+
; CHECK-NEXT: shr.u64 %rd4, %rd3, 5;
175+
; CHECK-NEXT: { // callseq 1, 0
176+
; CHECK-NEXT: .param .b64 param0;
177+
; CHECK-NEXT: st.param.b64 [param0], %rd2;
178+
; CHECK-NEXT: call.uni
179+
; CHECK-NEXT: use,
180+
; CHECK-NEXT: (
181+
; CHECK-NEXT: param0
182+
; CHECK-NEXT: );
183+
; CHECK-NEXT: } // callseq 1
184+
; CHECK-NEXT: st.param.b64 [func_retval0], %rd4;
185+
; CHECK-NEXT: ret;
186+
%ext = zext i32 %y to i64
187+
%shl = shl i64 %ext, 5
188+
%or = or i64 %x, %shl
189+
%srl = lshr i64 %or, 5
190+
call void @use(i64 %shl)
191+
ret i64 %srl
192+
}

0 commit comments

Comments
 (0)