@@ -28,6 +28,23 @@ define i64 @concat_bswap32_unary_split(i64 %a0) {
28
28
ret i64 %9
29
29
}
30
30
31
+ define <2 x i64 > @concat_bswap32_unary_split_vector (<2 x i64 > %a0 ) {
32
+ ; CHECK-LABEL: @concat_bswap32_unary_split_vector(
33
+ ; CHECK-NEXT: [[TMP1:%.*]] = call <2 x i64> @llvm.bswap.v2i64(<2 x i64> [[A0:%.*]])
34
+ ; CHECK-NEXT: ret <2 x i64> [[TMP1]]
35
+ ;
36
+ %1 = lshr <2 x i64 > %a0 , <i64 32 , i64 32 >
37
+ %2 = trunc <2 x i64 > %1 to <2 x i32 >
38
+ %3 = trunc <2 x i64 > %a0 to <2 x i32 >
39
+ %4 = tail call <2 x i32 > @llvm.bswap.v2i32 (<2 x i32 > %2 )
40
+ %5 = tail call <2 x i32 > @llvm.bswap.v2i32 (<2 x i32 > %3 )
41
+ %6 = zext <2 x i32 > %4 to <2 x i64 >
42
+ %7 = zext <2 x i32 > %5 to <2 x i64 >
43
+ %8 = shl nuw <2 x i64 > %7 , <i64 32 , i64 32 >
44
+ %9 = or <2 x i64 > %6 , %8
45
+ ret <2 x i64 > %9
46
+ }
47
+
31
48
define i64 @concat_bswap32_unary_flip (i64 %a0 ) {
32
49
; CHECK-LABEL: @concat_bswap32_unary_flip(
33
50
; CHECK-NEXT: [[TMP1:%.*]] = lshr i64 [[A0:%.*]], 32
@@ -48,6 +65,26 @@ define i64 @concat_bswap32_unary_flip(i64 %a0) {
48
65
ret i64 %9
49
66
}
50
67
68
+ define <2 x i64 > @concat_bswap32_unary_flip_vector (<2 x i64 > %a0 ) {
69
+ ; CHECK-LABEL: @concat_bswap32_unary_flip_vector(
70
+ ; CHECK-NEXT: [[TMP1:%.*]] = lshr <2 x i64> [[A0:%.*]], <i64 32, i64 32>
71
+ ; CHECK-NEXT: [[TMP2:%.*]] = shl <2 x i64> [[A0]], <i64 32, i64 32>
72
+ ; CHECK-NEXT: [[TMP3:%.*]] = or <2 x i64> [[TMP1]], [[TMP2]]
73
+ ; CHECK-NEXT: [[TMP4:%.*]] = call <2 x i64> @llvm.bswap.v2i64(<2 x i64> [[TMP3]])
74
+ ; CHECK-NEXT: ret <2 x i64> [[TMP4]]
75
+ ;
76
+ %1 = lshr <2 x i64 > %a0 , <i64 32 , i64 32 >
77
+ %2 = trunc <2 x i64 > %1 to <2 x i32 >
78
+ %3 = trunc <2 x i64 > %a0 to <2 x i32 >
79
+ %4 = tail call <2 x i32 > @llvm.bswap.v2i32 (<2 x i32 > %2 )
80
+ %5 = tail call <2 x i32 > @llvm.bswap.v2i32 (<2 x i32 > %3 )
81
+ %6 = zext <2 x i32 > %4 to <2 x i64 >
82
+ %7 = zext <2 x i32 > %5 to <2 x i64 >
83
+ %8 = shl nuw <2 x i64 > %6 , <i64 32 , i64 32 >
84
+ %9 = or <2 x i64 > %7 , %8
85
+ ret <2 x i64 > %9
86
+ }
87
+
51
88
define i64 @concat_bswap32_binary (i32 %a0 , i32 %a1 ) {
52
89
; CHECK-LABEL: @concat_bswap32_binary(
53
90
; CHECK-NEXT: [[TMP1:%.*]] = zext i32 [[A1:%.*]] to i64
@@ -66,7 +103,26 @@ define i64 @concat_bswap32_binary(i32 %a0, i32 %a1) {
66
103
ret i64 %6
67
104
}
68
105
106
+ define <2 x i64 > @concat_bswap32_binary_vector (<2 x i32 > %a0 , <2 x i32 > %a1 ) {
107
+ ; CHECK-LABEL: @concat_bswap32_binary_vector(
108
+ ; CHECK-NEXT: [[TMP1:%.*]] = zext <2 x i32> [[A1:%.*]] to <2 x i64>
109
+ ; CHECK-NEXT: [[TMP2:%.*]] = zext <2 x i32> [[A0:%.*]] to <2 x i64>
110
+ ; CHECK-NEXT: [[TMP3:%.*]] = shl nuw <2 x i64> [[TMP2]], <i64 32, i64 32>
111
+ ; CHECK-NEXT: [[TMP4:%.*]] = or <2 x i64> [[TMP3]], [[TMP1]]
112
+ ; CHECK-NEXT: [[TMP5:%.*]] = call <2 x i64> @llvm.bswap.v2i64(<2 x i64> [[TMP4]])
113
+ ; CHECK-NEXT: ret <2 x i64> [[TMP5]]
114
+ ;
115
+ %1 = tail call <2 x i32 > @llvm.bswap.v2i32 (<2 x i32 > %a0 )
116
+ %2 = tail call <2 x i32 > @llvm.bswap.v2i32 (<2 x i32 > %a1 )
117
+ %3 = zext <2 x i32 > %1 to <2 x i64 >
118
+ %4 = zext <2 x i32 > %2 to <2 x i64 >
119
+ %5 = shl nuw <2 x i64 > %4 , <i64 32 , i64 32 >
120
+ %6 = or <2 x i64 > %3 , %5
121
+ ret <2 x i64 > %6
122
+ }
123
+
69
124
declare i32 @llvm.bswap.i32 (i32 )
125
+ declare <2 x i32 > @llvm.bswap.v2i32 (<2 x i32 >)
70
126
71
127
; BITREVERSE
72
128
@@ -87,6 +143,23 @@ define i64 @concat_bitreverse32_unary_split(i64 %a0) {
87
143
ret i64 %9
88
144
}
89
145
146
+ define <2 x i64 > @concat_bitreverse32_unary_split_vector (<2 x i64 > %a0 ) {
147
+ ; CHECK-LABEL: @concat_bitreverse32_unary_split_vector(
148
+ ; CHECK-NEXT: [[TMP1:%.*]] = call <2 x i64> @llvm.bitreverse.v2i64(<2 x i64> [[A0:%.*]])
149
+ ; CHECK-NEXT: ret <2 x i64> [[TMP1]]
150
+ ;
151
+ %1 = lshr <2 x i64 > %a0 , <i64 32 , i64 32 >
152
+ %2 = trunc <2 x i64 > %1 to <2 x i32 >
153
+ %3 = trunc <2 x i64 > %a0 to <2 x i32 >
154
+ %4 = tail call <2 x i32 > @llvm.bitreverse.v2i32 (<2 x i32 > %2 )
155
+ %5 = tail call <2 x i32 > @llvm.bitreverse.v2i32 (<2 x i32 > %3 )
156
+ %6 = zext <2 x i32 > %4 to <2 x i64 >
157
+ %7 = zext <2 x i32 > %5 to <2 x i64 >
158
+ %8 = shl nuw <2 x i64 > %7 , <i64 32 , i64 32 >
159
+ %9 = or <2 x i64 > %6 , %8
160
+ ret <2 x i64 > %9
161
+ }
162
+
90
163
define i64 @concat_bitreverse32_unary_flip (i64 %a0 ) {
91
164
; CHECK-LABEL: @concat_bitreverse32_unary_flip(
92
165
; CHECK-NEXT: [[TMP1:%.*]] = lshr i64 [[A0:%.*]], 32
@@ -107,6 +180,26 @@ define i64 @concat_bitreverse32_unary_flip(i64 %a0) {
107
180
ret i64 %9
108
181
}
109
182
183
+ define <2 x i64 > @concat_bitreverse32_unary_flip_vector (<2 x i64 > %a0 ) {
184
+ ; CHECK-LABEL: @concat_bitreverse32_unary_flip_vector(
185
+ ; CHECK-NEXT: [[TMP1:%.*]] = lshr <2 x i64> [[A0:%.*]], <i64 32, i64 32>
186
+ ; CHECK-NEXT: [[TMP2:%.*]] = shl <2 x i64> [[A0]], <i64 32, i64 32>
187
+ ; CHECK-NEXT: [[TMP3:%.*]] = or <2 x i64> [[TMP1]], [[TMP2]]
188
+ ; CHECK-NEXT: [[TMP4:%.*]] = call <2 x i64> @llvm.bitreverse.v2i64(<2 x i64> [[TMP3]])
189
+ ; CHECK-NEXT: ret <2 x i64> [[TMP4]]
190
+ ;
191
+ %1 = lshr <2 x i64 > %a0 , <i64 32 , i64 32 >
192
+ %2 = trunc <2 x i64 > %1 to <2 x i32 >
193
+ %3 = trunc <2 x i64 > %a0 to <2 x i32 >
194
+ %4 = tail call <2 x i32 > @llvm.bitreverse.v2i32 (<2 x i32 > %2 )
195
+ %5 = tail call <2 x i32 > @llvm.bitreverse.v2i32 (<2 x i32 > %3 )
196
+ %6 = zext <2 x i32 > %4 to <2 x i64 >
197
+ %7 = zext <2 x i32 > %5 to <2 x i64 >
198
+ %8 = shl nuw <2 x i64 > %6 , <i64 32 , i64 32 >
199
+ %9 = or <2 x i64 > %7 , %8
200
+ ret <2 x i64 > %9
201
+ }
202
+
110
203
define i64 @concat_bitreverse32_binary (i32 %a0 , i32 %a1 ) {
111
204
; CHECK-LABEL: @concat_bitreverse32_binary(
112
205
; CHECK-NEXT: [[TMP1:%.*]] = zext i32 [[A1:%.*]] to i64
@@ -125,4 +218,23 @@ define i64 @concat_bitreverse32_binary(i32 %a0, i32 %a1) {
125
218
ret i64 %6
126
219
}
127
220
221
+ define <2 x i64 > @concat_bitreverse32_binary_vector (<2 x i32 > %a0 , <2 x i32 > %a1 ) {
222
+ ; CHECK-LABEL: @concat_bitreverse32_binary_vector(
223
+ ; CHECK-NEXT: [[TMP1:%.*]] = zext <2 x i32> [[A1:%.*]] to <2 x i64>
224
+ ; CHECK-NEXT: [[TMP2:%.*]] = zext <2 x i32> [[A0:%.*]] to <2 x i64>
225
+ ; CHECK-NEXT: [[TMP3:%.*]] = shl nuw <2 x i64> [[TMP2]], <i64 32, i64 32>
226
+ ; CHECK-NEXT: [[TMP4:%.*]] = or <2 x i64> [[TMP3]], [[TMP1]]
227
+ ; CHECK-NEXT: [[TMP5:%.*]] = call <2 x i64> @llvm.bitreverse.v2i64(<2 x i64> [[TMP4]])
228
+ ; CHECK-NEXT: ret <2 x i64> [[TMP5]]
229
+ ;
230
+ %1 = tail call <2 x i32 > @llvm.bitreverse.v2i32 (<2 x i32 > %a0 )
231
+ %2 = tail call <2 x i32 > @llvm.bitreverse.v2i32 (<2 x i32 > %a1 )
232
+ %3 = zext <2 x i32 > %1 to <2 x i64 >
233
+ %4 = zext <2 x i32 > %2 to <2 x i64 >
234
+ %5 = shl nuw <2 x i64 > %4 , <i64 32 , i64 32 >
235
+ %6 = or <2 x i64 > %3 , %5
236
+ ret <2 x i64 > %6
237
+ }
238
+
128
239
declare i32 @llvm.bitreverse.i32 (i32 )
240
+ declare <2 x i32 > @llvm.bitreverse.v2i32 (<2 x i32 >)
0 commit comments