@@ -80,29 +80,75 @@ entry:
80
80
ret <8 x half > %0
81
81
}
82
82
83
- define dso_local half @t_vfmah_lane_f16 (half %a , half %b , <4 x half > %c , i32 %lane ) {
84
- ; CHECK-LABEL: t_vfmah_lane_f16 :
83
+ define dso_local half @t_vfmah_lane_f16_0 (half %a , half %b , <4 x half > %c , i32 %lane ) {
84
+ ; CHECK-LABEL: t_vfmah_lane_f16_0 :
85
85
; CHECK: // %bb.0: // %entry
86
86
; CHECK-NEXT: // kill: def $d2 killed $d2 def $q2
87
- ; CHECK-NEXT: fmla h0, h1, v2.h[0]
87
+ ; CHECK-NEXT: fmadd h0, h1, h2, h0
88
88
; CHECK-NEXT: ret
89
89
entry:
90
90
%extract = extractelement <4 x half > %c , i32 0
91
91
%0 = tail call half @llvm.fma.f16 (half %b , half %extract , half %a )
92
92
ret half %0
93
93
}
94
94
95
- define dso_local half @t_vfmah_laneq_f16 (half %a , half %b , <8 x half > %c , i32 %lane ) {
96
- ; CHECK-LABEL: t_vfmah_laneq_f16 :
95
+ define dso_local half @t_vfmah_lane_f16_0_swap (half %a , half %b , <4 x half > %c , i32 %lane ) {
96
+ ; CHECK-LABEL: t_vfmah_lane_f16_0_swap :
97
97
; CHECK: // %bb.0: // %entry
98
- ; CHECK-NEXT: fmla h0, h1, v2.h[0]
98
+ ; CHECK-NEXT: // kill: def $d2 killed $d2 def $q2
99
+ ; CHECK-NEXT: fmadd h0, h2, h1, h0
100
+ ; CHECK-NEXT: ret
101
+ entry:
102
+ %extract = extractelement <4 x half > %c , i32 0
103
+ %0 = tail call half @llvm.fma.f16 (half %extract , half %b , half %a )
104
+ ret half %0
105
+ }
106
+
107
+ define dso_local half @t_vfmah_lane_f16_3 (half %a , half %b , <4 x half > %c , i32 %lane ) {
108
+ ; CHECK-LABEL: t_vfmah_lane_f16_3:
109
+ ; CHECK: // %bb.0: // %entry
110
+ ; CHECK-NEXT: // kill: def $d2 killed $d2 def $q2
111
+ ; CHECK-NEXT: fmla h0, h1, v2.h[3]
112
+ ; CHECK-NEXT: ret
113
+ entry:
114
+ %extract = extractelement <4 x half > %c , i32 3
115
+ %0 = tail call half @llvm.fma.f16 (half %b , half %extract , half %a )
116
+ ret half %0
117
+ }
118
+
119
+ define dso_local half @t_vfmah_laneq_f16_0 (half %a , half %b , <8 x half > %c , i32 %lane ) {
120
+ ; CHECK-LABEL: t_vfmah_laneq_f16_0:
121
+ ; CHECK: // %bb.0: // %entry
122
+ ; CHECK-NEXT: fmadd h0, h1, h2, h0
99
123
; CHECK-NEXT: ret
100
124
entry:
101
125
%extract = extractelement <8 x half > %c , i32 0
102
126
%0 = tail call half @llvm.fma.f16 (half %b , half %extract , half %a )
103
127
ret half %0
104
128
}
105
129
130
+ define dso_local half @t_vfmah_laneq_f16_0_swap (half %a , half %b , <8 x half > %c , i32 %lane ) {
131
+ ; CHECK-LABEL: t_vfmah_laneq_f16_0_swap:
132
+ ; CHECK: // %bb.0: // %entry
133
+ ; CHECK-NEXT: fmadd h0, h2, h1, h0
134
+ ; CHECK-NEXT: ret
135
+ entry:
136
+ %extract = extractelement <8 x half > %c , i32 0
137
+ %0 = tail call half @llvm.fma.f16 (half %extract , half %b , half %a )
138
+ ret half %0
139
+ }
140
+
141
+ define dso_local half @t_vfmah_laneq_f16_7 (half %a , half %b , <8 x half > %c , i32 %lane ) {
142
+ ; CHECK-LABEL: t_vfmah_laneq_f16_7:
143
+ ; CHECK: // %bb.0: // %entry
144
+ ; CHECK-NEXT: fmla h0, h1, v2.h[7]
145
+ ; CHECK-NEXT: ret
146
+ entry:
147
+ %extract = extractelement <8 x half > %c , i32 7
148
+ %0 = tail call half @llvm.fma.f16 (half %b , half %extract , half %a )
149
+ ret half %0
150
+ }
151
+
106
152
define dso_local <4 x half > @t_vfms_lane_f16 (<4 x half > %a , <4 x half > %b , <4 x half > %c , i32 %lane ) {
107
153
; CHECK-LABEL: t_vfms_lane_f16:
108
154
; CHECK: // %bb.0: // %entry
@@ -181,23 +227,49 @@ entry:
181
227
ret <8 x half > %0
182
228
}
183
229
184
- define dso_local half @t_vfmsh_lane_f16 (half %a , half %b , <4 x half > %c , i32 %lane ) {
185
- ; CHECK-LABEL: t_vfmsh_lane_f16:
230
+ define dso_local half @t_vfmsh_lane_f16_0 (half %a , half %b , <4 x half > %c , i32 %lane ) {
231
+ ; CHECK-LABEL: t_vfmsh_lane_f16_0:
232
+ ; CHECK: // %bb.0: // %entry
233
+ ; CHECK-NEXT: // kill: def $d2 killed $d2 def $q2
234
+ ; CHECK-NEXT: fmsub h0, h2, h1, h0
235
+ ; CHECK-NEXT: ret
236
+ entry:
237
+ %0 = fsub half 0xH8000, %b
238
+ %extract = extractelement <4 x half > %c , i32 0
239
+ %1 = tail call half @llvm.fma.f16 (half %0 , half %extract , half %a )
240
+ ret half %1
241
+ }
242
+
243
+ define dso_local half @t_vfmsh_lane_f16_0_swap (half %a , half %b , <4 x half > %c , i32 %lane ) {
244
+ ; CHECK-LABEL: t_vfmsh_lane_f16_0_swap:
186
245
; CHECK: // %bb.0: // %entry
187
246
; CHECK-NEXT: // kill: def $d2 killed $d2 def $q2
188
- ; CHECK-NEXT: fmls h0, h1, v2.h[0]
247
+ ; CHECK-NEXT: fmsub h0, h2, h1, h0
189
248
; CHECK-NEXT: ret
190
249
entry:
191
250
%0 = fsub half 0xH8000, %b
192
251
%extract = extractelement <4 x half > %c , i32 0
252
+ %1 = tail call half @llvm.fma.f16 (half %extract , half %0 , half %a )
253
+ ret half %1
254
+ }
255
+
256
+ define dso_local half @t_vfmsh_lane_f16_3 (half %a , half %b , <4 x half > %c , i32 %lane ) {
257
+ ; CHECK-LABEL: t_vfmsh_lane_f16_3:
258
+ ; CHECK: // %bb.0: // %entry
259
+ ; CHECK-NEXT: // kill: def $d2 killed $d2 def $q2
260
+ ; CHECK-NEXT: fmls h0, h1, v2.h[3]
261
+ ; CHECK-NEXT: ret
262
+ entry:
263
+ %0 = fsub half 0xH8000, %b
264
+ %extract = extractelement <4 x half > %c , i32 3
193
265
%1 = tail call half @llvm.fma.f16 (half %0 , half %extract , half %a )
194
266
ret half %1
195
267
}
196
268
197
- define dso_local half @t_vfmsh_laneq_f16 (half %a , half %b , <8 x half > %c , i32 %lane ) {
198
- ; CHECK-LABEL: t_vfmsh_laneq_f16 :
269
+ define dso_local half @t_vfmsh_laneq_f16_0 (half %a , half %b , <8 x half > %c , i32 %lane ) {
270
+ ; CHECK-LABEL: t_vfmsh_laneq_f16_0 :
199
271
; CHECK: // %bb.0: // %entry
200
- ; CHECK-NEXT: fmls h0, h1, v2.h[0]
272
+ ; CHECK-NEXT: fmsub h0, h2, h1, h0
201
273
; CHECK-NEXT: ret
202
274
entry:
203
275
%0 = fsub half 0xH8000, %b
@@ -206,6 +278,30 @@ entry:
206
278
ret half %1
207
279
}
208
280
281
+ define dso_local half @t_vfmsh_laneq_f16_0_swap (half %a , half %b , <8 x half > %c , i32 %lane ) {
282
+ ; CHECK-LABEL: t_vfmsh_laneq_f16_0_swap:
283
+ ; CHECK: // %bb.0: // %entry
284
+ ; CHECK-NEXT: fmsub h0, h2, h1, h0
285
+ ; CHECK-NEXT: ret
286
+ entry:
287
+ %0 = fsub half 0xH8000, %b
288
+ %extract = extractelement <8 x half > %c , i32 0
289
+ %1 = tail call half @llvm.fma.f16 (half %extract , half %0 , half %a )
290
+ ret half %1
291
+ }
292
+
293
+ define dso_local half @t_vfmsh_laneq_f16_7 (half %a , half %b , <8 x half > %c , i32 %lane ) {
294
+ ; CHECK-LABEL: t_vfmsh_laneq_f16_7:
295
+ ; CHECK: // %bb.0: // %entry
296
+ ; CHECK-NEXT: fmls h0, h1, v2.h[7]
297
+ ; CHECK-NEXT: ret
298
+ entry:
299
+ %0 = fsub half 0xH8000, %b
300
+ %extract = extractelement <8 x half > %c , i32 7
301
+ %1 = tail call half @llvm.fma.f16 (half %0 , half %extract , half %a )
302
+ ret half %1
303
+ }
304
+
209
305
define dso_local <4 x half > @t_vmul_laneq_f16 (<4 x half > %a , <8 x half > %b , i32 %lane ) {
210
306
; CHECK-LABEL: t_vmul_laneq_f16:
211
307
; CHECK: // %bb.0: // %entry
0 commit comments