@@ -153,7 +153,11 @@ pub fn adjust_intrinsic_arguments<'a, 'b, 'gcc, 'tcx>(
153
153
| "__builtin_ia32_psrav16hi_mask"
154
154
| "__builtin_ia32_psrav8hi_mask"
155
155
| "__builtin_ia32_permvarhi256_mask"
156
- | "__builtin_ia32_permvarhi128_mask" => {
156
+ | "__builtin_ia32_permvarhi128_mask"
157
+ | "__builtin_ia32_maxph128_mask"
158
+ | "__builtin_ia32_maxph256_mask"
159
+ | "__builtin_ia32_minph128_mask"
160
+ | "__builtin_ia32_minph256_mask" => {
157
161
let mut new_args = args. to_vec ( ) ;
158
162
let arg3_type = gcc_func. get_param_type ( 2 ) ;
159
163
let vector_type = arg3_type. dyncast_vector ( ) . expect ( "vector type" ) ;
@@ -194,7 +198,13 @@ pub fn adjust_intrinsic_arguments<'a, 'b, 'gcc, 'tcx>(
194
198
| "__builtin_ia32_cvtqq2ps256_mask"
195
199
| "__builtin_ia32_cvtuqq2pd128_mask"
196
200
| "__builtin_ia32_cvtuqq2pd256_mask"
197
- | "__builtin_ia32_cvtuqq2ps256_mask" => {
201
+ | "__builtin_ia32_cvtuqq2ps256_mask"
202
+ | "__builtin_ia32_vcvtw2ph128_mask"
203
+ | "__builtin_ia32_vcvtw2ph256_mask"
204
+ | "__builtin_ia32_vcvtuw2ph128_mask"
205
+ | "__builtin_ia32_vcvtuw2ph256_mask"
206
+ | "__builtin_ia32_vcvtdq2ph256_mask"
207
+ | "__builtin_ia32_vcvtudq2ph256_mask" => {
198
208
let mut new_args = args. to_vec ( ) ;
199
209
// Remove last arg as it doesn't seem to be used in GCC and is always false.
200
210
new_args. pop ( ) ;
@@ -296,7 +306,8 @@ pub fn adjust_intrinsic_arguments<'a, 'b, 'gcc, 'tcx>(
296
306
"__builtin_ia32_vfmaddsubps512_mask"
297
307
| "__builtin_ia32_vfmaddsubpd512_mask"
298
308
| "__builtin_ia32_cmpsh_mask_round"
299
- | "__builtin_ia32_vfmaddph512_mask" => {
309
+ | "__builtin_ia32_vfmaddph512_mask"
310
+ | "__builtin_ia32_vfmaddsubph512_mask" => {
300
311
let mut new_args = args. to_vec ( ) ;
301
312
let last_arg = new_args. pop ( ) . expect ( "last arg" ) ;
302
313
let arg4_type = gcc_func. get_param_type ( 3 ) ;
@@ -319,9 +330,6 @@ pub fn adjust_intrinsic_arguments<'a, 'b, 'gcc, 'tcx>(
319
330
| "__builtin_ia32_vpermi2varpd128_mask"
320
331
| "__builtin_ia32_vpmadd52huq512_mask"
321
332
| "__builtin_ia32_vpmadd52luq512_mask"
322
- | "__builtin_ia32_vpmadd52huq256_mask"
323
- | "__builtin_ia32_vpmadd52luq256_mask"
324
- | "__builtin_ia32_vpmadd52huq128_mask"
325
333
| "__builtin_ia32_vfmaddsubph128_mask"
326
334
| "__builtin_ia32_vfmaddsubph256_mask" => {
327
335
let mut new_args = args. to_vec ( ) ;
@@ -405,7 +413,14 @@ pub fn adjust_intrinsic_arguments<'a, 'b, 'gcc, 'tcx>(
405
413
"__builtin_ia32_cvtqq2pd512_mask"
406
414
| "__builtin_ia32_cvtqq2ps512_mask"
407
415
| "__builtin_ia32_cvtuqq2pd512_mask"
408
- | "__builtin_ia32_cvtuqq2ps512_mask" => {
416
+ | "__builtin_ia32_cvtuqq2ps512_mask"
417
+ | "__builtin_ia32_sqrtph512_mask_round"
418
+ | "__builtin_ia32_vcvtw2ph512_mask_round"
419
+ | "__builtin_ia32_vcvtuw2ph512_mask_round"
420
+ | "__builtin_ia32_vcvtdq2ph512_mask_round"
421
+ | "__builtin_ia32_vcvtudq2ph512_mask_round"
422
+ | "__builtin_ia32_vcvtqq2ph512_mask_round"
423
+ | "__builtin_ia32_vcvtuqq2ph512_mask_round" => {
409
424
let mut old_args = args. to_vec ( ) ;
410
425
let mut new_args = vec ! [ ] ;
411
426
new_args. push ( old_args. swap_remove ( 0 ) ) ;
@@ -425,7 +440,9 @@ pub fn adjust_intrinsic_arguments<'a, 'b, 'gcc, 'tcx>(
425
440
"__builtin_ia32_addph512_mask_round"
426
441
| "__builtin_ia32_subph512_mask_round"
427
442
| "__builtin_ia32_mulph512_mask_round"
428
- | "__builtin_ia32_divph512_mask_round" => {
443
+ | "__builtin_ia32_divph512_mask_round"
444
+ | "__builtin_ia32_maxph512_mask_round"
445
+ | "__builtin_ia32_minph512_mask_round" => {
429
446
let mut new_args = args. to_vec ( ) ;
430
447
let last_arg = new_args. pop ( ) . expect ( "last arg" ) ;
431
448
@@ -460,7 +477,9 @@ pub fn adjust_intrinsic_arguments<'a, 'b, 'gcc, 'tcx>(
460
477
}
461
478
} else {
462
479
match func_name {
463
- "__builtin_ia32_rndscaless_mask_round" | "__builtin_ia32_rndscalesd_mask_round" => {
480
+ "__builtin_ia32_rndscaless_mask_round"
481
+ | "__builtin_ia32_rndscalesd_mask_round"
482
+ | "__builtin_ia32_reducesh_mask_round" => {
464
483
let new_args = args. to_vec ( ) ;
465
484
let arg3_type = gcc_func. get_param_type ( 2 ) ;
466
485
let arg3 = builder. context . new_cast ( None , new_args[ 4 ] , arg3_type) ;
@@ -585,6 +604,12 @@ pub fn adjust_intrinsic_arguments<'a, 'b, 'gcc, 'tcx>(
585
604
new_args[ 2 ] = builder. context . new_cast ( None , new_args[ 2 ] , builder. double_type ) ;
586
605
args = new_args. into ( ) ;
587
606
}
607
+ "__builtin_ia32_sqrtsh_mask_round" => {
608
+ // The first two arguments are inverted, so swap them.
609
+ let mut new_args = args. to_vec ( ) ;
610
+ new_args. swap ( 0 , 1 ) ;
611
+ args = new_args. into ( ) ;
612
+ }
588
613
_ => ( ) ,
589
614
}
590
615
}
@@ -1090,9 +1115,9 @@ pub fn intrinsic<'gcc, 'tcx>(name: &str, cx: &CodegenCx<'gcc, 'tcx>) -> Function
1090
1115
"llvm.x86.avx512.dbpsadbw.128" => "__builtin_ia32_dbpsadbw128_mask" ,
1091
1116
"llvm.x86.avx512.vpmadd52h.uq.512" => "__builtin_ia32_vpmadd52huq512_mask" ,
1092
1117
"llvm.x86.avx512.vpmadd52l.uq.512" => "__builtin_ia32_vpmadd52luq512_mask" ,
1093
- "llvm.x86.avx512.vpmadd52h.uq.256" => "__builtin_ia32_vpmadd52huq256_mask " ,
1094
- "llvm.x86.avx512.vpmadd52l.uq.256" => "__builtin_ia32_vpmadd52luq256_mask " ,
1095
- "llvm.x86.avx512.vpmadd52h.uq.128" => "__builtin_ia32_vpmadd52huq128_mask " ,
1118
+ "llvm.x86.avx512.vpmadd52h.uq.256" => "__builtin_ia32_vpmadd52huq256 " ,
1119
+ "llvm.x86.avx512.vpmadd52l.uq.256" => "__builtin_ia32_vpmadd52luq256 " ,
1120
+ "llvm.x86.avx512.vpmadd52h.uq.128" => "__builtin_ia32_vpmadd52huq128 " ,
1096
1121
"llvm.x86.avx512.vpdpwssd.512" => "__builtin_ia32_vpdpwssd_v16si" ,
1097
1122
"llvm.x86.avx512.vpdpwssd.256" => "__builtin_ia32_vpdpwssd_v8si" ,
1098
1123
"llvm.x86.avx512.vpdpwssd.128" => "__builtin_ia32_vpdpwssd_v4si" ,
@@ -1209,6 +1234,55 @@ pub fn intrinsic<'gcc, 'tcx>(name: &str, cx: &CodegenCx<'gcc, 'tcx>) -> Function
1209
1234
"llvm.x86.avx512fp16.vfmadd.f16" => "__builtin_ia32_vfmaddsh3_mask" ,
1210
1235
"llvm.x86.avx512fp16.vfmaddsub.ph.128" => "__builtin_ia32_vfmaddsubph128_mask" ,
1211
1236
"llvm.x86.avx512fp16.vfmaddsub.ph.256" => "__builtin_ia32_vfmaddsubph256_mask" ,
1237
+ "llvm.x86.avx512fp16.vfmaddsub.ph.512" => "__builtin_ia32_vfmaddsubph512_mask" ,
1238
+ "llvm.x86.avx512fp16.sqrt.ph.512" => "__builtin_ia32_sqrtph512_mask_round" ,
1239
+ "llvm.x86.avx512fp16.mask.sqrt.sh" => "__builtin_ia32_sqrtsh_mask_round" ,
1240
+ "llvm.x86.avx512fp16.max.ph.128" => "__builtin_ia32_maxph128_mask" ,
1241
+ "llvm.x86.avx512fp16.max.ph.256" => "__builtin_ia32_maxph256_mask" ,
1242
+ "llvm.x86.avx512fp16.max.ph.512" => "__builtin_ia32_maxph512_mask_round" ,
1243
+ "llvm.x86.avx512fp16.min.ph.128" => "__builtin_ia32_minph128_mask" ,
1244
+ "llvm.x86.avx512fp16.min.ph.256" => "__builtin_ia32_minph256_mask" ,
1245
+ "llvm.x86.avx512fp16.min.ph.512" => "__builtin_ia32_minph512_mask_round" ,
1246
+ "llvm.x86.avx512fp16.mask.getexp.sh" => "__builtin_ia32_getexpsh_mask_round" ,
1247
+ "llvm.x86.avx512fp16.mask.rndscale.ph.128" => "__builtin_ia32_rndscaleph128_mask" ,
1248
+ "llvm.x86.avx512fp16.mask.rndscale.ph.256" => "__builtin_ia32_rndscaleph256_mask" ,
1249
+ "llvm.x86.avx512fp16.mask.rndscale.ph.512" => "__builtin_ia32_rndscaleph512_mask_round" ,
1250
+ "llvm.x86.avx512fp16.mask.scalef.ph.512" => "__builtin_ia32_scalefph512_mask_round" ,
1251
+ "llvm.x86.avx512fp16.mask.reduce.ph.512" => "__builtin_ia32_reduceph512_mask_round" ,
1252
+ "llvm.x86.avx512fp16.mask.reduce.sh" => "__builtin_ia32_reducesh_mask_round" ,
1253
+ "llvm.x86.avx512.sitofp.round.v8f16.v8i16" => "__builtin_ia32_vcvtw2ph128_mask" ,
1254
+ "llvm.x86.avx512.sitofp.round.v16f16.v16i16" => "__builtin_ia32_vcvtw2ph256_mask" ,
1255
+ "llvm.x86.avx512.sitofp.round.v32f16.v32i16" => "__builtin_ia32_vcvtw2ph512_mask_round" ,
1256
+ "llvm.x86.avx512.uitofp.round.v8f16.v8u16" => "__builtin_ia32_vcvtuw2ph128_mask" ,
1257
+ "llvm.x86.avx512.uitofp.round.v16f16.v16u16" => "__builtin_ia32_vcvtuw2ph256_mask" ,
1258
+ "llvm.x86.avx512.uitofp.round.v32f16.v32u16" => "__builtin_ia32_vcvtuw2ph512_mask_round" ,
1259
+ "llvm.x86.avx512.sitofp.round.v8f16.v8i32" => "__builtin_ia32_vcvtdq2ph256_mask" ,
1260
+ "llvm.x86.avx512.sitofp.round.v16f16.v16i32" => "__builtin_ia32_vcvtdq2ph512_mask_round" ,
1261
+ "llvm.x86.avx512fp16.vcvtsi2sh" => "__builtin_ia32_vcvtsi2sh32_round" ,
1262
+ "llvm.x86.avx512.uitofp.round.v8f16.v8u32" => "__builtin_ia32_vcvtudq2ph256_mask" ,
1263
+ "llvm.x86.avx512.uitofp.round.v16f16.v16u32" => "__builtin_ia32_vcvtudq2ph512_mask_round" ,
1264
+ "llvm.x86.avx512fp16.vcvtusi2sh" => "__builtin_ia32_vcvtusi2sh32_round" ,
1265
+ "llvm.x86.avx512.sitofp.round.v8f16.v8i64" => "__builtin_ia32_vcvtqq2ph512_mask_round" ,
1266
+ "llvm.x86.avx512.uitofp.round.v8f16.v8u64" => "__builtin_ia32_vcvtuqq2ph512_mask_round" ,
1267
+ "llvm.x86.avx512fp16.mask.vcvtps2phx.512" => "__builtin_ia32_vcvtps2phx512_mask_round" ,
1268
+ "llvm.x86.avx512fp16.mask.vcvtpd2ph.512" => "__builtin_ia32_vcvtpd2ph512_mask_round" ,
1269
+ "llvm.x86.avx512fp16.mask.vcvtph2uw.512" => "__builtin_ia32_vcvtph2uw512_mask_round" ,
1270
+ "llvm.x86.avx512fp16.mask.vcvttph2w.512" => "__builtin_ia32_vcvttph2w512_mask_round" ,
1271
+ "llvm.x86.avx512fp16.mask.vcvttph2uw.512" => "__builtin_ia32_vcvttph2uw512_mask_round" ,
1272
+ "llvm.x86.avx512fp16.mask.vcvtph2dq.512" => "__builtin_ia32_vcvtph2dq512_mask_round" ,
1273
+ "llvm.x86.avx512fp16.vcvtsh2si32" => "__builtin_ia32_vcvtsh2si32_round" ,
1274
+ "llvm.x86.avx512fp16.mask.vcvtph2udq.512" => "__builtin_ia32_vcvtph2udq512_mask_round" ,
1275
+ "llvm.x86.avx512fp16.vcvtsh2usi32" => "__builtin_ia32_vcvtsh2usi32_round" ,
1276
+ "llvm.x86.avx512fp16.mask.vcvttph2dq.512" => "__builtin_ia32_vcvttph2dq512_mask_round" ,
1277
+ "llvm.x86.avx512fp16.vcvttsh2si32" => "__builtin_ia32_vcvttsh2si32_round" ,
1278
+ "llvm.x86.avx512fp16.mask.vcvttph2udq.512" => "__builtin_ia32_vcvttph2udq512_mask_round" ,
1279
+ "llvm.x86.avx512fp16.vcvttsh2usi32" => "__builtin_ia32_vcvttsh2usi32_round" ,
1280
+ "llvm.x86.avx512fp16.mask.vcvtph2qq.512" => "__builtin_ia32_vcvtph2qq512_mask_round" ,
1281
+ "llvm.x86.avx512fp16.mask.vcvtph2uqq.512" => "__builtin_ia32_vcvtph2uqq512_mask_round" ,
1282
+ "llvm.x86.avx512fp16.mask.vcvttph2qq.512" => "__builtin_ia32_vcvttph2qq512_mask_round" ,
1283
+ "llvm.x86.avx512fp16.mask.vcvttph2uqq.512" => "__builtin_ia32_vcvttph2uqq512_mask_round" ,
1284
+ "llvm.x86.avx512fp16.mask.vcvtph2psx.512" => "__builtin_ia32_vcvtph2psx512_mask_round" ,
1285
+ "llvm.x86.avx512fp16.mask.vcvtph2pd.512" => "__builtin_ia32_vcvtph2pd512_mask_round" ,
1212
1286
1213
1287
// TODO: support the tile builtins:
1214
1288
"llvm.x86.ldtilecfg" => "__builtin_trap" ,
0 commit comments