1
1
use std:: borrow:: Cow ;
2
2
3
+ use gccjit:: CType ;
3
4
use gccjit:: { Function , FunctionPtrType , RValue , ToRValue , UnaryOp } ;
4
5
use rustc_codegen_ssa:: traits:: BuilderMethods ;
5
6
@@ -320,7 +321,9 @@ pub fn adjust_intrinsic_arguments<'a, 'b, 'gcc, 'tcx>(
320
321
| "__builtin_ia32_vpmadd52luq512_mask"
321
322
| "__builtin_ia32_vpmadd52huq256_mask"
322
323
| "__builtin_ia32_vpmadd52luq256_mask"
323
- | "__builtin_ia32_vpmadd52huq128_mask" => {
324
+ | "__builtin_ia32_vpmadd52huq128_mask"
325
+ | "__builtin_ia32_vfmaddsubph128_mask"
326
+ | "__builtin_ia32_vfmaddsubph256_mask" => {
324
327
let mut new_args = args. to_vec ( ) ;
325
328
let arg4_type = gcc_func. get_param_type ( 3 ) ;
326
329
let minus_one = builder. context . new_rvalue_from_int ( arg4_type, -1 ) ;
@@ -440,6 +443,19 @@ pub fn adjust_intrinsic_arguments<'a, 'b, 'gcc, 'tcx>(
440
443
new_args. push ( last_arg) ;
441
444
args = new_args. into ( ) ;
442
445
}
446
+ // NOTE: the LLVM intrinsics receive 3 floats, but the GCC builtin requires 3 vectors.
447
+ "__builtin_ia32_vfmaddsh3_mask" => {
448
+ let new_args = args. to_vec ( ) ;
449
+ let arg1_type = gcc_func. get_param_type ( 0 ) ;
450
+ let arg2_type = gcc_func. get_param_type ( 1 ) ;
451
+ let arg3_type = gcc_func. get_param_type ( 2 ) ;
452
+ let arg5_type = gcc_func. get_param_type ( 4 ) ;
453
+ let a = builder. context . new_rvalue_from_vector ( None , arg1_type, & [ new_args[ 0 ] ; 8 ] ) ;
454
+ let b = builder. context . new_rvalue_from_vector ( None , arg2_type, & [ new_args[ 1 ] ; 8 ] ) ;
455
+ let c = builder. context . new_rvalue_from_vector ( None , arg3_type, & [ new_args[ 2 ] ; 8 ] ) ;
456
+ let arg5 = builder. context . new_rvalue_from_int ( arg5_type, 4 ) ;
457
+ args = vec ! [ a, b, c, new_args[ 3 ] , arg5] . into ( ) ;
458
+ }
443
459
_ => ( ) ,
444
460
}
445
461
} else {
@@ -452,7 +468,7 @@ pub fn adjust_intrinsic_arguments<'a, 'b, 'gcc, 'tcx>(
452
468
let arg4 = builder. context . new_bitcast ( None , new_args[ 2 ] , arg4_type) ;
453
469
args = vec ! [ new_args[ 0 ] , new_args[ 1 ] , arg3, arg4, new_args[ 3 ] , new_args[ 5 ] ] . into ( ) ;
454
470
}
455
- // NOTE: the LLVM intrinsic receives 3 floats, but the GCC builtin requires 3 vectors.
471
+ // NOTE: the LLVM intrinsics receive 3 floats, but the GCC builtin requires 3 vectors.
456
472
// FIXME: the intrinsics like _mm_mask_fmadd_sd should probably directly call the GCC
457
473
// intrinsic to avoid this.
458
474
"__builtin_ia32_vfmaddss3_round" => {
@@ -550,6 +566,25 @@ pub fn adjust_intrinsic_arguments<'a, 'b, 'gcc, 'tcx>(
550
566
]
551
567
. into ( ) ;
552
568
}
569
+ "__builtin_ia32_rndscalesh_mask_round" => {
570
+ let new_args = args. to_vec ( ) ;
571
+ args = vec ! [
572
+ new_args[ 0 ] ,
573
+ new_args[ 1 ] ,
574
+ new_args[ 4 ] ,
575
+ new_args[ 2 ] ,
576
+ new_args[ 3 ] ,
577
+ new_args[ 5 ] ,
578
+ ]
579
+ . into ( ) ;
580
+ }
581
+ "fma" => {
582
+ let mut new_args = args. to_vec ( ) ;
583
+ new_args[ 0 ] = builder. context . new_cast ( None , new_args[ 0 ] , builder. double_type ) ;
584
+ new_args[ 1 ] = builder. context . new_cast ( None , new_args[ 1 ] , builder. double_type ) ;
585
+ new_args[ 2 ] = builder. context . new_cast ( None , new_args[ 2 ] , builder. double_type ) ;
586
+ args = new_args. into ( ) ;
587
+ }
553
588
_ => ( ) ,
554
589
}
555
590
}
@@ -566,7 +601,9 @@ pub fn adjust_intrinsic_return_value<'a, 'gcc, 'tcx>(
566
601
orig_args : & [ RValue < ' gcc > ] ,
567
602
) -> RValue < ' gcc > {
568
603
match func_name {
569
- "__builtin_ia32_vfmaddss3_round" | "__builtin_ia32_vfmaddsd3_round" => {
604
+ "__builtin_ia32_vfmaddss3_round"
605
+ | "__builtin_ia32_vfmaddsd3_round"
606
+ | "__builtin_ia32_vfmaddsh3_mask" => {
570
607
#[ cfg( feature = "master" ) ]
571
608
{
572
609
let zero = builder. context . new_rvalue_zero ( builder. int_type ) ;
@@ -625,6 +662,10 @@ pub fn adjust_intrinsic_return_value<'a, 'gcc, 'tcx>(
625
662
& [ random_number, success_variable. to_rvalue ( ) ] ,
626
663
) ;
627
664
}
665
+ "fma" => {
666
+ let f16_type = builder. context . new_c_type ( CType :: Float16 ) ;
667
+ return_value = builder. context . new_cast ( None , return_value, f16_type) ;
668
+ }
628
669
_ => ( ) ,
629
670
}
630
671
@@ -1165,6 +1206,9 @@ pub fn intrinsic<'gcc, 'tcx>(name: &str, cx: &CodegenCx<'gcc, 'tcx>) -> Function
1165
1206
"llvm.x86.avx512.mask.store.q.128" => "__builtin_ia32_movdqa64store128_mask" ,
1166
1207
"llvm.x86.avx512.mask.store.ps.128" => "__builtin_ia32_storeaps128_mask" ,
1167
1208
"llvm.x86.avx512.mask.store.pd.128" => "__builtin_ia32_storeapd128_mask" ,
1209
+ "llvm.x86.avx512fp16.vfmadd.f16" => "__builtin_ia32_vfmaddsh3_mask" ,
1210
+ "llvm.x86.avx512fp16.vfmaddsub.ph.128" => "__builtin_ia32_vfmaddsubph128_mask" ,
1211
+ "llvm.x86.avx512fp16.vfmaddsub.ph.256" => "__builtin_ia32_vfmaddsubph256_mask" ,
1168
1212
1169
1213
// TODO: support the tile builtins:
1170
1214
"llvm.x86.ldtilecfg" => "__builtin_trap" ,
0 commit comments