Skip to content

Commit cb36d78

Browse files
committed
Add more SIMD intrinsics
1 parent 17f3dbf commit cb36d78

File tree

4 files changed

+72
-4
lines changed

4 files changed

+72
-4
lines changed

src/base.rs

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -116,6 +116,10 @@ pub fn compile_codegen_unit(
116116
context.add_command_line_option("-mavx");
117117
}
118118

119+
/*for feature in tcx.sess.opts.cg.target_feature.split(',') {
120+
println!("Feature: {}", feature);
121+
}*/
122+
119123
for arg in &tcx.sess.opts.cg.llvm_args {
120124
context.add_command_line_option(arg);
121125
}
@@ -218,6 +222,7 @@ pub fn compile_codegen_unit(
218222

219223
// ... and now that we have everything pre-defined, fill out those definitions.
220224
for &(mono_item, _) in &mono_items {
225+
//println!("{:?}", mono_item);
221226
mono_item.define::<Builder<'_, '_, '_>>(&cx);
222227
}
223228

src/builder.rs

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -270,6 +270,8 @@ impl<'a, 'gcc, 'tcx> Builder<'a, 'gcc, 'tcx> {
270270
actual_val.dereference(self.location).to_rvalue()
271271
}
272272
} else {
273+
// FIXME: this condition seems wrong: it will pass when both types are not
274+
// a vector.
273275
assert!(
274276
(!expected_ty.is_vector() || actual_ty.is_vector())
275277
&& (expected_ty.is_vector() || !actual_ty.is_vector()),
@@ -283,6 +285,7 @@ impl<'a, 'gcc, 'tcx> Builder<'a, 'gcc, 'tcx> {
283285
);
284286
// TODO(antoyo): perhaps use __builtin_convertvector for vector casting.
285287
// TODO: remove bitcast now that vector types can be compared?
288+
println!("Name: {}", func_name);
286289
self.bitcast(actual_val, expected_ty)
287290
}
288291
} else {

src/declare.rs

Lines changed: 17 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -168,7 +168,23 @@ fn declare_raw_fn<'gcc>(
168168
variadic: bool,
169169
) -> Function<'gcc> {
170170
if name.starts_with("llvm.") {
171-
let intrinsic = llvm::intrinsic(name, cx);
171+
let intrinsic = match name {
172+
"llvm.fma.f16" => {
173+
let param1 = cx.context.new_parameter(None, cx.double_type, "x");
174+
let param2 = cx.context.new_parameter(None, cx.double_type, "y");
175+
let param3 = cx.context.new_parameter(None, cx.double_type, "z");
176+
cx.context.new_function(
177+
None,
178+
FunctionType::Extern,
179+
cx.double_type,
180+
&[param1, param2, param3],
181+
"fma",
182+
false,
183+
)
184+
}
185+
_ => llvm::intrinsic(name, cx),
186+
};
187+
172188
cx.intrinsics.borrow_mut().insert(name.to_string(), intrinsic);
173189
return intrinsic;
174190
}

src/intrinsic/llvm.rs

Lines changed: 47 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
use std::borrow::Cow;
22

3+
use gccjit::CType;
34
use gccjit::{Function, FunctionPtrType, RValue, ToRValue, UnaryOp};
45
use rustc_codegen_ssa::traits::BuilderMethods;
56

@@ -320,7 +321,9 @@ pub fn adjust_intrinsic_arguments<'a, 'b, 'gcc, 'tcx>(
320321
| "__builtin_ia32_vpmadd52luq512_mask"
321322
| "__builtin_ia32_vpmadd52huq256_mask"
322323
| "__builtin_ia32_vpmadd52luq256_mask"
323-
| "__builtin_ia32_vpmadd52huq128_mask" => {
324+
| "__builtin_ia32_vpmadd52huq128_mask"
325+
| "__builtin_ia32_vfmaddsubph128_mask"
326+
| "__builtin_ia32_vfmaddsubph256_mask" => {
324327
let mut new_args = args.to_vec();
325328
let arg4_type = gcc_func.get_param_type(3);
326329
let minus_one = builder.context.new_rvalue_from_int(arg4_type, -1);
@@ -440,6 +443,19 @@ pub fn adjust_intrinsic_arguments<'a, 'b, 'gcc, 'tcx>(
440443
new_args.push(last_arg);
441444
args = new_args.into();
442445
}
446+
// NOTE: the LLVM intrinsics receive 3 floats, but the GCC builtin requires 3 vectors.
447+
"__builtin_ia32_vfmaddsh3_mask" => {
448+
let new_args = args.to_vec();
449+
let arg1_type = gcc_func.get_param_type(0);
450+
let arg2_type = gcc_func.get_param_type(1);
451+
let arg3_type = gcc_func.get_param_type(2);
452+
let arg5_type = gcc_func.get_param_type(4);
453+
let a = builder.context.new_rvalue_from_vector(None, arg1_type, &[new_args[0]; 8]);
454+
let b = builder.context.new_rvalue_from_vector(None, arg2_type, &[new_args[1]; 8]);
455+
let c = builder.context.new_rvalue_from_vector(None, arg3_type, &[new_args[2]; 8]);
456+
let arg5 = builder.context.new_rvalue_from_int(arg5_type, 4);
457+
args = vec![a, b, c, new_args[3], arg5].into();
458+
}
443459
_ => (),
444460
}
445461
} else {
@@ -452,7 +468,7 @@ pub fn adjust_intrinsic_arguments<'a, 'b, 'gcc, 'tcx>(
452468
let arg4 = builder.context.new_bitcast(None, new_args[2], arg4_type);
453469
args = vec![new_args[0], new_args[1], arg3, arg4, new_args[3], new_args[5]].into();
454470
}
455-
// NOTE: the LLVM intrinsic receives 3 floats, but the GCC builtin requires 3 vectors.
471+
// NOTE: the LLVM intrinsics receive 3 floats, but the GCC builtin requires 3 vectors.
456472
// FIXME: the intrinsics like _mm_mask_fmadd_sd should probably directly call the GCC
457473
// intrinsic to avoid this.
458474
"__builtin_ia32_vfmaddss3_round" => {
@@ -550,6 +566,25 @@ pub fn adjust_intrinsic_arguments<'a, 'b, 'gcc, 'tcx>(
550566
]
551567
.into();
552568
}
569+
"__builtin_ia32_rndscalesh_mask_round" => {
570+
let new_args = args.to_vec();
571+
args = vec![
572+
new_args[0],
573+
new_args[1],
574+
new_args[4],
575+
new_args[2],
576+
new_args[3],
577+
new_args[5],
578+
]
579+
.into();
580+
}
581+
"fma" => {
582+
let mut new_args = args.to_vec();
583+
new_args[0] = builder.context.new_cast(None, new_args[0], builder.double_type);
584+
new_args[1] = builder.context.new_cast(None, new_args[1], builder.double_type);
585+
new_args[2] = builder.context.new_cast(None, new_args[2], builder.double_type);
586+
args = new_args.into();
587+
}
553588
_ => (),
554589
}
555590
}
@@ -566,7 +601,9 @@ pub fn adjust_intrinsic_return_value<'a, 'gcc, 'tcx>(
566601
orig_args: &[RValue<'gcc>],
567602
) -> RValue<'gcc> {
568603
match func_name {
569-
"__builtin_ia32_vfmaddss3_round" | "__builtin_ia32_vfmaddsd3_round" => {
604+
"__builtin_ia32_vfmaddss3_round"
605+
| "__builtin_ia32_vfmaddsd3_round"
606+
| "__builtin_ia32_vfmaddsh3_mask" => {
570607
#[cfg(feature = "master")]
571608
{
572609
let zero = builder.context.new_rvalue_zero(builder.int_type);
@@ -625,6 +662,10 @@ pub fn adjust_intrinsic_return_value<'a, 'gcc, 'tcx>(
625662
&[random_number, success_variable.to_rvalue()],
626663
);
627664
}
665+
"fma" => {
666+
let f16_type = builder.context.new_c_type(CType::Float16);
667+
return_value = builder.context.new_cast(None, return_value, f16_type);
668+
}
628669
_ => (),
629670
}
630671

@@ -1165,6 +1206,9 @@ pub fn intrinsic<'gcc, 'tcx>(name: &str, cx: &CodegenCx<'gcc, 'tcx>) -> Function
11651206
"llvm.x86.avx512.mask.store.q.128" => "__builtin_ia32_movdqa64store128_mask",
11661207
"llvm.x86.avx512.mask.store.ps.128" => "__builtin_ia32_storeaps128_mask",
11671208
"llvm.x86.avx512.mask.store.pd.128" => "__builtin_ia32_storeapd128_mask",
1209+
"llvm.x86.avx512fp16.vfmadd.f16" => "__builtin_ia32_vfmaddsh3_mask",
1210+
"llvm.x86.avx512fp16.vfmaddsub.ph.128" => "__builtin_ia32_vfmaddsubph128_mask",
1211+
"llvm.x86.avx512fp16.vfmaddsub.ph.256" => "__builtin_ia32_vfmaddsubph256_mask",
11681212

11691213
// TODO: support the tile builtins:
11701214
"llvm.x86.ldtilecfg" => "__builtin_trap",

0 commit comments

Comments
 (0)