Skip to content

Commit 88445ee

Browse files
committed
Add missing SIMD intrinsics
1 parent cb36d78 commit 88445ee

File tree

4 files changed

+88
-16
lines changed

4 files changed

+88
-16
lines changed

libgccjit.version

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
a0cb76246d8d00ed9847d9874e5d5658049c332d
1+
e744a9459d33864067214741daf5c5bc2a7b88c6

src/base.rs

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -222,7 +222,6 @@ pub fn compile_codegen_unit(
222222

223223
// ... and now that we have everything pre-defined, fill out those definitions.
224224
for &(mono_item, _) in &mono_items {
225-
//println!("{:?}", mono_item);
226225
mono_item.define::<Builder<'_, '_, '_>>(&cx);
227226
}
228227

src/builder.rs

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -275,7 +275,7 @@ impl<'a, 'gcc, 'tcx> Builder<'a, 'gcc, 'tcx> {
275275
assert!(
276276
(!expected_ty.is_vector() || actual_ty.is_vector())
277277
&& (expected_ty.is_vector() || !actual_ty.is_vector()),
278-
"{:?} ({}) -> {:?} ({}), index: {:?}[{}]",
278+
"{:?} (is vector: {}) -> {:?} (is vector: {}), Function: {:?}[{}]",
279279
actual_ty,
280280
actual_ty.is_vector(),
281281
expected_ty,
@@ -285,7 +285,6 @@ impl<'a, 'gcc, 'tcx> Builder<'a, 'gcc, 'tcx> {
285285
);
286286
// TODO(antoyo): perhaps use __builtin_convertvector for vector casting.
287287
// TODO: remove bitcast now that vector types can be compared?
288-
println!("Name: {}", func_name);
289288
self.bitcast(actual_val, expected_ty)
290289
}
291290
} else {

src/intrinsic/llvm.rs

Lines changed: 86 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -153,7 +153,11 @@ pub fn adjust_intrinsic_arguments<'a, 'b, 'gcc, 'tcx>(
153153
| "__builtin_ia32_psrav16hi_mask"
154154
| "__builtin_ia32_psrav8hi_mask"
155155
| "__builtin_ia32_permvarhi256_mask"
156-
| "__builtin_ia32_permvarhi128_mask" => {
156+
| "__builtin_ia32_permvarhi128_mask"
157+
| "__builtin_ia32_maxph128_mask"
158+
| "__builtin_ia32_maxph256_mask"
159+
| "__builtin_ia32_minph128_mask"
160+
| "__builtin_ia32_minph256_mask" => {
157161
let mut new_args = args.to_vec();
158162
let arg3_type = gcc_func.get_param_type(2);
159163
let vector_type = arg3_type.dyncast_vector().expect("vector type");
@@ -194,7 +198,13 @@ pub fn adjust_intrinsic_arguments<'a, 'b, 'gcc, 'tcx>(
194198
| "__builtin_ia32_cvtqq2ps256_mask"
195199
| "__builtin_ia32_cvtuqq2pd128_mask"
196200
| "__builtin_ia32_cvtuqq2pd256_mask"
197-
| "__builtin_ia32_cvtuqq2ps256_mask" => {
201+
| "__builtin_ia32_cvtuqq2ps256_mask"
202+
| "__builtin_ia32_vcvtw2ph128_mask"
203+
| "__builtin_ia32_vcvtw2ph256_mask"
204+
| "__builtin_ia32_vcvtuw2ph128_mask"
205+
| "__builtin_ia32_vcvtuw2ph256_mask"
206+
| "__builtin_ia32_vcvtdq2ph256_mask"
207+
| "__builtin_ia32_vcvtudq2ph256_mask" => {
198208
let mut new_args = args.to_vec();
199209
// Remove last arg as it doesn't seem to be used in GCC and is always false.
200210
new_args.pop();
@@ -296,7 +306,8 @@ pub fn adjust_intrinsic_arguments<'a, 'b, 'gcc, 'tcx>(
296306
"__builtin_ia32_vfmaddsubps512_mask"
297307
| "__builtin_ia32_vfmaddsubpd512_mask"
298308
| "__builtin_ia32_cmpsh_mask_round"
299-
| "__builtin_ia32_vfmaddph512_mask" => {
309+
| "__builtin_ia32_vfmaddph512_mask"
310+
| "__builtin_ia32_vfmaddsubph512_mask" => {
300311
let mut new_args = args.to_vec();
301312
let last_arg = new_args.pop().expect("last arg");
302313
let arg4_type = gcc_func.get_param_type(3);
@@ -319,9 +330,6 @@ pub fn adjust_intrinsic_arguments<'a, 'b, 'gcc, 'tcx>(
319330
| "__builtin_ia32_vpermi2varpd128_mask"
320331
| "__builtin_ia32_vpmadd52huq512_mask"
321332
| "__builtin_ia32_vpmadd52luq512_mask"
322-
| "__builtin_ia32_vpmadd52huq256_mask"
323-
| "__builtin_ia32_vpmadd52luq256_mask"
324-
| "__builtin_ia32_vpmadd52huq128_mask"
325333
| "__builtin_ia32_vfmaddsubph128_mask"
326334
| "__builtin_ia32_vfmaddsubph256_mask" => {
327335
let mut new_args = args.to_vec();
@@ -405,7 +413,14 @@ pub fn adjust_intrinsic_arguments<'a, 'b, 'gcc, 'tcx>(
405413
"__builtin_ia32_cvtqq2pd512_mask"
406414
| "__builtin_ia32_cvtqq2ps512_mask"
407415
| "__builtin_ia32_cvtuqq2pd512_mask"
408-
| "__builtin_ia32_cvtuqq2ps512_mask" => {
416+
| "__builtin_ia32_cvtuqq2ps512_mask"
417+
| "__builtin_ia32_sqrtph512_mask_round"
418+
| "__builtin_ia32_vcvtw2ph512_mask_round"
419+
| "__builtin_ia32_vcvtuw2ph512_mask_round"
420+
| "__builtin_ia32_vcvtdq2ph512_mask_round"
421+
| "__builtin_ia32_vcvtudq2ph512_mask_round"
422+
| "__builtin_ia32_vcvtqq2ph512_mask_round"
423+
| "__builtin_ia32_vcvtuqq2ph512_mask_round" => {
409424
let mut old_args = args.to_vec();
410425
let mut new_args = vec![];
411426
new_args.push(old_args.swap_remove(0));
@@ -425,7 +440,9 @@ pub fn adjust_intrinsic_arguments<'a, 'b, 'gcc, 'tcx>(
425440
"__builtin_ia32_addph512_mask_round"
426441
| "__builtin_ia32_subph512_mask_round"
427442
| "__builtin_ia32_mulph512_mask_round"
428-
| "__builtin_ia32_divph512_mask_round" => {
443+
| "__builtin_ia32_divph512_mask_round"
444+
| "__builtin_ia32_maxph512_mask_round"
445+
| "__builtin_ia32_minph512_mask_round" => {
429446
let mut new_args = args.to_vec();
430447
let last_arg = new_args.pop().expect("last arg");
431448

@@ -460,7 +477,9 @@ pub fn adjust_intrinsic_arguments<'a, 'b, 'gcc, 'tcx>(
460477
}
461478
} else {
462479
match func_name {
463-
"__builtin_ia32_rndscaless_mask_round" | "__builtin_ia32_rndscalesd_mask_round" => {
480+
"__builtin_ia32_rndscaless_mask_round"
481+
| "__builtin_ia32_rndscalesd_mask_round"
482+
| "__builtin_ia32_reducesh_mask_round" => {
464483
let new_args = args.to_vec();
465484
let arg3_type = gcc_func.get_param_type(2);
466485
let arg3 = builder.context.new_cast(None, new_args[4], arg3_type);
@@ -585,6 +604,12 @@ pub fn adjust_intrinsic_arguments<'a, 'b, 'gcc, 'tcx>(
585604
new_args[2] = builder.context.new_cast(None, new_args[2], builder.double_type);
586605
args = new_args.into();
587606
}
607+
"__builtin_ia32_sqrtsh_mask_round" => {
608+
// The first two arguments are inverted, so swap them.
609+
let mut new_args = args.to_vec();
610+
new_args.swap(0, 1);
611+
args = new_args.into();
612+
}
588613
_ => (),
589614
}
590615
}
@@ -1090,9 +1115,9 @@ pub fn intrinsic<'gcc, 'tcx>(name: &str, cx: &CodegenCx<'gcc, 'tcx>) -> Function
10901115
"llvm.x86.avx512.dbpsadbw.128" => "__builtin_ia32_dbpsadbw128_mask",
10911116
"llvm.x86.avx512.vpmadd52h.uq.512" => "__builtin_ia32_vpmadd52huq512_mask",
10921117
"llvm.x86.avx512.vpmadd52l.uq.512" => "__builtin_ia32_vpmadd52luq512_mask",
1093-
"llvm.x86.avx512.vpmadd52h.uq.256" => "__builtin_ia32_vpmadd52huq256_mask",
1094-
"llvm.x86.avx512.vpmadd52l.uq.256" => "__builtin_ia32_vpmadd52luq256_mask",
1095-
"llvm.x86.avx512.vpmadd52h.uq.128" => "__builtin_ia32_vpmadd52huq128_mask",
1118+
"llvm.x86.avx512.vpmadd52h.uq.256" => "__builtin_ia32_vpmadd52huq256",
1119+
"llvm.x86.avx512.vpmadd52l.uq.256" => "__builtin_ia32_vpmadd52luq256",
1120+
"llvm.x86.avx512.vpmadd52h.uq.128" => "__builtin_ia32_vpmadd52huq128",
10961121
"llvm.x86.avx512.vpdpwssd.512" => "__builtin_ia32_vpdpwssd_v16si",
10971122
"llvm.x86.avx512.vpdpwssd.256" => "__builtin_ia32_vpdpwssd_v8si",
10981123
"llvm.x86.avx512.vpdpwssd.128" => "__builtin_ia32_vpdpwssd_v4si",
@@ -1209,6 +1234,55 @@ pub fn intrinsic<'gcc, 'tcx>(name: &str, cx: &CodegenCx<'gcc, 'tcx>) -> Function
12091234
"llvm.x86.avx512fp16.vfmadd.f16" => "__builtin_ia32_vfmaddsh3_mask",
12101235
"llvm.x86.avx512fp16.vfmaddsub.ph.128" => "__builtin_ia32_vfmaddsubph128_mask",
12111236
"llvm.x86.avx512fp16.vfmaddsub.ph.256" => "__builtin_ia32_vfmaddsubph256_mask",
1237+
"llvm.x86.avx512fp16.vfmaddsub.ph.512" => "__builtin_ia32_vfmaddsubph512_mask",
1238+
"llvm.x86.avx512fp16.sqrt.ph.512" => "__builtin_ia32_sqrtph512_mask_round",
1239+
"llvm.x86.avx512fp16.mask.sqrt.sh" => "__builtin_ia32_sqrtsh_mask_round",
1240+
"llvm.x86.avx512fp16.max.ph.128" => "__builtin_ia32_maxph128_mask",
1241+
"llvm.x86.avx512fp16.max.ph.256" => "__builtin_ia32_maxph256_mask",
1242+
"llvm.x86.avx512fp16.max.ph.512" => "__builtin_ia32_maxph512_mask_round",
1243+
"llvm.x86.avx512fp16.min.ph.128" => "__builtin_ia32_minph128_mask",
1244+
"llvm.x86.avx512fp16.min.ph.256" => "__builtin_ia32_minph256_mask",
1245+
"llvm.x86.avx512fp16.min.ph.512" => "__builtin_ia32_minph512_mask_round",
1246+
"llvm.x86.avx512fp16.mask.getexp.sh" => "__builtin_ia32_getexpsh_mask_round",
1247+
"llvm.x86.avx512fp16.mask.rndscale.ph.128" => "__builtin_ia32_rndscaleph128_mask",
1248+
"llvm.x86.avx512fp16.mask.rndscale.ph.256" => "__builtin_ia32_rndscaleph256_mask",
1249+
"llvm.x86.avx512fp16.mask.rndscale.ph.512" => "__builtin_ia32_rndscaleph512_mask_round",
1250+
"llvm.x86.avx512fp16.mask.scalef.ph.512" => "__builtin_ia32_scalefph512_mask_round",
1251+
"llvm.x86.avx512fp16.mask.reduce.ph.512" => "__builtin_ia32_reduceph512_mask_round",
1252+
"llvm.x86.avx512fp16.mask.reduce.sh" => "__builtin_ia32_reducesh_mask_round",
1253+
"llvm.x86.avx512.sitofp.round.v8f16.v8i16" => "__builtin_ia32_vcvtw2ph128_mask",
1254+
"llvm.x86.avx512.sitofp.round.v16f16.v16i16" => "__builtin_ia32_vcvtw2ph256_mask",
1255+
"llvm.x86.avx512.sitofp.round.v32f16.v32i16" => "__builtin_ia32_vcvtw2ph512_mask_round",
1256+
"llvm.x86.avx512.uitofp.round.v8f16.v8u16" => "__builtin_ia32_vcvtuw2ph128_mask",
1257+
"llvm.x86.avx512.uitofp.round.v16f16.v16u16" => "__builtin_ia32_vcvtuw2ph256_mask",
1258+
"llvm.x86.avx512.uitofp.round.v32f16.v32u16" => "__builtin_ia32_vcvtuw2ph512_mask_round",
1259+
"llvm.x86.avx512.sitofp.round.v8f16.v8i32" => "__builtin_ia32_vcvtdq2ph256_mask",
1260+
"llvm.x86.avx512.sitofp.round.v16f16.v16i32" => "__builtin_ia32_vcvtdq2ph512_mask_round",
1261+
"llvm.x86.avx512fp16.vcvtsi2sh" => "__builtin_ia32_vcvtsi2sh32_round",
1262+
"llvm.x86.avx512.uitofp.round.v8f16.v8u32" => "__builtin_ia32_vcvtudq2ph256_mask",
1263+
"llvm.x86.avx512.uitofp.round.v16f16.v16u32" => "__builtin_ia32_vcvtudq2ph512_mask_round",
1264+
"llvm.x86.avx512fp16.vcvtusi2sh" => "__builtin_ia32_vcvtusi2sh32_round",
1265+
"llvm.x86.avx512.sitofp.round.v8f16.v8i64" => "__builtin_ia32_vcvtqq2ph512_mask_round",
1266+
"llvm.x86.avx512.uitofp.round.v8f16.v8u64" => "__builtin_ia32_vcvtuqq2ph512_mask_round",
1267+
"llvm.x86.avx512fp16.mask.vcvtps2phx.512" => "__builtin_ia32_vcvtps2phx512_mask_round",
1268+
"llvm.x86.avx512fp16.mask.vcvtpd2ph.512" => "__builtin_ia32_vcvtpd2ph512_mask_round",
1269+
"llvm.x86.avx512fp16.mask.vcvtph2uw.512" => "__builtin_ia32_vcvtph2uw512_mask_round",
1270+
"llvm.x86.avx512fp16.mask.vcvttph2w.512" => "__builtin_ia32_vcvttph2w512_mask_round",
1271+
"llvm.x86.avx512fp16.mask.vcvttph2uw.512" => "__builtin_ia32_vcvttph2uw512_mask_round",
1272+
"llvm.x86.avx512fp16.mask.vcvtph2dq.512" => "__builtin_ia32_vcvtph2dq512_mask_round",
1273+
"llvm.x86.avx512fp16.vcvtsh2si32" => "__builtin_ia32_vcvtsh2si32_round",
1274+
"llvm.x86.avx512fp16.mask.vcvtph2udq.512" => "__builtin_ia32_vcvtph2udq512_mask_round",
1275+
"llvm.x86.avx512fp16.vcvtsh2usi32" => "__builtin_ia32_vcvtsh2usi32_round",
1276+
"llvm.x86.avx512fp16.mask.vcvttph2dq.512" => "__builtin_ia32_vcvttph2dq512_mask_round",
1277+
"llvm.x86.avx512fp16.vcvttsh2si32" => "__builtin_ia32_vcvttsh2si32_round",
1278+
"llvm.x86.avx512fp16.mask.vcvttph2udq.512" => "__builtin_ia32_vcvttph2udq512_mask_round",
1279+
"llvm.x86.avx512fp16.vcvttsh2usi32" => "__builtin_ia32_vcvttsh2usi32_round",
1280+
"llvm.x86.avx512fp16.mask.vcvtph2qq.512" => "__builtin_ia32_vcvtph2qq512_mask_round",
1281+
"llvm.x86.avx512fp16.mask.vcvtph2uqq.512" => "__builtin_ia32_vcvtph2uqq512_mask_round",
1282+
"llvm.x86.avx512fp16.mask.vcvttph2qq.512" => "__builtin_ia32_vcvttph2qq512_mask_round",
1283+
"llvm.x86.avx512fp16.mask.vcvttph2uqq.512" => "__builtin_ia32_vcvttph2uqq512_mask_round",
1284+
"llvm.x86.avx512fp16.mask.vcvtph2psx.512" => "__builtin_ia32_vcvtph2psx512_mask_round",
1285+
"llvm.x86.avx512fp16.mask.vcvtph2pd.512" => "__builtin_ia32_vcvtph2pd512_mask_round",
12121286

12131287
// TODO: support the tile builtins:
12141288
"llvm.x86.ldtilecfg" => "__builtin_trap",

0 commit comments

Comments
 (0)