Open
Description
Given the testcase
#![feature(repr_simd, intrinsics)]
extern "rust-intrinsic" {
fn simd_bitmask<T, U>(v: T) -> U;
fn simd_select_bitmask<T, U>(m: T, a: U, b: U) -> U;
}
#[repr(simd, packed)]
#[allow(non_camel_case_types)]
#[derive(Copy, Clone, Debug, PartialEq)]
pub struct i32x10([i32; 10]);
impl i32x10 {
fn splat(x: i32) -> Self {
Self([x; 10])
}
}
pub fn main() {
// Non-power-of-2 multi-byte mask.
unsafe {
let mask = i32x10([-1, -1, 0, -1, 0, 0, -1, 0, -1, 0]);
let mask_bytes =
if cfg!(target_endian = "little") { [0b01001011, 0b01] } else { [0b11, 0b01001010] };
let selected2 = simd_select_bitmask::<[u8; 2], _>(
mask_bytes,
i32x10::splat(-1), // yes
i32x10::splat(0), // no
);
assert_eq!(selected2, mask);
}
}
on a big-endian target, this passes with optimizations but fails without. LLVM optimizations correctly implement the semantics of the IR this generates (so the resulting program is trivial), but without optimizations all the IR reaches the backend and somewhere in the LLVM machine backend, things go wrong.
See here for some analysis.