diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index 95b6f4afb9..88dc7b865a 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -56,6 +56,8 @@ jobs: os: ubuntu-latest - tuple: aarch64-unknown-linux-gnu os: ubuntu-latest + - tuple: aarch64_be-unknown-linux-gnu + os: ubuntu-latest - tuple: riscv64gc-unknown-linux-gnu os: ubuntu-latest - tuple: powerpc-unknown-linux-gnu @@ -125,6 +127,11 @@ jobs: tuple: aarch64-unknown-linux-gnu os: ubuntu-latest test_everything: true + - target: + tuple: aarch64_be-unknown-linux-gnu + os: ubuntu-latest + test_everything: true + build_std: true - target: tuple: armv7-unknown-linux-gnueabihf os: ubuntu-latest @@ -192,13 +199,16 @@ jobs: steps: - uses: actions/checkout@v4 - with: - submodules: recursive - name: Install Rust run: | rustup update nightly --no-self-update rustup default nightly - run: rustup target add ${{ matrix.target.tuple }} + if: matrix.build_std == '' + - run: | + rustup component add rust-src + echo "CARGO_UNSTABLE_BUILD_STD=std" >> $GITHUB_ENV + if: matrix.build_std != '' - run: cargo generate-lockfile # Configure some env vars based on matrix configuration diff --git a/ci/docker/aarch64_be-unknown-linux-gnu/Dockerfile b/ci/docker/aarch64_be-unknown-linux-gnu/Dockerfile index d7be70843d..74f770556d 100644 --- a/ci/docker/aarch64_be-unknown-linux-gnu/Dockerfile +++ b/ci/docker/aarch64_be-unknown-linux-gnu/Dockerfile @@ -26,5 +26,5 @@ ENV AARCH64_BE_LIBC="${AARCH64_BE_TOOLCHAIN}/aarch64_be-none-linux-gnu/libc" ENV CARGO_TARGET_AARCH64_BE_UNKNOWN_LINUX_GNU_LINKER="${AARCH64_BE_TOOLCHAIN}/bin/aarch64_be-none-linux-gnu-gcc" ENV CARGO_TARGET_AARCH64_BE_UNKNOWN_LINUX_GNU_RUNNER="qemu-aarch64_be -cpu max -L ${AARCH64_BE_LIBC}" -ENV OBJDUMP="${AARCH64_BE_TOOLCHAIN}/bin/bin/aarch64-none-linux-gnu-objdump" +ENV OBJDUMP="${AARCH64_BE_TOOLCHAIN}/bin/aarch64_be-none-linux-gnu-objdump" ENV STDARCH_TEST_SKIP_FEATURE=tme diff --git a/ci/run-docker.sh b/ci/run-docker.sh index af83a32eca..d7aa50a8c9 100755 --- a/ci/run-docker.sh +++ b/ci/run-docker.sh @@ -36,6 +36,7 @@ run() { --env NOSTD \ --env NORUN \ --env RUSTFLAGS \ + --env CARGO_UNSTABLE_BUILD_STD \ --volume "${HOME}/.cargo":/cargo \ --volume "$(rustc --print sysroot)":/rust:ro \ --volume "$(pwd)":/checkout:ro \ diff --git a/ci/run.sh b/ci/run.sh index f582602a8c..522b266176 100755 --- a/ci/run.sh +++ b/ci/run.sh @@ -187,7 +187,7 @@ case "${TARGET}" in --cppcompiler "${TEST_CXX_COMPILER}" \ --skip "${TEST_SKIP_INTRINSICS}" \ --target "${TARGET}" \ - --linker "${CARGO_TARGET_AARCH64_UNKNOWN_LINUX_GNU_LINKER}" \ + --linker "${CARGO_TARGET_AARCH64_BE_UNKNOWN_LINUX_GNU_LINKER}" \ --cxx-toolchain-dir "${AARCH64_BE_TOOLCHAIN}" ;; *) diff --git a/crates/core_arch/src/aarch64/neon/generated.rs b/crates/core_arch/src/aarch64/neon/generated.rs index 56e522a316..adf526452b 100644 --- a/crates/core_arch/src/aarch64/neon/generated.rs +++ b/crates/core_arch/src/aarch64/neon/generated.rs @@ -24470,6 +24470,7 @@ pub fn vrsrad_n_u64(a: u64, b: u64) -> u64 { #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrsubhn_high_s16)"] #[inline] #[target_feature(enable = "neon")] +#[cfg(target_endian = "little")] #[cfg_attr(test, assert_instr(rsubhn2))] #[stable(feature = "neon_intrinsics", since = "1.59.0")] pub fn vrsubhn_high_s16(a: int8x8_t, b: int16x8_t, c: int16x8_t) -> int8x16_t { @@ -24480,6 +24481,7 @@ pub fn vrsubhn_high_s16(a: int8x8_t, b: int16x8_t, c: int16x8_t) -> int8x16_t { #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrsubhn_high_s32)"] #[inline] #[target_feature(enable = "neon")] +#[cfg(target_endian = "little")] #[cfg_attr(test, assert_instr(rsubhn2))] #[stable(feature = "neon_intrinsics", since = "1.59.0")] pub fn vrsubhn_high_s32(a: int16x4_t, b: int32x4_t, c: int32x4_t) -> int16x8_t { @@ -24490,6 +24492,7 @@ pub fn vrsubhn_high_s32(a: int16x4_t, b: int32x4_t, c: int32x4_t) -> int16x8_t { #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrsubhn_high_s64)"] #[inline] #[target_feature(enable = "neon")] +#[cfg(target_endian = "little")] #[cfg_attr(test, assert_instr(rsubhn2))] #[stable(feature = "neon_intrinsics", since = "1.59.0")] pub fn vrsubhn_high_s64(a: int32x2_t, b: int64x2_t, c: int64x2_t) -> int32x4_t { @@ -24500,6 +24503,7 @@ pub fn vrsubhn_high_s64(a: int32x2_t, b: int64x2_t, c: int64x2_t) -> int32x4_t { #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrsubhn_high_u16)"] #[inline] #[target_feature(enable = "neon")] +#[cfg(target_endian = "little")] #[cfg_attr(test, assert_instr(rsubhn2))] #[stable(feature = "neon_intrinsics", since = "1.59.0")] pub fn vrsubhn_high_u16(a: uint8x8_t, b: uint16x8_t, c: uint16x8_t) -> uint8x16_t { @@ -24510,6 +24514,7 @@ pub fn vrsubhn_high_u16(a: uint8x8_t, b: uint16x8_t, c: uint16x8_t) -> uint8x16_ #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrsubhn_high_u32)"] #[inline] #[target_feature(enable = "neon")] +#[cfg(target_endian = "little")] #[cfg_attr(test, assert_instr(rsubhn2))] #[stable(feature = "neon_intrinsics", since = "1.59.0")] pub fn vrsubhn_high_u32(a: uint16x4_t, b: uint32x4_t, c: uint32x4_t) -> uint16x8_t { @@ -24520,12 +24525,79 @@ pub fn vrsubhn_high_u32(a: uint16x4_t, b: uint32x4_t, c: uint32x4_t) -> uint16x8 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrsubhn_high_u64)"] #[inline] #[target_feature(enable = "neon")] +#[cfg(target_endian = "little")] #[cfg_attr(test, assert_instr(rsubhn2))] #[stable(feature = "neon_intrinsics", since = "1.59.0")] pub fn vrsubhn_high_u64(a: uint32x2_t, b: uint64x2_t, c: uint64x2_t) -> uint32x4_t { let x: uint32x2_t = vrsubhn_u64(b, c); unsafe { simd_shuffle!(a, x, [0, 1, 2, 3]) } } +#[doc = "Rounding subtract returning high narrow"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrsubhn_high_s16)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg(target_endian = "big")] +#[cfg_attr(test, assert_instr(rsubhn))] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +pub fn vrsubhn_high_s16(a: int8x8_t, b: int16x8_t, c: int16x8_t) -> int8x16_t { + let x: int8x8_t = vrsubhn_s16(b, c); + unsafe { simd_shuffle!(a, x, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]) } +} +#[doc = "Rounding subtract returning high narrow"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrsubhn_high_s32)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg(target_endian = "big")] +#[cfg_attr(test, assert_instr(rsubhn))] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +pub fn vrsubhn_high_s32(a: int16x4_t, b: int32x4_t, c: int32x4_t) -> int16x8_t { + let x: int16x4_t = vrsubhn_s32(b, c); + unsafe { simd_shuffle!(a, x, [0, 1, 2, 3, 4, 5, 6, 7]) } +} +#[doc = "Rounding subtract returning high narrow"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrsubhn_high_s64)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg(target_endian = "big")] +#[cfg_attr(test, assert_instr(rsubhn))] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +pub fn vrsubhn_high_s64(a: int32x2_t, b: int64x2_t, c: int64x2_t) -> int32x4_t { + let x: int32x2_t = vrsubhn_s64(b, c); + unsafe { simd_shuffle!(a, x, [0, 1, 2, 3]) } +} +#[doc = "Rounding subtract returning high narrow"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrsubhn_high_u16)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg(target_endian = "big")] +#[cfg_attr(test, assert_instr(rsubhn))] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +pub fn vrsubhn_high_u16(a: uint8x8_t, b: uint16x8_t, c: uint16x8_t) -> uint8x16_t { + let x: uint8x8_t = vrsubhn_u16(b, c); + unsafe { simd_shuffle!(a, x, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]) } +} +#[doc = "Rounding subtract returning high narrow"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrsubhn_high_u32)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg(target_endian = "big")] +#[cfg_attr(test, assert_instr(rsubhn))] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +pub fn vrsubhn_high_u32(a: uint16x4_t, b: uint32x4_t, c: uint32x4_t) -> uint16x8_t { + let x: uint16x4_t = vrsubhn_u32(b, c); + unsafe { simd_shuffle!(a, x, [0, 1, 2, 3, 4, 5, 6, 7]) } +} +#[doc = "Rounding subtract returning high narrow"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrsubhn_high_u64)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg(target_endian = "big")] +#[cfg_attr(test, assert_instr(rsubhn))] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +pub fn vrsubhn_high_u64(a: uint32x2_t, b: uint64x2_t, c: uint64x2_t) -> uint32x4_t { + let x: uint32x2_t = vrsubhn_u64(b, c); + unsafe { simd_shuffle!(a, x, [0, 1, 2, 3]) } +} #[doc = "Insert vector element from another vector element"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vset_lane_f64)"] #[inline] diff --git a/crates/core_arch/src/arm_shared/neon/table_lookup_tests.rs b/crates/core_arch/src/arm_shared/neon/table_lookup_tests.rs index 8306206895..9403855f00 100644 --- a/crates/core_arch/src/arm_shared/neon/table_lookup_tests.rs +++ b/crates/core_arch/src/arm_shared/neon/table_lookup_tests.rs @@ -19,6 +19,7 @@ macro_rules! test_vtbl { - table[$table_t:ident]: [$($table_v:expr),*] | $(- ctrl[$ctrl_t:ident]: [$($ctrl_v:expr),*] => [$($exp_v:expr),*])|* ) => { + #[cfg(target_endian = "little")] #[simd_test(enable = "neon")] unsafe fn $test_name() { // create table as array, and transmute it to @@ -168,6 +169,7 @@ macro_rules! test_vtbx { - ext[$ext_t:ident]: [$($ext_v:expr),*] | $(- ctrl[$ctrl_t:ident]: [$($ctrl_v:expr),*] => [$($exp_v:expr),*])|* ) => { + #[cfg(target_endian = "little")] #[simd_test(enable = "neon")] unsafe fn $test_name() { // create table as array, and transmute it to diff --git a/crates/intrinsic-test/src/main.rs b/crates/intrinsic-test/src/main.rs index 2d04f1cf34..e398db0e90 100644 --- a/crates/intrinsic-test/src/main.rs +++ b/crates/intrinsic-test/src/main.rs @@ -594,29 +594,17 @@ fn compare_outputs( )) .output(); - let rust = if target != "aarch64_be-unknown-linux-gnu" { - Command::new("sh") - .current_dir("rust_programs") - .arg("-c") - .arg(format!( + let rust = Command::new("sh") + .current_dir("rust_programs") + .arg("-c") + .arg(format!( "cargo {toolchain} run --target {target} --bin {intrinsic} --release", intrinsic = intrinsic.name, toolchain = toolchain, target = target - )) - .env("RUSTFLAGS", "-Cdebuginfo=0") - .output() - } else { - Command::new("sh") - .arg("-c") - .arg(format!( - "{runner} ./rust_programs/target/{target}/release/{intrinsic}", - runner = runner, - target = target, - intrinsic = intrinsic.name, - )) - .output() - }; + )) + .env("RUSTFLAGS", "-Cdebuginfo=0") + .output(); let (c, rust) = match (c, rust) { (Ok(c), Ok(rust)) => (c, rust), @@ -624,14 +612,21 @@ fn compare_outputs( }; if !c.status.success() { - error!("Failed to run C program for intrinsic {}", intrinsic.name); + error!( + "Failed to run C program for intrinsic {intrinsic}\nstdout: {stdout}\nstderr: {stderr}", + intrinsic = intrinsic.name, + stdout = std::str::from_utf8(&c.stdout).unwrap_or(""), + stderr = std::str::from_utf8(&c.stderr).unwrap_or(""), + ); return Some(FailureReason::RunC(intrinsic.name.clone())); } if !rust.status.success() { error!( - "Failed to run rust program for intrinsic {}", - intrinsic.name + "Failed to run Rust program for intrinsic {intrinsic}\nstdout: {stdout}\nstderr: {stderr}", + intrinsic = intrinsic.name, + stdout = std::str::from_utf8(&rust.stdout).unwrap_or(""), + stderr = std::str::from_utf8(&rust.stderr).unwrap_or(""), ); return Some(FailureReason::RunRust(intrinsic.name.clone())); } diff --git a/crates/simd-test-macro/src/lib.rs b/crates/simd-test-macro/src/lib.rs index 9397c0452e..18e4747d94 100644 --- a/crates/simd-test-macro/src/lib.rs +++ b/crates/simd-test-macro/src/lib.rs @@ -58,7 +58,7 @@ pub fn simd_test( { "i686" | "x86_64" | "i586" => "is_x86_feature_detected", "arm" | "armv7" => "is_arm_feature_detected", - "aarch64" | "arm64ec" => "is_aarch64_feature_detected", + "aarch64" | "arm64ec" | "aarch64_be" => "is_aarch64_feature_detected", maybe_riscv if maybe_riscv.starts_with("riscv") => "is_riscv_feature_detected", "powerpc" | "powerpcle" => "is_powerpc_feature_detected", "powerpc64" | "powerpc64le" => "is_powerpc64_feature_detected", diff --git a/crates/std_detect/src/detect/os/linux/aarch64.rs b/crates/std_detect/src/detect/os/linux/aarch64.rs index 9caa785564..22a9cefff7 100644 --- a/crates/std_detect/src/detect/os/linux/aarch64.rs +++ b/crates/std_detect/src/detect/os/linux/aarch64.rs @@ -399,6 +399,7 @@ impl AtHwcap { } } +#[cfg(target_endian = "little")] #[cfg(test)] mod tests { use super::*; diff --git a/crates/std_detect/src/detect/os/linux/auxvec.rs b/crates/std_detect/src/detect/os/linux/auxvec.rs index 4293df20da..c30379ff06 100644 --- a/crates/std_detect/src/detect/os/linux/auxvec.rs +++ b/crates/std_detect/src/detect/os/linux/auxvec.rs @@ -290,6 +290,7 @@ mod tests { assert_eq!(v.hwcap2, 0); } } else if #[cfg(target_arch = "aarch64")] { + #[cfg(target_endian = "little")] #[test] fn linux_artificial_aarch64() { let file = concat!(env!("CARGO_MANIFEST_DIR"), "/src/detect/test_data/linux-artificial-aarch64.auxv"); @@ -298,6 +299,7 @@ mod tests { assert_eq!(v.hwcap, 0x0123456789abcdef); assert_eq!(v.hwcap2, 0x02468ace13579bdf); } + #[cfg(target_endian = "little")] #[test] fn linux_no_hwcap2_aarch64() { let file = concat!(env!("CARGO_MANIFEST_DIR"), "/src/detect/test_data/linux-no-hwcap2-aarch64.auxv"); diff --git a/crates/stdarch-gen-arm/spec/neon/aarch64.spec.yml b/crates/stdarch-gen-arm/spec/neon/aarch64.spec.yml index 6d6d199847..d4d5f77512 100644 --- a/crates/stdarch-gen-arm/spec/neon/aarch64.spec.yml +++ b/crates/stdarch-gen-arm/spec/neon/aarch64.spec.yml @@ -59,6 +59,14 @@ neon-unstable-f16: &neon-unstable-f16 neon-unstable-feat-lut: &neon-unstable-feat-lut FnCall: [unstable, ['feature = "stdarch_neon_feat_lut"', 'issue = "138050"']] +# #[cfg(target_endian = "little")] +little-endian: &little-endian + FnCall: [cfg, ['target_endian = "little"']] + +# #[cfg(target_endian = "big")] +big-endian: &big-endian + FnCall: [cfg, ['target_endian = "big"']] + intrinsics: - name: "vaddd_{type}" doc: Add @@ -8906,6 +8914,7 @@ intrinsics: arguments: ["a: {neon_type[0]}", "b: {neon_type[1]}", "c: {neon_type[1]}"] return_type: "{neon_type[3]}" attr: + - *little-endian - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [rsubhn2]]}]] - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']] safety: safe @@ -8923,6 +8932,29 @@ intrinsics: - FnCall: ["vrsubhn_{neon_type[1]}", [b, c]] - FnCall: [simd_shuffle!, [a, x, "{type[4]}"]] + - name: "vrsubhn_high_{neon_type[1]}" + doc: "Rounding subtract returning high narrow" + arguments: ["a: {neon_type[0]}", "b: {neon_type[1]}", "c: {neon_type[1]}"] + return_type: "{neon_type[3]}" + attr: + - *big-endian + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [rsubhn]]}]] + - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']] + safety: safe + types: + - [int8x8_t, int16x8_t, int16x8_t, int8x16_t, '[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]'] + - [int16x4_t, int32x4_t, int32x4_t, int16x8_t, '[0, 1, 2, 3, 4, 5, 6, 7]'] + - [int32x2_t, int64x2_t, int64x2_t, int32x4_t, '[0, 1, 2, 3]'] + - [uint8x8_t, uint16x8_t, uint16x8_t, uint8x16_t, '[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]'] + - [uint16x4_t, uint32x4_t, uint32x4_t, uint16x8_t, '[0, 1, 2, 3, 4, 5, 6, 7]'] + - [uint32x2_t, uint64x2_t, uint64x2_t, uint32x4_t, '[0, 1, 2, 3]'] + compose: + - Let: + - x + - "{neon_type[0]}" + - FnCall: ["vrsubhn_{neon_type[1]}", [b, c]] + - FnCall: [simd_shuffle!, [a, x, "{type[4]}"]] + - name: "vcopy{neon_type[0].lane_nox}" doc: "Insert vector element from another vector element" arguments: ["a: {neon_type[0]}", "b: {neon_type[1]}"] diff --git a/crates/stdarch-test/src/lib.rs b/crates/stdarch-test/src/lib.rs index 827a402e3a..977b4b46ad 100644 --- a/crates/stdarch-test/src/lib.rs +++ b/crates/stdarch-test/src/lib.rs @@ -164,7 +164,15 @@ pub fn assert(shim_addr: usize, fnname: &str, expected: &str) { // Original limit was 20 instructions, but ARM DSP Intrinsics // are exactly 20 instructions long. So, bump the limit to 22 // instead of adding here a long list of exceptions. - _ => 22, + _ => { + // aarch64_be may add reverse instructions which increases + // the number of instructions generated. + if cfg!(all(target_endian = "big", target_arch = "aarch64")) { + 32 + } else { + 22 + } + } }, |v| v.parse().unwrap(), );