Open
Description
Naïve code (znver5):
define dso_local <8 x i64> @foo(<8 x i64> %0) local_unnamed_addr {
Entry:
%1 = shufflevector <8 x i64> <i64 poison, i64 poison, i64 poison, i64 poison, i64 poison, i64 poison, i64 poison, i64 0>, <8 x i64> %0, <8 x i32> <i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14>
%2 = tail call <8 x i64> @llvm.fshl.v8i64(<8 x i64> %0, <8 x i64> %1, <8 x i64> splat (i64 1))
%3 = or <8 x i64> %2, <i64 1, i64 0, i64 0, i64 0, i64 0, i64 0, i64 0, i64 0>
ret <8 x i64> %3
}
declare <8 x i64> @llvm.fshl.v8i64(<8 x i64>, <8 x i64>, <8 x i64>) #1
.LCPI0_1:
.byte 1
.byte 0
foo:
vpxor xmm1, xmm1, xmm1
valignq zmm1, zmm0, zmm1, 7
vpshldq zmm0, zmm0, zmm1, 1
vpmovsxbq xmm1, word ptr [rip + .LCPI0_1]
vporq zmm0, zmm0, zmm1
ret
Better (znver5):
define dso_local <8 x i64> @bar(<8 x i64> %0) local_unnamed_addr {
Entry:
%1 = shufflevector <8 x i64> <i64 poison, i64 poison, i64 poison, i64 poison, i64 poison, i64 poison, i64 poison, i64 -9223372036854775808>, <8 x i64> %0, <8 x i32> <i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14>
%2 = tail call <8 x i64> @llvm.fshl.v8i64(<8 x i64> %0, <8 x i64> %1, <8 x i64> splat (i64 1))
ret <8 x i64> %2
}
.LCPI1_0:
.quad -9223372036854775808
bar:
valignq zmm1, zmm0, qword ptr [rip + .LCPI1_0]{1to8}, 7
vpshldq zmm0, zmm0, zmm1, 1
ret