Skip to content

Commit 8191307

Browse files
committed
[X86] Prefer lowerVectorShuffleAsBitMask over using a avx512 masked operation when avx512bw/avx512vl is enabled.
This does require a constant pool load instead of loading an immediate into a gpr, moving to a k register and masking. But its less instructions and more consistent with previous ISAs. It probably opens up more combine opportunities as one of the test cases demonstrates. llvm-svn: 348018
1 parent 4b5b0c0 commit 8191307

File tree

2 files changed

+10
-19
lines changed

2 files changed

+10
-19
lines changed

llvm/lib/Target/X86/X86ISelLowering.cpp

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -10175,18 +10175,18 @@ static SDValue lowerVectorShuffleAsBlend(const SDLoc &DL, MVT VT, SDValue V1,
1017510175
assert((VT.is128BitVector() || Subtarget.hasAVX2()) &&
1017610176
"256-bit byte-blends require AVX2 support!");
1017710177

10178+
// Attempt to lower to a bitmask if we can. VPAND is faster than VPBLENDVB.
10179+
if (SDValue Masked =
10180+
lowerVectorShuffleAsBitMask(DL, VT, V1, V2, Mask, Zeroable, DAG))
10181+
return Masked;
10182+
1017810183
if (Subtarget.hasBWI() && Subtarget.hasVLX()) {
1017910184
MVT IntegerType =
1018010185
MVT::getIntegerVT(std::max((int)VT.getVectorNumElements(), 8));
1018110186
SDValue MaskNode = DAG.getConstant(BlendMask, DL, IntegerType);
1018210187
return getVectorMaskingNode(V2, MaskNode, V1, Subtarget, DAG);
1018310188
}
1018410189

10185-
// Attempt to lower to a bitmask if we can. VPAND is faster than VPBLENDVB.
10186-
if (SDValue Masked =
10187-
lowerVectorShuffleAsBitMask(DL, VT, V1, V2, Mask, Zeroable, DAG))
10188-
return Masked;
10189-
1019010190
// Scale the blend by the number of bytes per element.
1019110191
int Scale = VT.getScalarSizeInBits() / 8;
1019210192

llvm/test/CodeGen/X86/vector-shuffle-256-v32.ll

Lines changed: 5 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -1690,17 +1690,10 @@ define <32 x i8> @load_fold_pblendvb_commute(<32 x i8>* %px, <32 x i8> %y) {
16901690
}
16911691

16921692
define <32 x i8> @shuffle_v32i8_zz_01_zz_03_zz_05_zz_07_zz_09_zz_11_zz_13_zz_15_zz_17_zz_19_zz_21_zz_23_zz_25_zz_27_zz_29_zz_31(<32 x i8> %a) {
1693-
; AVX1OR2-LABEL: shuffle_v32i8_zz_01_zz_03_zz_05_zz_07_zz_09_zz_11_zz_13_zz_15_zz_17_zz_19_zz_21_zz_23_zz_25_zz_27_zz_29_zz_31:
1694-
; AVX1OR2: # %bb.0:
1695-
; AVX1OR2-NEXT: vandps {{.*}}(%rip), %ymm0, %ymm0
1696-
; AVX1OR2-NEXT: retq
1697-
;
1698-
; AVX512VL-LABEL: shuffle_v32i8_zz_01_zz_03_zz_05_zz_07_zz_09_zz_11_zz_13_zz_15_zz_17_zz_19_zz_21_zz_23_zz_25_zz_27_zz_29_zz_31:
1699-
; AVX512VL: # %bb.0:
1700-
; AVX512VL-NEXT: movl $-1431655766, %eax # imm = 0xAAAAAAAA
1701-
; AVX512VL-NEXT: kmovd %eax, %k1
1702-
; AVX512VL-NEXT: vmovdqu8 %ymm0, %ymm0 {%k1} {z}
1703-
; AVX512VL-NEXT: retq
1693+
; ALL-LABEL: shuffle_v32i8_zz_01_zz_03_zz_05_zz_07_zz_09_zz_11_zz_13_zz_15_zz_17_zz_19_zz_21_zz_23_zz_25_zz_27_zz_29_zz_31:
1694+
; ALL: # %bb.0:
1695+
; ALL-NEXT: vandps {{.*}}(%rip), %ymm0, %ymm0
1696+
; ALL-NEXT: retq
17041697
%shuffle = shufflevector <32 x i8> %a, <32 x i8> zeroinitializer, <32 x i32> <i32 32, i32 1, i32 34, i32 3, i32 36, i32 5, i32 38, i32 7, i32 40, i32 9, i32 42, i32 11, i32 44, i32 13, i32 46, i32 15, i32 48, i32 17, i32 50, i32 19, i32 52, i32 21, i32 54, i32 23, i32 56, i32 25, i32 58, i32 27, i32 60, i32 29, i32 62, i32 31>
17051698
ret <32 x i8> %shuffle
17061699
}
@@ -2781,9 +2774,7 @@ define <32 x i8> @shuffle_v32i8_56_zz_zz_zz_57_zz_zz_zz_58_zz_zz_zz__zz_59_zz_zz
27812774
; AVX512VLBW-LABEL: shuffle_v32i8_56_zz_zz_zz_57_zz_zz_zz_58_zz_zz_zz__zz_59_zz_zz_zz_60_zz_zz_zz_61_zz_zz_zz_62_zz_zz_zz_63_zz_zz_zz:
27822775
; AVX512VLBW: # %bb.0:
27832776
; AVX512VLBW-NEXT: vpermq {{.*#+}} ymm0 = ymm0[2,3,2,3]
2784-
; AVX512VLBW-NEXT: movl $286331153, %eax # imm = 0x11111111
2785-
; AVX512VLBW-NEXT: kmovd %eax, %k1
2786-
; AVX512VLBW-NEXT: vpshufb {{.*#+}} ymm0 {%k1} {z} = ymm0[8,u,u,u,9,u,u,u,10,u,u,u,11,u,u,u,28,u,u,u,29,u,u,u,30,u,u,u,31,u,u,u]
2777+
; AVX512VLBW-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[8],zero,zero,zero,ymm0[9],zero,zero,zero,ymm0[10],zero,zero,zero,ymm0[11],zero,zero,zero,ymm0[28],zero,zero,zero,ymm0[29],zero,zero,zero,ymm0[30],zero,zero,zero,ymm0[31],zero,zero,zero
27872778
; AVX512VLBW-NEXT: retq
27882779
;
27892780
; AVX512VLVBMI-LABEL: shuffle_v32i8_56_zz_zz_zz_57_zz_zz_zz_58_zz_zz_zz__zz_59_zz_zz_zz_60_zz_zz_zz_61_zz_zz_zz_62_zz_zz_zz_63_zz_zz_zz:

0 commit comments

Comments
 (0)