Skip to content

Commit 39de82e

Browse files
committed
[SLP] fix insertion point for min/max reduction
As discussed in D70148 (and caused a revert of the original commit): if we insert at the select, then we can produce invalid IR because the replacement for the compare may have uses before the select.
1 parent 41bac76 commit 39de82e

File tree

2 files changed

+18
-3
lines changed

2 files changed

+18
-3
lines changed

llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp

Lines changed: 17 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -6741,8 +6741,23 @@ class HorizontalReduction {
67416741
DebugLoc Loc = cast<Instruction>(ReducedVals[i])->getDebugLoc();
67426742
Value *VectorizedRoot = V.vectorizeTree(ExternallyUsedValues);
67436743

6744-
// Emit a reduction.
6745-
Builder.SetInsertPoint(cast<Instruction>(ReductionRoot));
6744+
auto getCmpForMinMaxReduction = [](Instruction *RdxRootInst) {
6745+
assert(isa<SelectInst>(RdxRootInst) &&
6746+
"Expected min/max reduction to have select root instruction");
6747+
Value *ScalarCond = cast<SelectInst>(RdxRootInst)->getCondition();
6748+
assert(isa<Instruction>(ScalarCond) &&
6749+
"Expected min/max reduction to have compare condition");
6750+
return cast<Instruction>(ScalarCond);
6751+
};
6752+
6753+
// Emit a reduction. For min/max, the root is a select, but the insertion
6754+
// point is the compare condition of that select.
6755+
Instruction *RdxRootInst = cast<Instruction>(ReductionRoot);
6756+
if (ReductionData.isMinMax())
6757+
Builder.SetInsertPoint(getCmpForMinMaxReduction(RdxRootInst));
6758+
else
6759+
Builder.SetInsertPoint(RdxRootInst);
6760+
67466761
Value *ReducedSubTree =
67476762
emitReduction(VectorizedRoot, Builder, ReduxWidth, TTI);
67486763
if (VectorizedTree) {

llvm/test/Transforms/SLPVectorizer/X86/reduction.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -131,13 +131,13 @@ define i1 @bad_insertpoint_rdx([8 x i32]* %p) #0 {
131131
; CHECK-NEXT: [[ARRAYIDX22:%.*]] = getelementptr inbounds [8 x i32], [8 x i32]* [[P:%.*]], i64 0, i64 0
132132
; CHECK-NEXT: [[TMP1:%.*]] = bitcast i32* [[ARRAYIDX22]] to <2 x i32>*
133133
; CHECK-NEXT: [[TMP2:%.*]] = load <2 x i32>, <2 x i32>* [[TMP1]], align 16
134-
; CHECK-NEXT: [[SPEC_STORE_SELECT87:%.*]] = zext i1 undef to i32
135134
; CHECK-NEXT: [[RDX_SHUF:%.*]] = shufflevector <2 x i32> [[TMP2]], <2 x i32> undef, <2 x i32> <i32 1, i32 undef>
136135
; CHECK-NEXT: [[RDX_MINMAX_CMP:%.*]] = icmp sgt <2 x i32> [[TMP2]], [[RDX_SHUF]]
137136
; CHECK-NEXT: [[RDX_MINMAX_SELECT:%.*]] = select <2 x i1> [[RDX_MINMAX_CMP]], <2 x i32> [[TMP2]], <2 x i32> [[RDX_SHUF]]
138137
; CHECK-NEXT: [[TMP3:%.*]] = extractelement <2 x i32> [[RDX_MINMAX_SELECT]], i32 0
139138
; CHECK-NEXT: [[TMP4:%.*]] = icmp sgt i32 [[TMP3]], 0
140139
; CHECK-NEXT: [[OP_EXTRA:%.*]] = select i1 [[TMP4]], i32 [[TMP3]], i32 0
140+
; CHECK-NEXT: [[SPEC_STORE_SELECT87:%.*]] = zext i1 undef to i32
141141
; CHECK-NEXT: [[CMP23_2:%.*]] = icmp sgt i32 [[SPEC_STORE_SELECT87]], [[OP_EXTRA]]
142142
; CHECK-NEXT: ret i1 [[CMP23_2]]
143143
;

0 commit comments

Comments
 (0)