-
Notifications
You must be signed in to change notification settings - Fork 13.6k
[LoopVectorize] Add test case for minloc reduction #141556
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Conversation
@llvm/pr-subscribers-llvm-transforms Author: Madhur Amilkanthwar (madhur13490) ChangesThis patch adds a test case extracted from Polyhedron benchmark. https://fortran.uk/fortran-compiler-comparisons/the-polyhedron-solutions-benchmark-suite/ The test is specifically interesting for vectorizing min/max reduction pattern. Full diff: https://github.com/llvm/llvm-project/pull/141556.diff 1 Files Affected:
diff --git a/llvm/test/Transforms/LoopVectorize/last-min-index.ll b/llvm/test/Transforms/LoopVectorize/last-min-index.ll
new file mode 100644
index 0000000000000..f69145eafa74a
--- /dev/null
+++ b/llvm/test/Transforms/LoopVectorize/last-min-index.ll
@@ -0,0 +1,84 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
+; RUN: opt -passes=loop-vectorize -S %s | FileCheck %s --check-prefix=CHECK-REV-MIN
+
+; This test case is extracted from rnflow (fortran) benchmark in polyhedron benchmark suite.
+; The function minlst primarily takes two indices (i.e. range), scans backwards in the range
+; and returns the firstIV of the minimum value.
+
+define fastcc i32 @minlst(i32 %.0.val, i32 %.0.val1, ptr %.0.val3) {
+; CHECK-REV-MIN-LABEL: define internal fastcc i32 @_QFcptrf2Pminlst(
+; CHECK-REV-MIN-SAME: i32 [[DOT0_VAL:%.*]], i32 [[DOT0_VAL1:%.*]], ptr [[DOT0_VAL3:%.*]]) unnamed_addr {
+; CHECK-REV-MIN-NEXT: [[TMP1:%.*]] = sext i32 [[DOT0_VAL]] to i64
+; CHECK-REV-MIN-NEXT: [[TMP2:%.*]] = sub i32 0, [[DOT0_VAL1]]
+; CHECK-REV-MIN-NEXT: [[TMP3:%.*]] = sext i32 [[TMP2]] to i64
+; CHECK-REV-MIN-NEXT: [[TMP4:%.*]] = add nsw i64 [[TMP1]], [[TMP3]]
+; CHECK-REV-MIN-NEXT: [[TMP5:%.*]] = sub nsw i64 0, [[TMP4]]
+; CHECK-REV-MIN-NEXT: [[INVARIANT_GEP:%.*]] = getelementptr i8, ptr [[DOT0_VAL3]], i64 -8
+; CHECK-REV-MIN-NEXT: [[INVARIANT_GEP5:%.*]] = getelementptr i8, ptr [[DOT0_VAL3]], i64 -4
+; CHECK-REV-MIN-NEXT: [[TMP6:%.*]] = icmp slt i64 [[TMP4]], 0
+; CHECK-REV-MIN-NEXT: br i1 [[TMP6]], label %[[DOTLR_PH_PREHEADER:.*]], [[DOT_CRIT_EDGE:label %.*]]
+; CHECK-REV-MIN: [[_LR_PH_PREHEADER:.*:]]
+; CHECK-REV-MIN-NEXT: [[TMP7:%.*]] = sext i32 [[DOT0_VAL1]] to i64
+; CHECK-REV-MIN-NEXT: br label %[[DOTLR_PH:.*]]
+; CHECK-REV-MIN: [[_LR_PH:.*:]]
+; CHECK-REV-MIN-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[TMP7]], %[[DOTLR_PH_PREHEADER]] ], [ [[INDVARS_IV_NEXT:%.*]], %[[DOTLR_PH]] ]
+; CHECK-REV-MIN-NEXT: [[TMP8:%.*]] = phi i64 [ [[TMP14:%.*]], %[[DOTLR_PH]] ], [ [[TMP5]], %[[DOTLR_PH_PREHEADER]] ]
+; CHECK-REV-MIN-NEXT: [[DOT07:%.*]] = phi i32 [ [[DOT1:%.*]], %[[DOTLR_PH]] ], [ [[DOT0_VAL1]], %[[DOTLR_PH_PREHEADER]] ]
+; CHECK-REV-MIN-NEXT: [[INDVARS_IV_NEXT]] = add nsw i64 [[INDVARS_IV]], -1
+; CHECK-REV-MIN-NEXT: [[GEP:%.*]] = getelementptr float, ptr [[INVARIANT_GEP]], i64 [[INDVARS_IV]]
+; CHECK-REV-MIN-NEXT: [[TMP9:%.*]] = load float, ptr [[GEP]], align 4
+; CHECK-REV-MIN-NEXT: [[TMP10:%.*]] = sext i32 [[DOT07]] to i64
+; CHECK-REV-MIN-NEXT: [[GEP6:%.*]] = getelementptr float, ptr [[INVARIANT_GEP5]], i64 [[TMP10]]
+; CHECK-REV-MIN-NEXT: [[TMP11:%.*]] = load float, ptr [[GEP6]], align 4
+; CHECK-REV-MIN-NEXT: [[TMP12:%.*]] = fcmp contract olt float [[TMP9]], [[TMP11]]
+; CHECK-REV-MIN-NEXT: [[TMP13:%.*]] = trunc nsw i64 [[INDVARS_IV_NEXT]] to i32
+; CHECK-REV-MIN-NEXT: [[DOT1]] = select i1 [[TMP12]], i32 [[TMP13]], i32 [[DOT07]]
+; CHECK-REV-MIN-NEXT: [[TMP14]] = add nsw i64 [[TMP8]], -1
+; CHECK-REV-MIN-NEXT: [[TMP15:%.*]] = icmp sgt i64 [[TMP8]], 1
+; CHECK-REV-MIN-NEXT: br i1 [[TMP15]], label %[[DOTLR_PH]], label %[[DOT_CRIT_EDGE_LOOPEXIT:.*]]
+; CHECK-REV-MIN: [[__CRIT_EDGE_LOOPEXIT:.*:]]
+; CHECK-REV-MIN-NEXT: [[DOT1_LCSSA:%.*]] = phi i32 [ [[DOT1]], %[[DOTLR_PH]] ]
+; CHECK-REV-MIN-NEXT: br [[DOT_CRIT_EDGE]]
+; CHECK-REV-MIN: [[__CRIT_EDGE:.*:]]
+; CHECK-REV-MIN-NEXT: [[DOT0_LCSSA:%.*]] = phi i32 [ [[DOT0_VAL1]], [[TMP0:%.*]] ], [ [[DOT1_LCSSA]], %[[DOT_CRIT_EDGE_LOOPEXIT]] ]
+; CHECK-REV-MIN-NEXT: ret i32 [[DOT0_LCSSA]]
+;
+ %1 = sext i32 %.0.val to i64
+ %2 = sub i32 0, %.0.val1
+ %3 = sext i32 %2 to i64
+ %4 = add nsw i64 %1, %3
+ %5 = sub nsw i64 0, %4
+ %invariant.gep = getelementptr i8, ptr %.0.val3, i64 -8
+ %invariant.gep5 = getelementptr i8, ptr %.0.val3, i64 -4
+ %6 = icmp slt i64 %4, 0
+ br i1 %6, label %.lr.ph.preheader, label %._crit_edge
+
+.lr.ph.preheader: ; preds = %0
+ %7 = sext i32 %.0.val1 to i64
+ br label %.lr.ph
+
+.lr.ph: ; preds = %.lr.ph.preheader, %.lr.ph
+ %indvars.iv = phi i64 [ %7, %.lr.ph.preheader ], [ %indvars.iv.next, %.lr.ph ]
+ %8 = phi i64 [ %14, %.lr.ph ], [ %5, %.lr.ph.preheader ]
+ %.07 = phi i32 [ %.1, %.lr.ph ], [ %.0.val1, %.lr.ph.preheader ]
+ %indvars.iv.next = add nsw i64 %indvars.iv, -1
+ %gep = getelementptr float, ptr %invariant.gep, i64 %indvars.iv
+ %9 = load float, ptr %gep, align 4
+ %10 = sext i32 %.07 to i64
+ %gep6 = getelementptr float, ptr %invariant.gep5, i64 %10
+ %11 = load float, ptr %gep6, align 4
+ %12 = fcmp contract olt float %9, %11
+ %13 = trunc nsw i64 %indvars.iv.next to i32
+ %.1 = select i1 %12, i32 %13, i32 %.07
+ %14 = add nsw i64 %8, -1
+ %15 = icmp sgt i64 %8, 1
+ br i1 %15, label %.lr.ph, label %._crit_edge.loopexit
+
+._crit_edge.loopexit: ; preds = %.lr.ph
+ %.1.lcssa = phi i32 [ %.1, %.lr.ph ]
+ br label %._crit_edge
+
+._crit_edge: ; preds = %._crit_edge.loopexit, %0
+ %.0.lcssa = phi i32 [ %.0.val1, %0 ], [ %.1.lcssa, %._crit_edge.loopexit ]
+ ret i32 %.0.lcssa
+}
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Thamks for adding the test. Left some initial comments
@@ -0,0 +1,84 @@ | |||
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5 | |||
; RUN: opt -passes=loop-vectorize -S %s | FileCheck %s --check-prefix=CHECK-REV-MIN |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This will need something like -force-vector-width=4
to make sure this gets vectorized if possible.
; RUN: opt -passes=loop-vectorize -S %s | FileCheck %s --check-prefix=CHECK-REV-MIN | |
; RUN: opt -passes=loop-vectorize -force-vector-width=4 -S %s | FileCheck %s |
%1 = sext i32 %.0.val to i64 | ||
%2 = sub i32 0, %.0.val1 | ||
%3 = sext i32 %2 to i64 | ||
%4 = add nsw i64 %1, %3 | ||
%5 = sub nsw i64 0, %4 | ||
%invariant.gep = getelementptr i8, ptr %.0.val3, i64 -8 | ||
%invariant.gep5 = getelementptr i8, ptr %.0.val3, i64 -4 | ||
%6 = icmp slt i64 %4, 0 | ||
br i1 %6, label %.lr.ph.preheader, label %._crit_edge | ||
|
||
.lr.ph.preheader: ; preds = %0 | ||
%7 = sext i32 %.0.val1 to i64 | ||
br label %.lr.ph |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I think all this could be simplified?
%7 = sext i32 %.0.val1 to i64 | ||
br label %.lr.ph | ||
|
||
.lr.ph: ; preds = %.lr.ph.preheader, %.lr.ph |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
.lr.ph: ; preds = %.lr.ph.preheader, %.lr.ph | |
loop: |
br label %.lr.ph | ||
|
||
.lr.ph: ; preds = %.lr.ph.preheader, %.lr.ph | ||
%indvars.iv = phi i64 [ %7, %.lr.ph.preheader ], [ %indvars.iv.next, %.lr.ph ] |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
%indvars.iv = phi i64 [ %7, %.lr.ph.preheader ], [ %indvars.iv.next, %.lr.ph ] | |
%iv = phi i64 [ %7, %.lr.ph.preheader ], [ %indvars.iv.next, %.lr.ph ] |
%.07 = phi i32 [ %.1, %.lr.ph ], [ %.0.val1, %.lr.ph.preheader ] | ||
%indvars.iv.next = add nsw i64 %indvars.iv, -1 | ||
%gep = getelementptr float, ptr %invariant.gep, i64 %indvars.iv | ||
%9 = load float, ptr %gep, align 4 | ||
%10 = sext i32 %.07 to i64 | ||
%gep6 = getelementptr float, ptr %invariant.gep5, i64 %10 | ||
%11 = load float, ptr %gep6, align 4 | ||
%12 = fcmp contract olt float %9, %11 | ||
%13 = trunc nsw i64 %indvars.iv.next to i32 | ||
%.1 = select i1 %12, i32 %13, i32 %.07 | ||
%14 = add nsw i64 %8, -1 | ||
%15 = icmp sgt i64 %8, 1 |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
would be good to use more descriptive names for some of the values, to help read-ability
@fhahn Thanks for the review. I have addressed comments, I hope that the latest revision covers all! |
@@ -0,0 +1,86 @@ | |||
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5 | |||
; RUN: opt -passes=loop-vectorize -force-vector-width=4 -S %s | FileCheck %s --check-prefix=CHECK-REV-MIN |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Test with more configurations.
- -force-vector-width=4 -force-vector-interleave=1
- -force-vector-width=4 -force-vector-interleave=2
- -force-vector-width=1 -force-vector-interleave=4
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Done.
; The function minlst primarily takes two indices (i.e. range), scans backwards in the range | ||
; and returns the firstIV of the minimum value. | ||
|
||
define fastcc i32 @minlst(i32 %first_index, i32 %last_index, ptr %array) { |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Do we really need fastcc
?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Yes.
%iv = phi i64 [ %last_index_sext, %loop.preheader ], [ %iv.next, %loop ] | ||
%dec_iv = phi i64 [ %dec, %loop ], [ %diff, %loop.preheader ] | ||
%index = phi i32 [ %select, %loop ], [ %last_index, %loop.preheader ] |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Align the format of phi nodes, either phi loop.preheader, loop
or phi loop, loop.preheader
.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Done.
._crit_edge.loopexit: ; preds = %loop | ||
%select.lcssa = phi i32 [ %select, %loop ] | ||
br label %._crit_edge | ||
|
||
._crit_edge: ; preds = %._crit_edge.loopexit, %0 | ||
%last_index_ret = phi i32 [ %last_index, %entry ], [ %select.lcssa, %._crit_edge.loopexit ] | ||
ret i32 %last_index_ret | ||
} |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Merge ._crit_edge.loopexit and ._crit_edge into one basic block.
%first_ptr = getelementptr i8, ptr %array, i64 -8 | ||
%second_ptr = getelementptr i8, ptr %array, i64 -4 |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Could %first_ptr and %second_ptr be passed as arguments directly?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
No, I'd like to preserve the structure.
%iv = phi i64 [ %last_index_sext, %loop.preheader ], [ %iv.next, %loop ] | ||
%dec_iv = phi i64 [ %dec, %loop ], [ %diff, %loop.preheader ] |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Do you really need two induction variable with same step?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Yes.
This patch adds a test case extracted from Polyhedron benchmark. https://fortran.uk/fortran-compiler-comparisons/the-polyhedron-solutions-benchmark-suite/ The test is specifically interesting for vectorizing min/max reduction pattern.
936d6ca
to
3cef654
Compare
Hi @Mel-Chen Thanks for the review. I have addressed some of your comments. For others, I want to preserve the structure of the code generated by the Flang compiler. As said, this is extracted from Flang pipeline and particularly the IR is just before |
This patch adds a test case extracted from Polyhedron benchmark. https://fortran.uk/fortran-compiler-comparisons/the-polyhedron-solutions-benchmark-suite/
The test is specifically interesting for vectorizing min/max reduction pattern.