[LoopVectorize] Add test case for minloc reduction #141556

madhur13490 · 2025-05-27T07:10:07Z

This patch adds a test case extracted from Polyhedron benchmark. https://fortran.uk/fortran-compiler-comparisons/the-polyhedron-solutions-benchmark-suite/

The test is specifically interesting for vectorizing min/max reduction pattern.

llvmbot · 2025-05-27T07:10:41Z

@llvm/pr-subscribers-llvm-transforms

Author: Madhur Amilkanthwar (madhur13490)

Changes

This patch adds a test case extracted from Polyhedron benchmark. https://fortran.uk/fortran-compiler-comparisons/the-polyhedron-solutions-benchmark-suite/

The test is specifically interesting for vectorizing min/max reduction pattern.

Full diff: https://github.com/llvm/llvm-project/pull/141556.diff

1 Files Affected:

(added) llvm/test/Transforms/LoopVectorize/last-min-index.ll (+84)

diff --git a/llvm/test/Transforms/LoopVectorize/last-min-index.ll b/llvm/test/Transforms/LoopVectorize/last-min-index.ll
new file mode 100644
index 0000000000000..f69145eafa74a
--- /dev/null
+++ b/llvm/test/Transforms/LoopVectorize/last-min-index.ll
@@ -0,0 +1,84 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
+; RUN: opt -passes=loop-vectorize -S %s | FileCheck %s --check-prefix=CHECK-REV-MIN
+
+; This test case is extracted from rnflow (fortran) benchmark in polyhedron benchmark suite.
+; The function minlst primarily takes two indices (i.e. range), scans backwards in the range
+; and returns the firstIV of the minimum value.
+
+define fastcc i32 @minlst(i32 %.0.val, i32 %.0.val1, ptr %.0.val3) {
+; CHECK-REV-MIN-LABEL: define internal fastcc i32 @_QFcptrf2Pminlst(
+; CHECK-REV-MIN-SAME: i32 [[DOT0_VAL:%.*]], i32 [[DOT0_VAL1:%.*]], ptr [[DOT0_VAL3:%.*]]) unnamed_addr {
+; CHECK-REV-MIN-NEXT:    [[TMP1:%.*]] = sext i32 [[DOT0_VAL]] to i64
+; CHECK-REV-MIN-NEXT:    [[TMP2:%.*]] = sub i32 0, [[DOT0_VAL1]]
+; CHECK-REV-MIN-NEXT:    [[TMP3:%.*]] = sext i32 [[TMP2]] to i64
+; CHECK-REV-MIN-NEXT:    [[TMP4:%.*]] = add nsw i64 [[TMP1]], [[TMP3]]
+; CHECK-REV-MIN-NEXT:    [[TMP5:%.*]] = sub nsw i64 0, [[TMP4]]
+; CHECK-REV-MIN-NEXT:    [[INVARIANT_GEP:%.*]] = getelementptr i8, ptr [[DOT0_VAL3]], i64 -8
+; CHECK-REV-MIN-NEXT:    [[INVARIANT_GEP5:%.*]] = getelementptr i8, ptr [[DOT0_VAL3]], i64 -4
+; CHECK-REV-MIN-NEXT:    [[TMP6:%.*]] = icmp slt i64 [[TMP4]], 0
+; CHECK-REV-MIN-NEXT:    br i1 [[TMP6]], label %[[DOTLR_PH_PREHEADER:.*]], [[DOT_CRIT_EDGE:label %.*]]
+; CHECK-REV-MIN:       [[_LR_PH_PREHEADER:.*:]]
+; CHECK-REV-MIN-NEXT:    [[TMP7:%.*]] = sext i32 [[DOT0_VAL1]] to i64
+; CHECK-REV-MIN-NEXT:    br label %[[DOTLR_PH:.*]]
+; CHECK-REV-MIN:       [[_LR_PH:.*:]]
+; CHECK-REV-MIN-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ [[TMP7]], %[[DOTLR_PH_PREHEADER]] ], [ [[INDVARS_IV_NEXT:%.*]], %[[DOTLR_PH]] ]
+; CHECK-REV-MIN-NEXT:    [[TMP8:%.*]] = phi i64 [ [[TMP14:%.*]], %[[DOTLR_PH]] ], [ [[TMP5]], %[[DOTLR_PH_PREHEADER]] ]
+; CHECK-REV-MIN-NEXT:    [[DOT07:%.*]] = phi i32 [ [[DOT1:%.*]], %[[DOTLR_PH]] ], [ [[DOT0_VAL1]], %[[DOTLR_PH_PREHEADER]] ]
+; CHECK-REV-MIN-NEXT:    [[INDVARS_IV_NEXT]] = add nsw i64 [[INDVARS_IV]], -1
+; CHECK-REV-MIN-NEXT:    [[GEP:%.*]] = getelementptr float, ptr [[INVARIANT_GEP]], i64 [[INDVARS_IV]]
+; CHECK-REV-MIN-NEXT:    [[TMP9:%.*]] = load float, ptr [[GEP]], align 4
+; CHECK-REV-MIN-NEXT:    [[TMP10:%.*]] = sext i32 [[DOT07]] to i64
+; CHECK-REV-MIN-NEXT:    [[GEP6:%.*]] = getelementptr float, ptr [[INVARIANT_GEP5]], i64 [[TMP10]]
+; CHECK-REV-MIN-NEXT:    [[TMP11:%.*]] = load float, ptr [[GEP6]], align 4
+; CHECK-REV-MIN-NEXT:    [[TMP12:%.*]] = fcmp contract olt float [[TMP9]], [[TMP11]]
+; CHECK-REV-MIN-NEXT:    [[TMP13:%.*]] = trunc nsw i64 [[INDVARS_IV_NEXT]] to i32
+; CHECK-REV-MIN-NEXT:    [[DOT1]] = select i1 [[TMP12]], i32 [[TMP13]], i32 [[DOT07]]
+; CHECK-REV-MIN-NEXT:    [[TMP14]] = add nsw i64 [[TMP8]], -1
+; CHECK-REV-MIN-NEXT:    [[TMP15:%.*]] = icmp sgt i64 [[TMP8]], 1
+; CHECK-REV-MIN-NEXT:    br i1 [[TMP15]], label %[[DOTLR_PH]], label %[[DOT_CRIT_EDGE_LOOPEXIT:.*]]
+; CHECK-REV-MIN:       [[__CRIT_EDGE_LOOPEXIT:.*:]]
+; CHECK-REV-MIN-NEXT:    [[DOT1_LCSSA:%.*]] = phi i32 [ [[DOT1]], %[[DOTLR_PH]] ]
+; CHECK-REV-MIN-NEXT:    br [[DOT_CRIT_EDGE]]
+; CHECK-REV-MIN:       [[__CRIT_EDGE:.*:]]
+; CHECK-REV-MIN-NEXT:    [[DOT0_LCSSA:%.*]] = phi i32 [ [[DOT0_VAL1]], [[TMP0:%.*]] ], [ [[DOT1_LCSSA]], %[[DOT_CRIT_EDGE_LOOPEXIT]] ]
+; CHECK-REV-MIN-NEXT:    ret i32 [[DOT0_LCSSA]]
+;
+  %1 = sext i32 %.0.val to i64
+  %2 = sub i32 0, %.0.val1
+  %3 = sext i32 %2 to i64
+  %4 = add nsw i64 %1, %3
+  %5 = sub nsw i64 0, %4
+  %invariant.gep = getelementptr i8, ptr %.0.val3, i64 -8
+  %invariant.gep5 = getelementptr i8, ptr %.0.val3, i64 -4
+  %6 = icmp slt i64 %4, 0
+  br i1 %6, label %.lr.ph.preheader, label %._crit_edge
+
+.lr.ph.preheader:                                 ; preds = %0
+  %7 = sext i32 %.0.val1 to i64
+  br label %.lr.ph
+
+.lr.ph:                                           ; preds = %.lr.ph.preheader, %.lr.ph
+  %indvars.iv = phi i64 [ %7, %.lr.ph.preheader ], [ %indvars.iv.next, %.lr.ph ]
+  %8 = phi i64 [ %14, %.lr.ph ], [ %5, %.lr.ph.preheader ]
+  %.07 = phi i32 [ %.1, %.lr.ph ], [ %.0.val1, %.lr.ph.preheader ]
+  %indvars.iv.next = add nsw i64 %indvars.iv, -1
+  %gep = getelementptr float, ptr %invariant.gep, i64 %indvars.iv
+  %9 = load float, ptr %gep, align 4
+  %10 = sext i32 %.07 to i64
+  %gep6 = getelementptr float, ptr %invariant.gep5, i64 %10
+  %11 = load float, ptr %gep6, align 4
+  %12 = fcmp contract olt float %9, %11
+  %13 = trunc nsw i64 %indvars.iv.next to i32
+  %.1 = select i1 %12, i32 %13, i32 %.07
+  %14 = add nsw i64 %8, -1
+  %15 = icmp sgt i64 %8, 1
+  br i1 %15, label %.lr.ph, label %._crit_edge.loopexit
+
+._crit_edge.loopexit:                             ; preds = %.lr.ph
+  %.1.lcssa = phi i32 [ %.1, %.lr.ph ]
+  br label %._crit_edge
+
+._crit_edge:                                      ; preds = %._crit_edge.loopexit, %0
+  %.0.lcssa = phi i32 [ %.0.val1, %0 ], [ %.1.lcssa, %._crit_edge.loopexit ]
+  ret i32 %.0.lcssa
+}

madhur13490 · 2025-05-27T07:12:06Z

Related PRs: #141431 and #141467.

fhahn

Thamks for adding the test. Left some initial comments

fhahn · 2025-05-27T10:07:42Z

llvm/test/Transforms/LoopVectorize/last-min-index.ll

@@ -0,0 +1,84 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
+; RUN: opt -passes=loop-vectorize -S %s | FileCheck %s --check-prefix=CHECK-REV-MIN


This will need something like -force-vector-width=4 to make sure this gets vectorized if possible.

Suggested change

; RUN: opt -passes=loop-vectorize -S %s | FileCheck %s --check-prefix=CHECK-REV-MIN

; RUN: opt -passes=loop-vectorize -force-vector-width=4 -S %s | FileCheck %s

fhahn · 2025-05-27T10:08:04Z

llvm/test/Transforms/LoopVectorize/last-min-index.ll

+  %1 = sext i32 %.0.val to i64
+  %2 = sub i32 0, %.0.val1
+  %3 = sext i32 %2 to i64
+  %4 = add nsw i64 %1, %3
+  %5 = sub nsw i64 0, %4
+  %invariant.gep = getelementptr i8, ptr %.0.val3, i64 -8
+  %invariant.gep5 = getelementptr i8, ptr %.0.val3, i64 -4
+  %6 = icmp slt i64 %4, 0
+  br i1 %6, label %.lr.ph.preheader, label %._crit_edge
+
+.lr.ph.preheader:                                 ; preds = %0
+  %7 = sext i32 %.0.val1 to i64
+  br label %.lr.ph


I think all this could be simplified?

fhahn · 2025-05-27T10:08:16Z

llvm/test/Transforms/LoopVectorize/last-min-index.ll

+  %7 = sext i32 %.0.val1 to i64
+  br label %.lr.ph
+
+.lr.ph:                                           ; preds = %.lr.ph.preheader, %.lr.ph


Suggested change

.lr.ph: ; preds = %.lr.ph.preheader, %.lr.ph

loop:

fhahn · 2025-05-27T10:08:25Z

llvm/test/Transforms/LoopVectorize/last-min-index.ll

+  br label %.lr.ph
+
+.lr.ph:                                           ; preds = %.lr.ph.preheader, %.lr.ph
+  %indvars.iv = phi i64 [ %7, %.lr.ph.preheader ], [ %indvars.iv.next, %.lr.ph ]


Suggested change

%indvars.iv = phi i64 [ %7, %.lr.ph.preheader ], [ %indvars.iv.next, %.lr.ph ]

%iv = phi i64 [ %7, %.lr.ph.preheader ], [ %indvars.iv.next, %.lr.ph ]

fhahn · 2025-05-27T10:09:26Z

llvm/test/Transforms/LoopVectorize/last-min-index.ll

+  %.07 = phi i32 [ %.1, %.lr.ph ], [ %.0.val1, %.lr.ph.preheader ]
+  %indvars.iv.next = add nsw i64 %indvars.iv, -1
+  %gep = getelementptr float, ptr %invariant.gep, i64 %indvars.iv
+  %9 = load float, ptr %gep, align 4
+  %10 = sext i32 %.07 to i64
+  %gep6 = getelementptr float, ptr %invariant.gep5, i64 %10
+  %11 = load float, ptr %gep6, align 4
+  %12 = fcmp contract olt float %9, %11
+  %13 = trunc nsw i64 %indvars.iv.next to i32
+  %.1 = select i1 %12, i32 %13, i32 %.07
+  %14 = add nsw i64 %8, -1
+  %15 = icmp sgt i64 %8, 1


would be good to use more descriptive names for some of the values, to help read-ability

madhur13490 · 2025-05-27T12:32:40Z

@fhahn Thanks for the review. I have addressed comments, I hope that the latest revision covers all!

madhur13490 · 2025-05-28T11:24:27Z

@fhahn or @Mel-Chen Would any of your pending patches cause vectorization of this test case? (I am having LoopIdiomVectorize based patch ready, and I should be upstreaming that soon, but wanted to check once.)

Mel-Chen · 2025-05-29T07:26:08Z

llvm/test/Transforms/LoopVectorize/last-min-index.ll

@@ -0,0 +1,86 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
+; RUN: opt -passes=loop-vectorize -force-vector-width=4 -S %s | FileCheck %s --check-prefix=CHECK-REV-MIN


Test with more configurations.

-force-vector-width=4 -force-vector-interleave=1

-force-vector-width=4 -force-vector-interleave=2

-force-vector-width=1 -force-vector-interleave=4

Mel-Chen · 2025-05-29T07:29:14Z

llvm/test/Transforms/LoopVectorize/last-min-index.ll

+; The function minlst primarily takes two indices (i.e. range), scans backwards in the range
+; and returns the firstIV of the minimum value.
+
+define fastcc i32 @minlst(i32 %first_index, i32 %last_index, ptr %array) {


Do we really need fastcc?

Mel-Chen · 2025-05-29T07:32:29Z

llvm/test/Transforms/LoopVectorize/last-min-index.ll

+  %iv = phi i64 [ %last_index_sext, %loop.preheader ], [ %iv.next, %loop ]
+  %dec_iv = phi i64 [ %dec, %loop ], [ %diff, %loop.preheader ]
+  %index = phi i32 [ %select, %loop ], [ %last_index, %loop.preheader ]


Align the format of phi nodes, either phi loop.preheader, loop or phi loop, loop.preheader.

Mel-Chen · 2025-05-29T07:34:45Z

llvm/test/Transforms/LoopVectorize/last-min-index.ll

+._crit_edge.loopexit:                             ; preds = %loop
+  %select.lcssa = phi i32 [ %select, %loop ]
+  br label %._crit_edge
+
+._crit_edge:                                      ; preds = %._crit_edge.loopexit, %0
+  %last_index_ret = phi i32 [ %last_index, %entry ], [ %select.lcssa, %._crit_edge.loopexit ]
+  ret i32 %last_index_ret
+}


Merge ._crit_edge.loopexit and ._crit_edge into one basic block.

Mel-Chen · 2025-05-29T07:42:27Z

llvm/test/Transforms/LoopVectorize/last-min-index.ll

+  %first_ptr = getelementptr i8, ptr %array, i64 -8
+  %second_ptr = getelementptr i8, ptr %array, i64 -4


Could %first_ptr and %second_ptr be passed as arguments directly?

No, I'd like to preserve the structure.

Mel-Chen · 2025-05-29T08:12:28Z

llvm/test/Transforms/LoopVectorize/last-min-index.ll

+  %iv = phi i64 [ %last_index_sext, %loop.preheader ], [ %iv.next, %loop ]
+  %dec_iv = phi i64 [ %dec, %loop ], [ %diff, %loop.preheader ]


Do you really need two induction variable with same step?

Mel-Chen · 2025-05-29T08:27:23Z

Related PRs: #141431 and #141467.

No, I think this is #140451.
There is only one phi node involved the reduction.

This patch adds a test case extracted from Polyhedron benchmark. https://fortran.uk/fortran-compiler-comparisons/the-polyhedron-solutions-benchmark-suite/ The test is specifically interesting for vectorizing min/max reduction pattern.

madhur13490 · 2025-05-29T09:53:04Z

Hi @Mel-Chen Thanks for the review. I have addressed some of your comments. For others, I want to preserve the structure of the code generated by the Flang compiler. As said, this is extracted from Flang pipeline and particularly the IR is just before LoopIdiomVectorize pass, thus we have LCSSA structure preserved.

madhur13490 requested review from fhahn and Mel-Chen May 27, 2025 07:10

llvmbot added the llvm:transforms label May 27, 2025

fhahn reviewed May 27, 2025

View reviewed changes

Mel-Chen reviewed May 29, 2025

View reviewed changes

madhur13490 added 3 commits May 29, 2025 02:42

[LoopVectorize] Add test case for minloc reduction

9f0c7d4

This patch adds a test case extracted from Polyhedron benchmark. https://fortran.uk/fortran-compiler-comparisons/the-polyhedron-solutions-benchmark-suite/ The test is specifically interesting for vectorizing min/max reduction pattern.

Address review comments

f84894c

Address review comments

3cef654

madhur13490 force-pushed the add_minloc_test2 branch from 936d6ca to 3cef654 Compare May 29, 2025 09:50

		@@ -0,0 +1,84 @@
		; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
		; RUN: opt -passes=loop-vectorize -S %s \| FileCheck %s --check-prefix=CHECK-REV-MIN

	; RUN: opt -passes=loop-vectorize -S %s \| FileCheck %s --check-prefix=CHECK-REV-MIN
	; RUN: opt -passes=loop-vectorize -force-vector-width=4 -S %s \| FileCheck %s

	%indvars.iv = phi i64 [ %7, %.lr.ph.preheader ], [ %indvars.iv.next, %.lr.ph ]
	%iv = phi i64 [ %7, %.lr.ph.preheader ], [ %indvars.iv.next, %.lr.ph ]

		@@ -0,0 +1,86 @@
		; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
		; RUN: opt -passes=loop-vectorize -force-vector-width=4 -S %s \| FileCheck %s --check-prefix=CHECK-REV-MIN

		%first_ptr = getelementptr i8, ptr %array, i64 -8
		%second_ptr = getelementptr i8, ptr %array, i64 -4

		%iv = phi i64 [ %last_index_sext, %loop.preheader ], [ %iv.next, %loop ]
		%dec_iv = phi i64 [ %dec, %loop ], [ %diff, %loop.preheader ]

[LoopVectorize] Add test case for minloc reduction #141556

Are you sure you want to change the base?

[LoopVectorize] Add test case for minloc reduction #141556

Uh oh!

Conversation

madhur13490 commented May 27, 2025

Uh oh!

llvmbot commented May 27, 2025

Uh oh!

madhur13490 commented May 27, 2025

Uh oh!

fhahn left a comment

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

madhur13490 commented May 27, 2025 • edited Loading Uh oh! There was an error while loading. Please reload this page.

Uh oh!

Uh oh!

madhur13490 commented May 28, 2025

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Mel-Chen commented May 29, 2025

Uh oh!

madhur13490 commented May 29, 2025

Uh oh!

Uh oh!

madhur13490 commented May 27, 2025 •

edited

Loading