Skip to content

Commit 5522d24

Browse files
authored
[flang][cuda] Allow AbstractResult to run in gpu.module (#118529)
in CUDA Fortran, device function are converted to `gpu.func` inside the `gpu.module` operation. Update the AbstractResult pass to be able to run on `func.func` and `gpu.func` operations inside the `gpu.module`.
1 parent c806042 commit 5522d24

File tree

7 files changed

+82
-20
lines changed

7 files changed

+82
-20
lines changed

flang/include/flang/Optimizer/Passes/Pipelines.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@
2020
#include "flang/Tools/CrossToolHelpers.h"
2121
#include "mlir/Conversion/ReconcileUnrealizedCasts/ReconcileUnrealizedCasts.h"
2222
#include "mlir/Conversion/SCFToControlFlow/SCFToControlFlow.h"
23+
#include "mlir/Dialect/GPU/IR/GPUDialect.h"
2324
#include "mlir/Dialect/LLVMIR/LLVMAttrs.h"
2425
#include "mlir/Pass/PassManager.h"
2526
#include "mlir/Transforms/GreedyPatternRewriteDriver.h"

flang/lib/Optimizer/Passes/Pipelines.cpp

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,8 @@ namespace fir {
1616
void addNestedPassToAllTopLevelOperations(mlir::PassManager &pm,
1717
PassConstructor ctor) {
1818
addNestedPassToOps<mlir::func::FuncOp, mlir::omp::DeclareReductionOp,
19-
mlir::omp::PrivateClauseOp, fir::GlobalOp>(pm, ctor);
19+
mlir::omp::PrivateClauseOp, fir::GlobalOp,
20+
mlir::gpu::GPUModuleOp>(pm, ctor);
2021
}
2122

2223
void addNestedPassToAllTopLevelOperationsConditionally(

flang/lib/Optimizer/Transforms/AbstractResult.cpp

Lines changed: 25 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@
1414
#include "flang/Optimizer/Dialect/Support/FIRContext.h"
1515
#include "flang/Optimizer/Transforms/Passes.h"
1616
#include "mlir/Dialect/Func/IR/FuncOps.h"
17+
#include "mlir/Dialect/GPU/IR/GPUDialect.h"
1718
#include "mlir/IR/Diagnostics.h"
1819
#include "mlir/Pass/Pass.h"
1920
#include "mlir/Pass/PassManager.h"
@@ -331,9 +332,10 @@ class AbstractResultOpt
331332
using fir::impl::AbstractResultOptBase<
332333
AbstractResultOpt>::AbstractResultOptBase;
333334

334-
void runOnSpecificOperation(mlir::func::FuncOp func, bool shouldBoxResult,
335-
mlir::RewritePatternSet &patterns,
336-
mlir::ConversionTarget &target) {
335+
template <typename OpTy>
336+
void runOnFunctionLikeOperation(OpTy func, bool shouldBoxResult,
337+
mlir::RewritePatternSet &patterns,
338+
mlir::ConversionTarget &target) {
337339
auto loc = func.getLoc();
338340
auto *context = &getContext();
339341
// Convert function type itself if it has an abstract result.
@@ -384,6 +386,18 @@ class AbstractResultOpt
384386
}
385387
}
386388

389+
void runOnSpecificOperation(mlir::func::FuncOp func, bool shouldBoxResult,
390+
mlir::RewritePatternSet &patterns,
391+
mlir::ConversionTarget &target) {
392+
runOnFunctionLikeOperation(func, shouldBoxResult, patterns, target);
393+
}
394+
395+
void runOnSpecificOperation(mlir::gpu::GPUFuncOp func, bool shouldBoxResult,
396+
mlir::RewritePatternSet &patterns,
397+
mlir::ConversionTarget &target) {
398+
runOnFunctionLikeOperation(func, shouldBoxResult, patterns, target);
399+
}
400+
387401
inline static bool containsFunctionTypeWithAbstractResult(mlir::Type type) {
388402
return mlir::TypeSwitch<mlir::Type, bool>(type)
389403
.Case([](fir::BoxProcType boxProc) {
@@ -448,6 +462,14 @@ class AbstractResultOpt
448462
mlir::TypeSwitch<mlir::Operation *, void>(op)
449463
.Case<mlir::func::FuncOp, fir::GlobalOp>([&](auto op) {
450464
runOnSpecificOperation(op, shouldBoxResult, patterns, target);
465+
})
466+
.Case<mlir::gpu::GPUModuleOp>([&](auto op) {
467+
auto gpuMod = mlir::dyn_cast<mlir::gpu::GPUModuleOp>(*op);
468+
for (auto funcOp : gpuMod.template getOps<mlir::func::FuncOp>())
469+
runOnSpecificOperation(funcOp, shouldBoxResult, patterns, target);
470+
for (auto gpuFuncOp : gpuMod.template getOps<mlir::gpu::GPUFuncOp>())
471+
runOnSpecificOperation(gpuFuncOp, shouldBoxResult, patterns,
472+
target);
451473
});
452474

453475
// Convert the calls and, if needed, the ReturnOp in the function body.

flang/test/Driver/bbc-mlir-pass-pipeline.f90

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -17,12 +17,14 @@
1717
! CHECK-NEXT: (S) 0 num-cse'd - Number of operations CSE'd
1818
! CHECK-NEXT: (S) 0 num-dce'd - Number of operations DCE'd
1919

20-
! CHECK-NEXT: Pipeline Collection : ['fir.global', 'func.func', 'omp.declare_reduction', 'omp.private']
20+
! CHECK-NEXT: Pipeline Collection : ['fir.global', 'func.func', 'gpu.module', 'omp.declare_reduction', 'omp.private']
2121
! CHECK-NEXT: 'fir.global' Pipeline
2222
! CHECK-NEXT: CharacterConversion
2323
! CHECK-NEXT: 'func.func' Pipeline
2424
! CHECK-NEXT: ArrayValueCopy
2525
! CHECK-NEXT: CharacterConversion
26+
! CHECK-NEXT: 'gpu.module' Pipeline
27+
! CHECK-NEXT: CharacterConversion
2628
! CHECK-NEXT: 'omp.declare_reduction' Pipeline
2729
! CHECK-NEXT: CharacterConversion
2830
! CHECK-NEXT: 'omp.private' Pipeline
@@ -48,13 +50,16 @@
4850
! CHECK-NEXT: PolymorphicOpConversion
4951
! CHECK-NEXT: AssumedRankOpConversion
5052

51-
! CHECK-NEXT: Pipeline Collection : ['fir.global', 'func.func', 'omp.declare_reduction', 'omp.private']
53+
! CHECK-NEXT: Pipeline Collection : ['fir.global', 'func.func', 'gpu.module', 'omp.declare_reduction', 'omp.private']
5254
! CHECK-NEXT: 'fir.global' Pipeline
5355
! CHECK-NEXT: StackReclaim
5456
! CHECK-NEXT: CFGConversion
5557
! CHECK-NEXT: 'func.func' Pipeline
5658
! CHECK-NEXT: StackReclaim
5759
! CHECK-NEXT: CFGConversion
60+
! CHECK-NEXT: 'gpu.module' Pipeline
61+
! CHECK-NEXT: StackReclaim
62+
! CHECK-NEXT: CFGConversion
5863
! CHECK-NEXT: 'omp.declare_reduction' Pipeline
5964
! CHECK-NEXT: StackReclaim
6065
! CHECK-NEXT: CFGConversion

flang/test/Driver/mlir-debug-pass-pipeline.f90

Lines changed: 13 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -28,11 +28,13 @@
2828
! ALL: Pass statistics report
2929

3030
! ALL: Fortran::lower::VerifierPass
31-
! ALL-NEXT: Pipeline Collection : ['fir.global', 'func.func', 'omp.declare_reduction', 'omp.private']
31+
! ALL-NEXT: Pipeline Collection : ['fir.global', 'func.func', 'gpu.module', 'omp.declare_reduction', 'omp.private']
3232
! ALL-NEXT: 'fir.global' Pipeline
3333
! ALL-NEXT: InlineElementals
3434
! ALL-NEXT: 'func.func' Pipeline
3535
! ALL-NEXT: InlineElementals
36+
! ALL-NEXT: 'gpu.module' Pipeline
37+
! ALL-NEXT: InlineElementals
3638
! ALL-NEXT: 'omp.declare_reduction' Pipeline
3739
! ALL-NEXT: InlineElementals
3840
! ALL-NEXT: 'omp.private' Pipeline
@@ -49,12 +51,14 @@
4951
! ALL-NEXT: (S) 0 num-cse'd - Number of operations CSE'd
5052
! ALL-NEXT: (S) 0 num-dce'd - Number of operations DCE'd
5153

52-
! ALL-NEXT: Pipeline Collection : ['fir.global', 'func.func', 'omp.declare_reduction', 'omp.private']
54+
! ALL-NEXT: Pipeline Collection : ['fir.global', 'func.func', 'gpu.module', 'omp.declare_reduction', 'omp.private']
5355
! ALL-NEXT: 'fir.global' Pipeline
5456
! ALL-NEXT: CharacterConversion
5557
! ALL-NEXT: 'func.func' Pipeline
5658
! ALL-NEXT: ArrayValueCopy
5759
! ALL-NEXT: CharacterConversion
60+
! ALL-NEXT: 'gpu.module' Pipeline
61+
! ALL-NEXT: CharacterConversion
5862
! ALL-NEXT: 'omp.declare_reduction' Pipeline
5963
! ALL-NEXT: CharacterConversion
6064
! ALL-NEXT: 'omp.private' Pipeline
@@ -78,13 +82,16 @@
7882
! ALL-NEXT: PolymorphicOpConversion
7983
! ALL-NEXT: AssumedRankOpConversion
8084

81-
! ALL-NEXT: Pipeline Collection : ['fir.global', 'func.func', 'omp.declare_reduction', 'omp.private']
85+
! ALL-NEXT: Pipeline Collection : ['fir.global', 'func.func', 'gpu.module', 'omp.declare_reduction', 'omp.private']
8286
! ALL-NEXT: 'fir.global' Pipeline
8387
! ALL-NEXT: StackReclaim
8488
! ALL-NEXT: CFGConversion
8589
! ALL-NEXT: 'func.func' Pipeline
8690
! ALL-NEXT: StackReclaim
8791
! ALL-NEXT: CFGConversion
92+
! ALL-NEXT: 'gpu.module' Pipeline
93+
! ALL-NEXT: StackReclaim
94+
! ALL-NEXT: CFGConversion
8895
! ALL-NEXT: 'omp.declare_reduction' Pipeline
8996
! ALL-NEXT: StackReclaim
9097
! ALL-NEXT: CFGConversion
@@ -99,11 +106,13 @@
99106
! ALL-NEXT: (S) 0 num-dce'd - Number of operations DCE'd
100107
! ALL-NEXT: BoxedProcedurePass
101108

102-
! ALL-NEXT: Pipeline Collection : ['fir.global', 'func.func', 'omp.declare_reduction', 'omp.private']
109+
! ALL-NEXT: Pipeline Collection : ['fir.global', 'func.func', 'gpu.module', 'omp.declare_reduction', 'omp.private']
103110
! ALL-NEXT: 'fir.global' Pipeline
104111
! ALL-NEXT: AbstractResultOpt
105112
! ALL-NEXT: 'func.func' Pipeline
106113
! ALL-NEXT: AbstractResultOpt
114+
! ALL-NEXT: 'gpu.module' Pipeline
115+
! ALL-NEXT: AbstractResultOpt
107116
! ALL-NEXT: 'omp.declare_reduction' Pipeline
108117
! ALL-NEXT: AbstractResultOpt
109118
! ALL-NEXT: 'omp.private' Pipeline

flang/test/Driver/mlir-pass-pipeline.f90

Lines changed: 17 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -16,13 +16,16 @@
1616

1717
! ALL: Fortran::lower::VerifierPass
1818
! O2-NEXT: Canonicalizer
19-
! ALL: Pipeline Collection : ['fir.global', 'func.func', 'omp.declare_reduction', 'omp.private']
19+
! ALL: Pipeline Collection : ['fir.global', 'func.func', 'gpu.module', 'omp.declare_reduction', 'omp.private']
2020
! ALL-NEXT:'fir.global' Pipeline
2121
! O2-NEXT: SimplifyHLFIRIntrinsics
2222
! ALL: InlineElementals
2323
! ALL-NEXT:'func.func' Pipeline
2424
! O2-NEXT: SimplifyHLFIRIntrinsics
2525
! ALL: InlineElementals
26+
! ALL-NEXT:'gpu.module' Pipeline
27+
! O2-NEXT: SimplifyHLFIRIntrinsics
28+
! ALL: InlineElementals
2629
! ALL-NEXT:'omp.declare_reduction' Pipeline
2730
! O2-NEXT: SimplifyHLFIRIntrinsics
2831
! ALL: InlineElementals
@@ -33,11 +36,13 @@
3336
! O2-NEXT: CSE
3437
! O2-NEXT: (S) {{.*}} num-cse'd
3538
! O2-NEXT: (S) {{.*}} num-dce'd
36-
! O2-NEXT: Pipeline Collection : ['fir.global', 'func.func', 'omp.declare_reduction', 'omp.private']
39+
! O2-NEXT: Pipeline Collection : ['fir.global', 'func.func', 'gpu.module', 'omp.declare_reduction', 'omp.private']
3740
! O2-NEXT: 'fir.global' Pipeline
3841
! O2-NEXT: OptimizedBufferization
3942
! O2-NEXT: 'func.func' Pipeline
4043
! O2-NEXT: OptimizedBufferization
44+
! O2-NEXT: 'gpu.module' Pipeline
45+
! O2-NEXT: OptimizedBufferization
4146
! O2-NEXT: 'omp.declare_reduction' Pipeline
4247
! O2-NEXT: OptimizedBufferization
4348
! O2-NEXT: 'omp.private' Pipeline
@@ -54,12 +59,14 @@
5459
! ALL-NEXT: (S) 0 num-cse'd - Number of operations CSE'd
5560
! ALL-NEXT: (S) 0 num-dce'd - Number of operations DCE'd
5661

57-
! ALL-NEXT: Pipeline Collection : ['fir.global', 'func.func', 'omp.declare_reduction', 'omp.private']
62+
! ALL-NEXT: Pipeline Collection : ['fir.global', 'func.func', 'gpu.module', 'omp.declare_reduction', 'omp.private']
5863
! ALL-NEXT: 'fir.global' Pipeline
5964
! ALL-NEXT: CharacterConversion
6065
! ALL-NEXT: 'func.func' Pipeline
6166
! ALL-NEXT: ArrayValueCopy
6267
! ALL-NEXT: CharacterConversion
68+
! ALL-NEXT: 'gpu.module' Pipeline
69+
! ALL-NEXT: CharacterConversion
6370
! ALL-NEXT: 'omp.declare_reduction' Pipeline
6471
! ALL-NEXT: CharacterConversion
6572
! ALL-NEXT: 'omp.private' Pipeline
@@ -86,13 +93,16 @@
8693
! ALL-NEXT: AssumedRankOpConversion
8794
! O2-NEXT: AddAliasTags
8895

89-
! ALL-NEXT: Pipeline Collection : ['fir.global', 'func.func', 'omp.declare_reduction', 'omp.private']
96+
! ALL-NEXT: Pipeline Collection : ['fir.global', 'func.func', 'gpu.module', 'omp.declare_reduction', 'omp.private']
9097
! ALL-NEXT: 'fir.global' Pipeline
9198
! ALL-NEXT: StackReclaim
9299
! ALL-NEXT: CFGConversion
93100
! ALL-NEXT: 'func.func' Pipeline
94101
! ALL-NEXT: StackReclaim
95102
! ALL-NEXT: CFGConversion
103+
! ALL-NEXT: 'gpu.module' Pipeline
104+
! ALL-NEXT: StackReclaim
105+
! ALL-NEXT: CFGConversion
96106
! ALL-NEXT: 'omp.declare_reduction' Pipeline
97107
! ALL-NEXT: StackReclaim
98108
! ALL-NEXT: CFGConversion
@@ -108,11 +118,13 @@
108118
! ALL-NEXT: (S) 0 num-dce'd - Number of operations DCE'd
109119
! ALL-NEXT: BoxedProcedurePass
110120

111-
! ALL-NEXT: Pipeline Collection : ['fir.global', 'func.func', 'omp.declare_reduction', 'omp.private']
121+
! ALL-NEXT: Pipeline Collection : ['fir.global', 'func.func', 'gpu.module', 'omp.declare_reduction', 'omp.private']
112122
! ALL-NEXT: 'fir.global' Pipeline
113123
! ALL-NEXT: AbstractResultOpt
114124
! ALL-NEXT: 'func.func' Pipeline
115125
! ALL-NEXT: AbstractResultOpt
126+
! ALL-NEXT: 'gpu.module' Pipeline
127+
! ALL-NEXT: AbstractResultOpt
116128
! ALL-NEXT: 'omp.declare_reduction' Pipeline
117129
! ALL-NEXT: AbstractResultOpt
118130
! ALL-NEXT: 'omp.private' Pipeline

flang/test/Fir/basic-program.fir

Lines changed: 17 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -17,13 +17,16 @@ func.func @_QQmain() {
1717
// PASSES: Pass statistics report
1818

1919
// PASSES: Canonicalizer
20-
// PASSES-NEXT: Pipeline Collection : ['fir.global', 'func.func', 'omp.declare_reduction', 'omp.private']
20+
// PASSES-NEXT: Pipeline Collection : ['fir.global', 'func.func', 'gpu.module', 'omp.declare_reduction', 'omp.private']
2121
// PASSES-NEXT: 'fir.global' Pipeline
2222
// PASSES-NEXT: SimplifyHLFIRIntrinsics
2323
// PASSES-NEXT: InlineElementals
2424
// PASSES-NEXT: 'func.func' Pipeline
2525
// PASSES-NEXT: SimplifyHLFIRIntrinsics
2626
// PASSES-NEXT: InlineElementals
27+
// PASSES-NEXT: 'gpu.module' Pipeline
28+
// PASSES-NEXT: SimplifyHLFIRIntrinsics
29+
// PASSES-NEXT: InlineElementals
2730
// PASSES-NEXT: 'omp.declare_reduction' Pipeline
2831
// PASSES-NEXT: SimplifyHLFIRIntrinsics
2932
// PASSES-NEXT: InlineElementals
@@ -34,11 +37,13 @@ func.func @_QQmain() {
3437
// PASSES-NEXT: CSE
3538
// PASSES-NEXT: (S) 0 num-cse'd - Number of operations CSE'd
3639
// PASSES-NEXT: (S) 0 num-dce'd - Number of operations DCE'd
37-
// PASSES-NEXT: Pipeline Collection : ['fir.global', 'func.func', 'omp.declare_reduction', 'omp.private']
40+
// PASSES-NEXT: Pipeline Collection : ['fir.global', 'func.func', 'gpu.module', 'omp.declare_reduction', 'omp.private']
3841
// PASSES-NEXT: 'fir.global' Pipeline
3942
// PASSES-NEXT: OptimizedBufferization
4043
// PASSES-NEXT: 'func.func' Pipeline
4144
// PASSES-NEXT: OptimizedBufferization
45+
// PASSES-NEXT: 'gpu.module' Pipeline
46+
// PASSES-NEXT: OptimizedBufferization
4247
// PASSES-NEXT: 'omp.declare_reduction' Pipeline
4348
// PASSES-NEXT: OptimizedBufferization
4449
// PASSES-NEXT: 'omp.private' Pipeline
@@ -52,12 +57,14 @@ func.func @_QQmain() {
5257
// PASSES-NEXT: (S) 0 num-cse'd - Number of operations CSE'd
5358
// PASSES-NEXT: (S) 0 num-dce'd - Number of operations DCE'd
5459

55-
// PASSES-NEXT: Pipeline Collection : ['fir.global', 'func.func', 'omp.declare_reduction', 'omp.private']
60+
// PASSES-NEXT: Pipeline Collection : ['fir.global', 'func.func', 'gpu.module', 'omp.declare_reduction', 'omp.private']
5661
// PASSES-NEXT: 'fir.global' Pipeline
5762
// PASSES-NEXT: CharacterConversion
5863
// PASSES-NEXT: 'func.func' Pipeline
5964
// PASSES-NEXT: ArrayValueCopy
6065
// PASSES-NEXT: CharacterConversion
66+
// PASSES-NEXT: 'gpu.module' Pipeline
67+
// PASSES-NEXT: CharacterConversion
6168
// PASSES-NEXT: 'omp.declare_reduction' Pipeline
6269
// PASSES-NEXT: CharacterConversion
6370
// PASSES-NEXT: 'omp.private' Pipeline
@@ -84,13 +91,16 @@ func.func @_QQmain() {
8491
// PASSES-NEXT: AssumedRankOpConversion
8592
// PASSES-NEXT: AddAliasTags
8693

87-
// PASSES-NEXT: Pipeline Collection : ['fir.global', 'func.func', 'omp.declare_reduction', 'omp.private']
94+
// PASSES-NEXT: Pipeline Collection : ['fir.global', 'func.func', 'gpu.module', 'omp.declare_reduction', 'omp.private']
8895
// PASSES-NEXT: 'fir.global' Pipeline
8996
// PASSES-NEXT: StackReclaim
9097
// PASSES-NEXT: CFGConversion
9198
// PASSES-NEXT: 'func.func' Pipeline
9299
// PASSES-NEXT: StackReclaim
93100
// PASSES-NEXT: CFGConversion
101+
// PASSES-NEXT: 'gpu.module' Pipeline
102+
// PASSES-NEXT: StackReclaim
103+
// PASSES-NEXT: CFGConversion
94104
// PASSES-NEXT: 'omp.declare_reduction' Pipeline
95105
// PASSES-NEXT: StackReclaim
96106
// PASSES-NEXT: CFGConversion
@@ -106,11 +116,13 @@ func.func @_QQmain() {
106116
// PASSES-NEXT: (S) 0 num-dce'd - Number of operations DCE'd
107117
// PASSES-NEXT: BoxedProcedurePass
108118

109-
// PASSES-NEXT: Pipeline Collection : ['fir.global', 'func.func', 'omp.declare_reduction', 'omp.private']
119+
// PASSES-NEXT: Pipeline Collection : ['fir.global', 'func.func', 'gpu.module', 'omp.declare_reduction', 'omp.private']
110120
// PASSES-NEXT: 'fir.global' Pipeline
111121
// PASSES-NEXT: AbstractResultOpt
112122
// PASSES-NEXT: 'func.func' Pipeline
113123
// PASSES-NEXT: AbstractResultOpt
124+
// PASSES-NEXT: 'gpu.module' Pipeline
125+
// PASSES-NEXT: AbstractResultOpt
114126
// PASSES-NEXT: 'omp.declare_reduction' Pipeline
115127
// PASSES-NEXT: AbstractResultOpt
116128
// PASSES-NEXT: 'omp.private' Pipeline

0 commit comments

Comments
 (0)