Skip to content

Commit c293ec4

Browse files
committed
update gpu pipeline
1 parent 9e77f3e commit c293ec4

File tree

1 file changed

+21
-18
lines changed

1 file changed

+21
-18
lines changed

src/jit/mlir.cpp

Lines changed: 21 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -728,43 +728,46 @@ static const std::string gpu_pipeline =
728728
"linalg-fuse-elementwise-ops,"
729729
"arith-expand,"
730730
"memref-expand,"
731-
"arith-bufferize,"
732-
"func-bufferize,"
733731
"func.func(empty-tensor-to-alloc-tensor),"
734-
"func.func(scf-bufferize),"
735-
"func.func(tensor-bufferize),"
736-
"func.func(bufferization-bufferize),"
737-
"func.func(linalg-bufferize),"
738-
"func.func(linalg-detensorize),"
739-
"func.func(tensor-bufferize),"
732+
"func.func(tile-for-gpu{tile-sizes=32 in-regions}),"
733+
"func.func(tile-for-gpu{tile-sizes=1 in-regions}),"
740734
"region-bufferize,"
741735
"canonicalize,"
742-
"func.func(finalizing-bufferize),"
736+
"one-shot-bufferize,"
737+
"cse,"
738+
"canonicalize,"
739+
"scf-forall-to-parallel,"
740+
"cse,"
741+
"canonicalize,"
743742
"imex-remove-temporaries,"
744-
"func.func(convert-linalg-to-parallel-loops),"
745-
"func.func(scf-parallel-loop-fusion),"
746-
// is add-outer-parallel-loop needed?
747-
"func.func(imex-add-outer-parallel-loop),"
743+
"buffer-deallocation-pipeline,"
744+
"func.func(convert-linalg-to-loops),"
748745
"func.func(gpu-map-parallel-loops),"
749-
"func.func(convert-parallel-loops-to-gpu),"
746+
"convert-parallel-loops-to-gpu,"
747+
"canonicalize,"
748+
"cse,"
750749
"func.func(insert-gpu-allocs{in-regions=1}),"
751750
"func.func(insert-gpu-copy),"
752751
"drop-regions,"
753752
"canonicalize,"
754-
"func.func(lower-affine),"
755753
"gpu-kernel-outlining,"
754+
"convert-scf-to-cf,"
755+
"convert-cf-to-llvm,"
756756
"canonicalize,"
757757
"cse,"
758-
"gpu.module(strip-debuginfo,convert-gpu-to-nvvm),"
759-
"nvvm-attach-target,"
758+
"gpu.module(strip-debuginfo,"
759+
"convert-gpu-to-nvvm),"
760+
"nvvm-attach-target{chip=sm_80 O=3},"
760761
"func.func(gpu-async-region),"
761762
"expand-strided-metadata,"
762763
"lower-affine,"
763764
"gpu-to-llvm,"
764-
"gpu-module-to-binary{format=fatbin},"
765765
"convert-func-to-llvm,"
766766
"convert-math-to-llvm,"
767767
"finalize-memref-to-llvm,"
768+
"canonicalize,"
769+
"cse,"
770+
"gpu-module-to-binary{format=fatbin},"
768771
"reconcile-unrealized-casts";
769772

770773
const std::string _passes(get_text_env("SHARPY_PASSES"));

0 commit comments

Comments
 (0)