You signed in with another tab or window. Reload to refresh your session.You signed out in another tab or window. Reload to refresh your session.You switched accounts on another tab or window. Reload to refresh your session.Dismiss alert
[CostModel][X86] getCastInstrCost - improve CostKind adjustment when splitting src/dst types
Noticed in #90883 review - for non-Throughput costs, we weren't applying the split count to '0 or 1' cost value.
This still doesn't work well as many of the type legalizations are hidden so we don't have the split count, really we need to move a CostKindCosts based costs table, but that's going to be a lot of work :/
Copy file name to clipboardExpand all lines: llvm/test/Analysis/CostModel/X86/intrinsic-cost-kinds.ll
+3-3Lines changed: 3 additions & 3 deletions
Original file line number
Diff line number
Diff line change
@@ -56,17 +56,17 @@ define void @umul(i32 %a, i32 %b, <16 x i32> %va, <16 x i32> %vb) {
56
56
;
57
57
; LATE-LABEL: 'umul'
58
58
; LATE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %s = call { i32, i1 } @llvm.umul.with.overflow.i32(i32 %a, i32 %b)
59
-
; LATE-NEXT: Cost Model: Found an estimated cost of 100 for instruction: %v = call { <16 x i32>, <16 x i1> } @llvm.umul.with.overflow.v16i32(<16 x i32> %va, <16 x i32> %vb)
59
+
; LATE-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %v = call { <16 x i32>, <16 x i1> } @llvm.umul.with.overflow.v16i32(<16 x i32> %va, <16 x i32> %vb)
60
60
; LATE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void
61
61
;
62
62
; SIZE-LABEL: 'umul'
63
63
; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %s = call { i32, i1 } @llvm.umul.with.overflow.i32(i32 %a, i32 %b)
64
-
; SIZE-NEXT: Cost Model: Found an estimated cost of 100 for instruction: %v = call { <16 x i32>, <16 x i1> } @llvm.umul.with.overflow.v16i32(<16 x i32> %va, <16 x i32> %vb)
64
+
; SIZE-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %v = call { <16 x i32>, <16 x i1> } @llvm.umul.with.overflow.v16i32(<16 x i32> %va, <16 x i32> %vb)
65
65
; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void
66
66
;
67
67
; SIZE_LATE-LABEL: 'umul'
68
68
; SIZE_LATE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %s = call { i32, i1 } @llvm.umul.with.overflow.i32(i32 %a, i32 %b)
69
-
; SIZE_LATE-NEXT: Cost Model: Found an estimated cost of 100 for instruction: %v = call { <16 x i32>, <16 x i1> } @llvm.umul.with.overflow.v16i32(<16 x i32> %va, <16 x i32> %vb)
69
+
; SIZE_LATE-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %v = call { <16 x i32>, <16 x i1> } @llvm.umul.with.overflow.v16i32(<16 x i32> %va, <16 x i32> %vb)
70
70
; SIZE_LATE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void
Copy file name to clipboardExpand all lines: llvm/test/Analysis/CostModel/X86/masked-intrinsic-codesize.ll
+12-12Lines changed: 12 additions & 12 deletions
Original file line number
Diff line number
Diff line change
@@ -1883,13 +1883,13 @@ define <4 x i32> @test_gather_4i32_const_mask(<4 x ptr> %ptrs, <4 x i32> %src0)
1883
1883
1884
1884
define <16 x float> @test_gather_16f32_const_mask(ptr%base, <16 x i32> %ind) {
1885
1885
; SSE2-LABEL: 'test_gather_16f32_const_mask'
1886
-
; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sext_ind = sext <16 x i32> %ind to <16 x i64>
1886
+
; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %sext_ind = sext <16 x i32> %ind to <16 x i64>
1887
1887
; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, ptr %base, <16 x i64> %sext_ind
1888
1888
; SSE2-NEXT: Cost Model: Found an estimated cost of 60 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0(<16 x ptr> %gep.v, i32 4, <16 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <16 x float> undef)
1889
1889
; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x float> %res
1890
1890
;
1891
1891
; SSE42-LABEL: 'test_gather_16f32_const_mask'
1892
-
; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sext_ind = sext <16 x i32> %ind to <16 x i64>
1892
+
; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %sext_ind = sext <16 x i32> %ind to <16 x i64>
1893
1893
; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, ptr %base, <16 x i64> %sext_ind
1894
1894
; SSE42-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0(<16 x ptr> %gep.v, i32 4, <16 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <16 x float> undef)
1895
1895
; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x float> %res
@@ -1927,13 +1927,13 @@ define <16 x float> @test_gather_16f32_const_mask(ptr %base, <16 x i32> %ind) {
1927
1927
1928
1928
define <16 x float> @test_gather_16f32_var_mask(ptr%base, <16 x i32> %ind, <16 x i1>%mask) {
1929
1929
; SSE2-LABEL: 'test_gather_16f32_var_mask'
1930
-
; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sext_ind = sext <16 x i32> %ind to <16 x i64>
1930
+
; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %sext_ind = sext <16 x i32> %ind to <16 x i64>
1931
1931
; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, ptr %base, <16 x i64> %sext_ind
1932
1932
; SSE2-NEXT: Cost Model: Found an estimated cost of 77 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0(<16 x ptr> %gep.v, i32 4, <16 x i1> %mask, <16 x float> undef)
1933
1933
; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x float> %res
1934
1934
;
1935
1935
; SSE42-LABEL: 'test_gather_16f32_var_mask'
1936
-
; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sext_ind = sext <16 x i32> %ind to <16 x i64>
1936
+
; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %sext_ind = sext <16 x i32> %ind to <16 x i64>
1937
1937
; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, ptr %base, <16 x i64> %sext_ind
1938
1938
; SSE42-NEXT: Cost Model: Found an estimated cost of 61 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0(<16 x ptr> %gep.v, i32 4, <16 x i1> %mask, <16 x float> undef)
1939
1939
; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x float> %res
@@ -1971,13 +1971,13 @@ define <16 x float> @test_gather_16f32_var_mask(ptr %base, <16 x i32> %ind, <16
1971
1971
1972
1972
define <16 x float> @test_gather_16f32_ra_var_mask(<16 x ptr> %ptrs, <16 x i32> %ind, <16 x i1>%mask) {
1973
1973
; SSE2-LABEL: 'test_gather_16f32_ra_var_mask'
1974
-
; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sext_ind = sext <16 x i32> %ind to <16 x i64>
1974
+
; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %sext_ind = sext <16 x i32> %ind to <16 x i64>
1975
1975
; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, <16 x ptr> %ptrs, <16 x i64> %sext_ind
1976
1976
; SSE2-NEXT: Cost Model: Found an estimated cost of 77 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0(<16 x ptr> %gep.v, i32 4, <16 x i1> %mask, <16 x float> undef)
1977
1977
; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x float> %res
1978
1978
;
1979
1979
; SSE42-LABEL: 'test_gather_16f32_ra_var_mask'
1980
-
; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sext_ind = sext <16 x i32> %ind to <16 x i64>
1980
+
; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %sext_ind = sext <16 x i32> %ind to <16 x i64>
1981
1981
; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, <16 x ptr> %ptrs, <16 x i64> %sext_ind
1982
1982
; SSE42-NEXT: Cost Model: Found an estimated cost of 61 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0(<16 x ptr> %gep.v, i32 4, <16 x i1> %mask, <16 x float> undef)
1983
1983
; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x float> %res
@@ -2017,15 +2017,15 @@ define <16 x float> @test_gather_16f32_const_mask2(ptr %base, <16 x i32> %ind) {
2017
2017
; SSE2-LABEL: 'test_gather_16f32_const_mask2'
2018
2018
; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %broadcast.splatinsert = insertelement <16 x ptr> undef, ptr %base, i32 0
2019
2019
; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %broadcast.splat = shufflevector <16 x ptr> %broadcast.splatinsert, <16 x ptr> undef, <16 x i32> zeroinitializer
2020
-
; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sext_ind = sext <16 x i32> %ind to <16 x i64>
2020
+
; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %sext_ind = sext <16 x i32> %ind to <16 x i64>
2021
2021
; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.random = getelementptr float, <16 x ptr> %broadcast.splat, <16 x i64> %sext_ind
2022
2022
; SSE2-NEXT: Cost Model: Found an estimated cost of 60 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0(<16 x ptr> %gep.random, i32 4, <16 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <16 x float> undef)
2023
2023
; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x float> %res
2024
2024
;
2025
2025
; SSE42-LABEL: 'test_gather_16f32_const_mask2'
2026
2026
; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %broadcast.splatinsert = insertelement <16 x ptr> undef, ptr %base, i32 0
2027
2027
; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %broadcast.splat = shufflevector <16 x ptr> %broadcast.splatinsert, <16 x ptr> undef, <16 x i32> zeroinitializer
2028
-
; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sext_ind = sext <16 x i32> %ind to <16 x i64>
2028
+
; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %sext_ind = sext <16 x i32> %ind to <16 x i64>
2029
2029
; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.random = getelementptr float, <16 x ptr> %broadcast.splat, <16 x i64> %sext_ind
2030
2030
; SSE42-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0(<16 x ptr> %gep.random, i32 4, <16 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <16 x float> undef)
2031
2031
; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x float> %res
@@ -2178,13 +2178,13 @@ define void @test_scatter_4i32(<4 x i32>%a1, <4 x ptr> %ptr, <4 x i1>%mask) {
2178
2178
2179
2179
define <4 x float> @test_gather_4f32(ptr%ptr, <4 x i32> %ind, <4 x i1>%mask) {
2180
2180
; SSE2-LABEL: 'test_gather_4f32'
2181
-
; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sext_ind = sext <4 x i32> %ind to <4 x i64>
2181
+
; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %sext_ind = sext <4 x i32> %ind to <4 x i64>
2182
2182
; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, ptr %ptr, <4 x i64> %sext_ind
2183
2183
; SSE2-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %res = call <4 x float> @llvm.masked.gather.v4f32.v4p0(<4 x ptr> %gep.v, i32 4, <4 x i1> %mask, <4 x float> undef)
2184
2184
; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x float> %res
2185
2185
;
2186
2186
; SSE42-LABEL: 'test_gather_4f32'
2187
-
; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sext_ind = sext <4 x i32> %ind to <4 x i64>
2187
+
; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %sext_ind = sext <4 x i32> %ind to <4 x i64>
2188
2188
; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, ptr %ptr, <4 x i64> %sext_ind
2189
2189
; SSE42-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %res = call <4 x float> @llvm.masked.gather.v4f32.v4p0(<4 x ptr> %gep.v, i32 4, <4 x i1> %mask, <4 x float> undef)
2190
2190
; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x float> %res
@@ -2228,13 +2228,13 @@ define <4 x float> @test_gather_4f32(ptr %ptr, <4 x i32> %ind, <4 x i1>%mask) {
2228
2228
2229
2229
define <4 x float> @test_gather_4f32_const_mask(ptr%ptr, <4 x i32> %ind) {
2230
2230
; SSE2-LABEL: 'test_gather_4f32_const_mask'
2231
-
; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sext_ind = sext <4 x i32> %ind to <4 x i64>
2231
+
; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %sext_ind = sext <4 x i32> %ind to <4 x i64>
2232
2232
; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, ptr %ptr, <4 x i64> %sext_ind
2233
2233
; SSE2-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %res = call <4 x float> @llvm.masked.gather.v4f32.v4p0(<4 x ptr> %gep.v, i32 4, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x float> undef)
2234
2234
; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x float> %res
2235
2235
;
2236
2236
; SSE42-LABEL: 'test_gather_4f32_const_mask'
2237
-
; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sext_ind = sext <4 x i32> %ind to <4 x i64>
2237
+
; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %sext_ind = sext <4 x i32> %ind to <4 x i64>
2238
2238
; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, ptr %ptr, <4 x i64> %sext_ind
2239
2239
; SSE42-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %res = call <4 x float> @llvm.masked.gather.v4f32.v4p0(<4 x ptr> %gep.v, i32 4, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x float> undef)
2240
2240
; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x float> %res
0 commit comments