Skip to content

Commit f1de9d6

Browse files
committed
[X86] AVX512FP16 instructions enabling 2/6
Enable FP16 binary operator instructions. Ref.: https://software.intel.com/content/www/us/en/develop/download/intel-avx512-fp16-architecture-specification.html Reviewed By: LuoYuanke Differential Revision: https://reviews.llvm.org/D105264
1 parent 45138f7 commit f1de9d6

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

47 files changed

+12282
-43
lines changed

clang/include/clang/Basic/BuiltinsX86.def

Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1850,6 +1850,29 @@ TARGET_BUILTIN(__builtin_ia32_vp2intersect_d_256, "vV8iV8iUc*Uc*", "nV:256:", "a
18501850
TARGET_BUILTIN(__builtin_ia32_vp2intersect_d_128, "vV4iV4iUc*Uc*", "nV:128:", "avx512vp2intersect,avx512vl")
18511851

18521852
// AVX512 fp16 intrinsics
1853+
TARGET_BUILTIN(__builtin_ia32_vcomish, "iV8xV8xIiIi", "ncV:128:", "avx512fp16")
1854+
TARGET_BUILTIN(__builtin_ia32_addph512, "V32xV32xV32xIi", "ncV:512:", "avx512fp16")
1855+
TARGET_BUILTIN(__builtin_ia32_subph512, "V32xV32xV32xIi", "ncV:512:", "avx512fp16")
1856+
TARGET_BUILTIN(__builtin_ia32_mulph512, "V32xV32xV32xIi", "ncV:512:", "avx512fp16")
1857+
TARGET_BUILTIN(__builtin_ia32_divph512, "V32xV32xV32xIi", "ncV:512:", "avx512fp16")
1858+
TARGET_BUILTIN(__builtin_ia32_maxph512, "V32xV32xV32xIi", "ncV:512:", "avx512fp16")
1859+
TARGET_BUILTIN(__builtin_ia32_minph512, "V32xV32xV32xIi", "ncV:512:", "avx512fp16")
1860+
1861+
TARGET_BUILTIN(__builtin_ia32_minph256, "V16xV16xV16x", "ncV:256:", "avx512fp16,avx512vl")
1862+
TARGET_BUILTIN(__builtin_ia32_minph128, "V8xV8xV8x", "ncV:128:", "avx512fp16,avx512vl")
1863+
TARGET_BUILTIN(__builtin_ia32_maxph256, "V16xV16xV16x", "ncV:256:", "avx512fp16,avx512vl")
1864+
TARGET_BUILTIN(__builtin_ia32_maxph128, "V8xV8xV8x", "ncV:128:", "avx512fp16,avx512vl")
1865+
1866+
TARGET_BUILTIN(__builtin_ia32_addsh_round_mask, "V8xV8xV8xV8xUcIi", "ncV:128:", "avx512fp16")
1867+
TARGET_BUILTIN(__builtin_ia32_divsh_round_mask, "V8xV8xV8xV8xUcIi", "ncV:128:", "avx512fp16")
1868+
TARGET_BUILTIN(__builtin_ia32_mulsh_round_mask, "V8xV8xV8xV8xUcIi", "ncV:128:", "avx512fp16")
1869+
TARGET_BUILTIN(__builtin_ia32_subsh_round_mask, "V8xV8xV8xV8xUcIi", "ncV:128:", "avx512fp16")
1870+
TARGET_BUILTIN(__builtin_ia32_maxsh_round_mask, "V8xV8xV8xV8xUcIi", "ncV:128:", "avx512fp16")
1871+
TARGET_BUILTIN(__builtin_ia32_minsh_round_mask, "V8xV8xV8xV8xUcIi", "ncV:128:", "avx512fp16")
1872+
TARGET_BUILTIN(__builtin_ia32_cmpph512_mask, "UiV32xV32xIiUiIi", "ncV:512:", "avx512fp16")
1873+
TARGET_BUILTIN(__builtin_ia32_cmpph256_mask, "UsV16xV16xIiUs", "ncV:256:", "avx512fp16,avx512vl")
1874+
TARGET_BUILTIN(__builtin_ia32_cmpph128_mask, "UcV8xV8xIiUc", "ncV:128:", "avx512fp16,avx512vl")
1875+
TARGET_BUILTIN(__builtin_ia32_cmpsh_mask, "UcV8xV8xIiUcIi", "ncV:128:", "avx512fp16")
18531876
TARGET_BUILTIN(__builtin_ia32_loadsh128_mask, "V8xV8x*V8xUc", "nV:128:", "avx512fp16")
18541877
TARGET_BUILTIN(__builtin_ia32_storesh128_mask, "vV8x*V8xUc", "nV:128:", "avx512fp16")
18551878

@@ -1886,12 +1909,24 @@ TARGET_BUILTIN(__builtin_ia32_reduce_and_d512, "iV16i", "ncV:512:", "avx512f")
18861909
TARGET_BUILTIN(__builtin_ia32_reduce_and_q512, "OiV8Oi", "ncV:512:", "avx512f")
18871910
TARGET_BUILTIN(__builtin_ia32_reduce_fadd_pd512, "ddV8d", "ncV:512:", "avx512f")
18881911
TARGET_BUILTIN(__builtin_ia32_reduce_fadd_ps512, "ffV16f", "ncV:512:", "avx512f")
1912+
TARGET_BUILTIN(__builtin_ia32_reduce_fadd_ph512, "xxV32x", "ncV:512:", "avx512fp16")
1913+
TARGET_BUILTIN(__builtin_ia32_reduce_fadd_ph256, "xxV16x", "ncV:256:", "avx512fp16,avx512vl")
1914+
TARGET_BUILTIN(__builtin_ia32_reduce_fadd_ph128, "xxV8x", "ncV:128:", "avx512fp16,avx512vl")
18891915
TARGET_BUILTIN(__builtin_ia32_reduce_fmax_pd512, "dV8d", "ncV:512:", "avx512f")
18901916
TARGET_BUILTIN(__builtin_ia32_reduce_fmax_ps512, "fV16f", "ncV:512:", "avx512f")
1917+
TARGET_BUILTIN(__builtin_ia32_reduce_fmax_ph512, "xV32x", "ncV:512:", "avx512fp16")
1918+
TARGET_BUILTIN(__builtin_ia32_reduce_fmax_ph256, "xV16x", "ncV:256:", "avx512fp16,avx512vl")
1919+
TARGET_BUILTIN(__builtin_ia32_reduce_fmax_ph128, "xV8x", "ncV:128:", "avx512fp16,avx512vl")
18911920
TARGET_BUILTIN(__builtin_ia32_reduce_fmin_pd512, "dV8d", "ncV:512:", "avx512f")
18921921
TARGET_BUILTIN(__builtin_ia32_reduce_fmin_ps512, "fV16f", "ncV:512:", "avx512f")
1922+
TARGET_BUILTIN(__builtin_ia32_reduce_fmin_ph512, "xV32x", "ncV:512:", "avx512fp16")
1923+
TARGET_BUILTIN(__builtin_ia32_reduce_fmin_ph256, "xV16x", "ncV:256:", "avx512fp16,avx512vl")
1924+
TARGET_BUILTIN(__builtin_ia32_reduce_fmin_ph128, "xV8x", "ncV:128:", "avx512fp16,avx512vl")
18931925
TARGET_BUILTIN(__builtin_ia32_reduce_fmul_pd512, "ddV8d", "ncV:512:", "avx512f")
18941926
TARGET_BUILTIN(__builtin_ia32_reduce_fmul_ps512, "ffV16f", "ncV:512:", "avx512f")
1927+
TARGET_BUILTIN(__builtin_ia32_reduce_fmul_ph512, "xxV32x", "ncV:512:", "avx512fp16")
1928+
TARGET_BUILTIN(__builtin_ia32_reduce_fmul_ph256, "xxV16x", "ncV:256:", "avx512fp16,avx512vl")
1929+
TARGET_BUILTIN(__builtin_ia32_reduce_fmul_ph128, "xxV8x", "ncV:128:", "avx512fp16,avx512vl")
18951930
TARGET_BUILTIN(__builtin_ia32_reduce_mul_d512, "iV16i", "ncV:512:", "avx512f")
18961931
TARGET_BUILTIN(__builtin_ia32_reduce_mul_q512, "OiV8Oi", "ncV:512:", "avx512f")
18971932
TARGET_BUILTIN(__builtin_ia32_reduce_or_d512, "iV16i", "ncV:512:", "avx512f")

clang/lib/CodeGen/CGBuiltin.cpp

Lines changed: 19 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -14137,28 +14137,40 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID,
1413714137
return Builder.CreateCall(F, {Ops[0]});
1413814138
}
1413914139
case X86::BI__builtin_ia32_reduce_fadd_pd512:
14140-
case X86::BI__builtin_ia32_reduce_fadd_ps512: {
14140+
case X86::BI__builtin_ia32_reduce_fadd_ps512:
14141+
case X86::BI__builtin_ia32_reduce_fadd_ph512:
14142+
case X86::BI__builtin_ia32_reduce_fadd_ph256:
14143+
case X86::BI__builtin_ia32_reduce_fadd_ph128: {
1414114144
Function *F =
1414214145
CGM.getIntrinsic(Intrinsic::vector_reduce_fadd, Ops[1]->getType());
1414314146
Builder.getFastMathFlags().setAllowReassoc();
1414414147
return Builder.CreateCall(F, {Ops[0], Ops[1]});
1414514148
}
1414614149
case X86::BI__builtin_ia32_reduce_fmul_pd512:
14147-
case X86::BI__builtin_ia32_reduce_fmul_ps512: {
14150+
case X86::BI__builtin_ia32_reduce_fmul_ps512:
14151+
case X86::BI__builtin_ia32_reduce_fmul_ph512:
14152+
case X86::BI__builtin_ia32_reduce_fmul_ph256:
14153+
case X86::BI__builtin_ia32_reduce_fmul_ph128: {
1414814154
Function *F =
1414914155
CGM.getIntrinsic(Intrinsic::vector_reduce_fmul, Ops[1]->getType());
1415014156
Builder.getFastMathFlags().setAllowReassoc();
1415114157
return Builder.CreateCall(F, {Ops[0], Ops[1]});
1415214158
}
1415314159
case X86::BI__builtin_ia32_reduce_fmax_pd512:
14154-
case X86::BI__builtin_ia32_reduce_fmax_ps512: {
14160+
case X86::BI__builtin_ia32_reduce_fmax_ps512:
14161+
case X86::BI__builtin_ia32_reduce_fmax_ph512:
14162+
case X86::BI__builtin_ia32_reduce_fmax_ph256:
14163+
case X86::BI__builtin_ia32_reduce_fmax_ph128: {
1415514164
Function *F =
1415614165
CGM.getIntrinsic(Intrinsic::vector_reduce_fmax, Ops[0]->getType());
1415714166
Builder.getFastMathFlags().setNoNaNs();
1415814167
return Builder.CreateCall(F, {Ops[0]});
1415914168
}
1416014169
case X86::BI__builtin_ia32_reduce_fmin_pd512:
14161-
case X86::BI__builtin_ia32_reduce_fmin_ps512: {
14170+
case X86::BI__builtin_ia32_reduce_fmin_ps512:
14171+
case X86::BI__builtin_ia32_reduce_fmin_ph512:
14172+
case X86::BI__builtin_ia32_reduce_fmin_ph256:
14173+
case X86::BI__builtin_ia32_reduce_fmin_ph128: {
1416214174
Function *F =
1416314175
CGM.getIntrinsic(Intrinsic::vector_reduce_fmin, Ops[0]->getType());
1416414176
Builder.getFastMathFlags().setNoNaNs();
@@ -14422,6 +14434,9 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID,
1442214434
case X86::BI__builtin_ia32_cmpordps:
1442314435
case X86::BI__builtin_ia32_cmpordpd:
1442414436
return getVectorFCmpIR(CmpInst::FCMP_ORD, /*IsSignaling*/false);
14437+
case X86::BI__builtin_ia32_cmpph128_mask:
14438+
case X86::BI__builtin_ia32_cmpph256_mask:
14439+
case X86::BI__builtin_ia32_cmpph512_mask:
1442514440
case X86::BI__builtin_ia32_cmpps128_mask:
1442614441
case X86::BI__builtin_ia32_cmpps256_mask:
1442714442
case X86::BI__builtin_ia32_cmpps512_mask:

0 commit comments

Comments
 (0)