Skip to content

Commit fe694b1

Browse files
authored
[libclc] Move mad_sat to CLC; optimize for vector types (llvm#125517)
This commit moves the mad_sat builtin to the CLC library. It also optimizes it for vector types by avoiding scalarization. To help do this it transforms the previous control-flow code into vector select code. This has also been done for the scalar versions for simplicity.
1 parent 6dfe20d commit fe694b1

File tree

7 files changed

+142
-69
lines changed

7 files changed

+142
-69
lines changed
Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,12 @@
1+
#ifndef __CLC_INTEGER_CLC_MAD_SAT_H__
2+
#define __CLC_INTEGER_CLC_MAD_SAT_H__
3+
4+
#define __CLC_FUNCTION __clc_mad_sat
5+
#define __CLC_BODY <clc/shared/ternary_decl.inc>
6+
7+
#include <clc/integer/gentype.inc>
8+
9+
#undef __CLC_BODY
10+
#undef __CLC_FUNCTION
11+
12+
#endif // __CLC_INTEGER_CLC_MAD_SAT_H__

libclc/clc/include/clc/integer/definitions.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,8 +13,12 @@
1313
#define SHRT_MAX 32767
1414
#define SHRT_MIN (-32767 - 1)
1515
#define UCHAR_MAX 255
16+
#define UCHAR_MIN 0
1617
#define USHRT_MAX 65535
18+
#define USHRT_MIN 0
1719
#define UINT_MAX 0xffffffff
20+
#define UINT_MIN 0
1821
#define ULONG_MAX 0xffffffffffffffffUL
22+
#define ULONG_MIN 0UL
1923

2024
#endif // __CLC_INTEGER_DEFINITIONS_H__

libclc/clc/lib/clspv/SOURCES

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22
../generic/integer/clc_clz.cl
33
../generic/integer/clc_hadd.cl
44
../generic/integer/clc_mad24.cl
5+
../generic/integer/clc_mad_sat.cl
56
../generic/integer/clc_mul24.cl
67
../generic/integer/clc_mul_hi.cl
78
../generic/integer/clc_popcount.cl

libclc/clc/lib/generic/SOURCES

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@ integer/clc_add_sat.cl
88
integer/clc_clz.cl
99
integer/clc_hadd.cl
1010
integer/clc_mad24.cl
11+
integer/clc_mad_sat.cl
1112
integer/clc_mul24.cl
1213
integer/clc_mul_hi.cl
1314
integer/clc_popcount.cl
Lines changed: 119 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,119 @@
1+
#include <clc/clcmacro.h>
2+
#include <clc/integer/clc_add_sat.h>
3+
#include <clc/integer/clc_mad24.h>
4+
#include <clc/integer/clc_mul_hi.h>
5+
#include <clc/integer/clc_upsample.h>
6+
#include <clc/integer/definitions.h>
7+
#include <clc/internal/clc.h>
8+
#include <clc/relational/clc_select.h>
9+
#include <clc/shared/clc_clamp.h>
10+
11+
#define __CLC_CONVERT_TY(X, TY) __builtin_convertvector(X, TY)
12+
13+
// Macro for defining mad_sat variants for char/uchar/short/ushort
14+
// FIXME: Once using __clc_convert_ty, can easily unify scalar and vector defs
15+
#define __CLC_DEFINE_SIMPLE_MAD_SAT(TYPE, UP_TYPE, LIT_PREFIX) \
16+
_CLC_OVERLOAD _CLC_DEF TYPE __clc_mad_sat(TYPE x, TYPE y, TYPE z) { \
17+
return __clc_clamp( \
18+
(UP_TYPE)__clc_mad24((UP_TYPE)x, (UP_TYPE)y, (UP_TYPE)z), \
19+
(UP_TYPE)LIT_PREFIX##_MIN, (UP_TYPE)LIT_PREFIX##_MAX); \
20+
}
21+
22+
#define __CLC_DEFINE_SIMPLE_MAD_SAT_VEC(TYPE, UP_TYPE, LIT_PREFIX) \
23+
_CLC_OVERLOAD _CLC_DEF TYPE __clc_mad_sat(TYPE x, TYPE y, TYPE z) { \
24+
UP_TYPE upscaled_mad = __clc_mad24(__CLC_CONVERT_TY(x, UP_TYPE), \
25+
__CLC_CONVERT_TY(y, UP_TYPE), \
26+
__CLC_CONVERT_TY(z, UP_TYPE)); \
27+
UP_TYPE clamped_mad = __clc_clamp(upscaled_mad, (UP_TYPE)LIT_PREFIX##_MIN, \
28+
(UP_TYPE)LIT_PREFIX##_MAX); \
29+
return __CLC_CONVERT_TY(clamped_mad, TYPE); \
30+
}
31+
32+
#define __CLC_DEFINE_SIMPLE_MAD_SAT_ALL_TYS(TYPE, UP_TYPE, LIT_PREFIX) \
33+
__CLC_DEFINE_SIMPLE_MAD_SAT(TYPE, UP_TYPE, LIT_PREFIX) \
34+
__CLC_DEFINE_SIMPLE_MAD_SAT_VEC(TYPE##2, UP_TYPE##2, LIT_PREFIX) \
35+
__CLC_DEFINE_SIMPLE_MAD_SAT_VEC(TYPE##3, UP_TYPE##3, LIT_PREFIX) \
36+
__CLC_DEFINE_SIMPLE_MAD_SAT_VEC(TYPE##4, UP_TYPE##4, LIT_PREFIX) \
37+
__CLC_DEFINE_SIMPLE_MAD_SAT_VEC(TYPE##8, UP_TYPE##8, LIT_PREFIX) \
38+
__CLC_DEFINE_SIMPLE_MAD_SAT_VEC(TYPE##16, UP_TYPE##16, LIT_PREFIX)
39+
40+
__CLC_DEFINE_SIMPLE_MAD_SAT_ALL_TYS(char, int, CHAR)
41+
__CLC_DEFINE_SIMPLE_MAD_SAT_ALL_TYS(uchar, uint, UCHAR)
42+
__CLC_DEFINE_SIMPLE_MAD_SAT_ALL_TYS(short, int, SHRT)
43+
__CLC_DEFINE_SIMPLE_MAD_SAT_ALL_TYS(ushort, uint, USHRT)
44+
45+
// Macro for defining mad_sat variants for uint/ulong
46+
#define __CLC_DEFINE_UINTLONG_MAD_SAT(UTYPE, STYPE, ULIT_PREFIX) \
47+
_CLC_OVERLOAD _CLC_DEF UTYPE __clc_mad_sat(UTYPE x, UTYPE y, UTYPE z) { \
48+
STYPE has_mul_hi = __clc_mul_hi(x, y) != (UTYPE)0; \
49+
return __clc_select(__clc_add_sat(x * y, z), (UTYPE)ULIT_PREFIX##_MAX, \
50+
has_mul_hi); \
51+
}
52+
53+
#define __CLC_DEFINE_UINTLONG_MAD_SAT_ALL_TYS(UTY, STY, ULIT_PREFIX) \
54+
__CLC_DEFINE_UINTLONG_MAD_SAT(UTY, STY, ULIT_PREFIX) \
55+
__CLC_DEFINE_UINTLONG_MAD_SAT(UTY##2, STY##2, ULIT_PREFIX) \
56+
__CLC_DEFINE_UINTLONG_MAD_SAT(UTY##3, STY##3, ULIT_PREFIX) \
57+
__CLC_DEFINE_UINTLONG_MAD_SAT(UTY##4, STY##4, ULIT_PREFIX) \
58+
__CLC_DEFINE_UINTLONG_MAD_SAT(UTY##8, STY##8, ULIT_PREFIX) \
59+
__CLC_DEFINE_UINTLONG_MAD_SAT(UTY##16, STY##16, ULIT_PREFIX)
60+
61+
__CLC_DEFINE_UINTLONG_MAD_SAT_ALL_TYS(uint, int, UINT)
62+
__CLC_DEFINE_UINTLONG_MAD_SAT_ALL_TYS(ulong, long, ULONG)
63+
64+
// Macro for defining mad_sat variants for int
65+
#define __CLC_DEFINE_SINT_MAD_SAT(INTTY, UINTTY, SLONGTY) \
66+
_CLC_OVERLOAD _CLC_DEF INTTY __clc_mad_sat(INTTY x, INTTY y, INTTY z) { \
67+
INTTY mhi = __clc_mul_hi(x, y); \
68+
UINTTY mlo = __clc_as_##UINTTY(x * y); \
69+
SLONGTY m = __clc_upsample(mhi, mlo); \
70+
m += __CLC_CONVERT_TY(z, SLONGTY); \
71+
m = __clc_clamp(m, (SLONGTY)INT_MIN, (SLONGTY)INT_MAX); \
72+
return __CLC_CONVERT_TY(m, INTTY); \
73+
}
74+
75+
// FIXME: Once using __clc_convert_ty, can easily unify scalar and vector defs
76+
#define __CLC_DEFINE_SINT_MAD_SAT_ALL_TYS(INTTY, UINTTY, SLONGTY) \
77+
_CLC_OVERLOAD _CLC_DEF INTTY __clc_mad_sat(INTTY x, INTTY y, INTTY z) { \
78+
INTTY mhi = __clc_mul_hi(x, y); \
79+
UINTTY mlo = __clc_as_##UINTTY(x * y); \
80+
SLONGTY m = __clc_upsample(mhi, mlo); \
81+
m += z; \
82+
return __clc_clamp(m, (SLONGTY)INT_MIN, (SLONGTY)INT_MAX); \
83+
} \
84+
__CLC_DEFINE_SINT_MAD_SAT(INTTY##2, UINTTY##2, SLONGTY##2) \
85+
__CLC_DEFINE_SINT_MAD_SAT(INTTY##3, UINTTY##3, SLONGTY##3) \
86+
__CLC_DEFINE_SINT_MAD_SAT(INTTY##4, UINTTY##4, SLONGTY##4) \
87+
__CLC_DEFINE_SINT_MAD_SAT(INTTY##8, UINTTY##8, SLONGTY##8) \
88+
__CLC_DEFINE_SINT_MAD_SAT(INTTY##16, UINTTY##16, SLONGTY##16)
89+
90+
__CLC_DEFINE_SINT_MAD_SAT_ALL_TYS(int, uint, long)
91+
92+
// Macro for defining mad_sat variants for long
93+
#define __CLC_DEFINE_SLONG_MAD_SAT(SLONGTY, ULONGTY) \
94+
_CLC_OVERLOAD _CLC_DEF SLONGTY __clc_mad_sat(SLONGTY x, SLONGTY y, \
95+
SLONGTY z) { \
96+
SLONGTY hi = __clc_mul_hi(x, y); \
97+
ULONGTY ulo = __clc_as_##ULONGTY(x * y); \
98+
SLONGTY max1 = (x < 0) == (y < 0) && hi != 0; \
99+
SLONGTY max2 = hi == 0 && ulo >= LONG_MAX && \
100+
(z > 0 || (ulo + __clc_as_##ULONGTY(z)) > LONG_MAX); \
101+
SLONGTY min1 = (((x < 0) != (y < 0)) && hi != -1); \
102+
SLONGTY min2 = \
103+
hi == -1 && ulo <= ((ULONGTY)LONG_MAX + 1UL) && \
104+
(z < 0 || __clc_as_##ULONGTY(z) < ((ULONGTY)LONG_MAX - ulo)); \
105+
SLONGTY ret = __clc_as_##SLONGTY(ulo + __clc_as_##ULONGTY(z)); \
106+
ret = __clc_select(ret, (SLONGTY)LONG_MAX, (SLONGTY)(max1 || max2)); \
107+
ret = __clc_select(ret, (SLONGTY)LONG_MIN, (SLONGTY)(min1 || min2)); \
108+
return ret; \
109+
}
110+
111+
#define __CLC_DEFINE_SLONG_MAD_SAT_ALL_TYS(SLONGTY, ULONGTY) \
112+
__CLC_DEFINE_SLONG_MAD_SAT(SLONGTY, ULONGTY) \
113+
__CLC_DEFINE_SLONG_MAD_SAT(SLONGTY##2, ULONGTY##2) \
114+
__CLC_DEFINE_SLONG_MAD_SAT(SLONGTY##3, ULONGTY##3) \
115+
__CLC_DEFINE_SLONG_MAD_SAT(SLONGTY##4, ULONGTY##4) \
116+
__CLC_DEFINE_SLONG_MAD_SAT(SLONGTY##8, ULONGTY##8) \
117+
__CLC_DEFINE_SLONG_MAD_SAT(SLONGTY##16, ULONGTY##16)
118+
119+
__CLC_DEFINE_SLONG_MAD_SAT_ALL_TYS(long, ulong)

libclc/clc/lib/spirv/SOURCES

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@
66
../generic/integer/clc_clz.cl
77
../generic/integer/clc_hadd.cl
88
../generic/integer/clc_mad24.cl
9+
../generic/integer/clc_mad_sat.cl
910
../generic/integer/clc_mul24.cl
1011
../generic/integer/clc_mul_hi.cl
1112
../generic/integer/clc_popcount.cl

libclc/generic/lib/integer/mad_sat.cl

Lines changed: 4 additions & 69 deletions
Original file line numberDiff line numberDiff line change
@@ -1,72 +1,7 @@
11
#include <clc/clc.h>
2-
#include <clc/clcmacro.h>
2+
#include <clc/integer/clc_mad_sat.h>
33

4-
_CLC_OVERLOAD _CLC_DEF char mad_sat(char x, char y, char z) {
5-
return clamp((short)mad24((short)x, (short)y, (short)z), (short)CHAR_MIN, (short) CHAR_MAX);
6-
}
4+
#define FUNCTION mad_sat
5+
#define __CLC_BODY <clc/shared/ternary_def.inc>
76

8-
_CLC_OVERLOAD _CLC_DEF uchar mad_sat(uchar x, uchar y, uchar z) {
9-
return clamp((ushort)mad24((ushort)x, (ushort)y, (ushort)z), (ushort)0, (ushort) UCHAR_MAX);
10-
}
11-
12-
_CLC_OVERLOAD _CLC_DEF short mad_sat(short x, short y, short z) {
13-
return clamp((int)mad24((int)x, (int)y, (int)z), (int)SHRT_MIN, (int) SHRT_MAX);
14-
}
15-
16-
_CLC_OVERLOAD _CLC_DEF ushort mad_sat(ushort x, ushort y, ushort z) {
17-
return clamp((uint)mad24((uint)x, (uint)y, (uint)z), (uint)0, (uint) USHRT_MAX);
18-
}
19-
20-
_CLC_OVERLOAD _CLC_DEF int mad_sat(int x, int y, int z) {
21-
int mhi = mul_hi(x, y);
22-
uint mlo = x * y;
23-
long m = upsample(mhi, mlo);
24-
m += z;
25-
if (m > INT_MAX)
26-
return INT_MAX;
27-
if (m < INT_MIN)
28-
return INT_MIN;
29-
return m;
30-
}
31-
32-
_CLC_OVERLOAD _CLC_DEF uint mad_sat(uint x, uint y, uint z) {
33-
if (mul_hi(x, y) != 0)
34-
return UINT_MAX;
35-
return add_sat(x * y, z);
36-
}
37-
38-
_CLC_OVERLOAD _CLC_DEF long mad_sat(long x, long y, long z) {
39-
long hi = mul_hi(x, y);
40-
ulong ulo = x * y;
41-
long slo = x * y;
42-
/* Big overflow of more than 2 bits, add can't fix this */
43-
if (((x < 0) == (y < 0)) && hi != 0)
44-
return LONG_MAX;
45-
/* Low overflow in mul and z not neg enough to correct it */
46-
if (hi == 0 && ulo >= LONG_MAX && (z > 0 || (ulo + z) > LONG_MAX))
47-
return LONG_MAX;
48-
/* Big overflow of more than 2 bits, add can't fix this */
49-
if (((x < 0) != (y < 0)) && hi != -1)
50-
return LONG_MIN;
51-
/* Low overflow in mul and z not pos enough to correct it */
52-
if (hi == -1 && ulo <= ((ulong)LONG_MAX + 1UL) && (z < 0 || z < (LONG_MAX - ulo)))
53-
return LONG_MIN;
54-
/* We have checked all conditions, any overflow in addition returns
55-
* the correct value */
56-
return ulo + z;
57-
}
58-
59-
_CLC_OVERLOAD _CLC_DEF ulong mad_sat(ulong x, ulong y, ulong z) {
60-
if (mul_hi(x, y) != 0)
61-
return ULONG_MAX;
62-
return add_sat(x * y, z);
63-
}
64-
65-
_CLC_TERNARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, char, mad_sat, char, char, char)
66-
_CLC_TERNARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, uchar, mad_sat, uchar, uchar, uchar)
67-
_CLC_TERNARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, short, mad_sat, short, short, short)
68-
_CLC_TERNARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, ushort, mad_sat, ushort, ushort, ushort)
69-
_CLC_TERNARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, int, mad_sat, int, int, int)
70-
_CLC_TERNARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, uint, mad_sat, uint, uint, uint)
71-
_CLC_TERNARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, long, mad_sat, long, long, long)
72-
_CLC_TERNARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, ulong, mad_sat, ulong, ulong, ulong)
7+
#include <clc/integer/gentype.inc>

0 commit comments

Comments
 (0)