|
| 1 | +#include <clc/clcmacro.h> |
| 2 | +#include <clc/integer/clc_add_sat.h> |
| 3 | +#include <clc/integer/clc_mad24.h> |
| 4 | +#include <clc/integer/clc_mul_hi.h> |
| 5 | +#include <clc/integer/clc_upsample.h> |
| 6 | +#include <clc/integer/definitions.h> |
| 7 | +#include <clc/internal/clc.h> |
| 8 | +#include <clc/relational/clc_select.h> |
| 9 | +#include <clc/shared/clc_clamp.h> |
| 10 | + |
| 11 | +#define __CLC_CONVERT_TY(X, TY) __builtin_convertvector(X, TY) |
| 12 | + |
| 13 | +// Macro for defining mad_sat variants for char/uchar/short/ushort |
| 14 | +// FIXME: Once using __clc_convert_ty, can easily unify scalar and vector defs |
| 15 | +#define __CLC_DEFINE_SIMPLE_MAD_SAT(TYPE, UP_TYPE, LIT_PREFIX) \ |
| 16 | + _CLC_OVERLOAD _CLC_DEF TYPE __clc_mad_sat(TYPE x, TYPE y, TYPE z) { \ |
| 17 | + return __clc_clamp( \ |
| 18 | + (UP_TYPE)__clc_mad24((UP_TYPE)x, (UP_TYPE)y, (UP_TYPE)z), \ |
| 19 | + (UP_TYPE)LIT_PREFIX##_MIN, (UP_TYPE)LIT_PREFIX##_MAX); \ |
| 20 | + } |
| 21 | + |
| 22 | +#define __CLC_DEFINE_SIMPLE_MAD_SAT_VEC(TYPE, UP_TYPE, LIT_PREFIX) \ |
| 23 | + _CLC_OVERLOAD _CLC_DEF TYPE __clc_mad_sat(TYPE x, TYPE y, TYPE z) { \ |
| 24 | + UP_TYPE upscaled_mad = __clc_mad24(__CLC_CONVERT_TY(x, UP_TYPE), \ |
| 25 | + __CLC_CONVERT_TY(y, UP_TYPE), \ |
| 26 | + __CLC_CONVERT_TY(z, UP_TYPE)); \ |
| 27 | + UP_TYPE clamped_mad = __clc_clamp(upscaled_mad, (UP_TYPE)LIT_PREFIX##_MIN, \ |
| 28 | + (UP_TYPE)LIT_PREFIX##_MAX); \ |
| 29 | + return __CLC_CONVERT_TY(clamped_mad, TYPE); \ |
| 30 | + } |
| 31 | + |
| 32 | +#define __CLC_DEFINE_SIMPLE_MAD_SAT_ALL_TYS(TYPE, UP_TYPE, LIT_PREFIX) \ |
| 33 | + __CLC_DEFINE_SIMPLE_MAD_SAT(TYPE, UP_TYPE, LIT_PREFIX) \ |
| 34 | + __CLC_DEFINE_SIMPLE_MAD_SAT_VEC(TYPE##2, UP_TYPE##2, LIT_PREFIX) \ |
| 35 | + __CLC_DEFINE_SIMPLE_MAD_SAT_VEC(TYPE##3, UP_TYPE##3, LIT_PREFIX) \ |
| 36 | + __CLC_DEFINE_SIMPLE_MAD_SAT_VEC(TYPE##4, UP_TYPE##4, LIT_PREFIX) \ |
| 37 | + __CLC_DEFINE_SIMPLE_MAD_SAT_VEC(TYPE##8, UP_TYPE##8, LIT_PREFIX) \ |
| 38 | + __CLC_DEFINE_SIMPLE_MAD_SAT_VEC(TYPE##16, UP_TYPE##16, LIT_PREFIX) |
| 39 | + |
| 40 | +__CLC_DEFINE_SIMPLE_MAD_SAT_ALL_TYS(char, int, CHAR) |
| 41 | +__CLC_DEFINE_SIMPLE_MAD_SAT_ALL_TYS(uchar, uint, UCHAR) |
| 42 | +__CLC_DEFINE_SIMPLE_MAD_SAT_ALL_TYS(short, int, SHRT) |
| 43 | +__CLC_DEFINE_SIMPLE_MAD_SAT_ALL_TYS(ushort, uint, USHRT) |
| 44 | + |
| 45 | +// Macro for defining mad_sat variants for uint/ulong |
| 46 | +#define __CLC_DEFINE_UINTLONG_MAD_SAT(UTYPE, STYPE, ULIT_PREFIX) \ |
| 47 | + _CLC_OVERLOAD _CLC_DEF UTYPE __clc_mad_sat(UTYPE x, UTYPE y, UTYPE z) { \ |
| 48 | + STYPE has_mul_hi = __clc_mul_hi(x, y) != (UTYPE)0; \ |
| 49 | + return __clc_select(__clc_add_sat(x * y, z), (UTYPE)ULIT_PREFIX##_MAX, \ |
| 50 | + has_mul_hi); \ |
| 51 | + } |
| 52 | + |
| 53 | +#define __CLC_DEFINE_UINTLONG_MAD_SAT_ALL_TYS(UTY, STY, ULIT_PREFIX) \ |
| 54 | + __CLC_DEFINE_UINTLONG_MAD_SAT(UTY, STY, ULIT_PREFIX) \ |
| 55 | + __CLC_DEFINE_UINTLONG_MAD_SAT(UTY##2, STY##2, ULIT_PREFIX) \ |
| 56 | + __CLC_DEFINE_UINTLONG_MAD_SAT(UTY##3, STY##3, ULIT_PREFIX) \ |
| 57 | + __CLC_DEFINE_UINTLONG_MAD_SAT(UTY##4, STY##4, ULIT_PREFIX) \ |
| 58 | + __CLC_DEFINE_UINTLONG_MAD_SAT(UTY##8, STY##8, ULIT_PREFIX) \ |
| 59 | + __CLC_DEFINE_UINTLONG_MAD_SAT(UTY##16, STY##16, ULIT_PREFIX) |
| 60 | + |
| 61 | +__CLC_DEFINE_UINTLONG_MAD_SAT_ALL_TYS(uint, int, UINT) |
| 62 | +__CLC_DEFINE_UINTLONG_MAD_SAT_ALL_TYS(ulong, long, ULONG) |
| 63 | + |
| 64 | +// Macro for defining mad_sat variants for int |
| 65 | +#define __CLC_DEFINE_SINT_MAD_SAT(INTTY, UINTTY, SLONGTY) \ |
| 66 | + _CLC_OVERLOAD _CLC_DEF INTTY __clc_mad_sat(INTTY x, INTTY y, INTTY z) { \ |
| 67 | + INTTY mhi = __clc_mul_hi(x, y); \ |
| 68 | + UINTTY mlo = __clc_as_##UINTTY(x * y); \ |
| 69 | + SLONGTY m = __clc_upsample(mhi, mlo); \ |
| 70 | + m += __CLC_CONVERT_TY(z, SLONGTY); \ |
| 71 | + m = __clc_clamp(m, (SLONGTY)INT_MIN, (SLONGTY)INT_MAX); \ |
| 72 | + return __CLC_CONVERT_TY(m, INTTY); \ |
| 73 | + } |
| 74 | + |
| 75 | +// FIXME: Once using __clc_convert_ty, can easily unify scalar and vector defs |
| 76 | +#define __CLC_DEFINE_SINT_MAD_SAT_ALL_TYS(INTTY, UINTTY, SLONGTY) \ |
| 77 | + _CLC_OVERLOAD _CLC_DEF INTTY __clc_mad_sat(INTTY x, INTTY y, INTTY z) { \ |
| 78 | + INTTY mhi = __clc_mul_hi(x, y); \ |
| 79 | + UINTTY mlo = __clc_as_##UINTTY(x * y); \ |
| 80 | + SLONGTY m = __clc_upsample(mhi, mlo); \ |
| 81 | + m += z; \ |
| 82 | + return __clc_clamp(m, (SLONGTY)INT_MIN, (SLONGTY)INT_MAX); \ |
| 83 | + } \ |
| 84 | + __CLC_DEFINE_SINT_MAD_SAT(INTTY##2, UINTTY##2, SLONGTY##2) \ |
| 85 | + __CLC_DEFINE_SINT_MAD_SAT(INTTY##3, UINTTY##3, SLONGTY##3) \ |
| 86 | + __CLC_DEFINE_SINT_MAD_SAT(INTTY##4, UINTTY##4, SLONGTY##4) \ |
| 87 | + __CLC_DEFINE_SINT_MAD_SAT(INTTY##8, UINTTY##8, SLONGTY##8) \ |
| 88 | + __CLC_DEFINE_SINT_MAD_SAT(INTTY##16, UINTTY##16, SLONGTY##16) |
| 89 | + |
| 90 | +__CLC_DEFINE_SINT_MAD_SAT_ALL_TYS(int, uint, long) |
| 91 | + |
| 92 | +// Macro for defining mad_sat variants for long |
| 93 | +#define __CLC_DEFINE_SLONG_MAD_SAT(SLONGTY, ULONGTY) \ |
| 94 | + _CLC_OVERLOAD _CLC_DEF SLONGTY __clc_mad_sat(SLONGTY x, SLONGTY y, \ |
| 95 | + SLONGTY z) { \ |
| 96 | + SLONGTY hi = __clc_mul_hi(x, y); \ |
| 97 | + ULONGTY ulo = __clc_as_##ULONGTY(x * y); \ |
| 98 | + SLONGTY max1 = (x < 0) == (y < 0) && hi != 0; \ |
| 99 | + SLONGTY max2 = hi == 0 && ulo >= LONG_MAX && \ |
| 100 | + (z > 0 || (ulo + __clc_as_##ULONGTY(z)) > LONG_MAX); \ |
| 101 | + SLONGTY min1 = (((x < 0) != (y < 0)) && hi != -1); \ |
| 102 | + SLONGTY min2 = \ |
| 103 | + hi == -1 && ulo <= ((ULONGTY)LONG_MAX + 1UL) && \ |
| 104 | + (z < 0 || __clc_as_##ULONGTY(z) < ((ULONGTY)LONG_MAX - ulo)); \ |
| 105 | + SLONGTY ret = __clc_as_##SLONGTY(ulo + __clc_as_##ULONGTY(z)); \ |
| 106 | + ret = __clc_select(ret, (SLONGTY)LONG_MAX, (SLONGTY)(max1 || max2)); \ |
| 107 | + ret = __clc_select(ret, (SLONGTY)LONG_MIN, (SLONGTY)(min1 || min2)); \ |
| 108 | + return ret; \ |
| 109 | + } |
| 110 | + |
| 111 | +#define __CLC_DEFINE_SLONG_MAD_SAT_ALL_TYS(SLONGTY, ULONGTY) \ |
| 112 | + __CLC_DEFINE_SLONG_MAD_SAT(SLONGTY, ULONGTY) \ |
| 113 | + __CLC_DEFINE_SLONG_MAD_SAT(SLONGTY##2, ULONGTY##2) \ |
| 114 | + __CLC_DEFINE_SLONG_MAD_SAT(SLONGTY##3, ULONGTY##3) \ |
| 115 | + __CLC_DEFINE_SLONG_MAD_SAT(SLONGTY##4, ULONGTY##4) \ |
| 116 | + __CLC_DEFINE_SLONG_MAD_SAT(SLONGTY##8, ULONGTY##8) \ |
| 117 | + __CLC_DEFINE_SLONG_MAD_SAT(SLONGTY##16, ULONGTY##16) |
| 118 | + |
| 119 | +__CLC_DEFINE_SLONG_MAD_SAT_ALL_TYS(long, ulong) |
0 commit comments