From f8df8a154eaa1b1b3bc38068f93d273ad95cb9c5 Mon Sep 17 00:00:00 2001 From: Junyan He Date: Mon, 1 Sep 2014 10:13:04 +0800 Subject: [PATCH] Add the integer module into libocl as template Signed-off-by: Junyan He Reviewed-by: Zhigang Gong --- backend/src/libocl/script/ocl_integer.def | 31 +++ backend/src/libocl/tmpl/ocl_integer.tmpl.cl | 351 ++++++++++++++++++++++++++++ backend/src/libocl/tmpl/ocl_integer.tmpl.h | 159 +++++++++++++ 3 files changed, 541 insertions(+) create mode 100644 backend/src/libocl/script/ocl_integer.def create mode 100644 backend/src/libocl/tmpl/ocl_integer.tmpl.cl create mode 100644 backend/src/libocl/tmpl/ocl_integer.tmpl.h diff --git a/backend/src/libocl/script/ocl_integer.def b/backend/src/libocl/script/ocl_integer.def new file mode 100644 index 0000000..ec9177a --- /dev/null +++ b/backend/src/libocl/script/ocl_integer.def @@ -0,0 +1,31 @@ +##integer +ugentype abs (gentype x) +ugentype abs_diff (gentype x, gentype y) +gentype add_sat (gentype x, gentype y) +gentype hadd (gentype x, gentype y) +gentype rhadd (gentype x, gentype y) +gentype clamp (gentype x, gentype minval, gentype maxval) +gentype clamp (gentype x, sgentype minval, sgentype maxval) +gentype clz (gentype x) +gentype mad_hi (gentype a, gentype b, gentype c) +gentype mad_sat (gentype a, gentype b, gentype c) +gentype max (gentype x, gentype y) +gentype max (gentype x, sgentype y) +gentype min (gentype x, gentype y) +gentype min (gentype x, sgentype y) +gentype mul_hi (gentype x, gentype y) +gentype rotate (gentype v, gentype i) +gentype sub_sat (gentype x, gentype y) +shortn upsample (charn hi, ucharn lo) +ushortn upsample (ucharn hi, ucharn lo) +intn upsample (shortn hi, ushortn lo) +uintn upsample (ushortn hi, ushortn lo) +longn upsample (intn hi, uintn lo) +ulongn upsample (uintn hi, uintn lo) + +# XXX not implemented +#gentype popcount (gentype x) + +##fast_integer +gentype mad24 (gentype x, gentype y, gentype z) +gentype mul24 (gentype x, gentype y) diff --git a/backend/src/libocl/tmpl/ocl_integer.tmpl.cl b/backend/src/libocl/tmpl/ocl_integer.tmpl.cl new file mode 100644 index 0000000..20ba9df --- /dev/null +++ b/backend/src/libocl/tmpl/ocl_integer.tmpl.cl @@ -0,0 +1,351 @@ +#include "ocl_integer.h" + +PURE CONST uint __gen_ocl_fbh(uint); +PURE CONST uint __gen_ocl_fbl(uint); + +OVERLOADABLE char clz(char x) { + if (x < 0) + return 0; + if (x == 0) + return 8; + return __gen_ocl_fbh(x) - 24; +} + +OVERLOADABLE uchar clz(uchar x) { + if (x == 0) + return 8; + return __gen_ocl_fbh(x) - 24; +} + +OVERLOADABLE short clz(short x) { + if (x < 0) + return 0; + if (x == 0) + return 16; + return __gen_ocl_fbh(x) - 16; +} + +OVERLOADABLE ushort clz(ushort x) { + if (x == 0) + return 16; + return __gen_ocl_fbh(x) - 16; +} + +OVERLOADABLE int clz(int x) { + if (x < 0) + return 0; + if (x == 0) + return 32; + return __gen_ocl_fbh(x); +} + +OVERLOADABLE uint clz(uint x) { + if (x == 0) + return 32; + return __gen_ocl_fbh(x); +} + +OVERLOADABLE long clz(long x) { + union { int i[2]; long x; } u; + u.x = x; + if (u.i[1] & 0x80000000u) + return 0; + if (u.i[1] == 0 && u.i[0] == 0) + return 64; + uint v = clz(u.i[1]); + if(v == 32) + v += clz(u.i[0]); + return v; +} + +OVERLOADABLE ulong clz(ulong x) { + if (x == 0) + return 64; + union { uint i[2]; ulong x; } u; + u.x = x; + uint v = clz(u.i[1]); + if(v == 32) + v += clz(u.i[0]); + return v; +} + + +// sat +#define SDEF(TYPE) \ +OVERLOADABLE TYPE ocl_sadd_sat(TYPE x, TYPE y); \ +OVERLOADABLE TYPE ocl_ssub_sat(TYPE x, TYPE y); \ +OVERLOADABLE TYPE add_sat(TYPE x, TYPE y) { return ocl_sadd_sat(x, y); } \ +OVERLOADABLE TYPE sub_sat(TYPE x, TYPE y) { return ocl_ssub_sat(x, y); } +SDEF(char); +SDEF(short); +#undef SDEF +OVERLOADABLE int ocl_sadd_sat(int x, int y); +OVERLOADABLE int add_sat(int x, int y) { return ocl_sadd_sat(x, y); } +OVERLOADABLE int ocl_ssub_sat(int x, int y); +OVERLOADABLE int sub_sat(int x, int y) { + return (y == 0x80000000u) ? (x & 0x7FFFFFFF) : ocl_ssub_sat(x, y); +} +OVERLOADABLE long ocl_sadd_sat(long x, long y); +OVERLOADABLE long add_sat(long x, long y) { + union {long l; uint i[2];} ux, uy; + ux.l = x; + uy.l = y; + if((ux.i[1] ^ uy.i[1]) & 0x80000000u) + return x + y; + return ocl_sadd_sat(x, y); +} +OVERLOADABLE long ocl_ssub_sat(long x, long y); +OVERLOADABLE long sub_sat(long x, long y) { + union {long l; uint i[2];} ux, uy; + ux.l = x; + uy.l = y; + if((ux.i[1] ^ uy.i[1]) & 0x80000000u) + return ocl_ssub_sat(x, y); + return x - y; +} +#define UDEF(TYPE) \ +OVERLOADABLE TYPE ocl_uadd_sat(TYPE x, TYPE y); \ +OVERLOADABLE TYPE ocl_usub_sat(TYPE x, TYPE y); \ +OVERLOADABLE TYPE add_sat(TYPE x, TYPE y) { return ocl_uadd_sat(x, y); } \ +OVERLOADABLE TYPE sub_sat(TYPE x, TYPE y) { return ocl_usub_sat(x, y); } +UDEF(uchar); +UDEF(ushort); +UDEF(uint); +UDEF(ulong); +#undef UDEF + + +OVERLOADABLE int __gen_ocl_mul_hi(int x, int y); +OVERLOADABLE uint __gen_ocl_mul_hi(uint x, uint y); +OVERLOADABLE long __gen_ocl_mul_hi(long x, long y); +OVERLOADABLE ulong __gen_ocl_mul_hi(ulong x, ulong y); +OVERLOADABLE char mul_hi(char x, char y) { return (x * y) >> 8; } +OVERLOADABLE uchar mul_hi(uchar x, uchar y) { return (x * y) >> 8; } +OVERLOADABLE short mul_hi(short x, short y) { return (x * y) >> 16; } +OVERLOADABLE ushort mul_hi(ushort x, ushort y) { return (x * y) >> 16; } +OVERLOADABLE int mul_hi(int x, int y) { return __gen_ocl_mul_hi(x, y); } +OVERLOADABLE uint mul_hi(uint x, uint y) { return __gen_ocl_mul_hi(x, y); } +OVERLOADABLE long mul_hi(long x, long y) { + return __gen_ocl_mul_hi(x, y); +} +OVERLOADABLE ulong mul_hi(ulong x, ulong y) { + return __gen_ocl_mul_hi(x, y); +} + +#define DEF(type) OVERLOADABLE type mad_hi(type a, type b, type c) { return mul_hi(a, b) + c; } +DEF(char) +DEF(uchar) +DEF(short) +DEF(ushort) +DEF(int) +DEF(uint) +DEF(long) +DEF(ulong) +#undef DEF + +OVERLOADABLE int mul24(int a, int b) { return ((a << 8) >> 8) * ((b << 8) >> 8); } +OVERLOADABLE uint mul24(uint a, uint b) { return (a & 0xFFFFFF) * (b & 0xFFFFFF); } + +OVERLOADABLE int mad24(int a, int b, int c) { return mul24(a, b) + c; } +OVERLOADABLE uint mad24(uint a, uint b, uint c) { return mul24(a, b) + c; } + +OVERLOADABLE char mad_sat(char a, char b, char c) { + int x = (int)a * (int)b + (int)c; + if (x > 127) + x = 127; + if (x < -128) + x = -128; + return x; +} + +OVERLOADABLE uchar mad_sat(uchar a, uchar b, uchar c) { + uint x = (uint)a * (uint)b + (uint)c; + if (x > 255) + x = 255; + return x; +} + +OVERLOADABLE short mad_sat(short a, short b, short c) { + int x = (int)a * (int)b + (int)c; + if (x > 32767) + x = 32767; + if (x < -32768) + x = -32768; + return x; +} + +OVERLOADABLE ushort mad_sat(ushort a, ushort b, ushort c) { + uint x = (uint)a * (uint)b + (uint)c; + if (x > 65535) + x = 65535; + return x; +} + +OVERLOADABLE int mad_sat(int a, int b, int c) { + long x = (long)a * (long)b + (long)c; + if (x > 0x7FFFFFFF) + x = 0x7FFFFFFF; + else if (x < -0x7FFFFFFF-1) + x = -0x7FFFFFFF-1; + return (int)x; +} + +OVERLOADABLE uint mad_sat(uint a, uint b, uint c) { + ulong x = (ulong)a * (ulong)b + (ulong)c; + if (x > 0xFFFFFFFFu) + x = 0xFFFFFFFFu; + return (uint)x; +} + +OVERLOADABLE long __gen_ocl_mad_sat(long a, long b, long c); +OVERLOADABLE ulong __gen_ocl_mad_sat(ulong a, ulong b, ulong c); + +OVERLOADABLE long mad_sat(long a, long b, long c) { + return __gen_ocl_mad_sat(a, b, c); +} + +OVERLOADABLE ulong mad_sat(ulong a, ulong b, ulong c) { + return __gen_ocl_mad_sat(a, b, c); +} + +OVERLOADABLE uchar __rotate_left(uchar x, uchar y) { return (x << y) | (x >> (8 - y)); } +OVERLOADABLE char __rotate_left(char x, char y) { return __rotate_left((uchar)x, (uchar)y); } +OVERLOADABLE ushort __rotate_left(ushort x, ushort y) { return (x << y) | (x >> (16 - y)); } +OVERLOADABLE short __rotate_left(short x, short y) { return __rotate_left((ushort)x, (ushort)y); } +OVERLOADABLE uint __rotate_left(uint x, uint y) { return (x << y) | (x >> (32 - y)); } +OVERLOADABLE int __rotate_left(int x, int y) { return __rotate_left((uint)x, (uint)y); } +OVERLOADABLE ulong __rotate_left(ulong x, ulong y) { return (x << y) | (x >> (64 - y)); } +OVERLOADABLE long __rotate_left(long x, long y) { return __rotate_left((ulong)x, (ulong)y); } +#define DEF(type, m) OVERLOADABLE type rotate(type x, type y) { return __rotate_left(x, (type)(y & m)); } +DEF(char, 7) +DEF(uchar, 7) +DEF(short, 15) +DEF(ushort, 15) +DEF(int, 31) +DEF(uint, 31) +DEF(long, 63) +DEF(ulong, 63) +#undef DEF + +OVERLOADABLE short __gen_ocl_upsample(short hi, short lo); +OVERLOADABLE int __gen_ocl_upsample(int hi, int lo); +OVERLOADABLE long __gen_ocl_upsample(long hi, long lo); +OVERLOADABLE short upsample(char hi, uchar lo) { return __gen_ocl_upsample((short)hi, (short)lo); } +OVERLOADABLE ushort upsample(uchar hi, uchar lo) { return __gen_ocl_upsample((short)hi, (short)lo); } +OVERLOADABLE int upsample(short hi, ushort lo) { return __gen_ocl_upsample((int)hi, (int)lo); } +OVERLOADABLE uint upsample(ushort hi, ushort lo) { return __gen_ocl_upsample((int)hi, (int)lo); } +OVERLOADABLE long upsample(int hi, uint lo) { + return __gen_ocl_upsample((long)hi, (long)lo); +} +OVERLOADABLE ulong upsample(uint hi, uint lo) { + return __gen_ocl_upsample((long)hi, (long)lo); +} + +OVERLOADABLE uint __gen_ocl_hadd(uint x, uint y); +OVERLOADABLE uint __gen_ocl_rhadd(uint x, uint y); +#define DEC DEF(char); DEF(uchar); DEF(short); DEF(ushort) +#define DEF(type) OVERLOADABLE type hadd(type x, type y) { return (x + y) >> 1; } +DEC +#undef DEF +#define DEF(type) OVERLOADABLE type rhadd(type x, type y) { return (x + y + 1) >> 1; } +DEC +#undef DEF +#undef DEC +OVERLOADABLE int hadd(int x, int y) { + return (x < 0 && y > 0) || (x > 0 && y < 0) ? + ((x + y) >> 1) : + __gen_ocl_hadd((uint)x, (uint)y); +} +OVERLOADABLE uint hadd(uint x, uint y) { return __gen_ocl_hadd(x, y); } +OVERLOADABLE int rhadd(int x, int y) { + return (x < 0 && y > 0) || (x > 0 && y < 0) ? + ((x + y + 1) >> 1) : + __gen_ocl_rhadd((uint)x, (uint)y); + } +OVERLOADABLE uint rhadd(uint x, uint y) { return __gen_ocl_rhadd(x, y); } +OVERLOADABLE ulong __gen_ocl_hadd(ulong x, ulong y); +OVERLOADABLE ulong __gen_ocl_rhadd(ulong x, ulong y); +OVERLOADABLE long hadd(long x, long y) { + return (x < 0 && y > 0) || (x > 0 && y < 0) ? + ((x + y) >> 1) : + __gen_ocl_hadd((ulong)x, (ulong)y); +} +OVERLOADABLE ulong hadd(ulong x, ulong y) { + return __gen_ocl_hadd(x, y); +} +OVERLOADABLE long rhadd(long x, long y) { + return (x < 0 && y > 0) || (x > 0 && y < 0) ? + ((x + y + 1) >> 1) : + __gen_ocl_rhadd((ulong)x, (ulong)y); +} +OVERLOADABLE ulong rhadd(ulong x, ulong y) { + return __gen_ocl_rhadd(x, y); +} + +int __gen_ocl_abs(int x); +#define DEC(TYPE) OVERLOADABLE u##TYPE abs(TYPE x) { return (u##TYPE) __gen_ocl_abs(x); } +DEC(int) +DEC(short) +DEC(char) +#undef DEC +OVERLOADABLE ulong abs(long x) { return x < 0 ? -x : x; } +/* For unsigned types, do nothing. */ +#define DEC(TYPE) OVERLOADABLE TYPE abs(TYPE x) { return x; } +DEC(uint) +DEC(ushort) +DEC(uchar) +DEC(ulong) +#undef DEC + +/* Char and short type abs diff */ +/* promote char and short to int and will be no module overflow */ +#define DEC(TYPE, UTYPE) OVERLOADABLE UTYPE abs_diff(TYPE x, TYPE y) \ + { return (UTYPE) (abs((int)x - (int)y)); } +DEC(char, uchar) +DEC(uchar, uchar) +DEC(short, ushort) +DEC(ushort, ushort) +#undef DEC + +OVERLOADABLE uint abs_diff (uint x, uint y) { + /* same signed will never overflow. */ + return y > x ? (y -x) : (x - y); +} + +OVERLOADABLE uint abs_diff (int x, int y) { + /* same signed will never module overflow. */ + if ((x >= 0 && y >= 0) || (x <= 0 && y <= 0)) + return abs(x - y); + + return (abs(x) + abs(y)); +} + +OVERLOADABLE ulong abs_diff (long x, long y) { + if ((x >= 0 && y >= 0) || (x <= 0 && y <= 0)) + return abs(x - y); + return abs(x) + abs(y); +} +OVERLOADABLE ulong abs_diff (ulong x, ulong y) { + return y > x ? (y - x) : (x - y); +} + + +#define DECL_MIN_MAX_CLAMP(TYPE) \ +OVERLOADABLE TYPE max(TYPE a, TYPE b) { \ + return a > b ? a : b; \ +} \ +OVERLOADABLE TYPE min(TYPE a, TYPE b) { \ + return a < b ? a : b; \ +} \ +OVERLOADABLE TYPE clamp(TYPE v, TYPE l, TYPE u) { \ + return max(min(v, u), l); \ +} +DECL_MIN_MAX_CLAMP(int) +DECL_MIN_MAX_CLAMP(short) +DECL_MIN_MAX_CLAMP(char) +DECL_MIN_MAX_CLAMP(uint) +DECL_MIN_MAX_CLAMP(unsigned short) +DECL_MIN_MAX_CLAMP(unsigned char) +DECL_MIN_MAX_CLAMP(long) +DECL_MIN_MAX_CLAMP(ulong) +#undef DECL_MIN_MAX_CLAMP diff --git a/backend/src/libocl/tmpl/ocl_integer.tmpl.h b/backend/src/libocl/tmpl/ocl_integer.tmpl.h new file mode 100644 index 0000000..c0e57f7 --- /dev/null +++ b/backend/src/libocl/tmpl/ocl_integer.tmpl.h @@ -0,0 +1,159 @@ +#ifndef __OCL_INTEGER_H__ +#define __OCL_INTEGER_H__ + +#include "ocl_types.h" + +#define CHAR_BIT 8 +#define CHAR_MAX SCHAR_MAX +#define CHAR_MIN SCHAR_MIN +#define INT_MAX 2147483647 +#define INT_MIN (-2147483647 - 1) +#define LONG_MAX 0x7fffffffffffffffL +#define LONG_MIN (-0x7fffffffffffffffL - 1) +#define SCHAR_MAX 127 +#define SCHAR_MIN (-127 - 1) +#define SHRT_MAX 32767 +#define SHRT_MIN (-32767 - 1) +#define UCHAR_MAX 255 +#define USHRT_MAX 65535 +#define UINT_MAX 0xffffffff +#define ULONG_MAX 0xffffffffffffffffUL + +OVERLOADABLE char clz(char x); +OVERLOADABLE uchar clz(uchar x); +OVERLOADABLE short clz(short x); +OVERLOADABLE ushort clz(ushort x); +OVERLOADABLE int clz(int x); +OVERLOADABLE uint clz(uint x); +OVERLOADABLE long clz(long x); +OVERLOADABLE ulong clz(ulong x); + +OVERLOADABLE char mul_hi(char x, char y); +OVERLOADABLE uchar mul_hi(uchar x, uchar y); +OVERLOADABLE short mul_hi(short x, short y); +OVERLOADABLE ushort mul_hi(ushort x, ushort y); +OVERLOADABLE int mul_hi(int x, int y); +OVERLOADABLE uint mul_hi(uint x, uint y); +OVERLOADABLE long mul_hi(long x, long y); +OVERLOADABLE ulong mul_hi(ulong x, ulong y); + +#define SDEF(TYPE) \ +OVERLOADABLE TYPE add_sat(TYPE x, TYPE y); \ +OVERLOADABLE TYPE sub_sat(TYPE x, TYPE y); +SDEF(char); +SDEF(short); +SDEF(int); +SDEF(long); +#undef SDEF +#define UDEF(TYPE) \ +OVERLOADABLE TYPE add_sat(TYPE x, TYPE y); \ +OVERLOADABLE TYPE sub_sat(TYPE x, TYPE y); +UDEF(uchar); +UDEF(ushort); +UDEF(uint); +UDEF(ulong); +#undef UDEF + +#define DEF(type) OVERLOADABLE type mad_hi(type a, type b, type c); +DEF(char) +DEF(uchar) +DEF(short) +DEF(ushort) +DEF(int) +DEF(uint) +DEF(long) +DEF(ulong) +#undef DEF + +OVERLOADABLE int mul24(int a, int b); +OVERLOADABLE uint mul24(uint a, uint b); + +OVERLOADABLE int mad24(int a, int b, int c); +OVERLOADABLE uint mad24(uint a, uint b, uint c); + +OVERLOADABLE char mad_sat(char a, char b, char c) ; +OVERLOADABLE uchar mad_sat(uchar a, uchar b, uchar c); +OVERLOADABLE short mad_sat(short a, short b, short c); +OVERLOADABLE ushort mad_sat(ushort a, ushort b, ushort c); +OVERLOADABLE int mad_sat(int a, int b, int c); +OVERLOADABLE uint mad_sat(uint a, uint b, uint c); +OVERLOADABLE long mad_sat(long a, long b, long c); +OVERLOADABLE ulong mad_sat(ulong a, ulong b, ulong c); + +#define DEF(type, m) OVERLOADABLE type rotate(type x, type y); +DEF(char, 7) +DEF(uchar, 7) +DEF(short, 15) +DEF(ushort, 15) +DEF(int, 31) +DEF(uint, 31) +DEF(long, 63) +DEF(ulong, 63) +#undef DEF + +OVERLOADABLE short upsample(char hi, uchar lo); +OVERLOADABLE ushort upsample(uchar hi, uchar lo); +OVERLOADABLE int upsample(short hi, ushort lo); +OVERLOADABLE uint upsample(ushort hi, ushort lo); +OVERLOADABLE long upsample(int hi, uint lo); +OVERLOADABLE ulong upsample(uint hi, uint lo); + +#define DEC DEF(char); DEF(uchar); DEF(short); DEF(ushort) +#define DEF(type) OVERLOADABLE type hadd(type x, type y); +DEC +#undef DEF +#define DEF(type) OVERLOADABLE type rhadd(type x, type y); +DEC +#undef DEF +#undef DEC +OVERLOADABLE int hadd(int x, int y); +OVERLOADABLE uint hadd(uint x, uint y); +OVERLOADABLE int rhadd(int x, int y); +OVERLOADABLE uint rhadd(uint x, uint y); +OVERLOADABLE long hadd(long x, long y); +OVERLOADABLE ulong hadd(ulong x, ulong y); +OVERLOADABLE long rhadd(long x, long y); +OVERLOADABLE ulong rhadd(ulong x, ulong y); + +#define DEC(TYPE) OVERLOADABLE u##TYPE abs(TYPE x); +DEC(int) +DEC(short) +DEC(char) +#undef DEC +OVERLOADABLE ulong abs(long x); +/* For unsigned types, do nothing. */ +#define DEC(TYPE) OVERLOADABLE TYPE abs(TYPE x); +DEC(uint) +DEC(ushort) +DEC(uchar) +DEC(ulong) +#undef DEC + +/* Char and short type abs diff */ +/* promote char and short to int and will be no module overflow */ +#define DEC(TYPE, UTYPE) OVERLOADABLE UTYPE abs_diff(TYPE x, TYPE y); +DEC(char, uchar) +DEC(uchar, uchar) +DEC(short, ushort) +DEC(ushort, ushort) +#undef DEC + +OVERLOADABLE uint abs_diff (uint x, uint y); +OVERLOADABLE uint abs_diff (int x, int y); +OVERLOADABLE ulong abs_diff (long x, long y); +OVERLOADABLE ulong abs_diff (ulong x, ulong y); + + +#define DECL_MIN_MAX_CLAMP(TYPE) \ +OVERLOADABLE TYPE max(TYPE a, TYPE b); \ +OVERLOADABLE TYPE min(TYPE a, TYPE b); \ +OVERLOADABLE TYPE clamp(TYPE v, TYPE l, TYPE u); +DECL_MIN_MAX_CLAMP(int) +DECL_MIN_MAX_CLAMP(short) +DECL_MIN_MAX_CLAMP(char) +DECL_MIN_MAX_CLAMP(uint) +DECL_MIN_MAX_CLAMP(unsigned short) +DECL_MIN_MAX_CLAMP(unsigned char) +DECL_MIN_MAX_CLAMP(long) +DECL_MIN_MAX_CLAMP(ulong) +#undef DECL_MIN_MAX_CLAMP -- 2.7.4