--- /dev/null
+#include "ocl_integer.h"
+
+PURE CONST uint __gen_ocl_fbh(uint);
+PURE CONST uint __gen_ocl_fbl(uint);
+
+OVERLOADABLE char clz(char x) {
+ if (x < 0)
+ return 0;
+ if (x == 0)
+ return 8;
+ return __gen_ocl_fbh(x) - 24;
+}
+
+OVERLOADABLE uchar clz(uchar x) {
+ if (x == 0)
+ return 8;
+ return __gen_ocl_fbh(x) - 24;
+}
+
+OVERLOADABLE short clz(short x) {
+ if (x < 0)
+ return 0;
+ if (x == 0)
+ return 16;
+ return __gen_ocl_fbh(x) - 16;
+}
+
+OVERLOADABLE ushort clz(ushort x) {
+ if (x == 0)
+ return 16;
+ return __gen_ocl_fbh(x) - 16;
+}
+
+OVERLOADABLE int clz(int x) {
+ if (x < 0)
+ return 0;
+ if (x == 0)
+ return 32;
+ return __gen_ocl_fbh(x);
+}
+
+OVERLOADABLE uint clz(uint x) {
+ if (x == 0)
+ return 32;
+ return __gen_ocl_fbh(x);
+}
+
+OVERLOADABLE long clz(long x) {
+ union { int i[2]; long x; } u;
+ u.x = x;
+ if (u.i[1] & 0x80000000u)
+ return 0;
+ if (u.i[1] == 0 && u.i[0] == 0)
+ return 64;
+ uint v = clz(u.i[1]);
+ if(v == 32)
+ v += clz(u.i[0]);
+ return v;
+}
+
+OVERLOADABLE ulong clz(ulong x) {
+ if (x == 0)
+ return 64;
+ union { uint i[2]; ulong x; } u;
+ u.x = x;
+ uint v = clz(u.i[1]);
+ if(v == 32)
+ v += clz(u.i[0]);
+ return v;
+}
+
+
+// sat
+#define SDEF(TYPE) \
+OVERLOADABLE TYPE ocl_sadd_sat(TYPE x, TYPE y); \
+OVERLOADABLE TYPE ocl_ssub_sat(TYPE x, TYPE y); \
+OVERLOADABLE TYPE add_sat(TYPE x, TYPE y) { return ocl_sadd_sat(x, y); } \
+OVERLOADABLE TYPE sub_sat(TYPE x, TYPE y) { return ocl_ssub_sat(x, y); }
+SDEF(char);
+SDEF(short);
+#undef SDEF
+OVERLOADABLE int ocl_sadd_sat(int x, int y);
+OVERLOADABLE int add_sat(int x, int y) { return ocl_sadd_sat(x, y); }
+OVERLOADABLE int ocl_ssub_sat(int x, int y);
+OVERLOADABLE int sub_sat(int x, int y) {
+ return (y == 0x80000000u) ? (x & 0x7FFFFFFF) : ocl_ssub_sat(x, y);
+}
+OVERLOADABLE long ocl_sadd_sat(long x, long y);
+OVERLOADABLE long add_sat(long x, long y) {
+ union {long l; uint i[2];} ux, uy;
+ ux.l = x;
+ uy.l = y;
+ if((ux.i[1] ^ uy.i[1]) & 0x80000000u)
+ return x + y;
+ return ocl_sadd_sat(x, y);
+}
+OVERLOADABLE long ocl_ssub_sat(long x, long y);
+OVERLOADABLE long sub_sat(long x, long y) {
+ union {long l; uint i[2];} ux, uy;
+ ux.l = x;
+ uy.l = y;
+ if((ux.i[1] ^ uy.i[1]) & 0x80000000u)
+ return ocl_ssub_sat(x, y);
+ return x - y;
+}
+#define UDEF(TYPE) \
+OVERLOADABLE TYPE ocl_uadd_sat(TYPE x, TYPE y); \
+OVERLOADABLE TYPE ocl_usub_sat(TYPE x, TYPE y); \
+OVERLOADABLE TYPE add_sat(TYPE x, TYPE y) { return ocl_uadd_sat(x, y); } \
+OVERLOADABLE TYPE sub_sat(TYPE x, TYPE y) { return ocl_usub_sat(x, y); }
+UDEF(uchar);
+UDEF(ushort);
+UDEF(uint);
+UDEF(ulong);
+#undef UDEF
+
+
+OVERLOADABLE int __gen_ocl_mul_hi(int x, int y);
+OVERLOADABLE uint __gen_ocl_mul_hi(uint x, uint y);
+OVERLOADABLE long __gen_ocl_mul_hi(long x, long y);
+OVERLOADABLE ulong __gen_ocl_mul_hi(ulong x, ulong y);
+OVERLOADABLE char mul_hi(char x, char y) { return (x * y) >> 8; }
+OVERLOADABLE uchar mul_hi(uchar x, uchar y) { return (x * y) >> 8; }
+OVERLOADABLE short mul_hi(short x, short y) { return (x * y) >> 16; }
+OVERLOADABLE ushort mul_hi(ushort x, ushort y) { return (x * y) >> 16; }
+OVERLOADABLE int mul_hi(int x, int y) { return __gen_ocl_mul_hi(x, y); }
+OVERLOADABLE uint mul_hi(uint x, uint y) { return __gen_ocl_mul_hi(x, y); }
+OVERLOADABLE long mul_hi(long x, long y) {
+ return __gen_ocl_mul_hi(x, y);
+}
+OVERLOADABLE ulong mul_hi(ulong x, ulong y) {
+ return __gen_ocl_mul_hi(x, y);
+}
+
+#define DEF(type) OVERLOADABLE type mad_hi(type a, type b, type c) { return mul_hi(a, b) + c; }
+DEF(char)
+DEF(uchar)
+DEF(short)
+DEF(ushort)
+DEF(int)
+DEF(uint)
+DEF(long)
+DEF(ulong)
+#undef DEF
+
+OVERLOADABLE int mul24(int a, int b) { return ((a << 8) >> 8) * ((b << 8) >> 8); }
+OVERLOADABLE uint mul24(uint a, uint b) { return (a & 0xFFFFFF) * (b & 0xFFFFFF); }
+
+OVERLOADABLE int mad24(int a, int b, int c) { return mul24(a, b) + c; }
+OVERLOADABLE uint mad24(uint a, uint b, uint c) { return mul24(a, b) + c; }
+
+OVERLOADABLE char mad_sat(char a, char b, char c) {
+ int x = (int)a * (int)b + (int)c;
+ if (x > 127)
+ x = 127;
+ if (x < -128)
+ x = -128;
+ return x;
+}
+
+OVERLOADABLE uchar mad_sat(uchar a, uchar b, uchar c) {
+ uint x = (uint)a * (uint)b + (uint)c;
+ if (x > 255)
+ x = 255;
+ return x;
+}
+
+OVERLOADABLE short mad_sat(short a, short b, short c) {
+ int x = (int)a * (int)b + (int)c;
+ if (x > 32767)
+ x = 32767;
+ if (x < -32768)
+ x = -32768;
+ return x;
+}
+
+OVERLOADABLE ushort mad_sat(ushort a, ushort b, ushort c) {
+ uint x = (uint)a * (uint)b + (uint)c;
+ if (x > 65535)
+ x = 65535;
+ return x;
+}
+
+OVERLOADABLE int mad_sat(int a, int b, int c) {
+ long x = (long)a * (long)b + (long)c;
+ if (x > 0x7FFFFFFF)
+ x = 0x7FFFFFFF;
+ else if (x < -0x7FFFFFFF-1)
+ x = -0x7FFFFFFF-1;
+ return (int)x;
+}
+
+OVERLOADABLE uint mad_sat(uint a, uint b, uint c) {
+ ulong x = (ulong)a * (ulong)b + (ulong)c;
+ if (x > 0xFFFFFFFFu)
+ x = 0xFFFFFFFFu;
+ return (uint)x;
+}
+
+OVERLOADABLE long __gen_ocl_mad_sat(long a, long b, long c);
+OVERLOADABLE ulong __gen_ocl_mad_sat(ulong a, ulong b, ulong c);
+
+OVERLOADABLE long mad_sat(long a, long b, long c) {
+ return __gen_ocl_mad_sat(a, b, c);
+}
+
+OVERLOADABLE ulong mad_sat(ulong a, ulong b, ulong c) {
+ return __gen_ocl_mad_sat(a, b, c);
+}
+
+OVERLOADABLE uchar __rotate_left(uchar x, uchar y) { return (x << y) | (x >> (8 - y)); }
+OVERLOADABLE char __rotate_left(char x, char y) { return __rotate_left((uchar)x, (uchar)y); }
+OVERLOADABLE ushort __rotate_left(ushort x, ushort y) { return (x << y) | (x >> (16 - y)); }
+OVERLOADABLE short __rotate_left(short x, short y) { return __rotate_left((ushort)x, (ushort)y); }
+OVERLOADABLE uint __rotate_left(uint x, uint y) { return (x << y) | (x >> (32 - y)); }
+OVERLOADABLE int __rotate_left(int x, int y) { return __rotate_left((uint)x, (uint)y); }
+OVERLOADABLE ulong __rotate_left(ulong x, ulong y) { return (x << y) | (x >> (64 - y)); }
+OVERLOADABLE long __rotate_left(long x, long y) { return __rotate_left((ulong)x, (ulong)y); }
+#define DEF(type, m) OVERLOADABLE type rotate(type x, type y) { return __rotate_left(x, (type)(y & m)); }
+DEF(char, 7)
+DEF(uchar, 7)
+DEF(short, 15)
+DEF(ushort, 15)
+DEF(int, 31)
+DEF(uint, 31)
+DEF(long, 63)
+DEF(ulong, 63)
+#undef DEF
+
+OVERLOADABLE short __gen_ocl_upsample(short hi, short lo);
+OVERLOADABLE int __gen_ocl_upsample(int hi, int lo);
+OVERLOADABLE long __gen_ocl_upsample(long hi, long lo);
+OVERLOADABLE short upsample(char hi, uchar lo) { return __gen_ocl_upsample((short)hi, (short)lo); }
+OVERLOADABLE ushort upsample(uchar hi, uchar lo) { return __gen_ocl_upsample((short)hi, (short)lo); }
+OVERLOADABLE int upsample(short hi, ushort lo) { return __gen_ocl_upsample((int)hi, (int)lo); }
+OVERLOADABLE uint upsample(ushort hi, ushort lo) { return __gen_ocl_upsample((int)hi, (int)lo); }
+OVERLOADABLE long upsample(int hi, uint lo) {
+ return __gen_ocl_upsample((long)hi, (long)lo);
+}
+OVERLOADABLE ulong upsample(uint hi, uint lo) {
+ return __gen_ocl_upsample((long)hi, (long)lo);
+}
+
+OVERLOADABLE uint __gen_ocl_hadd(uint x, uint y);
+OVERLOADABLE uint __gen_ocl_rhadd(uint x, uint y);
+#define DEC DEF(char); DEF(uchar); DEF(short); DEF(ushort)
+#define DEF(type) OVERLOADABLE type hadd(type x, type y) { return (x + y) >> 1; }
+DEC
+#undef DEF
+#define DEF(type) OVERLOADABLE type rhadd(type x, type y) { return (x + y + 1) >> 1; }
+DEC
+#undef DEF
+#undef DEC
+OVERLOADABLE int hadd(int x, int y) {
+ return (x < 0 && y > 0) || (x > 0 && y < 0) ?
+ ((x + y) >> 1) :
+ __gen_ocl_hadd((uint)x, (uint)y);
+}
+OVERLOADABLE uint hadd(uint x, uint y) { return __gen_ocl_hadd(x, y); }
+OVERLOADABLE int rhadd(int x, int y) {
+ return (x < 0 && y > 0) || (x > 0 && y < 0) ?
+ ((x + y + 1) >> 1) :
+ __gen_ocl_rhadd((uint)x, (uint)y);
+ }
+OVERLOADABLE uint rhadd(uint x, uint y) { return __gen_ocl_rhadd(x, y); }
+OVERLOADABLE ulong __gen_ocl_hadd(ulong x, ulong y);
+OVERLOADABLE ulong __gen_ocl_rhadd(ulong x, ulong y);
+OVERLOADABLE long hadd(long x, long y) {
+ return (x < 0 && y > 0) || (x > 0 && y < 0) ?
+ ((x + y) >> 1) :
+ __gen_ocl_hadd((ulong)x, (ulong)y);
+}
+OVERLOADABLE ulong hadd(ulong x, ulong y) {
+ return __gen_ocl_hadd(x, y);
+}
+OVERLOADABLE long rhadd(long x, long y) {
+ return (x < 0 && y > 0) || (x > 0 && y < 0) ?
+ ((x + y + 1) >> 1) :
+ __gen_ocl_rhadd((ulong)x, (ulong)y);
+}
+OVERLOADABLE ulong rhadd(ulong x, ulong y) {
+ return __gen_ocl_rhadd(x, y);
+}
+
+int __gen_ocl_abs(int x);
+#define DEC(TYPE) OVERLOADABLE u##TYPE abs(TYPE x) { return (u##TYPE) __gen_ocl_abs(x); }
+DEC(int)
+DEC(short)
+DEC(char)
+#undef DEC
+OVERLOADABLE ulong abs(long x) { return x < 0 ? -x : x; }
+/* For unsigned types, do nothing. */
+#define DEC(TYPE) OVERLOADABLE TYPE abs(TYPE x) { return x; }
+DEC(uint)
+DEC(ushort)
+DEC(uchar)
+DEC(ulong)
+#undef DEC
+
+/* Char and short type abs diff */
+/* promote char and short to int and will be no module overflow */
+#define DEC(TYPE, UTYPE) OVERLOADABLE UTYPE abs_diff(TYPE x, TYPE y) \
+ { return (UTYPE) (abs((int)x - (int)y)); }
+DEC(char, uchar)
+DEC(uchar, uchar)
+DEC(short, ushort)
+DEC(ushort, ushort)
+#undef DEC
+
+OVERLOADABLE uint abs_diff (uint x, uint y) {
+ /* same signed will never overflow. */
+ return y > x ? (y -x) : (x - y);
+}
+
+OVERLOADABLE uint abs_diff (int x, int y) {
+ /* same signed will never module overflow. */
+ if ((x >= 0 && y >= 0) || (x <= 0 && y <= 0))
+ return abs(x - y);
+
+ return (abs(x) + abs(y));
+}
+
+OVERLOADABLE ulong abs_diff (long x, long y) {
+ if ((x >= 0 && y >= 0) || (x <= 0 && y <= 0))
+ return abs(x - y);
+ return abs(x) + abs(y);
+}
+OVERLOADABLE ulong abs_diff (ulong x, ulong y) {
+ return y > x ? (y - x) : (x - y);
+}
+
+
+#define DECL_MIN_MAX_CLAMP(TYPE) \
+OVERLOADABLE TYPE max(TYPE a, TYPE b) { \
+ return a > b ? a : b; \
+} \
+OVERLOADABLE TYPE min(TYPE a, TYPE b) { \
+ return a < b ? a : b; \
+} \
+OVERLOADABLE TYPE clamp(TYPE v, TYPE l, TYPE u) { \
+ return max(min(v, u), l); \
+}
+DECL_MIN_MAX_CLAMP(int)
+DECL_MIN_MAX_CLAMP(short)
+DECL_MIN_MAX_CLAMP(char)
+DECL_MIN_MAX_CLAMP(uint)
+DECL_MIN_MAX_CLAMP(unsigned short)
+DECL_MIN_MAX_CLAMP(unsigned char)
+DECL_MIN_MAX_CLAMP(long)
+DECL_MIN_MAX_CLAMP(ulong)
+#undef DECL_MIN_MAX_CLAMP
--- /dev/null
+#ifndef __OCL_INTEGER_H__
+#define __OCL_INTEGER_H__
+
+#include "ocl_types.h"
+
+#define CHAR_BIT 8
+#define CHAR_MAX SCHAR_MAX
+#define CHAR_MIN SCHAR_MIN
+#define INT_MAX 2147483647
+#define INT_MIN (-2147483647 - 1)
+#define LONG_MAX 0x7fffffffffffffffL
+#define LONG_MIN (-0x7fffffffffffffffL - 1)
+#define SCHAR_MAX 127
+#define SCHAR_MIN (-127 - 1)
+#define SHRT_MAX 32767
+#define SHRT_MIN (-32767 - 1)
+#define UCHAR_MAX 255
+#define USHRT_MAX 65535
+#define UINT_MAX 0xffffffff
+#define ULONG_MAX 0xffffffffffffffffUL
+
+OVERLOADABLE char clz(char x);
+OVERLOADABLE uchar clz(uchar x);
+OVERLOADABLE short clz(short x);
+OVERLOADABLE ushort clz(ushort x);
+OVERLOADABLE int clz(int x);
+OVERLOADABLE uint clz(uint x);
+OVERLOADABLE long clz(long x);
+OVERLOADABLE ulong clz(ulong x);
+
+OVERLOADABLE char mul_hi(char x, char y);
+OVERLOADABLE uchar mul_hi(uchar x, uchar y);
+OVERLOADABLE short mul_hi(short x, short y);
+OVERLOADABLE ushort mul_hi(ushort x, ushort y);
+OVERLOADABLE int mul_hi(int x, int y);
+OVERLOADABLE uint mul_hi(uint x, uint y);
+OVERLOADABLE long mul_hi(long x, long y);
+OVERLOADABLE ulong mul_hi(ulong x, ulong y);
+
+#define SDEF(TYPE) \
+OVERLOADABLE TYPE add_sat(TYPE x, TYPE y); \
+OVERLOADABLE TYPE sub_sat(TYPE x, TYPE y);
+SDEF(char);
+SDEF(short);
+SDEF(int);
+SDEF(long);
+#undef SDEF
+#define UDEF(TYPE) \
+OVERLOADABLE TYPE add_sat(TYPE x, TYPE y); \
+OVERLOADABLE TYPE sub_sat(TYPE x, TYPE y);
+UDEF(uchar);
+UDEF(ushort);
+UDEF(uint);
+UDEF(ulong);
+#undef UDEF
+
+#define DEF(type) OVERLOADABLE type mad_hi(type a, type b, type c);
+DEF(char)
+DEF(uchar)
+DEF(short)
+DEF(ushort)
+DEF(int)
+DEF(uint)
+DEF(long)
+DEF(ulong)
+#undef DEF
+
+OVERLOADABLE int mul24(int a, int b);
+OVERLOADABLE uint mul24(uint a, uint b);
+
+OVERLOADABLE int mad24(int a, int b, int c);
+OVERLOADABLE uint mad24(uint a, uint b, uint c);
+
+OVERLOADABLE char mad_sat(char a, char b, char c) ;
+OVERLOADABLE uchar mad_sat(uchar a, uchar b, uchar c);
+OVERLOADABLE short mad_sat(short a, short b, short c);
+OVERLOADABLE ushort mad_sat(ushort a, ushort b, ushort c);
+OVERLOADABLE int mad_sat(int a, int b, int c);
+OVERLOADABLE uint mad_sat(uint a, uint b, uint c);
+OVERLOADABLE long mad_sat(long a, long b, long c);
+OVERLOADABLE ulong mad_sat(ulong a, ulong b, ulong c);
+
+#define DEF(type, m) OVERLOADABLE type rotate(type x, type y);
+DEF(char, 7)
+DEF(uchar, 7)
+DEF(short, 15)
+DEF(ushort, 15)
+DEF(int, 31)
+DEF(uint, 31)
+DEF(long, 63)
+DEF(ulong, 63)
+#undef DEF
+
+OVERLOADABLE short upsample(char hi, uchar lo);
+OVERLOADABLE ushort upsample(uchar hi, uchar lo);
+OVERLOADABLE int upsample(short hi, ushort lo);
+OVERLOADABLE uint upsample(ushort hi, ushort lo);
+OVERLOADABLE long upsample(int hi, uint lo);
+OVERLOADABLE ulong upsample(uint hi, uint lo);
+
+#define DEC DEF(char); DEF(uchar); DEF(short); DEF(ushort)
+#define DEF(type) OVERLOADABLE type hadd(type x, type y);
+DEC
+#undef DEF
+#define DEF(type) OVERLOADABLE type rhadd(type x, type y);
+DEC
+#undef DEF
+#undef DEC
+OVERLOADABLE int hadd(int x, int y);
+OVERLOADABLE uint hadd(uint x, uint y);
+OVERLOADABLE int rhadd(int x, int y);
+OVERLOADABLE uint rhadd(uint x, uint y);
+OVERLOADABLE long hadd(long x, long y);
+OVERLOADABLE ulong hadd(ulong x, ulong y);
+OVERLOADABLE long rhadd(long x, long y);
+OVERLOADABLE ulong rhadd(ulong x, ulong y);
+
+#define DEC(TYPE) OVERLOADABLE u##TYPE abs(TYPE x);
+DEC(int)
+DEC(short)
+DEC(char)
+#undef DEC
+OVERLOADABLE ulong abs(long x);
+/* For unsigned types, do nothing. */
+#define DEC(TYPE) OVERLOADABLE TYPE abs(TYPE x);
+DEC(uint)
+DEC(ushort)
+DEC(uchar)
+DEC(ulong)
+#undef DEC
+
+/* Char and short type abs diff */
+/* promote char and short to int and will be no module overflow */
+#define DEC(TYPE, UTYPE) OVERLOADABLE UTYPE abs_diff(TYPE x, TYPE y);
+DEC(char, uchar)
+DEC(uchar, uchar)
+DEC(short, ushort)
+DEC(ushort, ushort)
+#undef DEC
+
+OVERLOADABLE uint abs_diff (uint x, uint y);
+OVERLOADABLE uint abs_diff (int x, int y);
+OVERLOADABLE ulong abs_diff (long x, long y);
+OVERLOADABLE ulong abs_diff (ulong x, ulong y);
+
+
+#define DECL_MIN_MAX_CLAMP(TYPE) \
+OVERLOADABLE TYPE max(TYPE a, TYPE b); \
+OVERLOADABLE TYPE min(TYPE a, TYPE b); \
+OVERLOADABLE TYPE clamp(TYPE v, TYPE l, TYPE u);
+DECL_MIN_MAX_CLAMP(int)
+DECL_MIN_MAX_CLAMP(short)
+DECL_MIN_MAX_CLAMP(char)
+DECL_MIN_MAX_CLAMP(uint)
+DECL_MIN_MAX_CLAMP(unsigned short)
+DECL_MIN_MAX_CLAMP(unsigned char)
+DECL_MIN_MAX_CLAMP(long)
+DECL_MIN_MAX_CLAMP(ulong)
+#undef DECL_MIN_MAX_CLAMP