//M*/
#if defined (DOUBLE_SUPPORT)
+#ifdef cl_khr_fp64
#pragma OPENCL EXTENSION cl_khr_fp64:enable
+#elif defined (cl_amd_fp64)
+#pragma OPENCL EXTENSION cl_amd_fp64:enable
+#endif
#endif
//////////////////////////////////////////////////////////////////////////////////////////////////////
if (x < cols && y < rows)
{
x = x << 2;
-
+
+#ifdef dst_align
+#undef dst_align
+#endif
#define dst_align (dst_offset & 3)
int src1_index = mad24(y, src1_step, x + src1_offset - dst_align);
int src2_index = mad24(y, src2_step, x + src2_offset - dst_align);
if (x < cols && y < rows)
{
x = x << 2;
-
- #define dst_align ((dst_offset >> 1) & 3)
+
+#ifdef dst_align
+#undef dst_align
+#endif
+#define dst_align ((dst_offset >> 1) & 3)
int src1_index = mad24(y, src1_step, (x << 1) + src1_offset - (dst_align << 1));
int src2_index = mad24(y, src2_step, (x << 1) + src2_offset - (dst_align << 1));
if (x < cols && y < rows)
{
x = x << 2;
-
- #define dst_align ((dst_offset >> 1) & 3)
+
+#ifdef dst_align
+#undef dst_align
+#endif
+#define dst_align ((dst_offset >> 1) & 3)
int src1_index = mad24(y, src1_step, (x << 1) + src1_offset - (dst_align << 1));
int src2_index = mad24(y, src2_step, (x << 1) + src2_offset - (dst_align << 1));
if (x < cols && y < rows)
{
x = x << 2;
-
- #define dst_align (dst_offset & 3)
+
+#ifdef dst_align
+#undef dst_align
+#endif
+#define dst_align (dst_offset & 3)
int src1_index = mad24(y, src1_step, x + src1_offset - dst_align);
int dst_start = mad24(y, dst_step, dst_offset);
if (x < cols && y < rows)
{
x = x << 1;
-
- #define dst_align ((dst_offset >> 1) & 1)
+
+#ifdef dst_align
+#undef dst_align
+#endif
+#define dst_align ((dst_offset >> 1) & 1)
int src1_index = mad24(y, src1_step, (x << 1) + src1_offset - (dst_align << 1));
int dst_start = mad24(y, dst_step, dst_offset);
if (x < cols && y < rows)
{
x = x << 1;
-
- #define dst_align ((dst_offset >> 1) & 1)
+
+#ifdef dst_align
+#undef dst_align
+#endif
+#define dst_align ((dst_offset >> 1) & 1)
int src1_index = mad24(y, src1_step, (x << 1) + src1_offset - (dst_align << 1));
int dst_start = mad24(y, dst_step, dst_offset);
#if defined (DOUBLE_SUPPORT)
__kernel void arithm_s_absdiff_C1_D6 (__global double *src1, int src1_step, int src1_offset,
- __global double *dst, int dst_step, int dst_offset,
- double4 src2, int rows, int cols, int dst_step1)
+ __global double *dst, int dst_step, int dst_offset,
+ double4 src2, int rows, int cols, int dst_step1)
{
int x = get_global_id(0);
if (x < cols && y < rows)
{
x = x << 1;
-
- #define dst_align ((dst_offset >> 1) & 1)
+
+#ifdef dst_align
+#undef dst_align
+#endif
+#define dst_align ((dst_offset >> 1) & 1)
int src1_index = mad24(y, src1_step, (x << 1) + src1_offset - (dst_align << 1));
int dst_start = mad24(y, dst_step, dst_offset);
}
__kernel void arithm_s_absdiff_C2_D3 (__global short *src1, int src1_step, int src1_offset,
__global short *dst, int dst_step, int dst_offset,
- int4 src2, int rows, int cols, int dst_step1)
+ int4 src2, int rows, int cols, int dst_step1)
{
int x = get_global_id(0);
}
__kernel void arithm_s_absdiff_C2_D5 (__global float *src1, int src1_step, int src1_offset,
__global float *dst, int dst_step, int dst_offset,
- float4 src2, int rows, int cols, int dst_step1)
+ float4 src2, int rows, int cols, int dst_step1)
{
int x = get_global_id(0);
if (x < cols && y < rows)
{
x = x << 2;
-
- #define dst_align (((dst_offset % dst_step) / 3 ) & 3)
+
+#ifdef dst_align
+#undef dst_align
+#endif
+#define dst_align (((dst_offset % dst_step) / 3 ) & 3)
int src1_index = mad24(y, src1_step, (x * 3) + src1_offset - (dst_align * 3));
int dst_start = mad24(y, dst_step, dst_offset);
if (x < cols && y < rows)
{
x = x << 1;
-
- #define dst_align (((dst_offset % dst_step) / 6 ) & 1)
+
+#ifdef dst_align
+#undef dst_align
+#endif
+#define dst_align (((dst_offset % dst_step) / 6 ) & 1)
int src1_index = mad24(y, src1_step, (x * 6) + src1_offset - (dst_align * 6));
int dst_start = mad24(y, dst_step, dst_offset);
data_0.xy = ((dst_index + 0 >= dst_start)) ? tmp_data_0.xy : data_0.xy;
data_1.x = ((dst_index + 0 >= dst_start) && (dst_index + 0 < dst_end))
- ? tmp_data_1.x : data_1.x;
+ ? tmp_data_1.x : data_1.x;
data_1.y = ((dst_index + 6 >= dst_start) && (dst_index + 6 < dst_end))
- ? tmp_data_1.y : data_1.y;
+ ? tmp_data_1.y : data_1.y;
data_2.xy = ((dst_index + 6 >= dst_start) && (dst_index + 6 < dst_end))
- ? tmp_data_2.xy : data_2.xy;
+ ? tmp_data_2.xy : data_2.xy;
- *((__global ushort2 *)((__global char *)dst + dst_index + 0))= data_0;
- *((__global ushort2 *)((__global char *)dst + dst_index + 4))= data_1;
- *((__global ushort2 *)((__global char *)dst + dst_index + 8))= data_2;
+ *((__global ushort2 *)((__global char *)dst + dst_index + 0))= data_0;
+ *((__global ushort2 *)((__global char *)dst + dst_index + 4))= data_1;
+ *((__global ushort2 *)((__global char *)dst + dst_index + 8))= data_2;
}
}
__kernel void arithm_s_absdiff_C3_D3 (__global short *src1, int src1_step, int src1_offset,
if (x < cols && y < rows)
{
x = x << 1;
-
- #define dst_align (((dst_offset % dst_step) / 6 ) & 1)
+
+#ifdef dst_align
+#undef dst_align
+#endif
+#define dst_align (((dst_offset % dst_step) / 6 ) & 1)
int src1_index = mad24(y, src1_step, (x * 6) + src1_offset - (dst_align * 6));
int dst_start = mad24(y, dst_step, dst_offset);
data_0.xy = ((dst_index + 0 >= dst_start)) ? tmp_data_0.xy : data_0.xy;
data_1.x = ((dst_index + 0 >= dst_start) && (dst_index + 0 < dst_end))
- ? tmp_data_1.x : data_1.x;
+ ? tmp_data_1.x : data_1.x;
data_1.y = ((dst_index + 6 >= dst_start) && (dst_index + 6 < dst_end))
- ? tmp_data_1.y : data_1.y;
+ ? tmp_data_1.y : data_1.y;
data_2.xy = ((dst_index + 6 >= dst_start) && (dst_index + 6 < dst_end))
- ? tmp_data_2.xy : data_2.xy;
+ ? tmp_data_2.xy : data_2.xy;
- *((__global short2 *)((__global char *)dst + dst_index + 0))= data_0;
- *((__global short2 *)((__global char *)dst + dst_index + 4))= data_1;
- *((__global short2 *)((__global char *)dst + dst_index + 8))= data_2;
+ *((__global short2 *)((__global char *)dst + dst_index + 0))= data_0;
+ *((__global short2 *)((__global char *)dst + dst_index + 4))= data_1;
+ *((__global short2 *)((__global char *)dst + dst_index + 8))= data_2;
}
}
__kernel void arithm_s_absdiff_C3_D4 (__global int *src1, int src1_step, int src1_offset,
int tmp_data_1 = convert_int_sat(abs_diff(src1_data_1, src2_data_1));
int tmp_data_2 = convert_int_sat(abs_diff(src1_data_2, src2_data_2));
- *((__global int *)((__global char *)dst + dst_index + 0))= tmp_data_0;
- *((__global int *)((__global char *)dst + dst_index + 4))= tmp_data_1;
- *((__global int *)((__global char *)dst + dst_index + 8))= tmp_data_2;
+ *((__global int *)((__global char *)dst + dst_index + 0))= tmp_data_0;
+ *((__global int *)((__global char *)dst + dst_index + 4))= tmp_data_1;
+ *((__global int *)((__global char *)dst + dst_index + 8))= tmp_data_2;
}
}
__kernel void arithm_s_absdiff_C3_D5 (__global float *src1, int src1_step, int src1_offset,
float tmp_data_1 = fabs(src1_data_1 - src2_data_1);
float tmp_data_2 = fabs(src1_data_2 - src2_data_2);
- *((__global float *)((__global char *)dst + dst_index + 0))= tmp_data_0;
- *((__global float *)((__global char *)dst + dst_index + 4))= tmp_data_1;
- *((__global float *)((__global char *)dst + dst_index + 8))= tmp_data_2;
+ *((__global float *)((__global char *)dst + dst_index + 0))= tmp_data_0;
+ *((__global float *)((__global char *)dst + dst_index + 4))= tmp_data_1;
+ *((__global float *)((__global char *)dst + dst_index + 8))= tmp_data_2;
}
}
double tmp_data_1 = fabs(src1_data_1 - src2_data_1);
double tmp_data_2 = fabs(src1_data_2 - src2_data_2);
- *((__global double *)((__global char *)dst + dst_index + 0 ))= tmp_data_0;
- *((__global double *)((__global char *)dst + dst_index + 8 ))= tmp_data_1;
- *((__global double *)((__global char *)dst + dst_index + 16))= tmp_data_2;
+ *((__global double *)((__global char *)dst + dst_index + 0 ))= tmp_data_0;
+ *((__global double *)((__global char *)dst + dst_index + 8 ))= tmp_data_1;
+ *((__global double *)((__global char *)dst + dst_index + 16))= tmp_data_2;
}
}
#endif
//M*/
#if defined (DOUBLE_SUPPORT)
+#ifdef cl_khr_fp64
#pragma OPENCL EXTENSION cl_khr_fp64:enable
+#elif defined (cl_amd_fp64)
+#pragma OPENCL EXTENSION cl_amd_fp64:enable
+#endif
#endif
//////////////////////////////////////////////////////////////////////////////////////////////////////
{
x = x << 2;
- #define dst_align (dst_offset & 3)
+#ifdef dst_align
+#undef dst_align
+#endif
+#define dst_align (dst_offset & 3)
int src1_index = mad24(y, src1_step, x + src1_offset - dst_align);
int src2_index = mad24(y, src2_step, x + src2_offset - dst_align);
{
x = x << 2;
- #define dst_align ((dst_offset >> 1) & 3)
+#ifdef dst_align
+#undef dst_align
+#endif
+#define dst_align ((dst_offset >> 1) & 3)
int src1_index = mad24(y, src1_step, (x << 1) + src1_offset - (dst_align << 1));
int src2_index = mad24(y, src2_step, (x << 1) + src2_offset - (dst_align << 1));
{
x = x << 2;
- #define dst_align ((dst_offset >> 1) & 3)
+#ifdef dst_align
+#undef dst_align
+#endif
+#define dst_align ((dst_offset >> 1) & 3)
int src1_index = mad24(y, src1_step, (x << 1) + src1_offset - (dst_align << 1));
int src2_index = mad24(y, src2_step, (x << 1) + src2_offset - (dst_align << 1));
{
x = x << 2;
- #define dst_align (dst_offset & 3)
+#ifdef dst_align
+#undef dst_align
+#endif
+#define dst_align (dst_offset & 3)
int src1_index = mad24(y, src1_step, x + src1_offset - dst_align);
int src2_index = mad24(y, src2_step, x + src2_offset - dst_align);
int mask_index = mad24(y, mask_step, x + mask_offset - dst_align);
{
x = x << 1;
- #define dst_align ((dst_offset >> 1) & 1)
+#ifdef dst_align
+#undef dst_align
+#endif
+#define dst_align ((dst_offset >> 1) & 1)
int src1_index = mad24(y, src1_step, (x << 1) + src1_offset - (dst_align << 1));
int src2_index = mad24(y, src2_step, (x << 1) + src2_offset - (dst_align << 1));
int mask_index = mad24(y, mask_step, x + mask_offset - dst_align);
{
x = x << 1;
- #define dst_align ((dst_offset >> 1) & 1)
+#ifdef dst_align
+#undef dst_align
+#endif
+#define dst_align ((dst_offset >> 1) & 1)
int src1_index = mad24(y, src1_step, (x << 1) + src1_offset - (dst_align << 1));
int src2_index = mad24(y, src2_step, (x << 1) + src2_offset - (dst_align << 1));
int mask_index = mad24(y, mask_step, x + mask_offset - dst_align);
{
x = x << 1;
- #define dst_align ((dst_offset >> 1) & 1)
+#ifdef dst_align
+#undef dst_align
+#endif
+#define dst_align ((dst_offset >> 1) & 1)
int src1_index = mad24(y, src1_step, (x << 1) + src1_offset - (dst_align << 1));
int src2_index = mad24(y, src2_step, (x << 1) + src2_offset - (dst_align << 1));
int mask_index = mad24(y, mask_step, x + mask_offset - dst_align);
{
x = x << 2;
- #define dst_align (((dst_offset % dst_step) / 3 ) & 3)
+#ifdef dst_align
+#undef dst_align
+#endif
+#define dst_align (((dst_offset % dst_step) / 3 ) & 3)
int src1_index = mad24(y, src1_step, (x * 3) + src1_offset - (dst_align * 3));
int src2_index = mad24(y, src2_step, (x * 3) + src2_offset - (dst_align * 3));
int mask_index = mad24(y, mask_step, x + mask_offset - dst_align);
{
x = x << 1;
- #define dst_align (((dst_offset % dst_step) / 6 ) & 1)
+#ifdef dst_align
+#undef dst_align
+#endif
+#define dst_align (((dst_offset % dst_step) / 6 ) & 1)
int src1_index = mad24(y, src1_step, (x * 6) + src1_offset - (dst_align * 6));
int src2_index = mad24(y, src2_step, (x * 6) + src2_offset - (dst_align * 6));
int mask_index = mad24(y, mask_step, x + mask_offset - dst_align);
data_0.xy = ((mask_data.x) && (dst_index + 0 >= dst_start)) ? tmp_data_0.xy : data_0.xy;
data_1.x = ((mask_data.x) && (dst_index + 0 >= dst_start) && (dst_index + 0 < dst_end))
- ? tmp_data_1.x : data_1.x;
+ ? tmp_data_1.x : data_1.x;
data_1.y = ((mask_data.y) && (dst_index + 6 >= dst_start) && (dst_index + 6 < dst_end))
- ? tmp_data_1.y : data_1.y;
+ ? tmp_data_1.y : data_1.y;
data_2.xy = ((mask_data.y) && (dst_index + 6 >= dst_start) && (dst_index + 6 < dst_end))
- ? tmp_data_2.xy : data_2.xy;
+ ? tmp_data_2.xy : data_2.xy;
- *((__global ushort2 *)((__global char *)dst + dst_index + 0))= data_0;
- *((__global ushort2 *)((__global char *)dst + dst_index + 4))= data_1;
- *((__global ushort2 *)((__global char *)dst + dst_index + 8))= data_2;
+ *((__global ushort2 *)((__global char *)dst + dst_index + 0))= data_0;
+ *((__global ushort2 *)((__global char *)dst + dst_index + 4))= data_1;
+ *((__global ushort2 *)((__global char *)dst + dst_index + 8))= data_2;
}
}
__kernel void arithm_add_with_mask_C3_D3 (__global short *src1, int src1_step, int src1_offset,
{
x = x << 1;
- #define dst_align (((dst_offset % dst_step) / 6 ) & 1)
+#ifdef dst_align
+#undef dst_align
+#endif
+#define dst_align (((dst_offset % dst_step) / 6 ) & 1)
int src1_index = mad24(y, src1_step, (x * 6) + src1_offset - (dst_align * 6));
int src2_index = mad24(y, src2_step, (x * 6) + src2_offset - (dst_align * 6));
int mask_index = mad24(y, mask_step, x + mask_offset - dst_align);
data_0.xy = ((mask_data.x) && (dst_index + 0 >= dst_start)) ? tmp_data_0.xy : data_0.xy;
data_1.x = ((mask_data.x) && (dst_index + 0 >= dst_start) && (dst_index + 0 < dst_end))
- ? tmp_data_1.x : data_1.x;
+ ? tmp_data_1.x : data_1.x;
data_1.y = ((mask_data.y) && (dst_index + 6 >= dst_start) && (dst_index + 6 < dst_end))
- ? tmp_data_1.y : data_1.y;
+ ? tmp_data_1.y : data_1.y;
data_2.xy = ((mask_data.y) && (dst_index + 6 >= dst_start) && (dst_index + 6 < dst_end))
- ? tmp_data_2.xy : data_2.xy;
+ ? tmp_data_2.xy : data_2.xy;
- *((__global short2 *)((__global char *)dst + dst_index + 0))= data_0;
- *((__global short2 *)((__global char *)dst + dst_index + 4))= data_1;
- *((__global short2 *)((__global char *)dst + dst_index + 8))= data_2;
+ *((__global short2 *)((__global char *)dst + dst_index + 0))= data_0;
+ *((__global short2 *)((__global char *)dst + dst_index + 4))= data_1;
+ *((__global short2 *)((__global char *)dst + dst_index + 8))= data_2;
}
}
__kernel void arithm_add_with_mask_C3_D4 (__global int *src1, int src1_step, int src1_offset,
data_1 = mask_data ? tmp_data_1 : data_1;
data_2 = mask_data ? tmp_data_2 : data_2;
- *((__global int *)((__global char *)dst + dst_index + 0))= data_0;
- *((__global int *)((__global char *)dst + dst_index + 4))= data_1;
- *((__global int *)((__global char *)dst + dst_index + 8))= data_2;
+ *((__global int *)((__global char *)dst + dst_index + 0))= data_0;
+ *((__global int *)((__global char *)dst + dst_index + 4))= data_1;
+ *((__global int *)((__global char *)dst + dst_index + 8))= data_2;
}
}
__kernel void arithm_add_with_mask_C3_D5 (__global float *src1, int src1_step, int src1_offset,
data_1 = mask_data ? tmp_data_1 : data_1;
data_2 = mask_data ? tmp_data_2 : data_2;
- *((__global float *)((__global char *)dst + dst_index + 0))= data_0;
- *((__global float *)((__global char *)dst + dst_index + 4))= data_1;
- *((__global float *)((__global char *)dst + dst_index + 8))= data_2;
+ *((__global float *)((__global char *)dst + dst_index + 0))= data_0;
+ *((__global float *)((__global char *)dst + dst_index + 4))= data_1;
+ *((__global float *)((__global char *)dst + dst_index + 8))= data_2;
}
}
data_1 = mask_data ? tmp_data_1 : data_1;
data_2 = mask_data ? tmp_data_2 : data_2;
- *((__global double *)((__global char *)dst + dst_index + 0 ))= data_0;
- *((__global double *)((__global char *)dst + dst_index + 8 ))= data_1;
- *((__global double *)((__global char *)dst + dst_index + 16))= data_2;
+ *((__global double *)((__global char *)dst + dst_index + 0 ))= data_0;
+ *((__global double *)((__global char *)dst + dst_index + 8 ))= data_1;
+ *((__global double *)((__global char *)dst + dst_index + 16))= data_2;
}
}
#endif
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
-#if defined DOUBLE_SUPPORT
+#if defined (DOUBLE_SUPPORT)
+#ifdef cl_khr_fp64
#pragma OPENCL EXTENSION cl_khr_fp64:enable
+#elif defined (cl_amd_fp64)
+#pragma OPENCL EXTENSION cl_amd_fp64:enable
+#endif
typedef double F;
#else
typedef float F;
/////////////////////////////////////////////addWeighted//////////////////////////////////////////////
///////////////////////////////////////////////////////////////////////////////////////////////////////
__kernel void addWeighted_D0 (__global uchar *src1,int src1_step,int src1_offset,
- __global uchar *src2, int src2_step,int src2_offset,
- F alpha,F beta,F gama,
- __global uchar *dst, int dst_step,int dst_offset,
- int rows, int cols,int dst_step1)
+ __global uchar *src2, int src2_step,int src2_offset,
+ F alpha,F beta,F gama,
+ __global uchar *dst, int dst_step,int dst_offset,
+ int rows, int cols,int dst_step1)
{
int x = get_global_id(0);
int y = get_global_id(1);
{
x = x << 2;
- #define dst_align (dst_offset & 3)
+#ifdef dst_align
+#undef dst_align
+#endif
+#define dst_align (dst_offset & 3)
int src1_index = mad24(y, src1_step, x + src1_offset - dst_align);
int src2_index = mad24(y, src2_step, x + src2_offset - dst_align);
uchar4 dst_data = *((__global uchar4 *)(dst + dst_index));
// short4 tmp = convert_short4_sat(src1_data) * alpha + convert_short4_sat(src2_data) * beta + gama;
- short4 tmp;
+ short4 tmp;
tmp.x = src1_data.x * alpha + src2_data.x * beta + gama;
tmp.y = src1_data.y * alpha + src2_data.y * beta + gama;
tmp.z = src1_data.z * alpha + src2_data.z * beta + gama;
dst_data.w = ((dst_index + 3 >= dst_start) && (dst_index + 3 < dst_end)) ? tmp_data.w : dst_data.w;
*((__global uchar4 *)(dst + dst_index)) = dst_data;
- // dst[x + y * dst_step] = src1[x + y * src1_step] * alpha + src2[x + y * src2_step] * beta + gama;
+ // dst[x + y * dst_step] = src1[x + y * src1_step] * alpha + src2[x + y * src2_step] * beta + gama;
}
}
__kernel void addWeighted_D2 (__global ushort *src1, int src1_step,int src1_offset,
- __global ushort *src2, int src2_step,int src2_offset,
- F alpha,F beta,F gama,
- __global ushort *dst, int dst_step,int dst_offset,
- int rows, int cols,int dst_step1)
+ __global ushort *src2, int src2_step,int src2_offset,
+ F alpha,F beta,F gama,
+ __global ushort *dst, int dst_step,int dst_offset,
+ int rows, int cols,int dst_step1)
{
int x = get_global_id(0);
int y = get_global_id(1);
{
x = x << 2;
-
- #define dst_align ((dst_offset >> 1) & 3)
+
+#ifdef dst_align
+#undef dst_align
+#endif
+#define dst_align ((dst_offset >> 1) & 3)
int src1_index = mad24(y, src1_step, (x << 1) + src1_offset -( dst_align << 1));
int src2_index = mad24(y, src2_step, (x << 1) + src2_offset -( dst_align << 1));
int dst_start = mad24(y, dst_step, dst_offset);
int dst_end = mad24(y, dst_step, dst_offset + dst_step1);
int dst_index = mad24(y, dst_step, dst_offset +( x<< 1) & (int)0xfffffff8);
- int src1_index_fix = src1_index < 0 ? 0 : src1_index;
- int src2_index_fix = src2_index < 0 ? 0 : src2_index;
+ int src1_index_fix = src1_index < 0 ? 0 : src1_index;
+ int src2_index_fix = src2_index < 0 ? 0 : src2_index;
ushort4 src1_data = vload4(0, (__global ushort *)((__global char *)src1 + src1_index_fix));
ushort4 src2_data = vload4(0, (__global ushort *)((__global char *)src2 + src2_index_fix));
- if(src1_index < 0)
- {
- ushort4 tmp;
- tmp.xyzw = (src1_index == -2) ? src1_data.zwxy:src1_data.yzwx;
- src1_data.xyzw = (src1_index == -1) ? src1_data.wxyz:tmp.xyzw;
- }
- if(src2_index < 0)
- {
- ushort4 tmp;
- tmp.xyzw = (src2_index == -2) ? src2_data.zwxy:src2_data.yzwx;
- src2_data.xyzw = (src2_index == -1) ? src2_data.wxyz:tmp.xyzw;
- }
+ if(src1_index < 0)
+ {
+ ushort4 tmp;
+ tmp.xyzw = (src1_index == -2) ? src1_data.zwxy:src1_data.yzwx;
+ src1_data.xyzw = (src1_index == -1) ? src1_data.wxyz:tmp.xyzw;
+ }
+ if(src2_index < 0)
+ {
+ ushort4 tmp;
+ tmp.xyzw = (src2_index == -2) ? src2_data.zwxy:src2_data.yzwx;
+ src2_data.xyzw = (src2_index == -1) ? src2_data.wxyz:tmp.xyzw;
+ }
ushort4 dst_data = *((__global ushort4 *)((__global char *)dst + dst_index));
- // int4 tmp = convert_int4_sat(src1_data) * alpha + convert_int4_sat(src2_data) * beta + gama;
- int4 tmp;
+ // int4 tmp = convert_int4_sat(src1_data) * alpha + convert_int4_sat(src2_data) * beta + gama;
+ int4 tmp;
tmp.x = src1_data.x * alpha + src2_data.x * beta + gama;
tmp.y = src1_data.y * alpha + src2_data.y * beta + gama;
tmp.z = src1_data.z * alpha + src2_data.z * beta + gama;
{
x = x << 2;
-
- #define dst_align ((dst_offset >> 1) & 3)
+
+#ifdef dst_align
+#undef dst_align
+#endif
+#define dst_align ((dst_offset >> 1) & 3)
int src1_index = mad24(y, src1_step, (x << 1) + src1_offset -( dst_align << 1));
int src2_index = mad24(y, src2_step, (x << 1) + src2_offset -( dst_align << 1));
int dst_end = mad24(y, dst_step, dst_offset + dst_step1);
int dst_index = mad24(y, dst_step, dst_offset +( x<< 1) - (dst_align << 1 ));
- int src1_index_fix = src1_index < 0 ? 0 : src1_index;
- int src2_index_fix = src2_index < 0 ? 0 : src2_index;
+ int src1_index_fix = src1_index < 0 ? 0 : src1_index;
+ int src2_index_fix = src2_index < 0 ? 0 : src2_index;
short4 src1_data = vload4(0, (__global short *)((__global char *)src1 + src1_index_fix));
short4 src2_data = vload4(0, (__global short *)((__global char *)src2 + src2_index_fix));
- if(src1_index < 0)
- {
- short4 tmp;
- tmp.xyzw = (src1_index == -2) ? src1_data.zwxy:src1_data.yzwx;
- src1_data.xyzw = (src1_index == -1) ? src1_data.wxyz:tmp.xyzw;
- }
- if(src2_index < 0)
- {
- short4 tmp;
- tmp.xyzw = (src2_index == -2) ? src2_data.zwxy:src2_data.yzwx;
- src2_data.xyzw = (src2_index == -1) ? src2_data.wxyz:tmp.xyzw;
- }
+ if(src1_index < 0)
+ {
+ short4 tmp;
+ tmp.xyzw = (src1_index == -2) ? src1_data.zwxy:src1_data.yzwx;
+ src1_data.xyzw = (src1_index == -1) ? src1_data.wxyz:tmp.xyzw;
+ }
+ if(src2_index < 0)
+ {
+ short4 tmp;
+ tmp.xyzw = (src2_index == -2) ? src2_data.zwxy:src2_data.yzwx;
+ src2_data.xyzw = (src2_index == -1) ? src2_data.wxyz:tmp.xyzw;
+ }
short4 dst_data = *((__global short4 *)((__global char *)dst + dst_index));
- // int4 tmp = convert_int4_sat(src1_data) * alpha + convert_int4_sat(src2_data) * beta + gama;
- int4 tmp;
+ // int4 tmp = convert_int4_sat(src1_data) * alpha + convert_int4_sat(src2_data) * beta + gama;
+ int4 tmp;
tmp.x = src1_data.x * alpha + src2_data.x * beta + gama;
tmp.y = src1_data.y * alpha + src2_data.y * beta + gama;
tmp.z = src1_data.z * alpha + src2_data.z * beta + gama;
__kernel void addWeighted_D4 (__global int *src1, int src1_step,int src1_offset,
__global int *src2, int src2_step,int src2_offset,
- F alpha,F beta, F gama,
+ F alpha,F beta, F gama,
__global int *dst, int dst_step,int dst_offset,
int rows, int cols,int dst_step1)
{
x = x << 2;
- #define bitOfInt (sizeof(int)== 4 ? 2: 3)
-
- #define dst_align ((dst_offset >> bitOfInt) & 3)
+#define bitOfInt (sizeof(int)== 4 ? 2: 3)
+
+#ifdef dst_align
+#undef dst_align
+#endif
+#define dst_align ((dst_offset >> bitOfInt) & 3)
int src1_index = mad24(y, src1_step, (x << bitOfInt) + src1_offset - (dst_align << bitOfInt));
int src2_index = mad24(y, src2_step, (x << bitOfInt) + src2_offset - (dst_align << bitOfInt));
int dst_end = mad24(y, dst_step, dst_offset + dst_step1);
int dst_index = mad24(y, dst_step, dst_offset + (x << bitOfInt) -(dst_align << bitOfInt));
- int src1_index_fix = src1_index < 0 ? 0 : src1_index;
- int src2_index_fix = src2_index < 0 ? 0 : src2_index;
+ int src1_index_fix = src1_index < 0 ? 0 : src1_index;
+ int src2_index_fix = src2_index < 0 ? 0 : src2_index;
int4 src1_data = vload4(0, (__global int *)((__global char *)src1 + src1_index_fix));
int4 src2_data = vload4(0, (__global int *)((__global char *)src2 + src2_index_fix));
- if(src1_index < 0)
- {
- int4 tmp;
- tmp.xyzw = (src1_index == -2) ? src1_data.zwxy:src1_data.yzwx;
- src1_data.xyzw = (src1_index == -1) ? src1_data.wxyz:tmp.xyzw;
- }
- if(src2_index < 0)
- {
- int4 tmp;
- tmp.xyzw = (src2_index == -2) ? src2_data.zwxy:src2_data.yzwx;
- src2_data.xyzw = (src2_index == -1) ? src2_data.wxyz:tmp.xyzw;
- }
+ if(src1_index < 0)
+ {
+ int4 tmp;
+ tmp.xyzw = (src1_index == -2) ? src1_data.zwxy:src1_data.yzwx;
+ src1_data.xyzw = (src1_index == -1) ? src1_data.wxyz:tmp.xyzw;
+ }
+ if(src2_index < 0)
+ {
+ int4 tmp;
+ tmp.xyzw = (src2_index == -2) ? src2_data.zwxy:src2_data.yzwx;
+ src2_data.xyzw = (src2_index == -1) ? src2_data.wxyz:tmp.xyzw;
+ }
int4 dst_data = *((__global int4 *)((__global char *)dst + dst_index));
- // double4 tmp = convert_double4(src1_data) * alpha + convert_double4(src2_data) * beta + gama ;
- float4 tmp;
+ // double4 tmp = convert_double4(src1_data) * alpha + convert_double4(src2_data) * beta + gama ;
+ float4 tmp;
tmp.x = src1_data.x * alpha + src2_data.x * beta + gama;
tmp.y = src1_data.y * alpha + src2_data.y * beta + gama;
tmp.z = src1_data.z * alpha + src2_data.z * beta + gama;
__kernel void addWeighted_D5 (__global float *src1,int src1_step,int src1_offset,
__global float *src2, int src2_step,int src2_offset,
- F alpha,F beta, F gama,
+ F alpha,F beta, F gama,
__global float *dst, int dst_step,int dst_offset,
int rows, int cols,int dst_step1)
{
{
x = x << 2;
-
- #define dst_align ((dst_offset >> 2) & 3)
+
+#ifdef dst_align
+#undef dst_align
+#endif
+#define dst_align ((dst_offset >> 2) & 3)
int src1_index = mad24(y, src1_step, (x << 2) + src1_offset - (dst_align << 2));
int src2_index = mad24(y, src2_step, (x << 2) + src2_offset - (dst_align << 2));
int dst_end = mad24(y, dst_step, dst_offset + dst_step1);
int dst_index = mad24(y, dst_step, dst_offset + (x << 2) -(dst_align << 2));
- int src1_index_fix = src1_index < 0 ? 0 : src1_index;
- int src2_index_fix = src2_index < 0 ? 0 : src2_index;
+ int src1_index_fix = src1_index < 0 ? 0 : src1_index;
+ int src2_index_fix = src2_index < 0 ? 0 : src2_index;
float4 src1_data = vload4(0, (__global float *)((__global char *)src1 + src1_index_fix));
float4 src2_data = vload4(0, (__global float *)((__global char *)src2 + src2_index_fix));
float4 dst_data = *((__global float4 *)((__global char *)dst + dst_index));
- if(src1_index < 0)
- {
- float4 tmp;
- tmp.xyzw = (src1_index == -2) ? src1_data.zwxy:src1_data.yzwx;
- src1_data.xyzw = (src1_index == -1) ? src1_data.wxyz:tmp.xyzw;
- }
- if(src2_index < 0)
- {
- float4 tmp;
- tmp.xyzw = (src2_index == -2) ? src2_data.zwxy:src2_data.yzwx;
- src2_data.xyzw = (src2_index == -1) ? src2_data.wxyz:tmp.xyzw;
- }
- // double4 tmp = convert_double4(src1_data) * alpha + convert_double4(src2_data) * beta + gama ;
-
- // float4 tmp_data =(src1_data) * alpha + (src2_data) * beta + gama ;
- float4 tmp_data;
+ if(src1_index < 0)
+ {
+ float4 tmp;
+ tmp.xyzw = (src1_index == -2) ? src1_data.zwxy:src1_data.yzwx;
+ src1_data.xyzw = (src1_index == -1) ? src1_data.wxyz:tmp.xyzw;
+ }
+ if(src2_index < 0)
+ {
+ float4 tmp;
+ tmp.xyzw = (src2_index == -2) ? src2_data.zwxy:src2_data.yzwx;
+ src2_data.xyzw = (src2_index == -1) ? src2_data.wxyz:tmp.xyzw;
+ }
+ // double4 tmp = convert_double4(src1_data) * alpha + convert_double4(src2_data) * beta + gama ;
+
+ // float4 tmp_data =(src1_data) * alpha + (src2_data) * beta + gama ;
+ float4 tmp_data;
tmp_data.x = src1_data.x * alpha + src2_data.x * beta + gama;
tmp_data.y = src1_data.y * alpha + src2_data.y * beta + gama;
tmp_data.z = src1_data.z * alpha + src2_data.z * beta + gama;
tmp_data.w = src1_data.w * alpha + src2_data.w * beta + gama;
- // float4 tmp_data = convert_float4(tmp);
+ // float4 tmp_data = convert_float4(tmp);
dst_data.x = ((dst_index + 0 >= dst_start) && (dst_index + 0 < dst_end)) ? tmp_data.x : dst_data.x;
dst_data.y = ((dst_index + 4 >= dst_start) && (dst_index + 4 < dst_end)) ? tmp_data.y : dst_data.y;
#if defined (DOUBLE_SUPPORT)
__kernel void addWeighted_D6 (__global double *src1, int src1_step,int src1_offset,
__global double *src2, int src2_step,int src2_offset,
- F alpha,F beta, F gama,
+ F alpha,F beta, F gama,
__global double *dst, int dst_step,int dst_offset,
int rows, int cols,int dst_step1)
{
{
x = x << 2;
-
- #define dst_align ((dst_offset >> 3) & 3)
+
+#ifdef dst_align
+#undef dst_align
+#endif
+#define dst_align ((dst_offset >> 3) & 3)
int src1_index = mad24(y, src1_step, (x << 3) + src1_offset - (dst_align << 3));
int src2_index = mad24(y, src2_step, (x << 3) + src2_offset - (dst_align << 3));
int dst_end = mad24(y, dst_step, dst_offset + dst_step1);
int dst_index = mad24(y, dst_step, dst_offset + (x << 3) -(dst_align << 3));
- int src1_index_fix = src1_index < 0 ? 0 : src1_index;
- int src2_index_fix = src2_index < 0 ? 0 : src2_index;
+ int src1_index_fix = src1_index < 0 ? 0 : src1_index;
+ int src2_index_fix = src2_index < 0 ? 0 : src2_index;
double4 src1_data = vload4(0, (__global double *)((__global char *)src1 + src1_index_fix));
double4 src2_data = vload4(0, (__global double *)((__global char *)src2 + src2_index_fix));
double4 dst_data = *((__global double4 *)((__global char *)dst + dst_index));
- if(src1_index < 0)
- {
- double4 tmp;
- tmp.xyzw = (src1_index == -2) ? src1_data.zwxy:src1_data.yzwx;
- src1_data.xyzw = (src1_index == -1) ? src1_data.wxyz:tmp.xyzw;
- }
- if(src2_index < 0)
- {
- double4 tmp;
- tmp.xyzw = (src2_index == -2) ? src2_data.zwxy:src2_data.yzwx;
- src2_data.xyzw = (src2_index == -1) ? src2_data.wxyz:tmp.xyzw;
- }
- // double4 tmp_data = (src1_data) * alpha + (src2_data) * beta + gama ;
- double4 tmp_data;
+ if(src1_index < 0)
+ {
+ double4 tmp;
+ tmp.xyzw = (src1_index == -2) ? src1_data.zwxy:src1_data.yzwx;
+ src1_data.xyzw = (src1_index == -1) ? src1_data.wxyz:tmp.xyzw;
+ }
+ if(src2_index < 0)
+ {
+ double4 tmp;
+ tmp.xyzw = (src2_index == -2) ? src2_data.zwxy:src2_data.yzwx;
+ src2_data.xyzw = (src2_index == -1) ? src2_data.wxyz:tmp.xyzw;
+ }
+ // double4 tmp_data = (src1_data) * alpha + (src2_data) * beta + gama ;
+ double4 tmp_data;
tmp_data.x = src1_data.x * alpha + src2_data.x * beta + gama;
tmp_data.y = src1_data.y * alpha + src2_data.y * beta + gama;
tmp_data.z = src1_data.z * alpha + src2_data.z * beta + gama;
//M*/
#if defined (DOUBLE_SUPPORT)
+#ifdef cl_khr_fp64
#pragma OPENCL EXTENSION cl_khr_fp64:enable
+#elif defined (cl_amd_fp64)
+#pragma OPENCL EXTENSION cl_amd_fp64:enable
#endif
+#endif
/**************************************add with scalar without mask**************************************/
__kernel void arithm_s_add_C1_D0 (__global uchar *src1, int src1_step, int src1_offset,
__global uchar *dst, int dst_step, int dst_offset,
if (x < cols && y < rows)
{
x = x << 2;
-
- #define dst_align (dst_offset & 3)
+
+#ifdef dst_align
+#undef dst_align
+#endif
+#define dst_align (dst_offset & 3)
int src1_index = mad24(y, src1_step, x + src1_offset - dst_align);
int dst_start = mad24(y, dst_step, dst_offset);
if (x < cols && y < rows)
{
x = x << 1;
-
- #define dst_align ((dst_offset >> 1) & 1)
+
+#ifdef dst_align
+#undef dst_align
+#endif
+#define dst_align ((dst_offset >> 1) & 1)
int src1_index = mad24(y, src1_step, (x << 1) + src1_offset - (dst_align << 1));
int dst_start = mad24(y, dst_step, dst_offset);
if (x < cols && y < rows)
{
x = x << 1;
-
- #define dst_align ((dst_offset >> 1) & 1)
+
+#ifdef dst_align
+#undef dst_align
+#endif
+#define dst_align ((dst_offset >> 1) & 1)
int src1_index = mad24(y, src1_step, (x << 1) + src1_offset - (dst_align << 1));
int dst_start = mad24(y, dst_step, dst_offset);
if (x < cols && y < rows)
{
x = x << 1;
-
- #define dst_align ((dst_offset >> 1) & 1)
+
+#ifdef dst_align
+#undef dst_align
+#endif
+#define dst_align ((dst_offset >> 1) & 1)
int src1_index = mad24(y, src1_step, (x << 1) + src1_offset - (dst_align << 1));
int dst_start = mad24(y, dst_step, dst_offset);
if (x < cols && y < rows)
{
x = x << 2;
-
- #define dst_align (((dst_offset % dst_step) / 3 ) & 3)
+
+#ifdef dst_align
+#undef dst_align
+#endif
+#define dst_align (((dst_offset % dst_step) / 3 ) & 3)
int src1_index = mad24(y, src1_step, (x * 3) + src1_offset - (dst_align * 3));
int dst_start = mad24(y, dst_step, dst_offset);
if (x < cols && y < rows)
{
x = x << 1;
-
- #define dst_align (((dst_offset % dst_step) / 6 ) & 1)
+
+#ifdef dst_align
+#undef dst_align
+#endif
+#define dst_align (((dst_offset % dst_step) / 6 ) & 1)
int src1_index = mad24(y, src1_step, (x * 6) + src1_offset - (dst_align * 6));
int dst_start = mad24(y, dst_step, dst_offset);
data_0.xy = ((dst_index + 0 >= dst_start)) ? tmp_data_0.xy : data_0.xy;
data_1.x = ((dst_index + 0 >= dst_start) && (dst_index + 0 < dst_end))
- ? tmp_data_1.x : data_1.x;
+ ? tmp_data_1.x : data_1.x;
data_1.y = ((dst_index + 6 >= dst_start) && (dst_index + 6 < dst_end))
- ? tmp_data_1.y : data_1.y;
+ ? tmp_data_1.y : data_1.y;
data_2.xy = ((dst_index + 6 >= dst_start) && (dst_index + 6 < dst_end))
- ? tmp_data_2.xy : data_2.xy;
+ ? tmp_data_2.xy : data_2.xy;
- *((__global ushort2 *)((__global char *)dst + dst_index + 0))= data_0;
- *((__global ushort2 *)((__global char *)dst + dst_index + 4))= data_1;
- *((__global ushort2 *)((__global char *)dst + dst_index + 8))= data_2;
+ *((__global ushort2 *)((__global char *)dst + dst_index + 0))= data_0;
+ *((__global ushort2 *)((__global char *)dst + dst_index + 4))= data_1;
+ *((__global ushort2 *)((__global char *)dst + dst_index + 8))= data_2;
}
}
__kernel void arithm_s_add_C3_D3 (__global short *src1, int src1_step, int src1_offset,
if (x < cols && y < rows)
{
x = x << 1;
-
- #define dst_align (((dst_offset % dst_step) / 6 ) & 1)
+
+#ifdef dst_align
+#undef dst_align
+#endif
+#define dst_align (((dst_offset % dst_step) / 6 ) & 1)
int src1_index = mad24(y, src1_step, (x * 6) + src1_offset - (dst_align * 6));
int dst_start = mad24(y, dst_step, dst_offset);
data_0.xy = ((dst_index + 0 >= dst_start)) ? tmp_data_0.xy : data_0.xy;
data_1.x = ((dst_index + 0 >= dst_start) && (dst_index + 0 < dst_end))
- ? tmp_data_1.x : data_1.x;
+ ? tmp_data_1.x : data_1.x;
data_1.y = ((dst_index + 6 >= dst_start) && (dst_index + 6 < dst_end))
- ? tmp_data_1.y : data_1.y;
+ ? tmp_data_1.y : data_1.y;
data_2.xy = ((dst_index + 6 >= dst_start) && (dst_index + 6 < dst_end))
- ? tmp_data_2.xy : data_2.xy;
+ ? tmp_data_2.xy : data_2.xy;
- *((__global short2 *)((__global char *)dst + dst_index + 0))= data_0;
- *((__global short2 *)((__global char *)dst + dst_index + 4))= data_1;
- *((__global short2 *)((__global char *)dst + dst_index + 8))= data_2;
+ *((__global short2 *)((__global char *)dst + dst_index + 0))= data_0;
+ *((__global short2 *)((__global char *)dst + dst_index + 4))= data_1;
+ *((__global short2 *)((__global char *)dst + dst_index + 8))= data_2;
}
}
__kernel void arithm_s_add_C3_D4 (__global int *src1, int src1_step, int src1_offset,
int tmp_data_1 = convert_int_sat((long)src1_data_1 + (long)src2_data_1);
int tmp_data_2 = convert_int_sat((long)src1_data_2 + (long)src2_data_2);
- *((__global int *)((__global char *)dst + dst_index + 0))= tmp_data_0;
- *((__global int *)((__global char *)dst + dst_index + 4))= tmp_data_1;
- *((__global int *)((__global char *)dst + dst_index + 8))= tmp_data_2;
+ *((__global int *)((__global char *)dst + dst_index + 0))= tmp_data_0;
+ *((__global int *)((__global char *)dst + dst_index + 4))= tmp_data_1;
+ *((__global int *)((__global char *)dst + dst_index + 8))= tmp_data_2;
}
}
__kernel void arithm_s_add_C3_D5 (__global float *src1, int src1_step, int src1_offset,
float tmp_data_1 = src1_data_1 + src2_data_1;
float tmp_data_2 = src1_data_2 + src2_data_2;
- *((__global float *)((__global char *)dst + dst_index + 0))= tmp_data_0;
- *((__global float *)((__global char *)dst + dst_index + 4))= tmp_data_1;
- *((__global float *)((__global char *)dst + dst_index + 8))= tmp_data_2;
+ *((__global float *)((__global char *)dst + dst_index + 0))= tmp_data_0;
+ *((__global float *)((__global char *)dst + dst_index + 4))= tmp_data_1;
+ *((__global float *)((__global char *)dst + dst_index + 8))= tmp_data_2;
}
}
double tmp_data_1 = src1_data_1 + src2_data_1;
double tmp_data_2 = src1_data_2 + src2_data_2;
- *((__global double *)((__global char *)dst + dst_index + 0 ))= tmp_data_0;
- *((__global double *)((__global char *)dst + dst_index + 8 ))= tmp_data_1;
- *((__global double *)((__global char *)dst + dst_index + 16))= tmp_data_2;
+ *((__global double *)((__global char *)dst + dst_index + 0 ))= tmp_data_0;
+ *((__global double *)((__global char *)dst + dst_index + 8 ))= tmp_data_1;
+ *((__global double *)((__global char *)dst + dst_index + 16))= tmp_data_2;
}
}
#endif
//M*/
#if defined (DOUBLE_SUPPORT)
+#ifdef cl_khr_fp64
#pragma OPENCL EXTENSION cl_khr_fp64:enable
+#elif defined (cl_amd_fp64)
+#pragma OPENCL EXTENSION cl_amd_fp64:enable
+#endif
#endif
/**************************************add with scalar with mask**************************************/
if (x < cols && y < rows)
{
x = x << 2;
-
- #define dst_align (dst_offset & 3)
+
+#ifdef dst_align
+#undef dst_align
+#endif
+#define dst_align (dst_offset & 3)
int src1_index = mad24(y, src1_step, x + src1_offset - dst_align);
int mask_index = mad24(y, mask_step, x + mask_offset - dst_align);
if (x < cols && y < rows)
{
x = x << 1;
-
- #define dst_align ((dst_offset >> 1) & 1)
+
+#ifdef dst_align
+#undef dst_align
+#endif
+#define dst_align ((dst_offset >> 1) & 1)
int src1_index = mad24(y, src1_step, (x << 1) + src1_offset - (dst_align << 1));
int mask_index = mad24(y, mask_step, x + mask_offset - dst_align);
if (x < cols && y < rows)
{
x = x << 1;
-
- #define dst_align ((dst_offset >> 1) & 1)
+
+#ifdef dst_align
+#undef dst_align
+#endif
+#define dst_align ((dst_offset >> 1) & 1)
int src1_index = mad24(y, src1_step, (x << 1) + src1_offset - (dst_align << 1));
int mask_index = mad24(y, mask_step, x + mask_offset - dst_align);
if (x < cols && y < rows)
{
x = x << 1;
-
- #define dst_align ((dst_offset >> 1) & 1)
+
+#ifdef dst_align
+#undef dst_align
+#endif
+#define dst_align ((dst_offset >> 1) & 1)
int src1_index = mad24(y, src1_step, (x << 1) + src1_offset - (dst_align << 1));
int mask_index = mad24(y, mask_step, x + mask_offset - dst_align);
if (x < cols && y < rows)
{
x = x << 2;
-
- #define dst_align (((dst_offset % dst_step) / 3 ) & 3)
+
+#ifdef dst_align
+#undef dst_align
+#endif
+#define dst_align (((dst_offset % dst_step) / 3 ) & 3)
int src1_index = mad24(y, src1_step, (x * 3) + src1_offset - (dst_align * 3));
int mask_index = mad24(y, mask_step, x + mask_offset - dst_align);
if (x < cols && y < rows)
{
x = x << 1;
-
- #define dst_align (((dst_offset % dst_step) / 6 ) & 1)
+
+#ifdef dst_align
+#undef dst_align
+#endif
+#define dst_align (((dst_offset % dst_step) / 6 ) & 1)
int src1_index = mad24(y, src1_step, (x * 6) + src1_offset - (dst_align * 6));
int mask_index = mad24(y, mask_step, x + mask_offset - dst_align);
data_0.xy = ((mask_data.x) && (dst_index + 0 >= dst_start)) ? tmp_data_0.xy : data_0.xy;
data_1.x = ((mask_data.x) && (dst_index + 0 >= dst_start) && (dst_index + 0 < dst_end))
- ? tmp_data_1.x : data_1.x;
+ ? tmp_data_1.x : data_1.x;
data_1.y = ((mask_data.y) && (dst_index + 6 >= dst_start) && (dst_index + 6 < dst_end))
- ? tmp_data_1.y : data_1.y;
+ ? tmp_data_1.y : data_1.y;
data_2.xy = ((mask_data.y) && (dst_index + 6 >= dst_start) && (dst_index + 6 < dst_end))
- ? tmp_data_2.xy : data_2.xy;
+ ? tmp_data_2.xy : data_2.xy;
- *((__global ushort2 *)((__global char *)dst + dst_index + 0))= data_0;
- *((__global ushort2 *)((__global char *)dst + dst_index + 4))= data_1;
- *((__global ushort2 *)((__global char *)dst + dst_index + 8))= data_2;
+ *((__global ushort2 *)((__global char *)dst + dst_index + 0))= data_0;
+ *((__global ushort2 *)((__global char *)dst + dst_index + 4))= data_1;
+ *((__global ushort2 *)((__global char *)dst + dst_index + 8))= data_2;
}
}
__kernel void arithm_s_add_with_mask_C3_D3 (__global short *src1, int src1_step, int src1_offset,
if (x < cols && y < rows)
{
x = x << 1;
-
- #define dst_align (((dst_offset % dst_step) / 6 ) & 1)
+
+#ifdef dst_align
+#undef dst_align
+#endif
+#define dst_align (((dst_offset % dst_step) / 6 ) & 1)
int src1_index = mad24(y, src1_step, (x * 6) + src1_offset - (dst_align * 6));
int mask_index = mad24(y, mask_step, x + mask_offset - dst_align);
data_0.xy = ((mask_data.x) && (dst_index + 0 >= dst_start)) ? tmp_data_0.xy : data_0.xy;
data_1.x = ((mask_data.x) && (dst_index + 0 >= dst_start) && (dst_index + 0 < dst_end))
- ? tmp_data_1.x : data_1.x;
+ ? tmp_data_1.x : data_1.x;
data_1.y = ((mask_data.y) && (dst_index + 6 >= dst_start) && (dst_index + 6 < dst_end))
- ? tmp_data_1.y : data_1.y;
+ ? tmp_data_1.y : data_1.y;
data_2.xy = ((mask_data.y) && (dst_index + 6 >= dst_start) && (dst_index + 6 < dst_end))
- ? tmp_data_2.xy : data_2.xy;
+ ? tmp_data_2.xy : data_2.xy;
- *((__global short2 *)((__global char *)dst + dst_index + 0))= data_0;
- *((__global short2 *)((__global char *)dst + dst_index + 4))= data_1;
- *((__global short2 *)((__global char *)dst + dst_index + 8))= data_2;
+ *((__global short2 *)((__global char *)dst + dst_index + 0))= data_0;
+ *((__global short2 *)((__global char *)dst + dst_index + 4))= data_1;
+ *((__global short2 *)((__global char *)dst + dst_index + 8))= data_2;
}
}
__kernel void arithm_s_add_with_mask_C3_D4 (__global int *src1, int src1_step, int src1_offset,
data_1 = mask_data ? tmp_data_1 : data_1;
data_2 = mask_data ? tmp_data_2 : data_2;
- *((__global int *)((__global char *)dst + dst_index + 0))= data_0;
- *((__global int *)((__global char *)dst + dst_index + 4))= data_1;
- *((__global int *)((__global char *)dst + dst_index + 8))= data_2;
+ *((__global int *)((__global char *)dst + dst_index + 0))= data_0;
+ *((__global int *)((__global char *)dst + dst_index + 4))= data_1;
+ *((__global int *)((__global char *)dst + dst_index + 8))= data_2;
}
}
__kernel void arithm_s_add_with_mask_C3_D5 (__global float *src1, int src1_step, int src1_offset,
data_1 = mask_data ? tmp_data_1 : data_1;
data_2 = mask_data ? tmp_data_2 : data_2;
- *((__global float *)((__global char *)dst + dst_index + 0))= data_0;
- *((__global float *)((__global char *)dst + dst_index + 4))= data_1;
- *((__global float *)((__global char *)dst + dst_index + 8))= data_2;
+ *((__global float *)((__global char *)dst + dst_index + 0))= data_0;
+ *((__global float *)((__global char *)dst + dst_index + 4))= data_1;
+ *((__global float *)((__global char *)dst + dst_index + 8))= data_2;
}
}
data_1 = mask_data ? tmp_data_1 : data_1;
data_2 = mask_data ? tmp_data_2 : data_2;
- *((__global double *)((__global char *)dst + dst_index + 0 ))= data_0;
- *((__global double *)((__global char *)dst + dst_index + 8 ))= data_1;
- *((__global double *)((__global char *)dst + dst_index + 16))= data_2;
+ *((__global double *)((__global char *)dst + dst_index + 0 ))= data_0;
+ *((__global double *)((__global char *)dst + dst_index + 8 ))= data_1;
+ *((__global double *)((__global char *)dst + dst_index + 16))= data_2;
}
}
#endif
//
//M*/
#if defined (DOUBLE_SUPPORT)
+#ifdef cl_khr_fp64
#pragma OPENCL EXTENSION cl_khr_fp64:enable
+#elif defined (cl_amd_fp64)
+#pragma OPENCL EXTENSION cl_amd_fp64:enable
+#endif
#endif
//////////////////////////////////////////////////////////////////////////////////////////////////////
///////////////////////////////////////////////////////////////////////////////////////////////////////
/**************************************bitwise_and without mask**************************************/
__kernel void arithm_bitwise_and_D0 (__global uchar *src1, int src1_step, int src1_offset,
- __global uchar *src2, int src2_step, int src2_offset,
- __global uchar *dst, int dst_step, int dst_offset,
- int rows, int cols, int dst_step1)
+ __global uchar *src2, int src2_step, int src2_offset,
+ __global uchar *dst, int dst_step, int dst_offset,
+ int rows, int cols, int dst_step1)
{
int x = get_global_id(0);
int y = get_global_id(1);
if (x < cols && y < rows)
{
x = x << 2;
-
- #define dst_align (dst_offset & 3)
+
+#ifdef dst_align
+#undef dst_align
+#endif
+#define dst_align (dst_offset & 3)
int src1_index = mad24(y, src1_step, x + src1_offset - dst_align);
int src2_index = mad24(y, src2_step, x + src2_offset - dst_align);
int dst_start = mad24(y, dst_step, dst_offset);
int dst_end = mad24(y, dst_step, dst_offset + dst_step1);
int dst_index = mad24(y, dst_step, dst_offset + x & (int)0xfffffffc);
- int src1_index_fix = src1_index < 0 ? 0 : src1_index;
- int src2_index_fix = src2_index < 0 ? 0 : src2_index;
- uchar4 src1_data = vload4(0, src1 + src1_index_fix);
- uchar4 src2_data = vload4(0, src2 + src2_index_fix);
-
- if(src1_index < 0)
- {
- uchar4 tmp;
- tmp.xyzw = (src1_index == -2) ? src1_data.zwxy:src1_data.yzwx;
- src1_data.xyzw = (src1_index == -1) ? src1_data.wxyz:tmp.xyzw;
- }
- if(src2_index < 0)
- {
- uchar4 tmp;
- tmp.xyzw = (src2_index == -2) ? src2_data.zwxy:src2_data.yzwx;
- src2_data.xyzw = (src2_index == -1) ? src2_data.wxyz:tmp.xyzw;
- }
+ int src1_index_fix = src1_index < 0 ? 0 : src1_index;
+ int src2_index_fix = src2_index < 0 ? 0 : src2_index;
+ uchar4 src1_data = vload4(0, src1 + src1_index_fix);
+ uchar4 src2_data = vload4(0, src2 + src2_index_fix);
+
+ if(src1_index < 0)
+ {
+ uchar4 tmp;
+ tmp.xyzw = (src1_index == -2) ? src1_data.zwxy:src1_data.yzwx;
+ src1_data.xyzw = (src1_index == -1) ? src1_data.wxyz:tmp.xyzw;
+ }
+ if(src2_index < 0)
+ {
+ uchar4 tmp;
+ tmp.xyzw = (src2_index == -2) ? src2_data.zwxy:src2_data.yzwx;
+ src2_data.xyzw = (src2_index == -1) ? src2_data.wxyz:tmp.xyzw;
+ }
uchar4 dst_data = *((__global uchar4 *)(dst + dst_index));
uchar4 tmp_data = src1_data & src2_data;
__kernel void arithm_bitwise_and_D1 (__global char *src1, int src1_step, int src1_offset,
- __global char *src2, int src2_step, int src2_offset,
- __global char *dst, int dst_step, int dst_offset,
- int rows, int cols, int dst_step1)
+ __global char *src2, int src2_step, int src2_offset,
+ __global char *dst, int dst_step, int dst_offset,
+ int rows, int cols, int dst_step1)
{
int x = get_global_id(0);
int y = get_global_id(1);
if (x < cols && y < rows)
{
x = x << 2;
-
- #define dst_align (dst_offset & 3)
+
+#ifdef dst_align
+#undef dst_align
+#endif
+#define dst_align (dst_offset & 3)
int src1_index = mad24(y, src1_step, x + src1_offset - dst_align);
int src2_index = mad24(y, src2_step, x + src2_offset - dst_align);
int dst_end = mad24(y, dst_step, dst_offset + dst_step1);
int dst_index = mad24(y, dst_step, dst_offset + x & (int)0xfffffffc);
- int src1_index_fix = src1_index < 0 ? 0 : src1_index;
- int src2_index_fix = src2_index < 0 ? 0 : src2_index;
- char4 src1_data = vload4(0, src1 + src1_index_fix);
- char4 src2_data = vload4(0, src2 + src2_index_fix);
-
- if(src1_index < 0)
- {
- char4 tmp;
- tmp.xyzw = (src1_index == -2) ? src1_data.zwxy:src1_data.yzwx;
- src1_data.xyzw = (src1_index == -1) ? src1_data.wxyz:tmp.xyzw;
- }
- if(src2_index < 0)
- {
- char4 tmp;
- tmp.xyzw = (src2_index == -2) ? src2_data.zwxy:src2_data.yzwx;
- src2_data.xyzw = (src2_index == -1) ? src2_data.wxyz:tmp.xyzw;
- }
+ int src1_index_fix = src1_index < 0 ? 0 : src1_index;
+ int src2_index_fix = src2_index < 0 ? 0 : src2_index;
+ char4 src1_data = vload4(0, src1 + src1_index_fix);
+ char4 src2_data = vload4(0, src2 + src2_index_fix);
+
+ if(src1_index < 0)
+ {
+ char4 tmp;
+ tmp.xyzw = (src1_index == -2) ? src1_data.zwxy:src1_data.yzwx;
+ src1_data.xyzw = (src1_index == -1) ? src1_data.wxyz:tmp.xyzw;
+ }
+ if(src2_index < 0)
+ {
+ char4 tmp;
+ tmp.xyzw = (src2_index == -2) ? src2_data.zwxy:src2_data.yzwx;
+ src2_data.xyzw = (src2_index == -1) ? src2_data.wxyz:tmp.xyzw;
+ }
char4 dst_data = *((__global char4 *)(dst + dst_index));
char4 tmp_data = src1_data & src2_data;
__kernel void arithm_bitwise_and_D2 (__global ushort *src1, int src1_step, int src1_offset,
- __global ushort *src2, int src2_step, int src2_offset,
- __global ushort *dst, int dst_step, int dst_offset,
- int rows, int cols, int dst_step1)
+ __global ushort *src2, int src2_step, int src2_offset,
+ __global ushort *dst, int dst_step, int dst_offset,
+ int rows, int cols, int dst_step1)
{
int x = get_global_id(0);
if (x < cols && y < rows)
{
x = x << 2;
-
- #define dst_align ((dst_offset >> 1) & 3)
+
+#ifdef dst_align
+#undef dst_align
+#endif
+#define dst_align ((dst_offset >> 1) & 3)
int src1_index = mad24(y, src1_step, (x << 1) + src1_offset - (dst_align << 1));
int src2_index = mad24(y, src2_step, (x << 1) + src2_offset - (dst_align << 1));
int dst_end = mad24(y, dst_step, dst_offset + dst_step1);
int dst_index = mad24(y, dst_step, dst_offset + (x << 1) & (int)0xfffffff8);
- int src1_index_fix = src1_index < 0 ? 0 : src1_index;
- int src2_index_fix = src2_index < 0 ? 0 : src2_index;
+ int src1_index_fix = src1_index < 0 ? 0 : src1_index;
+ int src2_index_fix = src2_index < 0 ? 0 : src2_index;
ushort4 src1_data = vload4(0, (__global ushort *)((__global char *)src1 + src1_index_fix));
ushort4 src2_data = vload4(0, (__global ushort *)((__global char *)src2 + src2_index_fix));
- if(src1_index < 0)
- {
- ushort4 tmp;
- tmp.xyzw = (src1_index == -2) ? src1_data.zwxy:src1_data.yzwx;
- src1_data.xyzw = (src1_index == -1) ? src1_data.wxyz:tmp.xyzw;
- }
- if(src2_index < 0)
- {
- ushort4 tmp;
- tmp.xyzw = (src2_index == -2) ? src2_data.zwxy:src2_data.yzwx;
- src2_data.xyzw = (src2_index == -1) ? src2_data.wxyz:tmp.xyzw;
- }
+ if(src1_index < 0)
+ {
+ ushort4 tmp;
+ tmp.xyzw = (src1_index == -2) ? src1_data.zwxy:src1_data.yzwx;
+ src1_data.xyzw = (src1_index == -1) ? src1_data.wxyz:tmp.xyzw;
+ }
+ if(src2_index < 0)
+ {
+ ushort4 tmp;
+ tmp.xyzw = (src2_index == -2) ? src2_data.zwxy:src2_data.yzwx;
+ src2_data.xyzw = (src2_index == -1) ? src2_data.wxyz:tmp.xyzw;
+ }
ushort4 dst_data = *((__global ushort4 *)((__global char *)dst + dst_index));
ushort4 tmp_data = src1_data & src2_data;
__kernel void arithm_bitwise_and_D3 (__global short *src1, int src1_step, int src1_offset,
- __global short *src2, int src2_step, int src2_offset,
- __global short *dst, int dst_step, int dst_offset,
- int rows, int cols, int dst_step1)
+ __global short *src2, int src2_step, int src2_offset,
+ __global short *dst, int dst_step, int dst_offset,
+ int rows, int cols, int dst_step1)
{
int x = get_global_id(0);
if (x < cols && y < rows)
{
x = x << 2;
-
- #define dst_align ((dst_offset >> 1) & 3)
+
+#ifdef dst_align
+#undef dst_align
+#endif
+#define dst_align ((dst_offset >> 1) & 3)
int src1_index = mad24(y, src1_step, (x << 1) + src1_offset - (dst_align << 1));
int src2_index = mad24(y, src2_step, (x << 1) + src2_offset - (dst_align << 1));
int dst_end = mad24(y, dst_step, dst_offset + dst_step1);
int dst_index = mad24(y, dst_step, dst_offset + (x << 1) & (int)0xfffffff8);
- int src1_index_fix = src1_index < 0 ? 0 : src1_index;
- int src2_index_fix = src2_index < 0 ? 0 : src2_index;
+ int src1_index_fix = src1_index < 0 ? 0 : src1_index;
+ int src2_index_fix = src2_index < 0 ? 0 : src2_index;
short4 src1_data = vload4(0, (__global short *)((__global char *)src1 + src1_index_fix));
short4 src2_data = vload4(0, (__global short *)((__global char *)src2 + src2_index_fix));
- if(src1_index < 0)
- {
- short4 tmp;
- tmp.xyzw = (src1_index == -2) ? src1_data.zwxy:src1_data.yzwx;
- src1_data.xyzw = (src1_index == -1) ? src1_data.wxyz:tmp.xyzw;
- }
- if(src2_index < 0)
- {
- short4 tmp;
- tmp.xyzw = (src2_index == -2) ? src2_data.zwxy:src2_data.yzwx;
- src2_data.xyzw = (src2_index == -1) ? src2_data.wxyz:tmp.xyzw;
- }
+ if(src1_index < 0)
+ {
+ short4 tmp;
+ tmp.xyzw = (src1_index == -2) ? src1_data.zwxy:src1_data.yzwx;
+ src1_data.xyzw = (src1_index == -1) ? src1_data.wxyz:tmp.xyzw;
+ }
+ if(src2_index < 0)
+ {
+ short4 tmp;
+ tmp.xyzw = (src2_index == -2) ? src2_data.zwxy:src2_data.yzwx;
+ src2_data.xyzw = (src2_index == -1) ? src2_data.wxyz:tmp.xyzw;
+ }
short4 dst_data = *((__global short4 *)((__global char *)dst + dst_index));
short4 tmp_data = src1_data & src2_data;
__kernel void arithm_bitwise_and_D4 (__global int *src1, int src1_step, int src1_offset,
- __global int *src2, int src2_step, int src2_offset,
- __global int *dst, int dst_step, int dst_offset,
- int rows, int cols, int dst_step1)
+ __global int *src2, int src2_step, int src2_offset,
+ __global int *dst, int dst_step, int dst_offset,
+ int rows, int cols, int dst_step1)
{
int x = get_global_id(0);
int y = get_global_id(1);
}
__kernel void arithm_bitwise_and_D5 (__global char *src1, int src1_step, int src1_offset,
- __global char *src2, int src2_step, int src2_offset,
- __global char *dst, int dst_step, int dst_offset,
- int rows, int cols, int dst_step1)
+ __global char *src2, int src2_step, int src2_offset,
+ __global char *dst, int dst_step, int dst_offset,
+ int rows, int cols, int dst_step1)
{
int x = get_global_id(0);
int y = get_global_id(1);
#if defined (DOUBLE_SUPPORT)
__kernel void arithm_bitwise_and_D6 (__global char *src1, int src1_step, int src1_offset,
- __global char *src2, int src2_step, int src2_offset,
- __global char *dst, int dst_step, int dst_offset,
- int rows, int cols, int dst_step1)
+ __global char *src2, int src2_step, int src2_offset,
+ __global char *dst, int dst_step, int dst_offset,
+ int rows, int cols, int dst_step1)
{
int x = get_global_id(0);
int y = get_global_id(1);
//
//M*/
#if defined (DOUBLE_SUPPORT)
+#ifdef cl_khr_fp64
#pragma OPENCL EXTENSION cl_khr_fp64:enable
+#elif defined (cl_amd_fp64)
+#pragma OPENCL EXTENSION cl_amd_fp64:enable
+#endif
#endif
-
//////////////////////////////////////////////////////////////////////////////////////////////////////
////////////////////////////////////////////BITWISE_AND////////////////////////////////////////////////////
///////////////////////////////////////////////////////////////////////////////////////////////////////
/**************************************bitwise_and with mask**************************************/
-__kernel void arithm_bitwise_and_with_mask_C1_D0 (__global uchar *src1, int src1_step, int src1_offset,
- __global uchar *src2, int src2_step, int src2_offset,
- __global uchar *mask, int mask_step, int mask_offset,
- __global uchar *dst, int dst_step, int dst_offset,
- int rows, int cols, int dst_step1)
+__kernel void arithm_bitwise_and_with_mask_C1_D0 (
+ __global uchar *src1, int src1_step, int src1_offset,
+ __global uchar *src2, int src2_step, int src2_offset,
+ __global uchar *mask, int mask_step, int mask_offset,
+ __global uchar *dst, int dst_step, int dst_offset,
+ int rows, int cols, int dst_step1)
{
int x = get_global_id(0);
if (x < cols && y < rows)
{
x = x << 2;
-
- #define dst_align (dst_offset & 3)
+
+#ifdef dst_align
+#undef dst_align
+#endif
+#define dst_align (dst_offset & 3)
int src1_index = mad24(y, src1_step, x + src1_offset - dst_align);
int src2_index = mad24(y, src2_step, x + src2_offset - dst_align);
int mask_index = mad24(y, mask_step, x + mask_offset - dst_align);
-__kernel void arithm_bitwise_and_with_mask_C1_D1 (__global char *src1, int src1_step, int src1_offset,
- __global char *src2, int src2_step, int src2_offset,
- __global uchar *mask, int mask_step, int mask_offset,
- __global char *dst, int dst_step, int dst_offset,
- int rows, int cols, int dst_step1)
+__kernel void arithm_bitwise_and_with_mask_C1_D1 (
+ __global char *src1, int src1_step, int src1_offset,
+ __global char *src2, int src2_step, int src2_offset,
+ __global uchar *mask, int mask_step, int mask_offset,
+ __global char *dst, int dst_step, int dst_offset,
+ int rows, int cols, int dst_step1)
{
int x = get_global_id(0);
if (x < cols && y < rows)
{
x = x << 2;
-
- #define dst_align (dst_offset & 3)
+
+#ifdef dst_align
+#undef dst_align
+#endif
+#define dst_align (dst_offset & 3)
int src1_index = mad24(y, src1_step, x + src1_offset - dst_align);
int src2_index = mad24(y, src2_step, x + src2_offset - dst_align);
int mask_index = mad24(y, mask_step, x + mask_offset - dst_align);
-__kernel void arithm_bitwise_and_with_mask_C1_D2 (__global ushort *src1, int src1_step, int src1_offset,
- __global ushort *src2, int src2_step, int src2_offset,
- __global uchar *mask, int mask_step, int mask_offset,
- __global ushort *dst, int dst_step, int dst_offset,
- int rows, int cols, int dst_step1)
+__kernel void arithm_bitwise_and_with_mask_C1_D2 (
+ __global ushort *src1, int src1_step, int src1_offset,
+ __global ushort *src2, int src2_step, int src2_offset,
+ __global uchar *mask, int mask_step, int mask_offset,
+ __global ushort *dst, int dst_step, int dst_offset,
+ int rows, int cols, int dst_step1)
{
int x = get_global_id(0);
if (x < cols && y < rows)
{
x = x << 1;
-
- #define dst_align ((dst_offset >> 1) & 1)
+
+#ifdef dst_align
+#undef dst_align
+#endif
+#define dst_align ((dst_offset >> 1) & 1)
int src1_index = mad24(y, src1_step, (x << 1) + src1_offset - (dst_align << 1));
int src2_index = mad24(y, src2_step, (x << 1) + src2_offset - (dst_align << 1));
int mask_index = mad24(y, mask_step, x + mask_offset - dst_align);
-__kernel void arithm_bitwise_and_with_mask_C1_D3 (__global short *src1, int src1_step, int src1_offset,
- __global short *src2, int src2_step, int src2_offset,
- __global uchar *mask, int mask_step, int mask_offset,
- __global short *dst, int dst_step, int dst_offset,
- int rows, int cols, int dst_step1)
+__kernel void arithm_bitwise_and_with_mask_C1_D3 (
+ __global short *src1, int src1_step, int src1_offset,
+ __global short *src2, int src2_step, int src2_offset,
+ __global uchar *mask, int mask_step, int mask_offset,
+ __global short *dst, int dst_step, int dst_offset,
+ int rows, int cols, int dst_step1)
{
int x = get_global_id(0);
if (x < cols && y < rows)
{
x = x << 1;
-
- #define dst_align ((dst_offset >> 1) & 1)
+
+#ifdef dst_align
+#undef dst_align
+#endif
+#define dst_align ((dst_offset >> 1) & 1)
int src1_index = mad24(y, src1_step, (x << 1) + src1_offset - (dst_align << 1));
int src2_index = mad24(y, src2_step, (x << 1) + src2_offset - (dst_align << 1));
int mask_index = mad24(y, mask_step, x + mask_offset - dst_align);
short2 src2_data = vload2(0, (__global short *)((__global char *)src2 + src2_index));
uchar2 mask_data = vload2(0, mask + mask_index);
- short2 data = *((__global short2 *)((__global uchar *)dst + dst_index));
- short2 tmp_data = src1_data & src2_data;
+ short2 data = *((__global short2 *)((__global uchar *)dst + dst_index));
+ short2 tmp_data = src1_data & src2_data;
data.x = convert_short((mask_data.x) && (dst_index + 0 >= dst_start) && (dst_index + 0 < dst_end)) ? tmp_data.x : data.x;
data.y = convert_short((mask_data.y) && (dst_index + 2 >= dst_start) && (dst_index + 2 < dst_end)) ? tmp_data.y : data.y;
-__kernel void arithm_bitwise_and_with_mask_C1_D4 (__global int *src1, int src1_step, int src1_offset,
- __global int *src2, int src2_step, int src2_offset,
- __global uchar *mask, int mask_step, int mask_offset,
- __global int *dst, int dst_step, int dst_offset,
- int rows, int cols, int dst_step1)
+__kernel void arithm_bitwise_and_with_mask_C1_D4 (
+ __global int *src1, int src1_step, int src1_offset,
+ __global int *src2, int src2_step, int src2_offset,
+ __global uchar *mask, int mask_step, int mask_offset,
+ __global int *dst, int dst_step, int dst_offset,
+ int rows, int cols, int dst_step1)
{
int x = get_global_id(0);
-__kernel void arithm_bitwise_and_with_mask_C1_D5 (__global char *src1, int src1_step, int src1_offset,
- __global char *src2, int src2_step, int src2_offset,
- __global uchar *mask, int mask_step, int mask_offset,
- __global char *dst, int dst_step, int dst_offset,
- int rows, int cols, int dst_step1)
+__kernel void arithm_bitwise_and_with_mask_C1_D5 (
+ __global char *src1, int src1_step, int src1_offset,
+ __global char *src2, int src2_step, int src2_offset,
+ __global uchar *mask, int mask_step, int mask_offset,
+ __global char *dst, int dst_step, int dst_offset,
+ int rows, int cols, int dst_step1)
{
int x = get_global_id(0);
-#if defined (DOUBLE_SUPPORT)
-__kernel void arithm_bitwise_and_with_mask_C1_D6 (__global char *src1, int src1_step, int src1_offset,
- __global char *src2, int src2_step, int src2_offset,
- __global uchar *mask, int mask_step, int mask_offset,
- __global char *dst, int dst_step, int dst_offset,
- int rows, int cols, int dst_step1)
+__kernel void arithm_bitwise_and_with_mask_C1_D6 (
+ __global char *src1, int src1_step, int src1_offset,
+ __global char *src2, int src2_step, int src2_offset,
+ __global uchar *mask, int mask_step, int mask_offset,
+ __global char *dst, int dst_step, int dst_offset,
+ int rows, int cols, int dst_step1)
{
int x = get_global_id(0);
}
}
-#endif
-__kernel void arithm_bitwise_and_with_mask_C2_D0 (__global uchar *src1, int src1_step, int src1_offset,
- __global uchar *src2, int src2_step, int src2_offset,
- __global uchar *mask, int mask_step, int mask_offset,
- __global uchar *dst, int dst_step, int dst_offset,
- int rows, int cols, int dst_step1)
+__kernel void arithm_bitwise_and_with_mask_C2_D0 (
+ __global uchar *src1, int src1_step, int src1_offset,
+ __global uchar *src2, int src2_step, int src2_offset,
+ __global uchar *mask, int mask_step, int mask_offset,
+ __global uchar *dst, int dst_step, int dst_offset,
+ int rows, int cols, int dst_step1)
{
int x = get_global_id(0);
if (x < cols && y < rows)
{
x = x << 1;
-
- #define dst_align ((dst_offset >> 1) & 1)
+
+#ifdef dst_align
+#undef dst_align
+#endif
+#define dst_align ((dst_offset >> 1) & 1)
int src1_index = mad24(y, src1_step, (x << 1) + src1_offset - (dst_align << 1));
int src2_index = mad24(y, src2_step, (x << 1) + src2_offset - (dst_align << 1));
int mask_index = mad24(y, mask_step, x + mask_offset - dst_align);
}
-__kernel void arithm_bitwise_and_with_mask_C2_D1 (__global char *src1, int src1_step, int src1_offset,
- __global char *src2, int src2_step, int src2_offset,
- __global uchar *mask, int mask_step, int mask_offset,
- __global char *dst, int dst_step, int dst_offset,
- int rows, int cols, int dst_step1)
+__kernel void arithm_bitwise_and_with_mask_C2_D1 (
+ __global char *src1, int src1_step, int src1_offset,
+ __global char *src2, int src2_step, int src2_offset,
+ __global uchar *mask, int mask_step, int mask_offset,
+ __global char *dst, int dst_step, int dst_offset,
+ int rows, int cols, int dst_step1)
{
int x = get_global_id(0);
if (x < cols && y < rows)
{
x = x << 1;
-
- #define dst_align ((dst_offset >> 1) & 1)
+
+#ifdef dst_align
+#undef dst_align
+#endif
+#define dst_align ((dst_offset >> 1) & 1)
int src1_index = mad24(y, src1_step, (x << 1) + src1_offset - (dst_align << 1));
int src2_index = mad24(y, src2_step, (x << 1) + src2_offset - (dst_align << 1));
int mask_index = mad24(y, mask_step, x + mask_offset - dst_align);
}
}
-__kernel void arithm_bitwise_and_with_mask_C2_D2 (__global ushort *src1, int src1_step, int src1_offset,
- __global ushort *src2, int src2_step, int src2_offset,
- __global uchar *mask, int mask_step, int mask_offset,
- __global ushort *dst, int dst_step, int dst_offset,
- int rows, int cols, int dst_step1)
+__kernel void arithm_bitwise_and_with_mask_C2_D2 (
+ __global ushort *src1, int src1_step, int src1_offset,
+ __global ushort *src2, int src2_step, int src2_offset,
+ __global uchar *mask, int mask_step, int mask_offset,
+ __global ushort *dst, int dst_step, int dst_offset,
+ int rows, int cols, int dst_step1)
{
int x = get_global_id(0);
*((__global ushort2 *)((__global char *)dst + dst_index)) = data;
}
}
-__kernel void arithm_bitwise_and_with_mask_C2_D3 (__global short *src1, int src1_step, int src1_offset,
- __global short *src2, int src2_step, int src2_offset,
- __global uchar *mask, int mask_step, int mask_offset,
- __global short *dst, int dst_step, int dst_offset,
- int rows, int cols, int dst_step1)
+__kernel void arithm_bitwise_and_with_mask_C2_D3 (
+ __global short *src1, int src1_step, int src1_offset,
+ __global short *src2, int src2_step, int src2_offset,
+ __global uchar *mask, int mask_step, int mask_offset,
+ __global short *dst, int dst_step, int dst_offset,
+ int rows, int cols, int dst_step1)
{
int x = get_global_id(0);
*((__global short2 *)((__global char *)dst + dst_index)) = data;
}
}
-__kernel void arithm_bitwise_and_with_mask_C2_D4 (__global int *src1, int src1_step, int src1_offset,
- __global int *src2, int src2_step, int src2_offset,
- __global uchar *mask, int mask_step, int mask_offset,
- __global int *dst, int dst_step, int dst_offset,
- int rows, int cols, int dst_step1)
+__kernel void arithm_bitwise_and_with_mask_C2_D4 (
+ __global int *src1, int src1_step, int src1_offset,
+ __global int *src2, int src2_step, int src2_offset,
+ __global uchar *mask, int mask_step, int mask_offset,
+ __global int *dst, int dst_step, int dst_offset,
+ int rows, int cols, int dst_step1)
{
int x = get_global_id(0);
*((__global int2 *)((__global char *)dst + dst_index)) = data;
}
}
-__kernel void arithm_bitwise_and_with_mask_C2_D5 (__global char *src1, int src1_step, int src1_offset,
- __global char *src2, int src2_step, int src2_offset,
- __global uchar *mask, int mask_step, int mask_offset,
- __global char *dst, int dst_step, int dst_offset,
- int rows, int cols, int dst_step1)
+__kernel void arithm_bitwise_and_with_mask_C2_D5 (
+ __global char *src1, int src1_step, int src1_offset,
+ __global char *src2, int src2_step, int src2_offset,
+ __global uchar *mask, int mask_step, int mask_offset,
+ __global char *dst, int dst_step, int dst_offset,
+ int rows, int cols, int dst_step1)
{
int x = get_global_id(0);
*((__global char8 *)((__global char *)dst + dst_index)) = data;
}
}
-#if defined (DOUBLE_SUPPORT)
-__kernel void arithm_bitwise_and_with_mask_C2_D6 (__global char *src1, int src1_step, int src1_offset,
- __global char *src2, int src2_step, int src2_offset,
- __global uchar *mask, int mask_step, int mask_offset,
- __global char *dst, int dst_step, int dst_offset,
- int rows, int cols, int dst_step1)
+
+__kernel void arithm_bitwise_and_with_mask_C2_D6 (
+ __global char *src1, int src1_step, int src1_offset,
+ __global char *src2, int src2_step, int src2_offset,
+ __global uchar *mask, int mask_step, int mask_offset,
+ __global char *dst, int dst_step, int dst_offset,
+ int rows, int cols, int dst_step1)
{
int x = get_global_id(0);
*((__global char16 *)((__global char *)dst + dst_index)) = data;
}
}
-#endif
-__kernel void arithm_bitwise_and_with_mask_C3_D0 (__global uchar *src1, int src1_step, int src1_offset,
- __global uchar *src2, int src2_step, int src2_offset,
- __global uchar *mask, int mask_step, int mask_offset,
- __global uchar *dst, int dst_step, int dst_offset,
- int rows, int cols, int dst_step1)
+__kernel void arithm_bitwise_and_with_mask_C3_D0 (
+ __global uchar *src1, int src1_step, int src1_offset,
+ __global uchar *src2, int src2_step, int src2_offset,
+ __global uchar *mask, int mask_step, int mask_offset,
+ __global uchar *dst, int dst_step, int dst_offset,
+ int rows, int cols, int dst_step1)
{
int x = get_global_id(0);
if (x < cols && y < rows)
{
x = x << 2;
-
- #define dst_align (((dst_offset % dst_step) / 3 ) & 3)
+
+#ifdef dst_align
+#undef dst_align
+#endif
+#define dst_align (((dst_offset % dst_step) / 3 ) & 3)
int src1_index = mad24(y, src1_step, (x * 3) + src1_offset - (dst_align * 3));
int src2_index = mad24(y, src2_step, (x * 3) + src2_offset - (dst_align * 3));
int mask_index = mad24(y, mask_step, x + mask_offset - dst_align);
}
-__kernel void arithm_bitwise_and_with_mask_C3_D1 (__global char *src1, int src1_step, int src1_offset,
- __global char *src2, int src2_step, int src2_offset,
- __global uchar *mask, int mask_step, int mask_offset,
- __global char *dst, int dst_step, int dst_offset,
- int rows, int cols, int dst_step1)
+__kernel void arithm_bitwise_and_with_mask_C3_D1 (
+ __global char *src1, int src1_step, int src1_offset,
+ __global char *src2, int src2_step, int src2_offset,
+ __global uchar *mask, int mask_step, int mask_offset,
+ __global char *dst, int dst_step, int dst_offset,
+ int rows, int cols, int dst_step1)
{
int x = get_global_id(0);
if (x < cols && y < rows)
{
x = x << 2;
-
- #define dst_align (((dst_offset % dst_step) / 3 ) & 3)
+
+#ifdef dst_align
+#undef dst_align
+#endif
+#define dst_align (((dst_offset % dst_step) / 3 ) & 3)
int src1_index = mad24(y, src1_step, (x * 3) + src1_offset - (dst_align * 3));
int src2_index = mad24(y, src2_step, (x * 3) + src2_offset - (dst_align * 3));
int mask_index = mad24(y, mask_step, x + mask_offset - dst_align);
}
}
-__kernel void arithm_bitwise_and_with_mask_C3_D2 (__global ushort *src1, int src1_step, int src1_offset,
- __global ushort *src2, int src2_step, int src2_offset,
- __global uchar *mask, int mask_step, int mask_offset,
- __global ushort *dst, int dst_step, int dst_offset,
- int rows, int cols, int dst_step1)
+__kernel void arithm_bitwise_and_with_mask_C3_D2 (
+ __global ushort *src1, int src1_step, int src1_offset,
+ __global ushort *src2, int src2_step, int src2_offset,
+ __global uchar *mask, int mask_step, int mask_offset,
+ __global ushort *dst, int dst_step, int dst_offset,
+ int rows, int cols, int dst_step1)
{
int x = get_global_id(0);
if (x < cols && y < rows)
{
x = x << 1;
-
- #define dst_align (((dst_offset % dst_step) / 6 ) & 1)
+
+#ifdef dst_align
+#undef dst_align
+#endif
+#define dst_align (((dst_offset % dst_step) / 6 ) & 1)
int src1_index = mad24(y, src1_step, (x * 6) + src1_offset - (dst_align * 6));
int src2_index = mad24(y, src2_step, (x * 6) + src2_offset - (dst_align * 6));
int mask_index = mad24(y, mask_step, x + mask_offset - dst_align);
data_0.xy = ((mask_data.x) && (dst_index + 0 >= dst_start)) ? tmp_data_0.xy : data_0.xy;
data_1.x = ((mask_data.x) && (dst_index + 0 >= dst_start) && (dst_index + 0 < dst_end))
- ? tmp_data_1.x : data_1.x;
+ ? tmp_data_1.x : data_1.x;
data_1.y = ((mask_data.y) && (dst_index + 6 >= dst_start) && (dst_index + 6 < dst_end))
- ? tmp_data_1.y : data_1.y;
+ ? tmp_data_1.y : data_1.y;
data_2.xy = ((mask_data.y) && (dst_index + 6 >= dst_start) && (dst_index + 6 < dst_end))
- ? tmp_data_2.xy : data_2.xy;
+ ? tmp_data_2.xy : data_2.xy;
- *((__global ushort2 *)((__global char *)dst + dst_index + 0))= data_0;
- *((__global ushort2 *)((__global char *)dst + dst_index + 4))= data_1;
- *((__global ushort2 *)((__global char *)dst + dst_index + 8))= data_2;
+ *((__global ushort2 *)((__global char *)dst + dst_index + 0))= data_0;
+ *((__global ushort2 *)((__global char *)dst + dst_index + 4))= data_1;
+ *((__global ushort2 *)((__global char *)dst + dst_index + 8))= data_2;
}
}
-__kernel void arithm_bitwise_and_with_mask_C3_D3 (__global short *src1, int src1_step, int src1_offset,
- __global short *src2, int src2_step, int src2_offset,
- __global uchar *mask, int mask_step, int mask_offset,
- __global short *dst, int dst_step, int dst_offset,
- int rows, int cols, int dst_step1)
+__kernel void arithm_bitwise_and_with_mask_C3_D3 (
+ __global short *src1, int src1_step, int src1_offset,
+ __global short *src2, int src2_step, int src2_offset,
+ __global uchar *mask, int mask_step, int mask_offset,
+ __global short *dst, int dst_step, int dst_offset,
+ int rows, int cols, int dst_step1)
{
int x = get_global_id(0);
if (x < cols && y < rows)
{
x = x << 1;
-
- #define dst_align (((dst_offset % dst_step) / 6 ) & 1)
+
+#ifdef dst_align
+#undef dst_align
+#endif
+#define dst_align (((dst_offset % dst_step) / 6 ) & 1)
int src1_index = mad24(y, src1_step, (x * 6) + src1_offset - (dst_align * 6));
int src2_index = mad24(y, src2_step, (x * 6) + src2_offset - (dst_align * 6));
int mask_index = mad24(y, mask_step, x + mask_offset - dst_align);
data_0.xy = ((mask_data.x) && (dst_index + 0 >= dst_start)) ? tmp_data_0.xy : data_0.xy;
data_1.x = ((mask_data.x) && (dst_index + 0 >= dst_start) && (dst_index + 0 < dst_end))
- ? tmp_data_1.x : data_1.x;
+ ? tmp_data_1.x : data_1.x;
data_1.y = ((mask_data.y) && (dst_index + 6 >= dst_start) && (dst_index + 6 < dst_end))
- ? tmp_data_1.y : data_1.y;
+ ? tmp_data_1.y : data_1.y;
data_2.xy = ((mask_data.y) && (dst_index + 6 >= dst_start) && (dst_index + 6 < dst_end))
- ? tmp_data_2.xy : data_2.xy;
+ ? tmp_data_2.xy : data_2.xy;
- *((__global short2 *)((__global char *)dst + dst_index + 0))= data_0;
- *((__global short2 *)((__global char *)dst + dst_index + 4))= data_1;
- *((__global short2 *)((__global char *)dst + dst_index + 8))= data_2;
+ *((__global short2 *)((__global char *)dst + dst_index + 0))= data_0;
+ *((__global short2 *)((__global char *)dst + dst_index + 4))= data_1;
+ *((__global short2 *)((__global char *)dst + dst_index + 8))= data_2;
}
}
-__kernel void arithm_bitwise_and_with_mask_C3_D4 (__global int *src1, int src1_step, int src1_offset,
- __global int *src2, int src2_step, int src2_offset,
- __global uchar *mask, int mask_step, int mask_offset,
- __global int *dst, int dst_step, int dst_offset,
- int rows, int cols, int dst_step1)
+__kernel void arithm_bitwise_and_with_mask_C3_D4 (
+ __global int *src1, int src1_step, int src1_offset,
+ __global int *src2, int src2_step, int src2_offset,
+ __global uchar *mask, int mask_step, int mask_offset,
+ __global int *dst, int dst_step, int dst_offset,
+ int rows, int cols, int dst_step1)
{
int x = get_global_id(0);
data_1 = mask_data ? tmp_data_1 : data_1;
data_2 = mask_data ? tmp_data_2 : data_2;
- *((__global int *)((__global char *)dst + dst_index + 0))= data_0;
- *((__global int *)((__global char *)dst + dst_index + 4))= data_1;
- *((__global int *)((__global char *)dst + dst_index + 8))= data_2;
+ *((__global int *)((__global char *)dst + dst_index + 0))= data_0;
+ *((__global int *)((__global char *)dst + dst_index + 4))= data_1;
+ *((__global int *)((__global char *)dst + dst_index + 8))= data_2;
}
}
-__kernel void arithm_bitwise_and_with_mask_C3_D5 (__global char *src1, int src1_step, int src1_offset,
- __global char *src2, int src2_step, int src2_offset,
- __global uchar *mask, int mask_step, int mask_offset,
- __global char *dst, int dst_step, int dst_offset,
- int rows, int cols, int dst_step1)
+__kernel void arithm_bitwise_and_with_mask_C3_D5 (
+ __global char *src1, int src1_step, int src1_offset,
+ __global char *src2, int src2_step, int src2_offset,
+ __global uchar *mask, int mask_step, int mask_offset,
+ __global char *dst, int dst_step, int dst_offset,
+ int rows, int cols, int dst_step1)
{
int x = get_global_id(0);
data_1 = mask_data ? tmp_data_1 : data_1;
data_2 = mask_data ? tmp_data_2 : data_2;
- *((__global char4 *)((__global char *)dst + dst_index + 0))= data_0;
- *((__global char4 *)((__global char *)dst + dst_index + 4))= data_1;
- *((__global char4 *)((__global char *)dst + dst_index + 8))= data_2;
+ *((__global char4 *)((__global char *)dst + dst_index + 0))= data_0;
+ *((__global char4 *)((__global char *)dst + dst_index + 4))= data_1;
+ *((__global char4 *)((__global char *)dst + dst_index + 8))= data_2;
}
}
#if defined (DOUBLE_SUPPORT)
-__kernel void arithm_bitwise_and_with_mask_C3_D6 (__global char *src1, int src1_step, int src1_offset,
- __global char *src2, int src2_step, int src2_offset,
- __global uchar *mask, int mask_step, int mask_offset,
- __global char *dst, int dst_step, int dst_offset,
- int rows, int cols, int dst_step1)
+__kernel void arithm_bitwise_and_with_mask_C3_D6 (
+ __global char *src1, int src1_step, int src1_offset,
+ __global char *src2, int src2_step, int src2_offset,
+ __global uchar *mask, int mask_step, int mask_offset,
+ __global char *dst, int dst_step, int dst_offset,
+ int rows, int cols, int dst_step1)
{
int x = get_global_id(0);
data_1 = mask_data ? tmp_data_1 : data_1;
data_2 = mask_data ? tmp_data_2 : data_2;
- *((__global char8 *)((__global char *)dst + dst_index + 0 ))= data_0;
- *((__global char8 *)((__global char *)dst + dst_index + 8 ))= data_1;
- *((__global char8 *)((__global char *)dst + dst_index + 16))= data_2;
+ *((__global char8 *)((__global char *)dst + dst_index + 0 ))= data_0;
+ *((__global char8 *)((__global char *)dst + dst_index + 8 ))= data_1;
+ *((__global char8 *)((__global char *)dst + dst_index + 16))= data_2;
}
}
#endif
-
-__kernel void arithm_bitwise_and_with_mask_C4_D0 (__global uchar *src1, int src1_step, int src1_offset,
- __global uchar *src2, int src2_step, int src2_offset,
- __global uchar *mask, int mask_step, int mask_offset,
- __global uchar *dst, int dst_step, int dst_offset,
- int rows, int cols, int dst_step1)
+__kernel void arithm_bitwise_and_with_mask_C4_D0 (
+ __global uchar *src1, int src1_step, int src1_offset,
+ __global uchar *src2, int src2_step, int src2_offset,
+ __global uchar *mask, int mask_step, int mask_offset,
+ __global uchar *dst, int dst_step, int dst_offset,
+ int rows, int cols, int dst_step1)
{
int x = get_global_id(0);
}
-__kernel void arithm_bitwise_and_with_mask_C4_D1 (__global char *src1, int src1_step, int src1_offset,
- __global char *src2, int src2_step, int src2_offset,
- __global uchar *mask, int mask_step, int mask_offset,
- __global char *dst, int dst_step, int dst_offset,
- int rows, int cols, int dst_step1)
+__kernel void arithm_bitwise_and_with_mask_C4_D1 (
+ __global char *src1, int src1_step, int src1_offset,
+ __global char *src2, int src2_step, int src2_offset,
+ __global uchar *mask, int mask_step, int mask_offset,
+ __global char *dst, int dst_step, int dst_offset,
+ int rows, int cols, int dst_step1)
{
int x = get_global_id(0);
}
}
-__kernel void arithm_bitwise_and_with_mask_C4_D2 (__global ushort *src1, int src1_step, int src1_offset,
- __global ushort *src2, int src2_step, int src2_offset,
- __global uchar *mask, int mask_step, int mask_offset,
- __global ushort *dst, int dst_step, int dst_offset,
- int rows, int cols, int dst_step1)
+__kernel void arithm_bitwise_and_with_mask_C4_D2 (
+ __global ushort *src1, int src1_step, int src1_offset,
+ __global ushort *src2, int src2_step, int src2_offset,
+ __global uchar *mask, int mask_step, int mask_offset,
+ __global ushort *dst, int dst_step, int dst_offset,
+ int rows, int cols, int dst_step1)
{
int x = get_global_id(0);
*((__global ushort4 *)((__global char *)dst + dst_index)) = data;
}
}
-__kernel void arithm_bitwise_and_with_mask_C4_D3 (__global short *src1, int src1_step, int src1_offset,
- __global short *src2, int src2_step, int src2_offset,
- __global uchar *mask, int mask_step, int mask_offset,
- __global short *dst, int dst_step, int dst_offset,
- int rows, int cols, int dst_step1)
+__kernel void arithm_bitwise_and_with_mask_C4_D3 (
+ __global short *src1, int src1_step, int src1_offset,
+ __global short *src2, int src2_step, int src2_offset,
+ __global uchar *mask, int mask_step, int mask_offset,
+ __global short *dst, int dst_step, int dst_offset,
+ int rows, int cols, int dst_step1)
{
int x = get_global_id(0);
*((__global short4 *)((__global char *)dst + dst_index)) = data;
}
}
-__kernel void arithm_bitwise_and_with_mask_C4_D4 (__global int *src1, int src1_step, int src1_offset,
- __global int *src2, int src2_step, int src2_offset,
- __global uchar *mask, int mask_step, int mask_offset,
- __global int *dst, int dst_step, int dst_offset,
- int rows, int cols, int dst_step1)
+__kernel void arithm_bitwise_and_with_mask_C4_D4 (
+ __global int *src1, int src1_step, int src1_offset,
+ __global int *src2, int src2_step, int src2_offset,
+ __global uchar *mask, int mask_step, int mask_offset,
+ __global int *dst, int dst_step, int dst_offset,
+ int rows, int cols, int dst_step1)
{
int x = get_global_id(0);
*((__global int4 *)((__global char *)dst + dst_index)) = data;
}
}
-__kernel void arithm_bitwise_and_with_mask_C4_D5 (__global char *src1, int src1_step, int src1_offset,
- __global char *src2, int src2_step, int src2_offset,
- __global uchar *mask, int mask_step, int mask_offset,
- __global char *dst, int dst_step, int dst_offset,
- int rows, int cols, int dst_step1)
+__kernel void arithm_bitwise_and_with_mask_C4_D5 (
+ __global char *src1, int src1_step, int src1_offset,
+ __global char *src2, int src2_step, int src2_offset,
+ __global uchar *mask, int mask_step, int mask_offset,
+ __global char *dst, int dst_step, int dst_offset,
+ int rows, int cols, int dst_step1)
{
int x = get_global_id(0);
}
}
#if defined (DOUBLE_SUPPORT)
-__kernel void arithm_bitwise_and_with_mask_C4_D6 (__global char *src1, int src1_step, int src1_offset,
- __global char *src2, int src2_step, int src2_offset,
- __global uchar *mask, int mask_step, int mask_offset,
- __global char *dst, int dst_step, int dst_offset,
- int rows, int cols, int dst_step1)
+__kernel void arithm_bitwise_and_with_mask_C4_D6 (
+ __global char *src1, int src1_step, int src1_offset,
+ __global char *src2, int src2_step, int src2_offset,
+ __global uchar *mask, int mask_step, int mask_offset,
+ __global char *dst, int dst_step, int dst_offset,
+ int rows, int cols, int dst_step1)
{
int x = get_global_id(0);
// the use of this software, even if advised of the possibility of such damage.
//
//
-#if defined (__ATI__)
-#pragma OPENCL EXTENSION cl_amd_fp64:enable
-#elif defined (__NVIDIA__)
+#if defined (DOUBLE_SUPPORT)
+#ifdef cl_khr_fp64
#pragma OPENCL EXTENSION cl_khr_fp64:enable
+#elif defined (cl_amd_fp64)
+#pragma OPENCL EXTENSION cl_amd_fp64:enable
+#endif
#endif
//////////////////////////////////////////////////////////////////////////////////////////////////////
////////////////////////////////////////////BITWISE_AND////////////////////////////////////////////////////
///////////////////////////////////////////////////////////////////////////////////////////////////////
/**************************************and with scalar without mask**************************************/
-__kernel void arithm_s_bitwise_and_C1_D0 (__global uchar *src1, int src1_step, int src1_offset,
- __global uchar *dst, int dst_step, int dst_offset,
- uchar4 src2, int rows, int cols, int dst_step1)
+__kernel void arithm_s_bitwise_and_C1_D0 (
+ __global uchar *src1, int src1_step, int src1_offset,
+ __global uchar *dst, int dst_step, int dst_offset,
+ uchar4 src2, int rows, int cols, int dst_step1)
{
int x = get_global_id(0);
int y = get_global_id(1);
if (x < cols && y < rows)
{
x = x << 2;
-
- #define dst_align (dst_offset & 3)
+
+#ifdef dst_align
+#undef dst_align
+#endif
+#define dst_align (dst_offset & 3)
int src1_index = mad24(y, src1_step, x + src1_offset - dst_align);
int dst_start = mad24(y, dst_step, dst_offset);
}
-__kernel void arithm_s_bitwise_and_C1_D1 (__global char *src1, int src1_step, int src1_offset,
- __global char *dst, int dst_step, int dst_offset,
- char4 src2, int rows, int cols, int dst_step1)
+__kernel void arithm_s_bitwise_and_C1_D1 (
+ __global char *src1, int src1_step, int src1_offset,
+ __global char *dst, int dst_step, int dst_offset,
+ char4 src2, int rows, int cols, int dst_step1)
{
int x = get_global_id(0);
int y = get_global_id(1);
if (x < cols && y < rows)
{
x = x << 2;
-
- #define dst_align (dst_offset & 3)
+
+#ifdef dst_align
+#undef dst_align
+#endif
+#define dst_align (dst_offset & 3)
int src1_index = mad24(y, src1_step, x + src1_offset - dst_align);
int dst_start = mad24(y, dst_step, dst_offset);
}
}
-__kernel void arithm_s_bitwise_and_C1_D2 (__global ushort *src1, int src1_step, int src1_offset,
- __global ushort *dst, int dst_step, int dst_offset,
- ushort4 src2, int rows, int cols, int dst_step1)
+__kernel void arithm_s_bitwise_and_C1_D2 (
+ __global ushort *src1, int src1_step, int src1_offset,
+ __global ushort *dst, int dst_step, int dst_offset,
+ ushort4 src2, int rows, int cols, int dst_step1)
{
int x = get_global_id(0);
if (x < cols && y < rows)
{
x = x << 1;
-
- #define dst_align ((dst_offset >> 1) & 1)
+
+#ifdef dst_align
+#undef dst_align
+#endif
+#define dst_align ((dst_offset >> 1) & 1)
int src1_index = mad24(y, src1_step, (x << 1) + src1_offset - (dst_align << 1));
int dst_start = mad24(y, dst_step, dst_offset);
*((__global ushort2 *)((__global uchar *)dst + dst_index)) = data;
}
}
-__kernel void arithm_s_bitwise_and_C1_D3 (__global short *src1, int src1_step, int src1_offset,
- __global short *dst, int dst_step, int dst_offset,
- short4 src2, int rows, int cols, int dst_step1)
+__kernel void arithm_s_bitwise_and_C1_D3 (
+ __global short *src1, int src1_step, int src1_offset,
+ __global short *dst, int dst_step, int dst_offset,
+ short4 src2, int rows, int cols, int dst_step1)
{
int x = get_global_id(0);
if (x < cols && y < rows)
{
x = x << 1;
-
- #define dst_align ((dst_offset >> 1) & 1)
+
+#ifdef dst_align
+#undef dst_align
+#endif
+#define dst_align ((dst_offset >> 1) & 1)
int src1_index = mad24(y, src1_step, (x << 1) + src1_offset - (dst_align << 1));
int dst_start = mad24(y, dst_step, dst_offset);
*((__global short2 *)((__global uchar *)dst + dst_index)) = data;
}
}
-__kernel void arithm_s_bitwise_and_C1_D4 (__global int *src1, int src1_step, int src1_offset,
- __global int *dst, int dst_step, int dst_offset,
- int4 src2, int rows, int cols, int dst_step1)
+__kernel void arithm_s_bitwise_and_C1_D4 (
+ __global int *src1, int src1_step, int src1_offset,
+ __global int *dst, int dst_step, int dst_offset,
+ int4 src2, int rows, int cols, int dst_step1)
{
int x = get_global_id(0);
*((__global int *)((__global char *)dst + dst_index)) = data;
}
}
-__kernel void arithm_s_bitwise_and_C1_D5 (__global char *src1, int src1_step, int src1_offset,
- __global char *dst, int dst_step, int dst_offset,
- char16 src2, int rows, int cols, int dst_step1)
+__kernel void arithm_s_bitwise_and_C1_D5 (
+ __global char *src1, int src1_step, int src1_offset,
+ __global char *dst, int dst_step, int dst_offset,
+ char16 src2, int rows, int cols, int dst_step1)
{
int x = get_global_id(0);
*((__global char4 *)((__global char *)dst + dst_index)) = data;
}
}
-
#if defined (DOUBLE_SUPPORT)
-__kernel void arithm_s_bitwise_and_C1_D6 (__global short *src1, int src1_step, int src1_offset,
- __global short *dst, int dst_step, int dst_offset,
- short16 src2, int rows, int cols, int dst_step1)
+__kernel void arithm_s_bitwise_and_C1_D6 (
+ __global short *src1, int src1_step, int src1_offset,
+ __global short *dst, int dst_step, int dst_offset,
+ short16 src2, int rows, int cols, int dst_step1)
{
int x = get_global_id(0);
}
}
#endif
-__kernel void arithm_s_bitwise_and_C2_D0 (__global uchar *src1, int src1_step, int src1_offset,
- __global uchar *dst, int dst_step, int dst_offset,
- uchar4 src2, int rows, int cols, int dst_step1)
+__kernel void arithm_s_bitwise_and_C2_D0 (
+ __global uchar *src1, int src1_step, int src1_offset,
+ __global uchar *dst, int dst_step, int dst_offset,
+ uchar4 src2, int rows, int cols, int dst_step1)
{
int x = get_global_id(0);
if (x < cols && y < rows)
{
x = x << 1;
-
- #define dst_align ((dst_offset >> 1) & 1)
+
+#ifdef dst_align
+#undef dst_align
+#endif
+#define dst_align ((dst_offset >> 1) & 1)
int src1_index = mad24(y, src1_step, (x << 1) + src1_offset - (dst_align << 1));
int dst_start = mad24(y, dst_step, dst_offset);
}
-__kernel void arithm_s_bitwise_and_C2_D1 (__global char *src1, int src1_step, int src1_offset,
- __global char *dst, int dst_step, int dst_offset,
- char4 src2, int rows, int cols, int dst_step1)
+__kernel void arithm_s_bitwise_and_C2_D1 (
+ __global char *src1, int src1_step, int src1_offset,
+ __global char *dst, int dst_step, int dst_offset,
+ char4 src2, int rows, int cols, int dst_step1)
{
int x = get_global_id(0);
if (x < cols && y < rows)
{
x = x << 1;
-
- #define dst_align ((dst_offset >> 1) & 1)
+
+#ifdef dst_align
+#undef dst_align
+#endif
+#define dst_align ((dst_offset >> 1) & 1)
int src1_index = mad24(y, src1_step, (x << 1) + src1_offset - (dst_align << 1));
int dst_start = mad24(y, dst_step, dst_offset);
}
}
-__kernel void arithm_s_bitwise_and_C2_D2 (__global ushort *src1, int src1_step, int src1_offset,
- __global ushort *dst, int dst_step, int dst_offset,
- ushort4 src2, int rows, int cols, int dst_step1)
+__kernel void arithm_s_bitwise_and_C2_D2 (
+ __global ushort *src1, int src1_step, int src1_offset,
+ __global ushort *dst, int dst_step, int dst_offset,
+ ushort4 src2, int rows, int cols, int dst_step1)
{
int x = get_global_id(0);
*((__global ushort2 *)((__global char *)dst + dst_index)) = data;
}
}
-__kernel void arithm_s_bitwise_and_C2_D3 (__global short *src1, int src1_step, int src1_offset,
- __global short *dst, int dst_step, int dst_offset,
- short4 src2, int rows, int cols, int dst_step1)
+__kernel void arithm_s_bitwise_and_C2_D3 (
+ __global short *src1, int src1_step, int src1_offset,
+ __global short *dst, int dst_step, int dst_offset,
+ short4 src2, int rows, int cols, int dst_step1)
{
int x = get_global_id(0);
*((__global short2 *)((__global char *)dst + dst_index)) = data;
}
}
-__kernel void arithm_s_bitwise_and_C2_D4 (__global int *src1, int src1_step, int src1_offset,
- __global int *dst, int dst_step, int dst_offset,
- int4 src2, int rows, int cols, int dst_step1)
+__kernel void arithm_s_bitwise_and_C2_D4 (
+ __global int *src1, int src1_step, int src1_offset,
+ __global int *dst, int dst_step, int dst_offset,
+ int4 src2, int rows, int cols, int dst_step1)
{
int x = get_global_id(0);
*((__global int2 *)((__global char *)dst + dst_index)) = data;
}
}
-__kernel void arithm_s_bitwise_and_C2_D5 (__global char *src1, int src1_step, int src1_offset,
- __global char *dst, int dst_step, int dst_offset,
- char16 src2, int rows, int cols, int dst_step1)
+__kernel void arithm_s_bitwise_and_C2_D5 (
+ __global char *src1, int src1_step, int src1_offset,
+ __global char *dst, int dst_step, int dst_offset,
+ char16 src2, int rows, int cols, int dst_step1)
{
int x = get_global_id(0);
char8 tmp_data = src1_data & src2_data;
*((__global char8 *)((__global char *)dst + dst_index)) = tmp_data;
- }
+ }
}
#if defined (DOUBLE_SUPPORT)
-__kernel void arithm_s_bitwise_and_C2_D6 (__global short *src1, int src1_step, int src1_offset,
- __global short *dst, int dst_step, int dst_offset,
- short16 src2, int rows, int cols, int dst_step1)
+__kernel void arithm_s_bitwise_and_C2_D6 (
+ __global short *src1, int src1_step, int src1_offset,
+ __global short *dst, int dst_step, int dst_offset,
+ short16 src2, int rows, int cols, int dst_step1)
{
int x = get_global_id(0);
}
}
#endif
-__kernel void arithm_s_bitwise_and_C3_D0 (__global uchar *src1, int src1_step, int src1_offset,
- __global uchar *dst, int dst_step, int dst_offset,
- uchar4 src2, int rows, int cols, int dst_step1)
+__kernel void arithm_s_bitwise_and_C3_D0 (
+ __global uchar *src1, int src1_step, int src1_offset,
+ __global uchar *dst, int dst_step, int dst_offset,
+ uchar4 src2, int rows, int cols, int dst_step1)
{
int x = get_global_id(0);
if (x < cols && y < rows)
{
x = x << 2;
-
- #define dst_align (((dst_offset % dst_step) / 3 ) & 3)
+
+#ifdef dst_align
+#undef dst_align
+#endif
+#define dst_align (((dst_offset % dst_step) / 3 ) & 3)
int src1_index = mad24(y, src1_step, (x * 3) + src1_offset - (dst_align * 3));
int dst_start = mad24(y, dst_step, dst_offset);
}
-__kernel void arithm_s_bitwise_and_C3_D1 (__global char *src1, int src1_step, int src1_offset,
- __global char *dst, int dst_step, int dst_offset,
- char4 src2, int rows, int cols, int dst_step1)
+__kernel void arithm_s_bitwise_and_C3_D1 (
+ __global char *src1, int src1_step, int src1_offset,
+ __global char *dst, int dst_step, int dst_offset,
+ char4 src2, int rows, int cols, int dst_step1)
{
int x = get_global_id(0);
if (x < cols && y < rows)
{
x = x << 2;
-
- #define dst_align (((dst_offset % dst_step) / 3 ) & 3)
+
+#ifdef dst_align
+#undef dst_align
+#endif
+#define dst_align (((dst_offset % dst_step) / 3 ) & 3)
int src1_index = mad24(y, src1_step, (x * 3) + src1_offset - (dst_align * 3));
int dst_start = mad24(y, dst_step, dst_offset);
}
}
-__kernel void arithm_s_bitwise_and_C3_D2 (__global ushort *src1, int src1_step, int src1_offset,
- __global ushort *dst, int dst_step, int dst_offset,
- ushort4 src2, int rows, int cols, int dst_step1)
+__kernel void arithm_s_bitwise_and_C3_D2 (
+ __global ushort *src1, int src1_step, int src1_offset,
+ __global ushort *dst, int dst_step, int dst_offset,
+ ushort4 src2, int rows, int cols, int dst_step1)
{
int x = get_global_id(0);
if (x < cols && y < rows)
{
x = x << 1;
-
- #define dst_align (((dst_offset % dst_step) / 6 ) & 1)
+
+#ifdef dst_align
+#undef dst_align
+#endif
+#define dst_align (((dst_offset % dst_step) / 6 ) & 1)
int src1_index = mad24(y, src1_step, (x * 6) + src1_offset - (dst_align * 6));
int dst_start = mad24(y, dst_step, dst_offset);
data_0.xy = ((dst_index + 0 >= dst_start)) ? tmp_data_0.xy : data_0.xy;
data_1.x = ((dst_index + 0 >= dst_start) && (dst_index + 0 < dst_end))
- ? tmp_data_1.x : data_1.x;
+ ? tmp_data_1.x : data_1.x;
data_1.y = ((dst_index + 6 >= dst_start) && (dst_index + 6 < dst_end))
- ? tmp_data_1.y : data_1.y;
+ ? tmp_data_1.y : data_1.y;
data_2.xy = ((dst_index + 6 >= dst_start) && (dst_index + 6 < dst_end))
- ? tmp_data_2.xy : data_2.xy;
+ ? tmp_data_2.xy : data_2.xy;
- *((__global ushort2 *)((__global char *)dst + dst_index + 0))= data_0;
- *((__global ushort2 *)((__global char *)dst + dst_index + 4))= data_1;
- *((__global ushort2 *)((__global char *)dst + dst_index + 8))= data_2;
+ *((__global ushort2 *)((__global char *)dst + dst_index + 0))= data_0;
+ *((__global ushort2 *)((__global char *)dst + dst_index + 4))= data_1;
+ *((__global ushort2 *)((__global char *)dst + dst_index + 8))= data_2;
}
}
-__kernel void arithm_s_bitwise_and_C3_D3 (__global short *src1, int src1_step, int src1_offset,
- __global short *dst, int dst_step, int dst_offset,
- short4 src2, int rows, int cols, int dst_step1)
+__kernel void arithm_s_bitwise_and_C3_D3 (
+ __global short *src1, int src1_step, int src1_offset,
+ __global short *dst, int dst_step, int dst_offset,
+ short4 src2, int rows, int cols, int dst_step1)
{
int x = get_global_id(0);
if (x < cols && y < rows)
{
x = x << 1;
-
- #define dst_align (((dst_offset % dst_step) / 6 ) & 1)
+
+#ifdef dst_align
+#undef dst_align
+#endif
+#define dst_align (((dst_offset % dst_step) / 6 ) & 1)
int src1_index = mad24(y, src1_step, (x * 6) + src1_offset - (dst_align * 6));
int dst_start = mad24(y, dst_step, dst_offset);
data_0.xy = ((dst_index + 0 >= dst_start)) ? tmp_data_0.xy : data_0.xy;
data_1.x = ((dst_index + 0 >= dst_start) && (dst_index + 0 < dst_end))
- ? tmp_data_1.x : data_1.x;
+ ? tmp_data_1.x : data_1.x;
data_1.y = ((dst_index + 6 >= dst_start) && (dst_index + 6 < dst_end))
- ? tmp_data_1.y : data_1.y;
+ ? tmp_data_1.y : data_1.y;
data_2.xy = ((dst_index + 6 >= dst_start) && (dst_index + 6 < dst_end))
- ? tmp_data_2.xy : data_2.xy;
+ ? tmp_data_2.xy : data_2.xy;
- *((__global short2 *)((__global char *)dst + dst_index + 0))= data_0;
- *((__global short2 *)((__global char *)dst + dst_index + 4))= data_1;
- *((__global short2 *)((__global char *)dst + dst_index + 8))= data_2;
+ *((__global short2 *)((__global char *)dst + dst_index + 0))= data_0;
+ *((__global short2 *)((__global char *)dst + dst_index + 4))= data_1;
+ *((__global short2 *)((__global char *)dst + dst_index + 8))= data_2;
}
}
-__kernel void arithm_s_bitwise_and_C3_D4 (__global int *src1, int src1_step, int src1_offset,
- __global int *dst, int dst_step, int dst_offset,
- int4 src2, int rows, int cols, int dst_step1)
+__kernel void arithm_s_bitwise_and_C3_D4 (
+ __global int *src1, int src1_step, int src1_offset,
+ __global int *dst, int dst_step, int dst_offset,
+ int4 src2, int rows, int cols, int dst_step1)
{
int x = get_global_id(0);
int tmp_data_1 = src1_data_1 & src2_data_1;
int tmp_data_2 = src1_data_2 & src2_data_2;
- *((__global int *)((__global char *)dst + dst_index + 0))= tmp_data_0;
- *((__global int *)((__global char *)dst + dst_index + 4))= tmp_data_1;
- *((__global int *)((__global char *)dst + dst_index + 8))= tmp_data_2;
+ *((__global int *)((__global char *)dst + dst_index + 0))= tmp_data_0;
+ *((__global int *)((__global char *)dst + dst_index + 4))= tmp_data_1;
+ *((__global int *)((__global char *)dst + dst_index + 8))= tmp_data_2;
}
}
-__kernel void arithm_s_bitwise_and_C3_D5 (__global char *src1, int src1_step, int src1_offset,
- __global char *dst, int dst_step, int dst_offset,
- char16 src2, int rows, int cols, int dst_step1)
+__kernel void arithm_s_bitwise_and_C3_D5 (
+ __global char *src1, int src1_step, int src1_offset,
+ __global char *dst, int dst_step, int dst_offset,
+ char16 src2, int rows, int cols, int dst_step1)
{
int x = get_global_id(0);
char4 tmp_data_1 = src1_data_1 & src2_data_1;
char4 tmp_data_2 = src1_data_2 & src2_data_2;
- *((__global char4 *)((__global char *)dst + dst_index + 0))= tmp_data_0;
- *((__global char4 *)((__global char *)dst + dst_index + 4))= tmp_data_1;
- *((__global char4 *)((__global char *)dst + dst_index + 8))= tmp_data_2;
+ *((__global char4 *)((__global char *)dst + dst_index + 0))= tmp_data_0;
+ *((__global char4 *)((__global char *)dst + dst_index + 4))= tmp_data_1;
+ *((__global char4 *)((__global char *)dst + dst_index + 8))= tmp_data_2;
}
}
#if defined (DOUBLE_SUPPORT)
-__kernel void arithm_s_bitwise_and_C3_D6 (__global short *src1, int src1_step, int src1_offset,
- __global short *dst, int dst_step, int dst_offset,
- short16 src2, int rows, int cols, int dst_step1)
+__kernel void arithm_s_bitwise_and_C3_D6 (
+ __global short *src1, int src1_step, int src1_offset,
+ __global short *dst, int dst_step, int dst_offset,
+ short16 src2, int rows, int cols, int dst_step1)
{
int x = get_global_id(0);
short4 tmp_data_1 = src1_data_1 & src2_data_1;
short4 tmp_data_2 = src1_data_2 & src2_data_2;
- *((__global short4 *)((__global char *)dst + dst_index + 0 ))= tmp_data_0;
- *((__global short4 *)((__global char *)dst + dst_index + 8 ))= tmp_data_1;
- *((__global short4 *)((__global char *)dst + dst_index + 16))= tmp_data_2;
+ *((__global short4 *)((__global char *)dst + dst_index + 0 ))= tmp_data_0;
+ *((__global short4 *)((__global char *)dst + dst_index + 8 ))= tmp_data_1;
+ *((__global short4 *)((__global char *)dst + dst_index + 16))= tmp_data_2;
}
}
#endif
-__kernel void arithm_s_bitwise_and_C4_D0 (__global uchar *src1, int src1_step, int src1_offset,
- __global uchar *dst, int dst_step, int dst_offset,
- uchar4 src2, int rows, int cols, int dst_step1)
+__kernel void arithm_s_bitwise_and_C4_D0 (
+ __global uchar *src1, int src1_step, int src1_offset,
+ __global uchar *dst, int dst_step, int dst_offset,
+ uchar4 src2, int rows, int cols, int dst_step1)
{
int x = get_global_id(0);
}
-__kernel void arithm_s_bitwise_and_C4_D1 (__global char *src1, int src1_step, int src1_offset,
- __global char *dst, int dst_step, int dst_offset,
- char4 src2, int rows, int cols, int dst_step1)
+__kernel void arithm_s_bitwise_and_C4_D1 (
+ __global char *src1, int src1_step, int src1_offset,
+ __global char *dst, int dst_step, int dst_offset,
+ char4 src2, int rows, int cols, int dst_step1)
{
int x = get_global_id(0);
}
}
-__kernel void arithm_s_bitwise_and_C4_D2 (__global ushort *src1, int src1_step, int src1_offset,
- __global ushort *dst, int dst_step, int dst_offset,
- ushort4 src2, int rows, int cols, int dst_step1)
+__kernel void arithm_s_bitwise_and_C4_D2 (
+ __global ushort *src1, int src1_step, int src1_offset,
+ __global ushort *dst, int dst_step, int dst_offset,
+ ushort4 src2, int rows, int cols, int dst_step1)
{
int x = get_global_id(0);
*((__global ushort4 *)((__global char *)dst + dst_index)) = data;
}
}
-__kernel void arithm_s_bitwise_and_C4_D3 (__global short *src1, int src1_step, int src1_offset,
- __global short *dst, int dst_step, int dst_offset,
- short4 src2, int rows, int cols, int dst_step1)
+__kernel void arithm_s_bitwise_and_C4_D3 (
+ __global short *src1, int src1_step, int src1_offset,
+ __global short *dst, int dst_step, int dst_offset,
+ short4 src2, int rows, int cols, int dst_step1)
{
int x = get_global_id(0);
*((__global short4 *)((__global char *)dst + dst_index)) = data;
}
}
-__kernel void arithm_s_bitwise_and_C4_D4 (__global int *src1, int src1_step, int src1_offset,
- __global int *dst, int dst_step, int dst_offset,
- int4 src2, int rows, int cols, int dst_step1)
+__kernel void arithm_s_bitwise_and_C4_D4 (
+ __global int *src1, int src1_step, int src1_offset,
+ __global int *dst, int dst_step, int dst_offset,
+ int4 src2, int rows, int cols, int dst_step1)
{
int x = get_global_id(0);
*((__global int4 *)((__global char *)dst + dst_index)) = data;
}
}
-__kernel void arithm_s_bitwise_and_C4_D5 (__global char *src1, int src1_step, int src1_offset,
- __global char *dst, int dst_step, int dst_offset,
- char16 src2, int rows, int cols, int dst_step1)
+__kernel void arithm_s_bitwise_and_C4_D5 (
+ __global char *src1, int src1_step, int src1_offset,
+ __global char *dst, int dst_step, int dst_offset,
+ char16 src2, int rows, int cols, int dst_step1)
{
int x = get_global_id(0);
}
}
#if defined (DOUBLE_SUPPORT)
-__kernel void arithm_s_bitwise_and_C4_D6 (__global short *src1, int src1_step, int src1_offset,
- __global short *dst, int dst_step, int dst_offset,
- short16 src2, int rows, int cols, int dst_step1)
+__kernel void arithm_s_bitwise_and_C4_D6 (
+ __global short *src1, int src1_step, int src1_offset,
+ __global short *dst, int dst_step, int dst_offset,
+ short16 src2, int rows, int cols, int dst_step1)
{
int x = get_global_id(0);
short4 tmp_data_2 = src1_data_2 & src2_data_2;
short4 tmp_data_3 = src1_data_3 & src2_data_3;
- *((__global short4 *)((__global char *)dst + dst_index + 0 ))= tmp_data_0;
- *((__global short4 *)((__global char *)dst + dst_index + 8 ))= tmp_data_1;
- *((__global short4 *)((__global char *)dst + dst_index + 16))= tmp_data_2;
- *((__global short4 *)((__global char *)dst + dst_index + 24))= tmp_data_3;
+ *((__global short4 *)((__global char *)dst + dst_index + 0 ))= tmp_data_0;
+ *((__global short4 *)((__global char *)dst + dst_index + 8 ))= tmp_data_1;
+ *((__global short4 *)((__global char *)dst + dst_index + 16))= tmp_data_2;
+ *((__global short4 *)((__global char *)dst + dst_index + 24))= tmp_data_3;
}
}
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
-#if defined (__ATI__)
-#pragma OPENCL EXTENSION cl_amd_fp64:enable
-#elif defined (__NVIDIA__)
+#if defined (DOUBLE_SUPPORT)
+#ifdef cl_khr_fp64
#pragma OPENCL EXTENSION cl_khr_fp64:enable
+#elif defined (cl_amd_fp64)
+#pragma OPENCL EXTENSION cl_amd_fp64:enable
+#endif
#endif
-
//////////////////////////////////////////////////////////////////////////////////////////////////////
////////////////////////////////////////////BITWISE_AND////////////////////////////////////////////////////
///////////////////////////////////////////////////////////////////////////////////////////////////////
/**************************************bitwise_and with scalar with mask**************************************/
-__kernel void arithm_s_bitwise_and_with_mask_C1_D0 (__global uchar *src1, int src1_step, int src1_offset,
- __global uchar *dst, int dst_step, int dst_offset,
- __global uchar *mask, int mask_step, int mask_offset,
- uchar4 src2, int rows, int cols, int dst_step1)
+__kernel void arithm_s_bitwise_and_with_mask_C1_D0 (
+ __global uchar *src1, int src1_step, int src1_offset,
+ __global uchar *dst, int dst_step, int dst_offset,
+ __global uchar *mask, int mask_step, int mask_offset,
+ uchar4 src2, int rows, int cols, int dst_step1)
{
int x = get_global_id(0);
if (x < cols && y < rows)
{
x = x << 2;
-
- #define dst_align (dst_offset & 3)
+
+#ifdef dst_align
+#undef dst_align
+#endif
+#define dst_align (dst_offset & 3)
int src1_index = mad24(y, src1_step, x + src1_offset - dst_align);
int mask_index = mad24(y, mask_step, x + mask_offset - dst_align);
}
-__kernel void arithm_s_bitwise_and_with_mask_C1_D1 (__global char *src1, int src1_step, int src1_offset,
- __global char *dst, int dst_step, int dst_offset,
- __global uchar *mask, int mask_step, int mask_offset,
- char4 src2, int rows, int cols, int dst_step1)
+__kernel void arithm_s_bitwise_and_with_mask_C1_D1 (
+ __global char *src1, int src1_step, int src1_offset,
+ __global char *dst, int dst_step, int dst_offset,
+ __global uchar *mask, int mask_step, int mask_offset,
+ char4 src2, int rows, int cols, int dst_step1)
{
int x = get_global_id(0);
if (x < cols && y < rows)
{
x = x << 2;
-
- #define dst_align (dst_offset & 3)
+
+#ifdef dst_align
+#undef dst_align
+#endif
+#define dst_align (dst_offset & 3)
int src1_index = mad24(y, src1_step, x + src1_offset - dst_align);
int mask_index = mad24(y, mask_step, x + mask_offset - dst_align);
}
}
-__kernel void arithm_s_bitwise_and_with_mask_C1_D2 (__global ushort *src1, int src1_step, int src1_offset,
- __global ushort *dst, int dst_step, int dst_offset,
- __global uchar *mask, int mask_step, int mask_offset,
- ushort4 src2, int rows, int cols, int dst_step1)
+__kernel void arithm_s_bitwise_and_with_mask_C1_D2 (
+ __global ushort *src1, int src1_step, int src1_offset,
+ __global ushort *dst, int dst_step, int dst_offset,
+ __global uchar *mask, int mask_step, int mask_offset,
+ ushort4 src2, int rows, int cols, int dst_step1)
{
int x = get_global_id(0);
if (x < cols && y < rows)
{
x = x << 1;
-
- #define dst_align ((dst_offset >> 1) & 1)
+
+#ifdef dst_align
+#undef dst_align
+#endif
+#define dst_align ((dst_offset >> 1) & 1)
int src1_index = mad24(y, src1_step, (x << 1) + src1_offset - (dst_align << 1));
int mask_index = mad24(y, mask_step, x + mask_offset - dst_align);
*((__global ushort2 *)((__global uchar *)dst + dst_index)) = data;
}
}
-__kernel void arithm_s_bitwise_and_with_mask_C1_D3 (__global short *src1, int src1_step, int src1_offset,
- __global short *dst, int dst_step, int dst_offset,
- __global uchar *mask, int mask_step, int mask_offset,
- short4 src2, int rows, int cols, int dst_step1)
+__kernel void arithm_s_bitwise_and_with_mask_C1_D3 (
+ __global short *src1, int src1_step, int src1_offset,
+ __global short *dst, int dst_step, int dst_offset,
+ __global uchar *mask, int mask_step, int mask_offset,
+ short4 src2, int rows, int cols, int dst_step1)
{
int x = get_global_id(0);
if (x < cols && y < rows)
{
x = x << 1;
-
- #define dst_align ((dst_offset >> 1) & 1)
+
+#ifdef dst_align
+#undef dst_align
+#endif
+#define dst_align ((dst_offset >> 1) & 1)
int src1_index = mad24(y, src1_step, (x << 1) + src1_offset - (dst_align << 1));
int mask_index = mad24(y, mask_step, x + mask_offset - dst_align);
*((__global short2 *)((__global uchar *)dst + dst_index)) = data;
}
}
-__kernel void arithm_s_bitwise_and_with_mask_C1_D4 (__global int *src1, int src1_step, int src1_offset,
- __global int *dst, int dst_step, int dst_offset,
- __global uchar *mask, int mask_step, int mask_offset,
- int4 src2, int rows, int cols, int dst_step1)
+__kernel void arithm_s_bitwise_and_with_mask_C1_D4 (
+ __global int *src1, int src1_step, int src1_offset,
+ __global int *dst, int dst_step, int dst_offset,
+ __global uchar *mask, int mask_step, int mask_offset,
+ int4 src2, int rows, int cols, int dst_step1)
{
int x = get_global_id(0);
}
}
-__kernel void arithm_s_bitwise_and_with_mask_C1_D5 (__global char *src1, int src1_step, int src1_offset,
- __global char *dst, int dst_step, int dst_offset,
- __global uchar *mask, int mask_step, int mask_offset,
- char16 src2, int rows, int cols, int dst_step1)
+__kernel void arithm_s_bitwise_and_with_mask_C1_D5 (
+ __global char *src1, int src1_step, int src1_offset,
+ __global char *dst, int dst_step, int dst_offset,
+ __global uchar *mask, int mask_step, int mask_offset,
+ char16 src2, int rows, int cols, int dst_step1)
{
int x = get_global_id(0);
}
#if defined (DOUBLE_SUPPORT)
-__kernel void arithm_s_bitwise_and_with_mask_C1_D6 (__global short *src1, int src1_step, int src1_offset,
- __global short *dst, int dst_step, int dst_offset,
- __global uchar *mask, int mask_step, int mask_offset,
- short16 src2, int rows, int cols, int dst_step1)
+__kernel void arithm_s_bitwise_and_with_mask_C1_D6 (
+ __global short *src1, int src1_step, int src1_offset,
+ __global short *dst, int dst_step, int dst_offset,
+ __global uchar *mask, int mask_step, int mask_offset,
+ short16 src2, int rows, int cols, int dst_step1)
{
int x = get_global_id(0);
}
}
#endif
-__kernel void arithm_s_bitwise_and_with_mask_C2_D0 (__global uchar *src1, int src1_step, int src1_offset,
- __global uchar *dst, int dst_step, int dst_offset,
- __global uchar *mask, int mask_step, int mask_offset,
- uchar4 src2, int rows, int cols, int dst_step1)
+__kernel void arithm_s_bitwise_and_with_mask_C2_D0 (
+ __global uchar *src1, int src1_step, int src1_offset,
+ __global uchar *dst, int dst_step, int dst_offset,
+ __global uchar *mask, int mask_step, int mask_offset,
+ uchar4 src2, int rows, int cols, int dst_step1)
{
int x = get_global_id(0);
if (x < cols && y < rows)
{
x = x << 1;
-
- #define dst_align ((dst_offset >> 1) & 1)
+
+#ifdef dst_align
+#undef dst_align
+#endif
+#define dst_align ((dst_offset >> 1) & 1)
int src1_index = mad24(y, src1_step, (x << 1) + src1_offset - (dst_align << 1));
int mask_index = mad24(y, mask_step, x + mask_offset - dst_align);
}
-__kernel void arithm_s_bitwise_and_with_mask_C2_D1 (__global char *src1, int src1_step, int src1_offset,
- __global char *dst, int dst_step, int dst_offset,
- __global uchar *mask, int mask_step, int mask_offset,
- char4 src2, int rows, int cols, int dst_step1)
+__kernel void arithm_s_bitwise_and_with_mask_C2_D1 (
+ __global char *src1, int src1_step, int src1_offset,
+ __global char *dst, int dst_step, int dst_offset,
+ __global uchar *mask, int mask_step, int mask_offset,
+ char4 src2, int rows, int cols, int dst_step1)
{
int x = get_global_id(0);
if (x < cols && y < rows)
{
x = x << 1;
-
- #define dst_align ((dst_offset >> 1) & 1)
+
+#ifdef dst_align
+#undef dst_align
+#endif
+#define dst_align ((dst_offset >> 1) & 1)
int src1_index = mad24(y, src1_step, (x << 1) + src1_offset - (dst_align << 1));
int mask_index = mad24(y, mask_step, x + mask_offset - dst_align);
}
}
-__kernel void arithm_s_bitwise_and_with_mask_C2_D2 (__global ushort *src1, int src1_step, int src1_offset,
- __global ushort *dst, int dst_step, int dst_offset,
- __global uchar *mask, int mask_step, int mask_offset,
- ushort4 src2, int rows, int cols, int dst_step1)
+__kernel void arithm_s_bitwise_and_with_mask_C2_D2 (
+ __global ushort *src1, int src1_step, int src1_offset,
+ __global ushort *dst, int dst_step, int dst_offset,
+ __global uchar *mask, int mask_step, int mask_offset,
+ ushort4 src2, int rows, int cols, int dst_step1)
{
int x = get_global_id(0);
*((__global ushort2 *)((__global char *)dst + dst_index)) = data;
}
}
-__kernel void arithm_s_bitwise_and_with_mask_C2_D3 (__global short *src1, int src1_step, int src1_offset,
- __global short *dst, int dst_step, int dst_offset,
- __global uchar *mask, int mask_step, int mask_offset,
- short4 src2, int rows, int cols, int dst_step1)
+__kernel void arithm_s_bitwise_and_with_mask_C2_D3 (
+ __global short *src1, int src1_step, int src1_offset,
+ __global short *dst, int dst_step, int dst_offset,
+ __global uchar *mask, int mask_step, int mask_offset,
+ short4 src2, int rows, int cols, int dst_step1)
{
int x = get_global_id(0);
*((__global short2 *)((__global char *)dst + dst_index)) = data;
}
}
-__kernel void arithm_s_bitwise_and_with_mask_C2_D4 (__global int *src1, int src1_step, int src1_offset,
- __global int *dst, int dst_step, int dst_offset,
- __global uchar *mask, int mask_step, int mask_offset,
- int4 src2, int rows, int cols, int dst_step1)
+__kernel void arithm_s_bitwise_and_with_mask_C2_D4 (
+ __global int *src1, int src1_step, int src1_offset,
+ __global int *dst, int dst_step, int dst_offset,
+ __global uchar *mask, int mask_step, int mask_offset,
+ int4 src2, int rows, int cols, int dst_step1)
{
int x = get_global_id(0);
*((__global int2 *)((__global char *)dst + dst_index)) = data;
}
}
-__kernel void arithm_s_bitwise_and_with_mask_C2_D5 (__global char *src1, int src1_step, int src1_offset,
- __global char *dst, int dst_step, int dst_offset,
- __global uchar *mask, int mask_step, int mask_offset,
- char16 src2, int rows, int cols, int dst_step1)
+__kernel void arithm_s_bitwise_and_with_mask_C2_D5 (
+ __global char *src1, int src1_step, int src1_offset,
+ __global char *dst, int dst_step, int dst_offset,
+ __global uchar *mask, int mask_step, int mask_offset,
+ char16 src2, int rows, int cols, int dst_step1)
{
int x = get_global_id(0);
}
}
#if defined (DOUBLE_SUPPORT)
-__kernel void arithm_s_bitwise_and_with_mask_C2_D6 (__global short *src1, int src1_step, int src1_offset,
- __global short *dst, int dst_step, int dst_offset,
- __global uchar *mask, int mask_step, int mask_offset,
- short16 src2, int rows, int cols, int dst_step1)
+__kernel void arithm_s_bitwise_and_with_mask_C2_D6 (
+ __global short *src1, int src1_step, int src1_offset,
+ __global short *dst, int dst_step, int dst_offset,
+ __global uchar *mask, int mask_step, int mask_offset,
+ short16 src2, int rows, int cols, int dst_step1)
{
int x = get_global_id(0);
}
}
#endif
-__kernel void arithm_s_bitwise_and_with_mask_C3_D0 (__global uchar *src1, int src1_step, int src1_offset,
- __global uchar *dst, int dst_step, int dst_offset,
- __global uchar *mask, int mask_step, int mask_offset,
- uchar4 src2, int rows, int cols, int dst_step1)
+__kernel void arithm_s_bitwise_and_with_mask_C3_D0 (
+ __global uchar *src1, int src1_step, int src1_offset,
+ __global uchar *dst, int dst_step, int dst_offset,
+ __global uchar *mask, int mask_step, int mask_offset,
+ uchar4 src2, int rows, int cols, int dst_step1)
{
int x = get_global_id(0);
if (x < cols && y < rows)
{
x = x << 2;
-
- #define dst_align (((dst_offset % dst_step) / 3 ) & 3)
+
+#ifdef dst_align
+#undef dst_align
+#endif
+#define dst_align (((dst_offset % dst_step) / 3 ) & 3)
int src1_index = mad24(y, src1_step, (x * 3) + src1_offset - (dst_align * 3));
int mask_index = mad24(y, mask_step, x + mask_offset - dst_align);
}
-__kernel void arithm_s_bitwise_and_with_mask_C3_D1 (__global char *src1, int src1_step, int src1_offset,
- __global char *dst, int dst_step, int dst_offset,
- __global uchar *mask, int mask_step, int mask_offset,
- char4 src2, int rows, int cols, int dst_step1)
+__kernel void arithm_s_bitwise_and_with_mask_C3_D1 (
+ __global char *src1, int src1_step, int src1_offset,
+ __global char *dst, int dst_step, int dst_offset,
+ __global uchar *mask, int mask_step, int mask_offset,
+ char4 src2, int rows, int cols, int dst_step1)
{
int x = get_global_id(0);
if (x < cols && y < rows)
{
x = x << 2;
-
- #define dst_align (((dst_offset % dst_step) / 3 ) & 3)
+
+#ifdef dst_align
+#undef dst_align
+#endif
+#define dst_align (((dst_offset % dst_step) / 3 ) & 3)
int src1_index = mad24(y, src1_step, (x * 3) + src1_offset - (dst_align * 3));
int mask_index = mad24(y, mask_step, x + mask_offset - dst_align);
}
}
-__kernel void arithm_s_bitwise_and_with_mask_C3_D2 (__global ushort *src1, int src1_step, int src1_offset,
- __global ushort *dst, int dst_step, int dst_offset,
- __global uchar *mask, int mask_step, int mask_offset,
- ushort4 src2, int rows, int cols, int dst_step1)
+__kernel void arithm_s_bitwise_and_with_mask_C3_D2 (
+ __global ushort *src1, int src1_step, int src1_offset,
+ __global ushort *dst, int dst_step, int dst_offset,
+ __global uchar *mask, int mask_step, int mask_offset,
+ ushort4 src2, int rows, int cols, int dst_step1)
{
int x = get_global_id(0);
if (x < cols && y < rows)
{
x = x << 1;
-
- #define dst_align (((dst_offset % dst_step) / 6 ) & 1)
+
+#ifdef dst_align
+#undef dst_align
+#endif
+#define dst_align (((dst_offset % dst_step) / 6 ) & 1)
int src1_index = mad24(y, src1_step, (x * 6) + src1_offset - (dst_align * 6));
int mask_index = mad24(y, mask_step, x + mask_offset - dst_align);
data_0.xy = ((mask_data.x) && (dst_index + 0 >= dst_start)) ? tmp_data_0.xy : data_0.xy;
data_1.x = ((mask_data.x) && (dst_index + 0 >= dst_start) && (dst_index + 0 < dst_end))
- ? tmp_data_1.x : data_1.x;
+ ? tmp_data_1.x : data_1.x;
data_1.y = ((mask_data.y) && (dst_index + 6 >= dst_start) && (dst_index + 6 < dst_end))
- ? tmp_data_1.y : data_1.y;
+ ? tmp_data_1.y : data_1.y;
data_2.xy = ((mask_data.y) && (dst_index + 6 >= dst_start) && (dst_index + 6 < dst_end))
- ? tmp_data_2.xy : data_2.xy;
+ ? tmp_data_2.xy : data_2.xy;
- *((__global ushort2 *)((__global char *)dst + dst_index + 0))= data_0;
- *((__global ushort2 *)((__global char *)dst + dst_index + 4))= data_1;
- *((__global ushort2 *)((__global char *)dst + dst_index + 8))= data_2;
+ *((__global ushort2 *)((__global char *)dst + dst_index + 0))= data_0;
+ *((__global ushort2 *)((__global char *)dst + dst_index + 4))= data_1;
+ *((__global ushort2 *)((__global char *)dst + dst_index + 8))= data_2;
}
}
-__kernel void arithm_s_bitwise_and_with_mask_C3_D3 (__global short *src1, int src1_step, int src1_offset,
- __global short *dst, int dst_step, int dst_offset,
- __global uchar *mask, int mask_step, int mask_offset,
- short4 src2, int rows, int cols, int dst_step1)
+__kernel void arithm_s_bitwise_and_with_mask_C3_D3 (
+ __global short *src1, int src1_step, int src1_offset,
+ __global short *dst, int dst_step, int dst_offset,
+ __global uchar *mask, int mask_step, int mask_offset,
+ short4 src2, int rows, int cols, int dst_step1)
{
int x = get_global_id(0);
if (x < cols && y < rows)
{
x = x << 1;
-
- #define dst_align (((dst_offset % dst_step) / 6 ) & 1)
+
+#ifdef dst_align
+#undef dst_align
+#endif
+#define dst_align (((dst_offset % dst_step) / 6 ) & 1)
int src1_index = mad24(y, src1_step, (x * 6) + src1_offset - (dst_align * 6));
int mask_index = mad24(y, mask_step, x + mask_offset - dst_align);
data_0.xy = ((mask_data.x) && (dst_index + 0 >= dst_start)) ? tmp_data_0.xy : data_0.xy;
data_1.x = ((mask_data.x) && (dst_index + 0 >= dst_start) && (dst_index + 0 < dst_end))
- ? tmp_data_1.x : data_1.x;
+ ? tmp_data_1.x : data_1.x;
data_1.y = ((mask_data.y) && (dst_index + 6 >= dst_start) && (dst_index + 6 < dst_end))
- ? tmp_data_1.y : data_1.y;
+ ? tmp_data_1.y : data_1.y;
data_2.xy = ((mask_data.y) && (dst_index + 6 >= dst_start) && (dst_index + 6 < dst_end))
- ? tmp_data_2.xy : data_2.xy;
+ ? tmp_data_2.xy : data_2.xy;
- *((__global short2 *)((__global char *)dst + dst_index + 0))= data_0;
- *((__global short2 *)((__global char *)dst + dst_index + 4))= data_1;
- *((__global short2 *)((__global char *)dst + dst_index + 8))= data_2;
+ *((__global short2 *)((__global char *)dst + dst_index + 0))= data_0;
+ *((__global short2 *)((__global char *)dst + dst_index + 4))= data_1;
+ *((__global short2 *)((__global char *)dst + dst_index + 8))= data_2;
}
}
-__kernel void arithm_s_bitwise_and_with_mask_C3_D4 (__global int *src1, int src1_step, int src1_offset,
- __global int *dst, int dst_step, int dst_offset,
- __global uchar *mask, int mask_step, int mask_offset,
- int4 src2, int rows, int cols, int dst_step1)
+__kernel void arithm_s_bitwise_and_with_mask_C3_D4 (
+ __global int *src1, int src1_step, int src1_offset,
+ __global int *dst, int dst_step, int dst_offset,
+ __global uchar *mask, int mask_step, int mask_offset,
+ int4 src2, int rows, int cols, int dst_step1)
{
int x = get_global_id(0);
data_1 = mask_data ? tmp_data_1 : data_1;
data_2 = mask_data ? tmp_data_2 : data_2;
- *((__global int *)((__global char *)dst + dst_index + 0))= data_0;
- *((__global int *)((__global char *)dst + dst_index + 4))= data_1;
- *((__global int *)((__global char *)dst + dst_index + 8))= data_2;
+ *((__global int *)((__global char *)dst + dst_index + 0))= data_0;
+ *((__global int *)((__global char *)dst + dst_index + 4))= data_1;
+ *((__global int *)((__global char *)dst + dst_index + 8))= data_2;
}
}
-__kernel void arithm_s_bitwise_and_with_mask_C3_D5 (__global char *src1, int src1_step, int src1_offset,
- __global char *dst, int dst_step, int dst_offset,
- __global uchar *mask, int mask_step, int mask_offset,
- char16 src2, int rows, int cols, int dst_step1)
+__kernel void arithm_s_bitwise_and_with_mask_C3_D5 (
+ __global char *src1, int src1_step, int src1_offset,
+ __global char *dst, int dst_step, int dst_offset,
+ __global uchar *mask, int mask_step, int mask_offset,
+ char16 src2, int rows, int cols, int dst_step1)
{
int x = get_global_id(0);
data_1 = mask_data ? tmp_data_1 : data_1;
data_2 = mask_data ? tmp_data_2 : data_2;
- *((__global char4 *)((__global char *)dst + dst_index + 0))= data_0;
- *((__global char4 *)((__global char *)dst + dst_index + 4))= data_1;
- *((__global char4 *)((__global char *)dst + dst_index + 8))= data_2;
+ *((__global char4 *)((__global char *)dst + dst_index + 0))= data_0;
+ *((__global char4 *)((__global char *)dst + dst_index + 4))= data_1;
+ *((__global char4 *)((__global char *)dst + dst_index + 8))= data_2;
}
}
#if defined (DOUBLE_SUPPORT)
-__kernel void arithm_s_bitwise_and_with_mask_C3_D6 (__global short *src1, int src1_step, int src1_offset,
- __global short *dst, int dst_step, int dst_offset,
- __global uchar *mask, int mask_step, int mask_offset,
- short16 src2, int rows, int cols, int dst_step1)
+__kernel void arithm_s_bitwise_and_with_mask_C3_D6 (
+ __global short *src1, int src1_step, int src1_offset,
+ __global short *dst, int dst_step, int dst_offset,
+ __global uchar *mask, int mask_step, int mask_offset,
+ short16 src2, int rows, int cols, int dst_step1)
{
int x = get_global_id(0);
data_1 = mask_data ? tmp_data_1 : data_1;
data_2 = mask_data ? tmp_data_2 : data_2;
- *((__global short4 *)((__global char *)dst + dst_index + 0 ))= data_0;
- *((__global short4 *)((__global char *)dst + dst_index + 8 ))= data_1;
- *((__global short4 *)((__global char *)dst + dst_index + 16))= data_2;
+ *((__global short4 *)((__global char *)dst + dst_index + 0 ))= data_0;
+ *((__global short4 *)((__global char *)dst + dst_index + 8 ))= data_1;
+ *((__global short4 *)((__global char *)dst + dst_index + 16))= data_2;
}
}
#endif
-__kernel void arithm_s_bitwise_and_with_mask_C4_D0 (__global uchar *src1, int src1_step, int src1_offset,
- __global uchar *dst, int dst_step, int dst_offset,
- __global uchar *mask, int mask_step, int mask_offset,
- uchar4 src2, int rows, int cols, int dst_step1)
+__kernel void arithm_s_bitwise_and_with_mask_C4_D0 (
+ __global uchar *src1, int src1_step, int src1_offset,
+ __global uchar *dst, int dst_step, int dst_offset,
+ __global uchar *mask, int mask_step, int mask_offset,
+ uchar4 src2, int rows, int cols, int dst_step1)
{
int x = get_global_id(0);
}
-__kernel void arithm_s_bitwise_and_with_mask_C4_D1 (__global char *src1, int src1_step, int src1_offset,
- __global char *dst, int dst_step, int dst_offset,
- __global uchar *mask, int mask_step, int mask_offset,
- char4 src2, int rows, int cols, int dst_step1)
+__kernel void arithm_s_bitwise_and_with_mask_C4_D1 (
+ __global char *src1, int src1_step, int src1_offset,
+ __global char *dst, int dst_step, int dst_offset,
+ __global uchar *mask, int mask_step, int mask_offset,
+ char4 src2, int rows, int cols, int dst_step1)
{
int x = get_global_id(0);
}
}
-__kernel void arithm_s_bitwise_and_with_mask_C4_D2 (__global ushort *src1, int src1_step, int src1_offset,
- __global ushort *dst, int dst_step, int dst_offset,
- __global uchar *mask, int mask_step, int mask_offset,
- ushort4 src2, int rows, int cols, int dst_step1)
+__kernel void arithm_s_bitwise_and_with_mask_C4_D2 (
+ __global ushort *src1, int src1_step, int src1_offset,
+ __global ushort *dst, int dst_step, int dst_offset,
+ __global uchar *mask, int mask_step, int mask_offset,
+ ushort4 src2, int rows, int cols, int dst_step1)
{
int x = get_global_id(0);
*((__global ushort4 *)((__global char *)dst + dst_index)) = data;
}
}
-__kernel void arithm_s_bitwise_and_with_mask_C4_D3 (__global short *src1, int src1_step, int src1_offset,
- __global short *dst, int dst_step, int dst_offset,
- __global uchar *mask, int mask_step, int mask_offset,
- short4 src2, int rows, int cols, int dst_step1)
+__kernel void arithm_s_bitwise_and_with_mask_C4_D3 (
+ __global short *src1, int src1_step, int src1_offset,
+ __global short *dst, int dst_step, int dst_offset,
+ __global uchar *mask, int mask_step, int mask_offset,
+ short4 src2, int rows, int cols, int dst_step1)
{
int x = get_global_id(0);
*((__global short4 *)((__global char *)dst + dst_index)) = data;
}
}
-__kernel void arithm_s_bitwise_and_with_mask_C4_D4 (__global int *src1, int src1_step, int src1_offset,
- __global int *dst, int dst_step, int dst_offset,
- __global uchar *mask, int mask_step, int mask_offset,
- int4 src2, int rows, int cols, int dst_step1)
+__kernel void arithm_s_bitwise_and_with_mask_C4_D4 (
+ __global int *src1, int src1_step, int src1_offset,
+ __global int *dst, int dst_step, int dst_offset,
+ __global uchar *mask, int mask_step, int mask_offset,
+ int4 src2, int rows, int cols, int dst_step1)
{
int x = get_global_id(0);
*((__global int4 *)((__global char *)dst + dst_index)) = data;
}
}
-__kernel void arithm_s_bitwise_and_with_mask_C4_D5 (__global char *src1, int src1_step, int src1_offset,
- __global char *dst, int dst_step, int dst_offset,
- __global uchar *mask, int mask_step, int mask_offset,
- char16 src2, int rows, int cols, int dst_step1)
+__kernel void arithm_s_bitwise_and_with_mask_C4_D5 (
+ __global char *src1, int src1_step, int src1_offset,
+ __global char *dst, int dst_step, int dst_offset,
+ __global uchar *mask, int mask_step, int mask_offset,
+ char16 src2, int rows, int cols, int dst_step1)
{
int x = get_global_id(0);
}
}
#if defined (DOUBLE_SUPPORT)
-__kernel void arithm_s_bitwise_and_with_mask_C4_D6 (__global short *src1, int src1_step, int src1_offset,
- __global short *dst, int dst_step, int dst_offset,
- __global uchar *mask, int mask_step, int mask_offset,
- short16 src2, int rows, int cols, int dst_step1)
+__kernel void arithm_s_bitwise_and_with_mask_C4_D6 (
+ __global short *src1, int src1_step, int src1_offset,
+ __global short *dst, int dst_step, int dst_offset,
+ __global uchar *mask, int mask_step, int mask_offset,
+ short16 src2, int rows, int cols, int dst_step1)
{
int x = get_global_id(0);
}
}
#endif
+
//
//M*/
#if defined (DOUBLE_SUPPORT)
+#ifdef cl_khr_fp64
#pragma OPENCL EXTENSION cl_khr_fp64:enable
+#elif defined (cl_amd_fp64)
+#pragma OPENCL EXTENSION cl_amd_fp64:enable
+#endif
#endif
-
//////////////////////////////////////////////////////////////////////////////////////////////////////
////////////////////////////////////////////BITWISE_NOT////////////////////////////////////////////////////
///////////////////////////////////////////////////////////////////////////////////////////////////////
if (x < cols && y < rows)
{
x = x << 2;
-
- #define dst_align (dst_offset & 3)
+
+#ifdef dst_align
+#undef dst_align
+#endif
+#define dst_align (dst_offset & 3)
int src1_index = mad24(y, src1_step, x + src1_offset - dst_align);
int dst_start = mad24(y, dst_step, dst_offset);
int dst_end = mad24(y, dst_step, dst_offset + dst_step1);
int dst_index = mad24(y, dst_step, dst_offset + x & (int)0xfffffffc);
- int src1_index_fix = src1_index < 0 ? 0 : src1_index;
+ int src1_index_fix = src1_index < 0 ? 0 : src1_index;
uchar4 src1_data = vload4(0, src1 + src1_index_fix);
uchar4 dst_data = *((__global uchar4 *)(dst + dst_index));
uchar4 tmp_data = ~ src1_data;
- /* if(src1_index < 0)
- {
- uchar4 tmp;
- tmp.xyzw = (src1_index == -2) ? src1_data.zwxy:src1_data.yzwx;
- src1_data.xyzw = (src1_index == -1) ? src1_data.wxyz:tmp.xyzw;
- }
- */
+ /* if(src1_index < 0)
+ {
+ uchar4 tmp;
+ tmp.xyzw = (src1_index == -2) ? src1_data.zwxy:src1_data.yzwx;
+ src1_data.xyzw = (src1_index == -1) ? src1_data.wxyz:tmp.xyzw;
+ }
+ */
dst_data.x = ((dst_index + 0 >= dst_start) && (dst_index + 0 < dst_end)) ? tmp_data.x : dst_data.x;
dst_data.y = ((dst_index + 1 >= dst_start) && (dst_index + 1 < dst_end)) ? tmp_data.y : dst_data.y;
dst_data.z = ((dst_index + 2 >= dst_start) && (dst_index + 2 < dst_end)) ? tmp_data.z : dst_data.z;
__kernel void arithm_bitwise_not_D1 (__global char *src1, int src1_step, int src1_offset,
- __global char *dst, int dst_step, int dst_offset,
- int rows, int cols, int dst_step1)
+ __global char *dst, int dst_step, int dst_offset,
+ int rows, int cols, int dst_step1)
{
int x = get_global_id(0);
int y = get_global_id(1);
if (x < cols && y < rows)
{
x = x << 2;
-
- #define dst_align (dst_offset & 3)
+
+#ifdef dst_align
+#undef dst_align
+#endif
+#define dst_align (dst_offset & 3)
int src1_index = mad24(y, src1_step, x + src1_offset - dst_align);
int dst_start = mad24(y, dst_step, dst_offset);
__kernel void arithm_bitwise_not_D2 (__global ushort *src1, int src1_step, int src1_offset,
- __global ushort *dst, int dst_step, int dst_offset,
- int rows, int cols, int dst_step1)
+ __global ushort *dst, int dst_step, int dst_offset,
+ int rows, int cols, int dst_step1)
{
int x = get_global_id(0);
if (x < cols && y < rows)
{
x = x << 2;
-
- #define dst_align ((dst_offset >> 1) & 3)
+
+#ifdef dst_align
+#undef dst_align
+#endif
+#define dst_align ((dst_offset >> 1) & 3)
int src1_index = mad24(y, src1_step, (x << 1) + src1_offset - (dst_align << 1));
int dst_start = mad24(y, dst_step, dst_offset);
__kernel void arithm_bitwise_not_D3 (__global short *src1, int src1_step, int src1_offset,
- __global short *dst, int dst_step, int dst_offset,
- int rows, int cols, int dst_step1)
+ __global short *dst, int dst_step, int dst_offset,
+ int rows, int cols, int dst_step1)
{
int x = get_global_id(0);
if (x < cols && y < rows)
{
x = x << 2;
-
- #define dst_align ((dst_offset >> 1) & 3)
+
+#ifdef dst_align
+#undef dst_align
+#endif
+#define dst_align ((dst_offset >> 1) & 3)
int src1_index = mad24(y, src1_step, (x << 1) + src1_offset - (dst_align << 1));
int dst_start = mad24(y, dst_step, dst_offset);
__kernel void arithm_bitwise_not_D4 (__global int *src1, int src1_step, int src1_offset,
- __global int *dst, int dst_step, int dst_offset,
- int rows, int cols, int dst_step1)
+ __global int *dst, int dst_step, int dst_offset,
+ int rows, int cols, int dst_step1)
{
int x = get_global_id(0);
int y = get_global_id(1);
//
//M*/
#if defined (DOUBLE_SUPPORT)
+#ifdef cl_khr_fp64
#pragma OPENCL EXTENSION cl_khr_fp64:enable
+#elif defined (cl_amd_fp64)
+#pragma OPENCL EXTENSION cl_amd_fp64:enable
+#endif
#endif
//////////////////////////////////////////////////////////////////////////////////////////////////////
///////////////////////////////////////////////////////////////////////////////////////////////////////
/**************************************bitwise_or without mask**************************************/
__kernel void arithm_bitwise_or_D0 (__global uchar *src1, int src1_step, int src1_offset,
- __global uchar *src2, int src2_step, int src2_offset,
- __global uchar *dst, int dst_step, int dst_offset,
- int rows, int cols, int dst_step1)
+ __global uchar *src2, int src2_step, int src2_offset,
+ __global uchar *dst, int dst_step, int dst_offset,
+ int rows, int cols, int dst_step1)
{
int x = get_global_id(0);
int y = get_global_id(1);
if (x < cols && y < rows)
{
x = x << 2;
-
- #define dst_align (dst_offset & 3)
+
+#ifdef dst_align
+#undef dst_align
+#endif
+#define dst_align (dst_offset & 3)
int src1_index = mad24(y, src1_step, x + src1_offset - dst_align);
int src2_index = mad24(y, src2_step, x + src2_offset - dst_align);
int dst_start = mad24(y, dst_step, dst_offset);
int dst_end = mad24(y, dst_step, dst_offset + dst_step1);
int dst_index = mad24(y, dst_step, dst_offset + x & (int)0xfffffffc);
- int src1_index_fix = src1_index < 0 ? 0 : src1_index;
- int src2_index_fix = src2_index < 0 ? 0 : src2_index;
+ int src1_index_fix = src1_index < 0 ? 0 : src1_index;
+ int src2_index_fix = src2_index < 0 ? 0 : src2_index;
uchar4 src1_data = vload4(0, src1 + src1_index_fix);
uchar4 src2_data = vload4(0, src2 + src2_index_fix);
- if(src1_index < 0)
- {
- uchar4 tmp;
- tmp.xyzw = (src1_index == -2) ? src1_data.zwxy:src1_data.yzwx;
- src1_data.xyzw = (src1_index == -1) ? src1_data.wxyz:tmp.xyzw;
- }
- if(src2_index < 0)
- {
- uchar4 tmp;
- tmp.xyzw = (src2_index == -2) ? src2_data.zwxy:src2_data.yzwx;
- src2_data.xyzw = (src2_index == -1) ? src2_data.wxyz:tmp.xyzw;
- }
+ if(src1_index < 0)
+ {
+ uchar4 tmp;
+ tmp.xyzw = (src1_index == -2) ? src1_data.zwxy:src1_data.yzwx;
+ src1_data.xyzw = (src1_index == -1) ? src1_data.wxyz:tmp.xyzw;
+ }
+ if(src2_index < 0)
+ {
+ uchar4 tmp;
+ tmp.xyzw = (src2_index == -2) ? src2_data.zwxy:src2_data.yzwx;
+ src2_data.xyzw = (src2_index == -1) ? src2_data.wxyz:tmp.xyzw;
+ }
uchar4 dst_data = *((__global uchar4 *)(dst + dst_index));
uchar4 tmp_data = src1_data | src2_data;
__kernel void arithm_bitwise_or_D1 (__global char *src1, int src1_step, int src1_offset,
- __global char *src2, int src2_step, int src2_offset,
- __global char *dst, int dst_step, int dst_offset,
- int rows, int cols, int dst_step1)
+ __global char *src2, int src2_step, int src2_offset,
+ __global char *dst, int dst_step, int dst_offset,
+ int rows, int cols, int dst_step1)
{
int x = get_global_id(0);
int y = get_global_id(1);
if (x < cols && y < rows)
{
x = x << 2;
-
- #define dst_align (dst_offset & 3)
+
+#ifdef dst_align
+#undef dst_align
+#endif
+#define dst_align (dst_offset & 3)
int src1_index = mad24(y, src1_step, x + src1_offset - dst_align);
int src2_index = mad24(y, src2_step, x + src2_offset - dst_align);
__kernel void arithm_bitwise_or_D2 (__global ushort *src1, int src1_step, int src1_offset,
- __global ushort *src2, int src2_step, int src2_offset,
- __global ushort *dst, int dst_step, int dst_offset,
- int rows, int cols, int dst_step1)
+ __global ushort *src2, int src2_step, int src2_offset,
+ __global ushort *dst, int dst_step, int dst_offset,
+ int rows, int cols, int dst_step1)
{
int x = get_global_id(0);
if (x < cols && y < rows)
{
x = x << 2;
-
- #define dst_align ((dst_offset >> 1) & 3)
+
+#ifdef dst_align
+#undef dst_align
+#endif
+#define dst_align ((dst_offset >> 1) & 3)
int src1_index = mad24(y, src1_step, (x << 1) + src1_offset - (dst_align << 1));
int src2_index = mad24(y, src2_step, (x << 1) + src2_offset - (dst_align << 1));
__kernel void arithm_bitwise_or_D3 (__global short *src1, int src1_step, int src1_offset,
- __global short *src2, int src2_step, int src2_offset,
- __global short *dst, int dst_step, int dst_offset,
- int rows, int cols, int dst_step1)
+ __global short *src2, int src2_step, int src2_offset,
+ __global short *dst, int dst_step, int dst_offset,
+ int rows, int cols, int dst_step1)
{
int x = get_global_id(0);
if (x < cols && y < rows)
{
x = x << 2;
-
- #define dst_align ((dst_offset >> 1) & 3)
+
+#ifdef dst_align
+#undef dst_align
+#endif
+#define dst_align ((dst_offset >> 1) & 3)
int src1_index = mad24(y, src1_step, (x << 1) + src1_offset - (dst_align << 1));
int src2_index = mad24(y, src2_step, (x << 1) + src2_offset - (dst_align << 1));
__kernel void arithm_bitwise_or_D4 (__global int *src1, int src1_step, int src1_offset,
- __global int *src2, int src2_step, int src2_offset,
- __global int *dst, int dst_step, int dst_offset,
- int rows, int cols, int dst_step1)
+ __global int *src2, int src2_step, int src2_offset,
+ __global int *dst, int dst_step, int dst_offset,
+ int rows, int cols, int dst_step1)
{
int x = get_global_id(0);
int y = get_global_id(1);
}
__kernel void arithm_bitwise_or_D5 (__global char *src1, int src1_step, int src1_offset,
- __global char *src2, int src2_step, int src2_offset,
- __global char *dst, int dst_step, int dst_offset,
- int rows, int cols, int dst_step1)
+ __global char *src2, int src2_step, int src2_offset,
+ __global char *dst, int dst_step, int dst_offset,
+ int rows, int cols, int dst_step1)
{
int x = get_global_id(0);
int y = get_global_id(1);
#if defined (DOUBLE_SUPPORT)
__kernel void arithm_bitwise_or_D6 (__global char *src1, int src1_step, int src1_offset,
- __global char *src2, int src2_step, int src2_offset,
- __global char *dst, int dst_step, int dst_offset,
- int rows, int cols, int dst_step1)
+ __global char *src2, int src2_step, int src2_offset,
+ __global char *dst, int dst_step, int dst_offset,
+ int rows, int cols, int dst_step1)
{
int x = get_global_id(0);
int y = get_global_id(1);
//
//M*/
#if defined (DOUBLE_SUPPORT)
+#ifdef cl_khr_fp64
#pragma OPENCL EXTENSION cl_khr_fp64:enable
+#elif defined (cl_amd_fp64)
+#pragma OPENCL EXTENSION cl_amd_fp64:enable
+#endif
#endif
-
//////////////////////////////////////////////////////////////////////////////////////////////////////
////////////////////////////////////////////BITWISE_OR////////////////////////////////////////////////////
///////////////////////////////////////////////////////////////////////////////////////////////////////
/**************************************bitwise_or with mask**************************************/
-__kernel void arithm_bitwise_or_with_mask_C1_D0 (__global uchar *src1, int src1_step, int src1_offset,
- __global uchar *src2, int src2_step, int src2_offset,
- __global uchar *mask, int mask_step, int mask_offset,
- __global uchar *dst, int dst_step, int dst_offset,
- int rows, int cols, int dst_step1)
+__kernel void arithm_bitwise_or_with_mask_C1_D0 (
+ __global uchar *src1, int src1_step, int src1_offset,
+ __global uchar *src2, int src2_step, int src2_offset,
+ __global uchar *mask, int mask_step, int mask_offset,
+ __global uchar *dst, int dst_step, int dst_offset,
+ int rows, int cols, int dst_step1)
{
int x = get_global_id(0);
if (x < cols && y < rows)
{
x = x << 2;
-
- #define dst_align (dst_offset & 3)
+
+#ifdef dst_align
+#undef dst_align
+#endif
+#define dst_align (dst_offset & 3)
int src1_index = mad24(y, src1_step, x + src1_offset - dst_align);
int src2_index = mad24(y, src2_step, x + src2_offset - dst_align);
int mask_index = mad24(y, mask_step, x + mask_offset - dst_align);
-__kernel void arithm_bitwise_or_with_mask_C1_D1 (__global char *src1, int src1_step, int src1_offset,
- __global char *src2, int src2_step, int src2_offset,
- __global uchar *mask, int mask_step, int mask_offset,
- __global char *dst, int dst_step, int dst_offset,
- int rows, int cols, int dst_step1)
+__kernel void arithm_bitwise_or_with_mask_C1_D1 (
+ __global char *src1, int src1_step, int src1_offset,
+ __global char *src2, int src2_step, int src2_offset,
+ __global uchar *mask, int mask_step, int mask_offset,
+ __global char *dst, int dst_step, int dst_offset,
+ int rows, int cols, int dst_step1)
{
int x = get_global_id(0);
if (x < cols && y < rows)
{
x = x << 2;
-
- #define dst_align (dst_offset & 3)
+
+#ifdef dst_align
+#undef dst_align
+#endif
+#define dst_align (dst_offset & 3)
int src1_index = mad24(y, src1_step, x + src1_offset - dst_align);
int src2_index = mad24(y, src2_step, x + src2_offset - dst_align);
int mask_index = mad24(y, mask_step, x + mask_offset - dst_align);
-__kernel void arithm_bitwise_or_with_mask_C1_D2 (__global ushort *src1, int src1_step, int src1_offset,
- __global ushort *src2, int src2_step, int src2_offset,
- __global uchar *mask, int mask_step, int mask_offset,
- __global ushort *dst, int dst_step, int dst_offset,
- int rows, int cols, int dst_step1)
+__kernel void arithm_bitwise_or_with_mask_C1_D2 (
+ __global ushort *src1, int src1_step, int src1_offset,
+ __global ushort *src2, int src2_step, int src2_offset,
+ __global uchar *mask, int mask_step, int mask_offset,
+ __global ushort *dst, int dst_step, int dst_offset,
+ int rows, int cols, int dst_step1)
{
int x = get_global_id(0);
if (x < cols && y < rows)
{
x = x << 1;
-
- #define dst_align ((dst_offset >> 1) & 1)
+
+#ifdef dst_align
+#undef dst_align
+#endif
+#define dst_align ((dst_offset >> 1) & 1)
int src1_index = mad24(y, src1_step, (x << 1) + src1_offset - (dst_align << 1));
int src2_index = mad24(y, src2_step, (x << 1) + src2_offset - (dst_align << 1));
int mask_index = mad24(y, mask_step, x + mask_offset - dst_align);
-__kernel void arithm_bitwise_or_with_mask_C1_D3 (__global short *src1, int src1_step, int src1_offset,
- __global short *src2, int src2_step, int src2_offset,
- __global uchar *mask, int mask_step, int mask_offset,
- __global short *dst, int dst_step, int dst_offset,
- int rows, int cols, int dst_step1)
+__kernel void arithm_bitwise_or_with_mask_C1_D3 (
+ __global short *src1, int src1_step, int src1_offset,
+ __global short *src2, int src2_step, int src2_offset,
+ __global uchar *mask, int mask_step, int mask_offset,
+ __global short *dst, int dst_step, int dst_offset,
+ int rows, int cols, int dst_step1)
{
int x = get_global_id(0);
if (x < cols && y < rows)
{
x = x << 1;
-
- #define dst_align ((dst_offset >> 1) & 1)
+
+#ifdef dst_align
+#undef dst_align
+#endif
+#define dst_align ((dst_offset >> 1) & 1)
int src1_index = mad24(y, src1_step, (x << 1) + src1_offset - (dst_align << 1));
int src2_index = mad24(y, src2_step, (x << 1) + src2_offset - (dst_align << 1));
int mask_index = mad24(y, mask_step, x + mask_offset - dst_align);
short2 src2_data = vload2(0, (__global short *)((__global char *)src2 + src2_index));
uchar2 mask_data = vload2(0, mask + mask_index);
- short2 data = *((__global short2 *)((__global uchar *)dst + dst_index));
- short2 tmp_data = src1_data | src2_data;
+ short2 data = *((__global short2 *)((__global uchar *)dst + dst_index));
+ short2 tmp_data = src1_data | src2_data;
data.x = convert_short((mask_data.x) && (dst_index + 0 >= dst_start) && (dst_index + 0 < dst_end)) ? tmp_data.x : data.x;
data.y = convert_short((mask_data.y) && (dst_index + 2 >= dst_start) && (dst_index + 2 < dst_end)) ? tmp_data.y : data.y;
-__kernel void arithm_bitwise_or_with_mask_C1_D4 (__global int *src1, int src1_step, int src1_offset,
- __global int *src2, int src2_step, int src2_offset,
- __global uchar *mask, int mask_step, int mask_offset,
- __global int *dst, int dst_step, int dst_offset,
- int rows, int cols, int dst_step1)
+__kernel void arithm_bitwise_or_with_mask_C1_D4 (
+ __global int *src1, int src1_step, int src1_offset,
+ __global int *src2, int src2_step, int src2_offset,
+ __global uchar *mask, int mask_step, int mask_offset,
+ __global int *dst, int dst_step, int dst_offset,
+ int rows, int cols, int dst_step1)
{
int x = get_global_id(0);
-__kernel void arithm_bitwise_or_with_mask_C1_D5 (__global char *src1, int src1_step, int src1_offset,
- __global char *src2, int src2_step, int src2_offset,
- __global uchar *mask, int mask_step, int mask_offset,
- __global char *dst, int dst_step, int dst_offset,
- int rows, int cols, int dst_step1)
+__kernel void arithm_bitwise_or_with_mask_C1_D5 (
+ __global char *src1, int src1_step, int src1_offset,
+ __global char *src2, int src2_step, int src2_offset,
+ __global uchar *mask, int mask_step, int mask_offset,
+ __global char *dst, int dst_step, int dst_offset,
+ int rows, int cols, int dst_step1)
{
int x = get_global_id(0);
}
-
#if defined (DOUBLE_SUPPORT)
-__kernel void arithm_bitwise_or_with_mask_C1_D6 (__global char *src1, int src1_step, int src1_offset,
- __global char *src2, int src2_step, int src2_offset,
- __global uchar *mask, int mask_step, int mask_offset,
- __global char *dst, int dst_step, int dst_offset,
- int rows, int cols, int dst_step1)
+__kernel void arithm_bitwise_or_with_mask_C1_D6 (
+ __global char *src1, int src1_step, int src1_offset,
+ __global char *src2, int src2_step, int src2_offset,
+ __global uchar *mask, int mask_step, int mask_offset,
+ __global char *dst, int dst_step, int dst_offset,
+ int rows, int cols, int dst_step1)
{
int x = get_global_id(0);
#endif
-
-__kernel void arithm_bitwise_or_with_mask_C2_D0 (__global uchar *src1, int src1_step, int src1_offset,
- __global uchar *src2, int src2_step, int src2_offset,
- __global uchar *mask, int mask_step, int mask_offset,
- __global uchar *dst, int dst_step, int dst_offset,
- int rows, int cols, int dst_step1)
+__kernel void arithm_bitwise_or_with_mask_C2_D0 (
+ __global uchar *src1, int src1_step, int src1_offset,
+ __global uchar *src2, int src2_step, int src2_offset,
+ __global uchar *mask, int mask_step, int mask_offset,
+ __global uchar *dst, int dst_step, int dst_offset,
+ int rows, int cols, int dst_step1)
{
int x = get_global_id(0);
if (x < cols && y < rows)
{
x = x << 1;
-
- #define dst_align ((dst_offset >> 1) & 1)
+
+#ifdef dst_align
+#undef dst_align
+#endif
+#define dst_align ((dst_offset >> 1) & 1)
int src1_index = mad24(y, src1_step, (x << 1) + src1_offset - (dst_align << 1));
int src2_index = mad24(y, src2_step, (x << 1) + src2_offset - (dst_align << 1));
int mask_index = mad24(y, mask_step, x + mask_offset - dst_align);
}
-__kernel void arithm_bitwise_or_with_mask_C2_D1 (__global char *src1, int src1_step, int src1_offset,
- __global char *src2, int src2_step, int src2_offset,
- __global uchar *mask, int mask_step, int mask_offset,
- __global char *dst, int dst_step, int dst_offset,
- int rows, int cols, int dst_step1)
+__kernel void arithm_bitwise_or_with_mask_C2_D1 (
+ __global char *src1, int src1_step, int src1_offset,
+ __global char *src2, int src2_step, int src2_offset,
+ __global uchar *mask, int mask_step, int mask_offset,
+ __global char *dst, int dst_step, int dst_offset,
+ int rows, int cols, int dst_step1)
{
int x = get_global_id(0);
if (x < cols && y < rows)
{
x = x << 1;
-
- #define dst_align ((dst_offset >> 1) & 1)
+
+#ifdef dst_align
+#undef dst_align
+#endif
+#define dst_align ((dst_offset >> 1) & 1)
int src1_index = mad24(y, src1_step, (x << 1) + src1_offset - (dst_align << 1));
int src2_index = mad24(y, src2_step, (x << 1) + src2_offset - (dst_align << 1));
int mask_index = mad24(y, mask_step, x + mask_offset - dst_align);
}
}
-__kernel void arithm_bitwise_or_with_mask_C2_D2 (__global ushort *src1, int src1_step, int src1_offset,
- __global ushort *src2, int src2_step, int src2_offset,
- __global uchar *mask, int mask_step, int mask_offset,
- __global ushort *dst, int dst_step, int dst_offset,
- int rows, int cols, int dst_step1)
+__kernel void arithm_bitwise_or_with_mask_C2_D2 (
+ __global ushort *src1, int src1_step, int src1_offset,
+ __global ushort *src2, int src2_step, int src2_offset,
+ __global uchar *mask, int mask_step, int mask_offset,
+ __global ushort *dst, int dst_step, int dst_offset,
+ int rows, int cols, int dst_step1)
{
int x = get_global_id(0);
*((__global ushort2 *)((__global char *)dst + dst_index)) = data;
}
}
-__kernel void arithm_bitwise_or_with_mask_C2_D3 (__global short *src1, int src1_step, int src1_offset,
- __global short *src2, int src2_step, int src2_offset,
- __global uchar *mask, int mask_step, int mask_offset,
- __global short *dst, int dst_step, int dst_offset,
- int rows, int cols, int dst_step1)
+__kernel void arithm_bitwise_or_with_mask_C2_D3 (
+ __global short *src1, int src1_step, int src1_offset,
+ __global short *src2, int src2_step, int src2_offset,
+ __global uchar *mask, int mask_step, int mask_offset,
+ __global short *dst, int dst_step, int dst_offset,
+ int rows, int cols, int dst_step1)
{
int x = get_global_id(0);
*((__global short2 *)((__global char *)dst + dst_index)) = data;
}
}
-__kernel void arithm_bitwise_or_with_mask_C2_D4 (__global int *src1, int src1_step, int src1_offset,
- __global int *src2, int src2_step, int src2_offset,
- __global uchar *mask, int mask_step, int mask_offset,
- __global int *dst, int dst_step, int dst_offset,
- int rows, int cols, int dst_step1)
+__kernel void arithm_bitwise_or_with_mask_C2_D4 (
+ __global int *src1, int src1_step, int src1_offset,
+ __global int *src2, int src2_step, int src2_offset,
+ __global uchar *mask, int mask_step, int mask_offset,
+ __global int *dst, int dst_step, int dst_offset,
+ int rows, int cols, int dst_step1)
{
int x = get_global_id(0);
*((__global int2 *)((__global char *)dst + dst_index)) = data;
}
}
-__kernel void arithm_bitwise_or_with_mask_C2_D5 (__global char *src1, int src1_step, int src1_offset,
- __global char *src2, int src2_step, int src2_offset,
- __global uchar *mask, int mask_step, int mask_offset,
- __global char *dst, int dst_step, int dst_offset,
- int rows, int cols, int dst_step1)
+__kernel void arithm_bitwise_or_with_mask_C2_D5 (
+ __global char *src1, int src1_step, int src1_offset,
+ __global char *src2, int src2_step, int src2_offset,
+ __global uchar *mask, int mask_step, int mask_offset,
+ __global char *dst, int dst_step, int dst_offset,
+ int rows, int cols, int dst_step1)
{
int x = get_global_id(0);
}
}
#if defined (DOUBLE_SUPPORT)
-__kernel void arithm_bitwise_or_with_mask_C2_D6 (__global char *src1, int src1_step, int src1_offset,
- __global char *src2, int src2_step, int src2_offset,
- __global uchar *mask, int mask_step, int mask_offset,
- __global char *dst, int dst_step, int dst_offset,
- int rows, int cols, int dst_step1)
+__kernel void arithm_bitwise_or_with_mask_C2_D6 (
+ __global char *src1, int src1_step, int src1_offset,
+ __global char *src2, int src2_step, int src2_offset,
+ __global uchar *mask, int mask_step, int mask_offset,
+ __global char *dst, int dst_step, int dst_offset,
+ int rows, int cols, int dst_step1)
{
int x = get_global_id(0);
#endif
-
-__kernel void arithm_bitwise_or_with_mask_C3_D0 (__global uchar *src1, int src1_step, int src1_offset,
- __global uchar *src2, int src2_step, int src2_offset,
- __global uchar *mask, int mask_step, int mask_offset,
- __global uchar *dst, int dst_step, int dst_offset,
- int rows, int cols, int dst_step1)
+__kernel void arithm_bitwise_or_with_mask_C3_D0 (
+ __global uchar *src1, int src1_step, int src1_offset,
+ __global uchar *src2, int src2_step, int src2_offset,
+ __global uchar *mask, int mask_step, int mask_offset,
+ __global uchar *dst, int dst_step, int dst_offset,
+ int rows, int cols, int dst_step1)
{
int x = get_global_id(0);
if (x < cols && y < rows)
{
x = x << 2;
-
- #define dst_align (((dst_offset % dst_step) / 3 ) & 3)
+
+#ifdef dst_align
+#undef dst_align
+#endif
+#define dst_align (((dst_offset % dst_step) / 3 ) & 3)
int src1_index = mad24(y, src1_step, (x * 3) + src1_offset - (dst_align * 3));
int src2_index = mad24(y, src2_step, (x * 3) + src2_offset - (dst_align * 3));
int mask_index = mad24(y, mask_step, x + mask_offset - dst_align);
}
-__kernel void arithm_bitwise_or_with_mask_C3_D1 (__global char *src1, int src1_step, int src1_offset,
- __global char *src2, int src2_step, int src2_offset,
- __global uchar *mask, int mask_step, int mask_offset,
- __global char *dst, int dst_step, int dst_offset,
- int rows, int cols, int dst_step1)
+__kernel void arithm_bitwise_or_with_mask_C3_D1 (
+ __global char *src1, int src1_step, int src1_offset,
+ __global char *src2, int src2_step, int src2_offset,
+ __global uchar *mask, int mask_step, int mask_offset,
+ __global char *dst, int dst_step, int dst_offset,
+ int rows, int cols, int dst_step1)
{
int x = get_global_id(0);
if (x < cols && y < rows)
{
x = x << 2;
-
- #define dst_align (((dst_offset % dst_step) / 3 ) & 3)
+
+#ifdef dst_align
+#undef dst_align
+#endif
+#define dst_align (((dst_offset % dst_step) / 3 ) & 3)
int src1_index = mad24(y, src1_step, (x * 3) + src1_offset - (dst_align * 3));
int src2_index = mad24(y, src2_step, (x * 3) + src2_offset - (dst_align * 3));
int mask_index = mad24(y, mask_step, x + mask_offset - dst_align);
}
}
-__kernel void arithm_bitwise_or_with_mask_C3_D2 (__global ushort *src1, int src1_step, int src1_offset,
- __global ushort *src2, int src2_step, int src2_offset,
- __global uchar *mask, int mask_step, int mask_offset,
- __global ushort *dst, int dst_step, int dst_offset,
- int rows, int cols, int dst_step1)
+__kernel void arithm_bitwise_or_with_mask_C3_D2 (
+ __global ushort *src1, int src1_step, int src1_offset,
+ __global ushort *src2, int src2_step, int src2_offset,
+ __global uchar *mask, int mask_step, int mask_offset,
+ __global ushort *dst, int dst_step, int dst_offset,
+ int rows, int cols, int dst_step1)
{
int x = get_global_id(0);
if (x < cols && y < rows)
{
x = x << 1;
-
- #define dst_align (((dst_offset % dst_step) / 6 ) & 1)
+
+#ifdef dst_align
+#undef dst_align
+#endif
+#define dst_align (((dst_offset % dst_step) / 6 ) & 1)
int src1_index = mad24(y, src1_step, (x * 6) + src1_offset - (dst_align * 6));
int src2_index = mad24(y, src2_step, (x * 6) + src2_offset - (dst_align * 6));
int mask_index = mad24(y, mask_step, x + mask_offset - dst_align);
data_0.xy = ((mask_data.x) && (dst_index + 0 >= dst_start)) ? tmp_data_0.xy : data_0.xy;
data_1.x = ((mask_data.x) && (dst_index + 0 >= dst_start) && (dst_index + 0 < dst_end))
- ? tmp_data_1.x : data_1.x;
+ ? tmp_data_1.x : data_1.x;
data_1.y = ((mask_data.y) && (dst_index + 6 >= dst_start) && (dst_index + 6 < dst_end))
- ? tmp_data_1.y : data_1.y;
+ ? tmp_data_1.y : data_1.y;
data_2.xy = ((mask_data.y) && (dst_index + 6 >= dst_start) && (dst_index + 6 < dst_end))
- ? tmp_data_2.xy : data_2.xy;
+ ? tmp_data_2.xy : data_2.xy;
- *((__global ushort2 *)((__global char *)dst + dst_index + 0))= data_0;
- *((__global ushort2 *)((__global char *)dst + dst_index + 4))= data_1;
- *((__global ushort2 *)((__global char *)dst + dst_index + 8))= data_2;
+ *((__global ushort2 *)((__global char *)dst + dst_index + 0))= data_0;
+ *((__global ushort2 *)((__global char *)dst + dst_index + 4))= data_1;
+ *((__global ushort2 *)((__global char *)dst + dst_index + 8))= data_2;
}
}
-__kernel void arithm_bitwise_or_with_mask_C3_D3 (__global short *src1, int src1_step, int src1_offset,
- __global short *src2, int src2_step, int src2_offset,
- __global uchar *mask, int mask_step, int mask_offset,
- __global short *dst, int dst_step, int dst_offset,
- int rows, int cols, int dst_step1)
+__kernel void arithm_bitwise_or_with_mask_C3_D3 (
+ __global short *src1, int src1_step, int src1_offset,
+ __global short *src2, int src2_step, int src2_offset,
+ __global uchar *mask, int mask_step, int mask_offset,
+ __global short *dst, int dst_step, int dst_offset,
+ int rows, int cols, int dst_step1)
{
int x = get_global_id(0);
if (x < cols && y < rows)
{
x = x << 1;
-
- #define dst_align (((dst_offset % dst_step) / 6 ) & 1)
+
+#ifdef dst_align
+#undef dst_align
+#endif
+#define dst_align (((dst_offset % dst_step) / 6 ) & 1)
int src1_index = mad24(y, src1_step, (x * 6) + src1_offset - (dst_align * 6));
int src2_index = mad24(y, src2_step, (x * 6) + src2_offset - (dst_align * 6));
int mask_index = mad24(y, mask_step, x + mask_offset - dst_align);
data_0.xy = ((mask_data.x) && (dst_index + 0 >= dst_start)) ? tmp_data_0.xy : data_0.xy;
data_1.x = ((mask_data.x) && (dst_index + 0 >= dst_start) && (dst_index + 0 < dst_end))
- ? tmp_data_1.x : data_1.x;
+ ? tmp_data_1.x : data_1.x;
data_1.y = ((mask_data.y) && (dst_index + 6 >= dst_start) && (dst_index + 6 < dst_end))
- ? tmp_data_1.y : data_1.y;
+ ? tmp_data_1.y : data_1.y;
data_2.xy = ((mask_data.y) && (dst_index + 6 >= dst_start) && (dst_index + 6 < dst_end))
- ? tmp_data_2.xy : data_2.xy;
+ ? tmp_data_2.xy : data_2.xy;
- *((__global short2 *)((__global char *)dst + dst_index + 0))= data_0;
- *((__global short2 *)((__global char *)dst + dst_index + 4))= data_1;
- *((__global short2 *)((__global char *)dst + dst_index + 8))= data_2;
+ *((__global short2 *)((__global char *)dst + dst_index + 0))= data_0;
+ *((__global short2 *)((__global char *)dst + dst_index + 4))= data_1;
+ *((__global short2 *)((__global char *)dst + dst_index + 8))= data_2;
}
}
-__kernel void arithm_bitwise_or_with_mask_C3_D4 (__global int *src1, int src1_step, int src1_offset,
- __global int *src2, int src2_step, int src2_offset,
- __global uchar *mask, int mask_step, int mask_offset,
- __global int *dst, int dst_step, int dst_offset,
- int rows, int cols, int dst_step1)
+__kernel void arithm_bitwise_or_with_mask_C3_D4 (
+ __global int *src1, int src1_step, int src1_offset,
+ __global int *src2, int src2_step, int src2_offset,
+ __global uchar *mask, int mask_step, int mask_offset,
+ __global int *dst, int dst_step, int dst_offset,
+ int rows, int cols, int dst_step1)
{
int x = get_global_id(0);
data_1 = mask_data ? tmp_data_1 : data_1;
data_2 = mask_data ? tmp_data_2 : data_2;
- *((__global int *)((__global char *)dst + dst_index + 0))= data_0;
- *((__global int *)((__global char *)dst + dst_index + 4))= data_1;
- *((__global int *)((__global char *)dst + dst_index + 8))= data_2;
+ *((__global int *)((__global char *)dst + dst_index + 0))= data_0;
+ *((__global int *)((__global char *)dst + dst_index + 4))= data_1;
+ *((__global int *)((__global char *)dst + dst_index + 8))= data_2;
}
}
-__kernel void arithm_bitwise_or_with_mask_C3_D5 (__global char *src1, int src1_step, int src1_offset,
- __global char *src2, int src2_step, int src2_offset,
- __global uchar *mask, int mask_step, int mask_offset,
- __global char *dst, int dst_step, int dst_offset,
- int rows, int cols, int dst_step1)
+__kernel void arithm_bitwise_or_with_mask_C3_D5 (
+ __global char *src1, int src1_step, int src1_offset,
+ __global char *src2, int src2_step, int src2_offset,
+ __global uchar *mask, int mask_step, int mask_offset,
+ __global char *dst, int dst_step, int dst_offset,
+ int rows, int cols, int dst_step1)
{
int x = get_global_id(0);
data_1 = mask_data ? tmp_data_1 : data_1;
data_2 = mask_data ? tmp_data_2 : data_2;
- *((__global char4 *)((__global char *)dst + dst_index + 0))= data_0;
- *((__global char4 *)((__global char *)dst + dst_index + 4))= data_1;
- *((__global char4 *)((__global char *)dst + dst_index + 8))= data_2;
+ *((__global char4 *)((__global char *)dst + dst_index + 0))= data_0;
+ *((__global char4 *)((__global char *)dst + dst_index + 4))= data_1;
+ *((__global char4 *)((__global char *)dst + dst_index + 8))= data_2;
}
}
#if defined (DOUBLE_SUPPORT)
-__kernel void arithm_bitwise_or_with_mask_C3_D6 (__global char *src1, int src1_step, int src1_offset,
- __global char *src2, int src2_step, int src2_offset,
- __global uchar *mask, int mask_step, int mask_offset,
- __global char *dst, int dst_step, int dst_offset,
- int rows, int cols, int dst_step1)
+__kernel void arithm_bitwise_or_with_mask_C3_D6 (
+ __global char *src1, int src1_step, int src1_offset,
+ __global char *src2, int src2_step, int src2_offset,
+ __global uchar *mask, int mask_step, int mask_offset,
+ __global char *dst, int dst_step, int dst_offset,
+ int rows, int cols, int dst_step1)
{
int x = get_global_id(0);
data_1 = mask_data ? tmp_data_1 : data_1;
data_2 = mask_data ? tmp_data_2 : data_2;
- *((__global char8 *)((__global char *)dst + dst_index + 0 ))= data_0;
- *((__global char8 *)((__global char *)dst + dst_index + 8 ))= data_1;
- *((__global char8 *)((__global char *)dst + dst_index + 16))= data_2;
+ *((__global char8 *)((__global char *)dst + dst_index + 0 ))= data_0;
+ *((__global char8 *)((__global char *)dst + dst_index + 8 ))= data_1;
+ *((__global char8 *)((__global char *)dst + dst_index + 16))= data_2;
}
}
#endif
-
-__kernel void arithm_bitwise_or_with_mask_C4_D0 (__global uchar *src1, int src1_step, int src1_offset,
- __global uchar *src2, int src2_step, int src2_offset,
- __global uchar *mask, int mask_step, int mask_offset,
- __global uchar *dst, int dst_step, int dst_offset,
- int rows, int cols, int dst_step1)
+__kernel void arithm_bitwise_or_with_mask_C4_D0 (
+ __global uchar *src1, int src1_step, int src1_offset,
+ __global uchar *src2, int src2_step, int src2_offset,
+ __global uchar *mask, int mask_step, int mask_offset,
+ __global uchar *dst, int dst_step, int dst_offset,
+ int rows, int cols, int dst_step1)
{
int x = get_global_id(0);
}
-__kernel void arithm_bitwise_or_with_mask_C4_D1 (__global char *src1, int src1_step, int src1_offset,
- __global char *src2, int src2_step, int src2_offset,
- __global uchar *mask, int mask_step, int mask_offset,
- __global char *dst, int dst_step, int dst_offset,
- int rows, int cols, int dst_step1)
+__kernel void arithm_bitwise_or_with_mask_C4_D1 (
+ __global char *src1, int src1_step, int src1_offset,
+ __global char *src2, int src2_step, int src2_offset,
+ __global uchar *mask, int mask_step, int mask_offset,
+ __global char *dst, int dst_step, int dst_offset,
+ int rows, int cols, int dst_step1)
{
int x = get_global_id(0);
}
}
-__kernel void arithm_bitwise_or_with_mask_C4_D2 (__global ushort *src1, int src1_step, int src1_offset,
- __global ushort *src2, int src2_step, int src2_offset,
- __global uchar *mask, int mask_step, int mask_offset,
- __global ushort *dst, int dst_step, int dst_offset,
- int rows, int cols, int dst_step1)
+__kernel void arithm_bitwise_or_with_mask_C4_D2 (
+ __global ushort *src1, int src1_step, int src1_offset,
+ __global ushort *src2, int src2_step, int src2_offset,
+ __global uchar *mask, int mask_step, int mask_offset,
+ __global ushort *dst, int dst_step, int dst_offset,
+ int rows, int cols, int dst_step1)
{
int x = get_global_id(0);
*((__global ushort4 *)((__global char *)dst + dst_index)) = data;
}
}
-__kernel void arithm_bitwise_or_with_mask_C4_D3 (__global short *src1, int src1_step, int src1_offset,
- __global short *src2, int src2_step, int src2_offset,
- __global uchar *mask, int mask_step, int mask_offset,
- __global short *dst, int dst_step, int dst_offset,
- int rows, int cols, int dst_step1)
+__kernel void arithm_bitwise_or_with_mask_C4_D3 (
+ __global short *src1, int src1_step, int src1_offset,
+ __global short *src2, int src2_step, int src2_offset,
+ __global uchar *mask, int mask_step, int mask_offset,
+ __global short *dst, int dst_step, int dst_offset,
+ int rows, int cols, int dst_step1)
{
int x = get_global_id(0);
*((__global short4 *)((__global char *)dst + dst_index)) = data;
}
}
-__kernel void arithm_bitwise_or_with_mask_C4_D4 (__global int *src1, int src1_step, int src1_offset,
- __global int *src2, int src2_step, int src2_offset,
- __global uchar *mask, int mask_step, int mask_offset,
- __global int *dst, int dst_step, int dst_offset,
- int rows, int cols, int dst_step1)
+__kernel void arithm_bitwise_or_with_mask_C4_D4 (
+ __global int *src1, int src1_step, int src1_offset,
+ __global int *src2, int src2_step, int src2_offset,
+ __global uchar *mask, int mask_step, int mask_offset,
+ __global int *dst, int dst_step, int dst_offset,
+ int rows, int cols, int dst_step1)
{
int x = get_global_id(0);
*((__global int4 *)((__global char *)dst + dst_index)) = data;
}
}
-__kernel void arithm_bitwise_or_with_mask_C4_D5 (__global char *src1, int src1_step, int src1_offset,
- __global char *src2, int src2_step, int src2_offset,
- __global uchar *mask, int mask_step, int mask_offset,
- __global char *dst, int dst_step, int dst_offset,
- int rows, int cols, int dst_step1)
+__kernel void arithm_bitwise_or_with_mask_C4_D5 (
+ __global char *src1, int src1_step, int src1_offset,
+ __global char *src2, int src2_step, int src2_offset,
+ __global uchar *mask, int mask_step, int mask_offset,
+ __global char *dst, int dst_step, int dst_offset,
+ int rows, int cols, int dst_step1)
{
int x = get_global_id(0);
}
}
#if defined (DOUBLE_SUPPORT)
-__kernel void arithm_bitwise_or_with_mask_C4_D6 (__global char *src1, int src1_step, int src1_offset,
- __global char *src2, int src2_step, int src2_offset,
- __global uchar *mask, int mask_step, int mask_offset,
- __global char *dst, int dst_step, int dst_offset,
- int rows, int cols, int dst_step1)
+__kernel void arithm_bitwise_or_with_mask_C4_D6 (
+ __global char *src1, int src1_step, int src1_offset,
+ __global char *src2, int src2_step, int src2_offset,
+ __global uchar *mask, int mask_step, int mask_offset,
+ __global char *dst, int dst_step, int dst_offset,
+ int rows, int cols, int dst_step1)
{
int x = get_global_id(0);
//
//M*/
#if defined (DOUBLE_SUPPORT)
+#ifdef cl_khr_fp64
#pragma OPENCL EXTENSION cl_khr_fp64:enable
+#elif defined (cl_amd_fp64)
+#pragma OPENCL EXTENSION cl_amd_fp64:enable
+#endif
#endif
//////////////////////////////////////////////////////////////////////////////////////////////////////
////////////////////////////////////////////BITWISE_OR////////////////////////////////////////////////////
///////////////////////////////////////////////////////////////////////////////////////////////////////
/**************************************and with scalar without mask**************************************/
-__kernel void arithm_s_bitwise_or_C1_D0 (__global uchar *src1, int src1_step, int src1_offset,
- __global uchar *dst, int dst_step, int dst_offset,
- uchar4 src2, int rows, int cols, int dst_step1)
+__kernel void arithm_s_bitwise_or_C1_D0 (
+ __global uchar *src1, int src1_step, int src1_offset,
+ __global uchar *dst, int dst_step, int dst_offset,
+ uchar4 src2, int rows, int cols, int dst_step1)
{
int x = get_global_id(0);
int y = get_global_id(1);
if (x < cols && y < rows)
{
x = x << 2;
-
- #define dst_align (dst_offset & 3)
+
+#ifdef dst_align
+#undef dst_align
+#endif
+#define dst_align (dst_offset & 3)
int src1_index = mad24(y, src1_step, x + src1_offset - dst_align);
int dst_start = mad24(y, dst_step, dst_offset);
}
-__kernel void arithm_s_bitwise_or_C1_D1 (__global char *src1, int src1_step, int src1_offset,
- __global char *dst, int dst_step, int dst_offset,
- char4 src2, int rows, int cols, int dst_step1)
+__kernel void arithm_s_bitwise_or_C1_D1 (
+ __global char *src1, int src1_step, int src1_offset,
+ __global char *dst, int dst_step, int dst_offset,
+ char4 src2, int rows, int cols, int dst_step1)
{
int x = get_global_id(0);
int y = get_global_id(1);
if (x < cols && y < rows)
{
x = x << 2;
-
- #define dst_align (dst_offset & 3)
+
+#ifdef dst_align
+#undef dst_align
+#endif
+#define dst_align (dst_offset & 3)
int src1_index = mad24(y, src1_step, x + src1_offset - dst_align);
int dst_start = mad24(y, dst_step, dst_offset);
}
}
-__kernel void arithm_s_bitwise_or_C1_D2 (__global ushort *src1, int src1_step, int src1_offset,
- __global ushort *dst, int dst_step, int dst_offset,
- ushort4 src2, int rows, int cols, int dst_step1)
+__kernel void arithm_s_bitwise_or_C1_D2 (
+ __global ushort *src1, int src1_step, int src1_offset,
+ __global ushort *dst, int dst_step, int dst_offset,
+ ushort4 src2, int rows, int cols, int dst_step1)
{
int x = get_global_id(0);
if (x < cols && y < rows)
{
x = x << 1;
-
- #define dst_align ((dst_offset >> 1) & 1)
+
+#ifdef dst_align
+#undef dst_align
+#endif
+#define dst_align ((dst_offset >> 1) & 1)
int src1_index = mad24(y, src1_step, (x << 1) + src1_offset - (dst_align << 1));
int dst_start = mad24(y, dst_step, dst_offset);
*((__global ushort2 *)((__global uchar *)dst + dst_index)) = data;
}
}
-__kernel void arithm_s_bitwise_or_C1_D3 (__global short *src1, int src1_step, int src1_offset,
- __global short *dst, int dst_step, int dst_offset,
- short4 src2, int rows, int cols, int dst_step1)
+__kernel void arithm_s_bitwise_or_C1_D3 (
+ __global short *src1, int src1_step, int src1_offset,
+ __global short *dst, int dst_step, int dst_offset,
+ short4 src2, int rows, int cols, int dst_step1)
{
int x = get_global_id(0);
if (x < cols && y < rows)
{
x = x << 1;
-
- #define dst_align ((dst_offset >> 1) & 1)
+
+#ifdef dst_align
+#undef dst_align
+#endif
+#define dst_align ((dst_offset >> 1) & 1)
int src1_index = mad24(y, src1_step, (x << 1) + src1_offset - (dst_align << 1));
int dst_start = mad24(y, dst_step, dst_offset);
*((__global short2 *)((__global uchar *)dst + dst_index)) = data;
}
}
-__kernel void arithm_s_bitwise_or_C1_D4 (__global int *src1, int src1_step, int src1_offset,
- __global int *dst, int dst_step, int dst_offset,
- int4 src2, int rows, int cols, int dst_step1)
+__kernel void arithm_s_bitwise_or_C1_D4 (
+ __global int *src1, int src1_step, int src1_offset,
+ __global int *dst, int dst_step, int dst_offset,
+ int4 src2, int rows, int cols, int dst_step1)
{
int x = get_global_id(0);
*((__global int *)((__global char *)dst + dst_index)) = data;
}
}
-__kernel void arithm_s_bitwise_or_C1_D5 (__global char *src1, int src1_step, int src1_offset,
- __global char *dst, int dst_step, int dst_offset,
- char16 src2, int rows, int cols, int dst_step1)
+__kernel void arithm_s_bitwise_or_C1_D5 (
+ __global char *src1, int src1_step, int src1_offset,
+ __global char *dst, int dst_step, int dst_offset,
+ char16 src2, int rows, int cols, int dst_step1)
{
int x = get_global_id(0);
}
}
#if defined (DOUBLE_SUPPORT)
-__kernel void arithm_s_bitwise_or_C1_D6 (__global short *src1, int src1_step, int src1_offset,
- __global short *dst, int dst_step, int dst_offset,
- short16 src2, int rows, int cols, int dst_step1)
+__kernel void arithm_s_bitwise_or_C1_D6 (
+ __global short *src1, int src1_step, int src1_offset,
+ __global short *dst, int dst_step, int dst_offset,
+ short16 src2, int rows, int cols, int dst_step1)
{
}
}
#endif
-
-__kernel void arithm_s_bitwise_or_C2_D0 (__global uchar *src1, int src1_step, int src1_offset,
- __global uchar *dst, int dst_step, int dst_offset,
- uchar4 src2, int rows, int cols, int dst_step1)
+__kernel void arithm_s_bitwise_or_C2_D0 (
+ __global uchar *src1, int src1_step, int src1_offset,
+ __global uchar *dst, int dst_step, int dst_offset,
+ uchar4 src2, int rows, int cols, int dst_step1)
{
if (x < cols && y < rows)
{
x = x << 1;
-
- #define dst_align ((dst_offset >> 1) & 1)
+
+#ifdef dst_align
+#undef dst_align
+#endif
+#define dst_align ((dst_offset >> 1) & 1)
int src1_index = mad24(y, src1_step, (x << 1) + src1_offset - (dst_align << 1));
int dst_start = mad24(y, dst_step, dst_offset);
}
-__kernel void arithm_s_bitwise_or_C2_D1 (__global char *src1, int src1_step, int src1_offset,
- __global char *dst, int dst_step, int dst_offset,
- char4 src2, int rows, int cols, int dst_step1)
+__kernel void arithm_s_bitwise_or_C2_D1 (
+ __global char *src1, int src1_step, int src1_offset,
+ __global char *dst, int dst_step, int dst_offset,
+ char4 src2, int rows, int cols, int dst_step1)
{
if (x < cols && y < rows)
{
x = x << 1;
-
- #define dst_align ((dst_offset >> 1) & 1)
+
+#ifdef dst_align
+#undef dst_align
+#endif
+#define dst_align ((dst_offset >> 1) & 1)
int src1_index = mad24(y, src1_step, (x << 1) + src1_offset - (dst_align << 1));
int dst_start = mad24(y, dst_step, dst_offset);
}
}
-__kernel void arithm_s_bitwise_or_C2_D2 (__global ushort *src1, int src1_step, int src1_offset,
- __global ushort *dst, int dst_step, int dst_offset,
- ushort4 src2, int rows, int cols, int dst_step1)
+__kernel void arithm_s_bitwise_or_C2_D2 (
+ __global ushort *src1, int src1_step, int src1_offset,
+ __global ushort *dst, int dst_step, int dst_offset,
+ ushort4 src2, int rows, int cols, int dst_step1)
{
*((__global ushort2 *)((__global char *)dst + dst_index)) = data;
}
}
-__kernel void arithm_s_bitwise_or_C2_D3 (__global short *src1, int src1_step, int src1_offset,
- __global short *dst, int dst_step, int dst_offset,
- short4 src2, int rows, int cols, int dst_step1)
+__kernel void arithm_s_bitwise_or_C2_D3 (
+ __global short *src1, int src1_step, int src1_offset,
+ __global short *dst, int dst_step, int dst_offset,
+ short4 src2, int rows, int cols, int dst_step1)
{
}
}
__kernel void arithm_s_bitwise_or_C2_D4 (__global int *src1, int src1_step, int src1_offset,
- __global int *dst, int dst_step, int dst_offset,
- int4 src2, int rows, int cols, int dst_step1)
+ __global int *dst, int dst_step, int dst_offset,
+ int4 src2, int rows, int cols, int dst_step1)
{
*((__global int2 *)((__global char *)dst + dst_index)) = data;
}
}
-__kernel void arithm_s_bitwise_or_C2_D5 (__global char *src1, int src1_step, int src1_offset,
- __global char *dst, int dst_step, int dst_offset,
- char16 src2, int rows, int cols, int dst_step1)
+__kernel void arithm_s_bitwise_or_C2_D5 (
+ __global char *src1, int src1_step, int src1_offset,
+ __global char *dst, int dst_step, int dst_offset,
+ char16 src2, int rows, int cols, int dst_step1)
{
}
}
#if defined (DOUBLE_SUPPORT)
-__kernel void arithm_s_bitwise_or_C2_D6 (__global short *src1, int src1_step, int src1_offset,
- __global short *dst, int dst_step, int dst_offset,
- short16 src2, int rows, int cols, int dst_step1)
+__kernel void arithm_s_bitwise_or_C2_D6 (
+ __global short *src1, int src1_step, int src1_offset,
+ __global short *dst, int dst_step, int dst_offset,
+ short16 src2, int rows, int cols, int dst_step1)
{
}
}
#endif
-__kernel void arithm_s_bitwise_or_C3_D0 (__global uchar *src1, int src1_step, int src1_offset,
- __global uchar *dst, int dst_step, int dst_offset,
- uchar4 src2, int rows, int cols, int dst_step1)
+__kernel void arithm_s_bitwise_or_C3_D0 (
+ __global uchar *src1, int src1_step, int src1_offset,
+ __global uchar *dst, int dst_step, int dst_offset,
+ uchar4 src2, int rows, int cols, int dst_step1)
{
if (x < cols && y < rows)
{
x = x << 2;
-
- #define dst_align (((dst_offset % dst_step) / 3 ) & 3)
+
+#ifdef dst_align
+#undef dst_align
+#endif
+#define dst_align (((dst_offset % dst_step) / 3 ) & 3)
int src1_index = mad24(y, src1_step, (x * 3) + src1_offset - (dst_align * 3));
int dst_start = mad24(y, dst_step, dst_offset);
}
-__kernel void arithm_s_bitwise_or_C3_D1 (__global char *src1, int src1_step, int src1_offset,
- __global char *dst, int dst_step, int dst_offset,
- char4 src2, int rows, int cols, int dst_step1)
+__kernel void arithm_s_bitwise_or_C3_D1 (
+ __global char *src1, int src1_step, int src1_offset,
+ __global char *dst, int dst_step, int dst_offset,
+ char4 src2, int rows, int cols, int dst_step1)
{
if (x < cols && y < rows)
{
x = x << 2;
-
- #define dst_align (((dst_offset % dst_step) / 3 ) & 3)
+
+#ifdef dst_align
+#undef dst_align
+#endif
+#define dst_align (((dst_offset % dst_step) / 3 ) & 3)
int src1_index = mad24(y, src1_step, (x * 3) + src1_offset - (dst_align * 3));
int dst_start = mad24(y, dst_step, dst_offset);
}
}
-__kernel void arithm_s_bitwise_or_C3_D2 (__global ushort *src1, int src1_step, int src1_offset,
- __global ushort *dst, int dst_step, int dst_offset,
- ushort4 src2, int rows, int cols, int dst_step1)
+__kernel void arithm_s_bitwise_or_C3_D2 (
+ __global ushort *src1, int src1_step, int src1_offset,
+ __global ushort *dst, int dst_step, int dst_offset,
+ ushort4 src2, int rows, int cols, int dst_step1)
{
if (x < cols && y < rows)
{
x = x << 1;
-
- #define dst_align (((dst_offset % dst_step) / 6 ) & 1)
+
+#ifdef dst_align
+#undef dst_align
+#endif
+#define dst_align (((dst_offset % dst_step) / 6 ) & 1)
int src1_index = mad24(y, src1_step, (x * 6) + src1_offset - (dst_align * 6));
int dst_start = mad24(y, dst_step, dst_offset);
data_0.xy = ((dst_index + 0 >= dst_start)) ? tmp_data_0.xy : data_0.xy;
data_1.x = ((dst_index + 0 >= dst_start) && (dst_index + 0 < dst_end))
- ? tmp_data_1.x : data_1.x;
+ ? tmp_data_1.x : data_1.x;
data_1.y = ((dst_index + 6 >= dst_start) && (dst_index + 6 < dst_end))
- ? tmp_data_1.y : data_1.y;
+ ? tmp_data_1.y : data_1.y;
data_2.xy = ((dst_index + 6 >= dst_start) && (dst_index + 6 < dst_end))
- ? tmp_data_2.xy : data_2.xy;
+ ? tmp_data_2.xy : data_2.xy;
- *((__global ushort2 *)((__global char *)dst + dst_index + 0))= data_0;
- *((__global ushort2 *)((__global char *)dst + dst_index + 4))= data_1;
- *((__global ushort2 *)((__global char *)dst + dst_index + 8))= data_2;
+ *((__global ushort2 *)((__global char *)dst + dst_index + 0))= data_0;
+ *((__global ushort2 *)((__global char *)dst + dst_index + 4))= data_1;
+ *((__global ushort2 *)((__global char *)dst + dst_index + 8))= data_2;
}
}
-__kernel void arithm_s_bitwise_or_C3_D3 (__global short *src1, int src1_step, int src1_offset,
- __global short *dst, int dst_step, int dst_offset,
- short4 src2, int rows, int cols, int dst_step1)
+__kernel void arithm_s_bitwise_or_C3_D3 (
+ __global short *src1, int src1_step, int src1_offset,
+ __global short *dst, int dst_step, int dst_offset,
+ short4 src2, int rows, int cols, int dst_step1)
{
if (x < cols && y < rows)
{
x = x << 1;
-
- #define dst_align (((dst_offset % dst_step) / 6 ) & 1)
+
+#ifdef dst_align
+#undef dst_align
+#endif
+#define dst_align (((dst_offset % dst_step) / 6 ) & 1)
int src1_index = mad24(y, src1_step, (x * 6) + src1_offset - (dst_align * 6));
int dst_start = mad24(y, dst_step, dst_offset);
data_0.xy = ((dst_index + 0 >= dst_start)) ? tmp_data_0.xy : data_0.xy;
data_1.x = ((dst_index + 0 >= dst_start) && (dst_index + 0 < dst_end))
- ? tmp_data_1.x : data_1.x;
+ ? tmp_data_1.x : data_1.x;
data_1.y = ((dst_index + 6 >= dst_start) && (dst_index + 6 < dst_end))
- ? tmp_data_1.y : data_1.y;
+ ? tmp_data_1.y : data_1.y;
data_2.xy = ((dst_index + 6 >= dst_start) && (dst_index + 6 < dst_end))
- ? tmp_data_2.xy : data_2.xy;
+ ? tmp_data_2.xy : data_2.xy;
- *((__global short2 *)((__global char *)dst + dst_index + 0))= data_0;
- *((__global short2 *)((__global char *)dst + dst_index + 4))= data_1;
- *((__global short2 *)((__global char *)dst + dst_index + 8))= data_2;
+ *((__global short2 *)((__global char *)dst + dst_index + 0))= data_0;
+ *((__global short2 *)((__global char *)dst + dst_index + 4))= data_1;
+ *((__global short2 *)((__global char *)dst + dst_index + 8))= data_2;
}
}
-__kernel void arithm_s_bitwise_or_C3_D4 (__global int *src1, int src1_step, int src1_offset,
- __global int *dst, int dst_step, int dst_offset,
- int4 src2, int rows, int cols, int dst_step1)
+__kernel void arithm_s_bitwise_or_C3_D4 (
+ __global int *src1, int src1_step, int src1_offset,
+ __global int *dst, int dst_step, int dst_offset,
+ int4 src2, int rows, int cols, int dst_step1)
{
int tmp_data_1 = src1_data_1 | src2_data_1;
int tmp_data_2 = src1_data_2 | src2_data_2;
- *((__global int *)((__global char *)dst + dst_index + 0))= tmp_data_0;
- *((__global int *)((__global char *)dst + dst_index + 4))= tmp_data_1;
- *((__global int *)((__global char *)dst + dst_index + 8))= tmp_data_2;
+ *((__global int *)((__global char *)dst + dst_index + 0))= tmp_data_0;
+ *((__global int *)((__global char *)dst + dst_index + 4))= tmp_data_1;
+ *((__global int *)((__global char *)dst + dst_index + 8))= tmp_data_2;
}
}
-__kernel void arithm_s_bitwise_or_C3_D5 (__global char *src1, int src1_step, int src1_offset,
- __global char *dst, int dst_step, int dst_offset,
- char16 src2, int rows, int cols, int dst_step1)
+__kernel void arithm_s_bitwise_or_C3_D5 (
+ __global char *src1, int src1_step, int src1_offset,
+ __global char *dst, int dst_step, int dst_offset,
+ char16 src2, int rows, int cols, int dst_step1)
{
char4 tmp_data_1 = src1_data_1 | src2_data_1;
char4 tmp_data_2 = src1_data_2 | src2_data_2;
- *((__global char4 *)((__global char *)dst + dst_index + 0))= tmp_data_0;
- *((__global char4 *)((__global char *)dst + dst_index + 4))= tmp_data_1;
- *((__global char4 *)((__global char *)dst + dst_index + 8))= tmp_data_2;
+ *((__global char4 *)((__global char *)dst + dst_index + 0))= tmp_data_0;
+ *((__global char4 *)((__global char *)dst + dst_index + 4))= tmp_data_1;
+ *((__global char4 *)((__global char *)dst + dst_index + 8))= tmp_data_2;
}
}
#if defined (DOUBLE_SUPPORT)
-__kernel void arithm_s_bitwise_or_C3_D6 (__global short *src1, int src1_step, int src1_offset,
- __global short *dst, int dst_step, int dst_offset,
- short16 src2, int rows, int cols, int dst_step1)
+__kernel void arithm_s_bitwise_or_C3_D6 (
+ __global short *src1, int src1_step, int src1_offset,
+ __global short *dst, int dst_step, int dst_offset,
+ short16 src2, int rows, int cols, int dst_step1)
{
short4 tmp_data_1 = src1_data_1 | src2_data_1;
short4 tmp_data_2 = src1_data_2 | src2_data_2;
- *((__global short4 *)((__global char *)dst + dst_index + 0 ))= tmp_data_0;
- *((__global short4 *)((__global char *)dst + dst_index + 8 ))= tmp_data_1;
- *((__global short4 *)((__global char *)dst + dst_index + 16))= tmp_data_2;
+ *((__global short4 *)((__global char *)dst + dst_index + 0 ))= tmp_data_0;
+ *((__global short4 *)((__global char *)dst + dst_index + 8 ))= tmp_data_1;
+ *((__global short4 *)((__global char *)dst + dst_index + 16))= tmp_data_2;
}
}
#endif
-__kernel void arithm_s_bitwise_or_C4_D0 (__global uchar *src1, int src1_step, int src1_offset,
- __global uchar *dst, int dst_step, int dst_offset,
- uchar4 src2, int rows, int cols, int dst_step1)
+__kernel void arithm_s_bitwise_or_C4_D0 (
+ __global uchar *src1, int src1_step, int src1_offset,
+ __global uchar *dst, int dst_step, int dst_offset,
+ uchar4 src2, int rows, int cols, int dst_step1)
{
}
-__kernel void arithm_s_bitwise_or_C4_D1 (__global char *src1, int src1_step, int src1_offset,
- __global char *dst, int dst_step, int dst_offset,
- char4 src2, int rows, int cols, int dst_step1)
+__kernel void arithm_s_bitwise_or_C4_D1 (
+ __global char *src1, int src1_step, int src1_offset,
+ __global char *dst, int dst_step, int dst_offset,
+ char4 src2, int rows, int cols, int dst_step1)
{
}
}
-__kernel void arithm_s_bitwise_or_C4_D2 (__global ushort *src1, int src1_step, int src1_offset,
- __global ushort *dst, int dst_step, int dst_offset,
- ushort4 src2, int rows, int cols, int dst_step1)
+__kernel void arithm_s_bitwise_or_C4_D2 (
+ __global ushort *src1, int src1_step, int src1_offset,
+ __global ushort *dst, int dst_step, int dst_offset,
+ ushort4 src2, int rows, int cols, int dst_step1)
{
*((__global ushort4 *)((__global char *)dst + dst_index)) = data;
}
}
-__kernel void arithm_s_bitwise_or_C4_D3 (__global short *src1, int src1_step, int src1_offset,
- __global short *dst, int dst_step, int dst_offset,
- short4 src2, int rows, int cols, int dst_step1)
+__kernel void arithm_s_bitwise_or_C4_D3 (
+ __global short *src1, int src1_step, int src1_offset,
+ __global short *dst, int dst_step, int dst_offset,
+ short4 src2, int rows, int cols, int dst_step1)
{
*((__global short4 *)((__global char *)dst + dst_index)) = data;
}
}
-__kernel void arithm_s_bitwise_or_C4_D4 (__global int *src1, int src1_step, int src1_offset,
- __global int *dst, int dst_step, int dst_offset,
- int4 src2, int rows, int cols, int dst_step1)
+__kernel void arithm_s_bitwise_or_C4_D4 (
+ __global int *src1, int src1_step, int src1_offset,
+ __global int *dst, int dst_step, int dst_offset,
+ int4 src2, int rows, int cols, int dst_step1)
{
*((__global int4 *)((__global char *)dst + dst_index)) = data;
}
}
-__kernel void arithm_s_bitwise_or_C4_D5 (__global char *src1, int src1_step, int src1_offset,
- __global char *dst, int dst_step, int dst_offset,
- char16 src2, int rows, int cols, int dst_step1)
+__kernel void arithm_s_bitwise_or_C4_D5 (
+ __global char *src1, int src1_step, int src1_offset,
+ __global char *dst, int dst_step, int dst_offset,
+ char16 src2, int rows, int cols, int dst_step1)
{
}
}
#if defined (DOUBLE_SUPPORT)
-__kernel void arithm_s_bitwise_or_C4_D6 (__global short *src1, int src1_step, int src1_offset,
- __global short *dst, int dst_step, int dst_offset,
- short16 src2, int rows, int cols, int dst_step1)
+__kernel void arithm_s_bitwise_or_C4_D6 (
+ __global short *src1, int src1_step, int src1_offset,
+ __global short *dst, int dst_step, int dst_offset,
+ short16 src2, int rows, int cols, int dst_step1)
{
short4 tmp_data_2 = src1_data_2 | src2_data_2;
short4 tmp_data_3 = src1_data_3 | src2_data_3;
- *((__global short4 *)((__global char *)dst + dst_index + 0 ))= tmp_data_0;
- *((__global short4 *)((__global char *)dst + dst_index + 8 ))= tmp_data_1;
- *((__global short4 *)((__global char *)dst + dst_index + 16))= tmp_data_2;
- *((__global short4 *)((__global char *)dst + dst_index + 24))= tmp_data_3;
+ *((__global short4 *)((__global char *)dst + dst_index + 0 ))= tmp_data_0;
+ *((__global short4 *)((__global char *)dst + dst_index + 8 ))= tmp_data_1;
+ *((__global short4 *)((__global char *)dst + dst_index + 16))= tmp_data_2;
+ *((__global short4 *)((__global char *)dst + dst_index + 24))= tmp_data_3;
}
}
//
//M*/
#if defined (DOUBLE_SUPPORT)
+#ifdef cl_khr_fp64
#pragma OPENCL EXTENSION cl_khr_fp64:enable
+#elif defined (cl_amd_fp64)
+#pragma OPENCL EXTENSION cl_amd_fp64:enable
+#endif
#endif
-
//////////////////////////////////////////////////////////////////////////////////////////////////////
////////////////////////////////////////////BITWISE_OR////////////////////////////////////////////////////
///////////////////////////////////////////////////////////////////////////////////////////////////////
/**************************************bitwise_or with scalar with mask**************************************/
-__kernel void arithm_s_bitwise_or_with_mask_C1_D0 (__global uchar *src1, int src1_step, int src1_offset,
- __global uchar *dst, int dst_step, int dst_offset,
- __global uchar *mask, int mask_step, int mask_offset,
- uchar4 src2, int rows, int cols, int dst_step1)
+__kernel void arithm_s_bitwise_or_with_mask_C1_D0 (
+ __global uchar *src1, int src1_step, int src1_offset,
+ __global uchar *dst, int dst_step, int dst_offset,
+ __global uchar *mask, int mask_step, int mask_offset,
+ uchar4 src2, int rows, int cols, int dst_step1)
{
if (x < cols && y < rows)
{
x = x << 2;
-
- #define dst_align (dst_offset & 3)
+
+#ifdef dst_align
+#undef dst_align
+#endif
+#define dst_align (dst_offset & 3)
int src1_index = mad24(y, src1_step, x + src1_offset - dst_align);
int mask_index = mad24(y, mask_step, x + mask_offset - dst_align);
}
-__kernel void arithm_s_bitwise_or_with_mask_C1_D1 (__global char *src1, int src1_step, int src1_offset,
- __global char *dst, int dst_step, int dst_offset,
- __global uchar *mask, int mask_step, int mask_offset,
- char4 src2, int rows, int cols, int dst_step1)
+__kernel void arithm_s_bitwise_or_with_mask_C1_D1 (
+ __global char *src1, int src1_step, int src1_offset,
+ __global char *dst, int dst_step, int dst_offset,
+ __global uchar *mask, int mask_step, int mask_offset,
+ char4 src2, int rows, int cols, int dst_step1)
{
if (x < cols && y < rows)
{
x = x << 2;
-
- #define dst_align (dst_offset & 3)
+
+#ifdef dst_align
+#undef dst_align
+#endif
+#define dst_align (dst_offset & 3)
int src1_index = mad24(y, src1_step, x + src1_offset - dst_align);
int mask_index = mad24(y, mask_step, x + mask_offset - dst_align);
}
}
-__kernel void arithm_s_bitwise_or_with_mask_C1_D2 (__global ushort *src1, int src1_step, int src1_offset,
- __global ushort *dst, int dst_step, int dst_offset,
- __global uchar *mask, int mask_step, int mask_offset,
- ushort4 src2, int rows, int cols, int dst_step1)
+__kernel void arithm_s_bitwise_or_with_mask_C1_D2 (
+ __global ushort *src1, int src1_step, int src1_offset,
+ __global ushort *dst, int dst_step, int dst_offset,
+ __global uchar *mask, int mask_step, int mask_offset,
+ ushort4 src2, int rows, int cols, int dst_step1)
{
if (x < cols && y < rows)
{
x = x << 1;
-
- #define dst_align ((dst_offset >> 1) & 1)
+
+#ifdef dst_align
+#undef dst_align
+#endif
+#define dst_align ((dst_offset >> 1) & 1)
int src1_index = mad24(y, src1_step, (x << 1) + src1_offset - (dst_align << 1));
int mask_index = mad24(y, mask_step, x + mask_offset - dst_align);
*((__global ushort2 *)((__global uchar *)dst + dst_index)) = data;
}
}
-__kernel void arithm_s_bitwise_or_with_mask_C1_D3 (__global short *src1, int src1_step, int src1_offset,
- __global short *dst, int dst_step, int dst_offset,
- __global uchar *mask, int mask_step, int mask_offset,
- short4 src2, int rows, int cols, int dst_step1)
+__kernel void arithm_s_bitwise_or_with_mask_C1_D3 (
+ __global short *src1, int src1_step, int src1_offset,
+ __global short *dst, int dst_step, int dst_offset,
+ __global uchar *mask, int mask_step, int mask_offset,
+ short4 src2, int rows, int cols, int dst_step1)
{
if (x < cols && y < rows)
{
x = x << 1;
-
- #define dst_align ((dst_offset >> 1) & 1)
+
+#ifdef dst_align
+#undef dst_align
+#endif
+#define dst_align ((dst_offset >> 1) & 1)
int src1_index = mad24(y, src1_step, (x << 1) + src1_offset - (dst_align << 1));
int mask_index = mad24(y, mask_step, x + mask_offset - dst_align);
*((__global short2 *)((__global uchar *)dst + dst_index)) = data;
}
}
-__kernel void arithm_s_bitwise_or_with_mask_C1_D4 (__global int *src1, int src1_step, int src1_offset,
- __global int *dst, int dst_step, int dst_offset,
- __global uchar *mask, int mask_step, int mask_offset,
- int4 src2, int rows, int cols, int dst_step1)
+__kernel void arithm_s_bitwise_or_with_mask_C1_D4 (
+ __global int *src1, int src1_step, int src1_offset,
+ __global int *dst, int dst_step, int dst_offset,
+ __global uchar *mask, int mask_step, int mask_offset,
+ int4 src2, int rows, int cols, int dst_step1)
{
}
}
-__kernel void arithm_s_bitwise_or_with_mask_C1_D5 (__global char *src1, int src1_step, int src1_offset,
- __global char *dst, int dst_step, int dst_offset,
- __global uchar *mask, int mask_step, int mask_offset,
- char16 src2, int rows, int cols, int dst_step1)
+__kernel void arithm_s_bitwise_or_with_mask_C1_D5 (
+ __global char *src1, int src1_step, int src1_offset,
+ __global char *dst, int dst_step, int dst_offset,
+ __global uchar *mask, int mask_step, int mask_offset,
+ char16 src2, int rows, int cols, int dst_step1)
{
*((__global char4 *)((__global char *)dst + dst_index)) = data;
}
}
-
#if defined (DOUBLE_SUPPORT)
-__kernel void arithm_s_bitwise_or_with_mask_C1_D6 (__global short *src1, int src1_step, int src1_offset,
- __global short *dst, int dst_step, int dst_offset,
- __global uchar *mask, int mask_step, int mask_offset,
- short16 src2, int rows, int cols, int dst_step1)
+__kernel void arithm_s_bitwise_or_with_mask_C1_D6 (
+ __global short *src1, int src1_step, int src1_offset,
+ __global short *dst, int dst_step, int dst_offset,
+ __global uchar *mask, int mask_step, int mask_offset,
+ short16 src2, int rows, int cols, int dst_step1)
{
}
}
#endif
-__kernel void arithm_s_bitwise_or_with_mask_C2_D0 (__global uchar *src1, int src1_step, int src1_offset,
- __global uchar *dst, int dst_step, int dst_offset,
- __global uchar *mask, int mask_step, int mask_offset,
- uchar4 src2, int rows, int cols, int dst_step1)
+__kernel void arithm_s_bitwise_or_with_mask_C2_D0 (
+ __global uchar *src1, int src1_step, int src1_offset,
+ __global uchar *dst, int dst_step, int dst_offset,
+ __global uchar *mask, int mask_step, int mask_offset,
+ uchar4 src2, int rows, int cols, int dst_step1)
{
if (x < cols && y < rows)
{
x = x << 1;
-
- #define dst_align ((dst_offset >> 1) & 1)
+
+#ifdef dst_align
+#undef dst_align
+#endif
+#define dst_align ((dst_offset >> 1) & 1)
int src1_index = mad24(y, src1_step, (x << 1) + src1_offset - (dst_align << 1));
int mask_index = mad24(y, mask_step, x + mask_offset - dst_align);
}
-__kernel void arithm_s_bitwise_or_with_mask_C2_D1 (__global char *src1, int src1_step, int src1_offset,
- __global char *dst, int dst_step, int dst_offset,
- __global uchar *mask, int mask_step, int mask_offset,
- char4 src2, int rows, int cols, int dst_step1)
+__kernel void arithm_s_bitwise_or_with_mask_C2_D1 (
+ __global char *src1, int src1_step, int src1_offset,
+ __global char *dst, int dst_step, int dst_offset,
+ __global uchar *mask, int mask_step, int mask_offset,
+ char4 src2, int rows, int cols, int dst_step1)
{
if (x < cols && y < rows)
{
x = x << 1;
-
- #define dst_align ((dst_offset >> 1) & 1)
+
+#ifdef dst_align
+#undef dst_align
+#endif
+#define dst_align ((dst_offset >> 1) & 1)
int src1_index = mad24(y, src1_step, (x << 1) + src1_offset - (dst_align << 1));
int mask_index = mad24(y, mask_step, x + mask_offset - dst_align);
}
}
-__kernel void arithm_s_bitwise_or_with_mask_C2_D2 (__global ushort *src1, int src1_step, int src1_offset,
- __global ushort *dst, int dst_step, int dst_offset,
- __global uchar *mask, int mask_step, int mask_offset,
- ushort4 src2, int rows, int cols, int dst_step1)
+__kernel void arithm_s_bitwise_or_with_mask_C2_D2 (
+ __global ushort *src1, int src1_step, int src1_offset,
+ __global ushort *dst, int dst_step, int dst_offset,
+ __global uchar *mask, int mask_step, int mask_offset,
+ ushort4 src2, int rows, int cols, int dst_step1)
{
*((__global ushort2 *)((__global char *)dst + dst_index)) = data;
}
}
-__kernel void arithm_s_bitwise_or_with_mask_C2_D3 (__global short *src1, int src1_step, int src1_offset,
- __global short *dst, int dst_step, int dst_offset,
- __global uchar *mask, int mask_step, int mask_offset,
- short4 src2, int rows, int cols, int dst_step1)
+__kernel void arithm_s_bitwise_or_with_mask_C2_D3 (
+ __global short *src1, int src1_step, int src1_offset,
+ __global short *dst, int dst_step, int dst_offset,
+ __global uchar *mask, int mask_step, int mask_offset,
+ short4 src2, int rows, int cols, int dst_step1)
{
*((__global short2 *)((__global char *)dst + dst_index)) = data;
}
}
-__kernel void arithm_s_bitwise_or_with_mask_C2_D4 (__global int *src1, int src1_step, int src1_offset,
- __global int *dst, int dst_step, int dst_offset,
- __global uchar *mask, int mask_step, int mask_offset,
- int4 src2, int rows, int cols, int dst_step1)
+__kernel void arithm_s_bitwise_or_with_mask_C2_D4 (
+ __global int *src1, int src1_step, int src1_offset,
+ __global int *dst, int dst_step, int dst_offset,
+ __global uchar *mask, int mask_step, int mask_offset,
+ int4 src2, int rows, int cols, int dst_step1)
{
*((__global int2 *)((__global char *)dst + dst_index)) = data;
}
}
-__kernel void arithm_s_bitwise_or_with_mask_C2_D5 (__global char *src1, int src1_step, int src1_offset,
- __global char *dst, int dst_step, int dst_offset,
- __global uchar *mask, int mask_step, int mask_offset,
- char16 src2, int rows, int cols, int dst_step1)
+__kernel void arithm_s_bitwise_or_with_mask_C2_D5 (
+ __global char *src1, int src1_step, int src1_offset,
+ __global char *dst, int dst_step, int dst_offset,
+ __global uchar *mask, int mask_step, int mask_offset,
+ char16 src2, int rows, int cols, int dst_step1)
{
char8 src_data1 = *((__global char8 *)((__global char *)src1 + src1_index));
char8 src_data2 = (char8)(src2.s0, src2.s1, src2.s2, src2.s3, src2.s4, src2.s5, src2.s6, src2.s7);
char8 dst_data = *((__global char8 *)((__global char *)dst + dst_index));
- char8 data = src_data1 | src_data2;
+ char8 data = src_data1 | src_data2;
data = mask_data ? data : dst_data;
*((__global char8 *)((__global char *)dst + dst_index)) = data;
- }
+ }
}
#if defined (DOUBLE_SUPPORT)
-__kernel void arithm_s_bitwise_or_with_mask_C2_D6 (__global char *src1, int src1_step, int src1_offset,
- __global char *dst, int dst_step, int dst_offset,
- __global uchar *mask, int mask_step, int mask_offset,
- short16 src2, int rows, int cols, int dst_step1)
+__kernel void arithm_s_bitwise_or_with_mask_C2_D6 (
+ __global char *src1, int src1_step, int src1_offset,
+ __global char *dst, int dst_step, int dst_offset,
+ __global uchar *mask, int mask_step, int mask_offset,
+ short16 src2, int rows, int cols, int dst_step1)
{
}
}
#endif
-__kernel void arithm_s_bitwise_or_with_mask_C3_D0 (__global uchar *src1, int src1_step, int src1_offset,
- __global uchar *dst, int dst_step, int dst_offset,
- __global uchar *mask, int mask_step, int mask_offset,
- uchar4 src2, int rows, int cols, int dst_step1)
+__kernel void arithm_s_bitwise_or_with_mask_C3_D0 (
+ __global uchar *src1, int src1_step, int src1_offset,
+ __global uchar *dst, int dst_step, int dst_offset,
+ __global uchar *mask, int mask_step, int mask_offset,
+ uchar4 src2, int rows, int cols, int dst_step1)
{
if (x < cols && y < rows)
{
x = x << 2;
-
- #define dst_align (((dst_offset % dst_step) / 3 ) & 3)
+
+#ifdef dst_align
+#undef dst_align
+#endif
+#define dst_align (((dst_offset % dst_step) / 3 ) & 3)
int src1_index = mad24(y, src1_step, (x * 3) + src1_offset - (dst_align * 3));
int mask_index = mad24(y, mask_step, x + mask_offset - dst_align);
}
-__kernel void arithm_s_bitwise_or_with_mask_C3_D1 (__global char *src1, int src1_step, int src1_offset,
- __global char *dst, int dst_step, int dst_offset,
- __global uchar *mask, int mask_step, int mask_offset,
- char4 src2, int rows, int cols, int dst_step1)
+__kernel void arithm_s_bitwise_or_with_mask_C3_D1 (
+ __global char *src1, int src1_step, int src1_offset,
+ __global char *dst, int dst_step, int dst_offset,
+ __global uchar *mask, int mask_step, int mask_offset,
+ char4 src2, int rows, int cols, int dst_step1)
{
if (x < cols && y < rows)
{
x = x << 2;
-
- #define dst_align (((dst_offset % dst_step) / 3 ) & 3)
+
+#ifdef dst_align
+#undef dst_align
+#endif
+#define dst_align (((dst_offset % dst_step) / 3 ) & 3)
int src1_index = mad24(y, src1_step, (x * 3) + src1_offset - (dst_align * 3));
int mask_index = mad24(y, mask_step, x + mask_offset - dst_align);
}
}
-__kernel void arithm_s_bitwise_or_with_mask_C3_D2 (__global ushort *src1, int src1_step, int src1_offset,
- __global ushort *dst, int dst_step, int dst_offset,
- __global uchar *mask, int mask_step, int mask_offset,
- ushort4 src2, int rows, int cols, int dst_step1)
+__kernel void arithm_s_bitwise_or_with_mask_C3_D2 (
+ __global ushort *src1, int src1_step, int src1_offset,
+ __global ushort *dst, int dst_step, int dst_offset,
+ __global uchar *mask, int mask_step, int mask_offset,
+ ushort4 src2, int rows, int cols, int dst_step1)
{
if (x < cols && y < rows)
{
x = x << 1;
-
- #define dst_align (((dst_offset % dst_step) / 6 ) & 1)
+
+#ifdef dst_align
+#undef dst_align
+#endif
+#define dst_align (((dst_offset % dst_step) / 6 ) & 1)
int src1_index = mad24(y, src1_step, (x * 6) + src1_offset - (dst_align * 6));
int mask_index = mad24(y, mask_step, x + mask_offset - dst_align);
data_0.xy = ((mask_data.x) && (dst_index + 0 >= dst_start)) ? tmp_data_0.xy : data_0.xy;
data_1.x = ((mask_data.x) && (dst_index + 0 >= dst_start) && (dst_index + 0 < dst_end))
- ? tmp_data_1.x : data_1.x;
+ ? tmp_data_1.x : data_1.x;
data_1.y = ((mask_data.y) && (dst_index + 6 >= dst_start) && (dst_index + 6 < dst_end))
- ? tmp_data_1.y : data_1.y;
+ ? tmp_data_1.y : data_1.y;
data_2.xy = ((mask_data.y) && (dst_index + 6 >= dst_start) && (dst_index + 6 < dst_end))
- ? tmp_data_2.xy : data_2.xy;
+ ? tmp_data_2.xy : data_2.xy;
- *((__global ushort2 *)((__global char *)dst + dst_index + 0))= data_0;
- *((__global ushort2 *)((__global char *)dst + dst_index + 4))= data_1;
- *((__global ushort2 *)((__global char *)dst + dst_index + 8))= data_2;
+ *((__global ushort2 *)((__global char *)dst + dst_index + 0))= data_0;
+ *((__global ushort2 *)((__global char *)dst + dst_index + 4))= data_1;
+ *((__global ushort2 *)((__global char *)dst + dst_index + 8))= data_2;
}
}
-__kernel void arithm_s_bitwise_or_with_mask_C3_D3 (__global short *src1, int src1_step, int src1_offset,
- __global short *dst, int dst_step, int dst_offset,
- __global uchar *mask, int mask_step, int mask_offset,
- short4 src2, int rows, int cols, int dst_step1)
+__kernel void arithm_s_bitwise_or_with_mask_C3_D3 (
+ __global short *src1, int src1_step, int src1_offset,
+ __global short *dst, int dst_step, int dst_offset,
+ __global uchar *mask, int mask_step, int mask_offset,
+ short4 src2, int rows, int cols, int dst_step1)
{
if (x < cols && y < rows)
{
x = x << 1;
-
- #define dst_align (((dst_offset % dst_step) / 6 ) & 1)
+
+#ifdef dst_align
+#undef dst_align
+#endif
+#define dst_align (((dst_offset % dst_step) / 6 ) & 1)
int src1_index = mad24(y, src1_step, (x * 6) + src1_offset - (dst_align * 6));
int mask_index = mad24(y, mask_step, x + mask_offset - dst_align);
data_0.xy = ((mask_data.x) && (dst_index + 0 >= dst_start)) ? tmp_data_0.xy : data_0.xy;
data_1.x = ((mask_data.x) && (dst_index + 0 >= dst_start) && (dst_index + 0 < dst_end))
- ? tmp_data_1.x : data_1.x;
+ ? tmp_data_1.x : data_1.x;
data_1.y = ((mask_data.y) && (dst_index + 6 >= dst_start) && (dst_index + 6 < dst_end))
- ? tmp_data_1.y : data_1.y;
+ ? tmp_data_1.y : data_1.y;
data_2.xy = ((mask_data.y) && (dst_index + 6 >= dst_start) && (dst_index + 6 < dst_end))
- ? tmp_data_2.xy : data_2.xy;
+ ? tmp_data_2.xy : data_2.xy;
- *((__global short2 *)((__global char *)dst + dst_index + 0))= data_0;
- *((__global short2 *)((__global char *)dst + dst_index + 4))= data_1;
- *((__global short2 *)((__global char *)dst + dst_index + 8))= data_2;
+ *((__global short2 *)((__global char *)dst + dst_index + 0))= data_0;
+ *((__global short2 *)((__global char *)dst + dst_index + 4))= data_1;
+ *((__global short2 *)((__global char *)dst + dst_index + 8))= data_2;
}
}
-__kernel void arithm_s_bitwise_or_with_mask_C3_D4 (__global int *src1, int src1_step, int src1_offset,
- __global int *dst, int dst_step, int dst_offset,
- __global uchar *mask, int mask_step, int mask_offset,
- int4 src2, int rows, int cols, int dst_step1)
+__kernel void arithm_s_bitwise_or_with_mask_C3_D4 (
+ __global int *src1, int src1_step, int src1_offset,
+ __global int *dst, int dst_step, int dst_offset,
+ __global uchar *mask, int mask_step, int mask_offset,
+ int4 src2, int rows, int cols, int dst_step1)
{
data_1 = mask_data ? tmp_data_1 : data_1;
data_2 = mask_data ? tmp_data_2 : data_2;
- *((__global int *)((__global char *)dst + dst_index + 0))= data_0;
- *((__global int *)((__global char *)dst + dst_index + 4))= data_1;
- *((__global int *)((__global char *)dst + dst_index + 8))= data_2;
+ *((__global int *)((__global char *)dst + dst_index + 0))= data_0;
+ *((__global int *)((__global char *)dst + dst_index + 4))= data_1;
+ *((__global int *)((__global char *)dst + dst_index + 8))= data_2;
}
}
-__kernel void arithm_s_bitwise_or_with_mask_C3_D5 (__global char *src1, int src1_step, int src1_offset,
- __global char *dst, int dst_step, int dst_offset,
- __global uchar *mask, int mask_step, int mask_offset,
- char16 src2, int rows, int cols, int dst_step1)
+__kernel void arithm_s_bitwise_or_with_mask_C3_D5 (
+ __global char *src1, int src1_step, int src1_offset,
+ __global char *dst, int dst_step, int dst_offset,
+ __global uchar *mask, int mask_step, int mask_offset,
+ char16 src2, int rows, int cols, int dst_step1)
{
data_1 = mask_data ? tmp_data_1 : data_1;
data_2 = mask_data ? tmp_data_2 : data_2;
- *((__global char4 *)((__global char *)dst + dst_index + 0))= data_0;
- *((__global char4 *)((__global char *)dst + dst_index + 4))= data_1;
- *((__global char4 *)((__global char *)dst + dst_index + 8))= data_2;
+ *((__global char4 *)((__global char *)dst + dst_index + 0))= data_0;
+ *((__global char4 *)((__global char *)dst + dst_index + 4))= data_1;
+ *((__global char4 *)((__global char *)dst + dst_index + 8))= data_2;
- }
+ }
}
#if defined (DOUBLE_SUPPORT)
-__kernel void arithm_s_bitwise_or_with_mask_C3_D6 (__global short *src1, int src1_step, int src1_offset,
- __global short *dst, int dst_step, int dst_offset,
- __global uchar *mask, int mask_step, int mask_offset,
- short16 src2, int rows, int cols, int dst_step1)
+__kernel void arithm_s_bitwise_or_with_mask_C3_D6 (
+ __global short *src1, int src1_step, int src1_offset,
+ __global short *dst, int dst_step, int dst_offset,
+ __global uchar *mask, int mask_step, int mask_offset,
+ short16 src2, int rows, int cols, int dst_step1)
{
int x = get_global_id(0);
data_1 = mask_data ? tmp_data_1 : data_1;
data_2 = mask_data ? tmp_data_2 : data_2;
- *((__global short4 *)((__global char *)dst + dst_index + 0 ))= data_0;
- *((__global short4 *)((__global char *)dst + dst_index + 8 ))= data_1;
- *((__global short4 *)((__global char *)dst + dst_index + 16))= data_2;
+ *((__global short4 *)((__global char *)dst + dst_index + 0 ))= data_0;
+ *((__global short4 *)((__global char *)dst + dst_index + 8 ))= data_1;
+ *((__global short4 *)((__global char *)dst + dst_index + 16))= data_2;
}
}
#endif
-__kernel void arithm_s_bitwise_or_with_mask_C4_D0 (__global uchar *src1, int src1_step, int src1_offset,
- __global uchar *dst, int dst_step, int dst_offset,
- __global uchar *mask, int mask_step, int mask_offset,
- uchar4 src2, int rows, int cols, int dst_step1)
+__kernel void arithm_s_bitwise_or_with_mask_C4_D0 (
+ __global uchar *src1, int src1_step, int src1_offset,
+ __global uchar *dst, int dst_step, int dst_offset,
+ __global uchar *mask, int mask_step, int mask_offset,
+ uchar4 src2, int rows, int cols, int dst_step1)
{
}
-__kernel void arithm_s_bitwise_or_with_mask_C4_D1 (__global char *src1, int src1_step, int src1_offset,
- __global char *dst, int dst_step, int dst_offset,
- __global uchar *mask, int mask_step, int mask_offset,
- char4 src2, int rows, int cols, int dst_step1)
+__kernel void arithm_s_bitwise_or_with_mask_C4_D1 (
+ __global char *src1, int src1_step, int src1_offset,
+ __global char *dst, int dst_step, int dst_offset,
+ __global uchar *mask, int mask_step, int mask_offset,
+ char4 src2, int rows, int cols, int dst_step1)
{
}
}
-__kernel void arithm_s_bitwise_or_with_mask_C4_D2 (__global ushort *src1, int src1_step, int src1_offset,
- __global ushort *dst, int dst_step, int dst_offset,
- __global uchar *mask, int mask_step, int mask_offset,
- ushort4 src2, int rows, int cols, int dst_step1)
+__kernel void arithm_s_bitwise_or_with_mask_C4_D2 (
+ __global ushort *src1, int src1_step, int src1_offset,
+ __global ushort *dst, int dst_step, int dst_offset,
+ __global uchar *mask, int mask_step, int mask_offset,
+ ushort4 src2, int rows, int cols, int dst_step1)
{
*((__global ushort4 *)((__global char *)dst + dst_index)) = data;
}
}
-__kernel void arithm_s_bitwise_or_with_mask_C4_D3 (__global short *src1, int src1_step, int src1_offset,
- __global short *dst, int dst_step, int dst_offset,
- __global uchar *mask, int mask_step, int mask_offset,
- short4 src2, int rows, int cols, int dst_step1)
+__kernel void arithm_s_bitwise_or_with_mask_C4_D3 (
+ __global short *src1, int src1_step, int src1_offset,
+ __global short *dst, int dst_step, int dst_offset,
+ __global uchar *mask, int mask_step, int mask_offset,
+ short4 src2, int rows, int cols, int dst_step1)
{
*((__global short4 *)((__global char *)dst + dst_index)) = data;
}
}
-__kernel void arithm_s_bitwise_or_with_mask_C4_D4 (__global int *src1, int src1_step, int src1_offset,
- __global int *dst, int dst_step, int dst_offset,
- __global uchar *mask, int mask_step, int mask_offset,
- int4 src2, int rows, int cols, int dst_step1)
+__kernel void arithm_s_bitwise_or_with_mask_C4_D4 (
+ __global int *src1, int src1_step, int src1_offset,
+ __global int *dst, int dst_step, int dst_offset,
+ __global uchar *mask, int mask_step, int mask_offset,
+ int4 src2, int rows, int cols, int dst_step1)
{
*((__global int4 *)((__global char *)dst + dst_index)) = data;
}
}
-__kernel void arithm_s_bitwise_or_with_mask_C4_D5 (__global char *src1, int src1_step, int src1_offset,
- __global char *dst, int dst_step, int dst_offset,
- __global uchar *mask, int mask_step, int mask_offset,
- char16 src2, int rows, int cols, int dst_step1)
+__kernel void arithm_s_bitwise_or_with_mask_C4_D5 (
+ __global char *src1, int src1_step, int src1_offset,
+ __global char *dst, int dst_step, int dst_offset,
+ __global uchar *mask, int mask_step, int mask_offset,
+ char16 src2, int rows, int cols, int dst_step1)
{
}
}
#if defined (DOUBLE_SUPPORT)
-__kernel void arithm_s_bitwise_or_with_mask_C4_D6 (__global short *src1, int src1_step, int src1_offset,
- __global short *dst, int dst_step, int dst_offset,
- __global uchar *mask, int mask_step, int mask_offset,
- short16 src2, int rows, int cols, int dst_step1)
+__kernel void arithm_s_bitwise_or_with_mask_C4_D6 (
+ __global short *src1, int src1_step, int src1_offset,
+ __global short *dst, int dst_step, int dst_offset,
+ __global uchar *mask, int mask_step, int mask_offset,
+ short16 src2, int rows, int cols, int dst_step1)
{
int x = get_global_id(0);
//
//M*/
#if defined (DOUBLE_SUPPORT)
+#ifdef cl_khr_fp64
#pragma OPENCL EXTENSION cl_khr_fp64:enable
+#elif defined (cl_amd_fp64)
+#pragma OPENCL EXTENSION cl_amd_fp64:enable
+#endif
#endif
-
//////////////////////////////////////////////////////////////////////////////////////////////////////
////////////////////////////////////////////BITWISE_XOR////////////////////////////////////////////////////
///////////////////////////////////////////////////////////////////////////////////////////////////////
/**************************************bitwise_xor without mask**************************************/
__kernel void arithm_bitwise_xor_D0 (__global uchar *src1, int src1_step, int src1_offset,
- __global uchar *src2, int src2_step, int src2_offset,
- __global uchar *dst, int dst_step, int dst_offset,
- int rows, int cols, int dst_step1)
+ __global uchar *src2, int src2_step, int src2_offset,
+ __global uchar *dst, int dst_step, int dst_offset,
+ int rows, int cols, int dst_step1)
{
int x = get_global_id(0);
int y = get_global_id(1);
if (x < cols && y < rows)
{
x = x << 2;
-
- #define dst_align (dst_offset & 3)
+
+#ifdef dst_align
+#undef dst_align
+#endif
+#define dst_align (dst_offset & 3)
int src1_index = mad24(y, src1_step, x + src1_offset - dst_align);
int src2_index = mad24(y, src2_step, x + src2_offset - dst_align);
int dst_end = mad24(y, dst_step, dst_offset + dst_step1);
int dst_index = mad24(y, dst_step, dst_offset + x & (int)0xfffffffc);
- int src1_index_fix = src1_index < 0 ? 0 : src1_index;
- int src2_index_fix = src2_index < 0 ? 0 : src2_index;
+ int src1_index_fix = src1_index < 0 ? 0 : src1_index;
+ int src2_index_fix = src2_index < 0 ? 0 : src2_index;
uchar4 src1_data = vload4(0, src1 + src1_index_fix);
uchar4 src2_data = vload4(0, src2 + src2_index_fix);
- if(src1_index < 0)
- {
- uchar4 tmp;
- tmp.xyzw = (src1_index == -2) ? src1_data.zwxy:src1_data.yzwx;
- src1_data.xyzw = (src1_index == -1) ? src1_data.wxyz:tmp.xyzw;
- }
- if(src2_index < 0)
- {
- uchar4 tmp;
- tmp.xyzw = (src2_index == -2) ? src2_data.zwxy:src2_data.yzwx;
- src2_data.xyzw = (src2_index == -1) ? src2_data.wxyz:tmp.xyzw;
- }
+ if(src1_index < 0)
+ {
+ uchar4 tmp;
+ tmp.xyzw = (src1_index == -2) ? src1_data.zwxy:src1_data.yzwx;
+ src1_data.xyzw = (src1_index == -1) ? src1_data.wxyz:tmp.xyzw;
+ }
+ if(src2_index < 0)
+ {
+ uchar4 tmp;
+ tmp.xyzw = (src2_index == -2) ? src2_data.zwxy:src2_data.yzwx;
+ src2_data.xyzw = (src2_index == -1) ? src2_data.wxyz:tmp.xyzw;
+ }
uchar4 dst_data = *((__global uchar4 *)(dst + dst_index));
uchar4 tmp_data = src1_data ^ src2_data;
__kernel void arithm_bitwise_xor_D1 (__global char *src1, int src1_step, int src1_offset,
- __global char *src2, int src2_step, int src2_offset,
- __global char *dst, int dst_step, int dst_offset,
- int rows, int cols, int dst_step1)
+ __global char *src2, int src2_step, int src2_offset,
+ __global char *dst, int dst_step, int dst_offset,
+ int rows, int cols, int dst_step1)
{
int x = get_global_id(0);
int y = get_global_id(1);
if (x < cols && y < rows)
{
x = x << 2;
-
- #define dst_align (dst_offset & 3)
+
+#ifdef dst_align
+#undef dst_align
+#endif
+#define dst_align (dst_offset & 3)
int src1_index = mad24(y, src1_step, x + src1_offset - dst_align);
int src2_index = mad24(y, src2_step, x + src2_offset - dst_align);
int dst_end = mad24(y, dst_step, dst_offset + dst_step1);
int dst_index = mad24(y, dst_step, dst_offset + x & (int)0xfffffffc);
- int src1_index_fix = src1_index < 0 ? 0 : src1_index;
- int src2_index_fix = src2_index < 0 ? 0 : src2_index;
+ int src1_index_fix = src1_index < 0 ? 0 : src1_index;
+ int src2_index_fix = src2_index < 0 ? 0 : src2_index;
char4 src1_data = vload4(0, src1 + src1_index_fix);
char4 src2_data = vload4(0, src2 + src2_index_fix);
- if(src1_index < 0)
- {
- char4 tmp;
- tmp.xyzw = (src1_index == -2) ? src1_data.zwxy:src1_data.yzwx;
- src1_data.xyzw = (src1_index == -1) ? src1_data.wxyz:tmp.xyzw;
- }
- if(src2_index < 0)
- {
- char4 tmp;
- tmp.xyzw = (src2_index == -2) ? src2_data.zwxy:src2_data.yzwx;
- src2_data.xyzw = (src2_index == -1) ? src2_data.wxyz:tmp.xyzw;
- }
+ if(src1_index < 0)
+ {
+ char4 tmp;
+ tmp.xyzw = (src1_index == -2) ? src1_data.zwxy:src1_data.yzwx;
+ src1_data.xyzw = (src1_index == -1) ? src1_data.wxyz:tmp.xyzw;
+ }
+ if(src2_index < 0)
+ {
+ char4 tmp;
+ tmp.xyzw = (src2_index == -2) ? src2_data.zwxy:src2_data.yzwx;
+ src2_data.xyzw = (src2_index == -1) ? src2_data.wxyz:tmp.xyzw;
+ }
char4 dst_data = *((__global char4 *)(dst + dst_index));
char4 tmp_data = src1_data ^ src2_data;
__kernel void arithm_bitwise_xor_D2 (__global ushort *src1, int src1_step, int src1_offset,
- __global ushort *src2, int src2_step, int src2_offset,
- __global ushort *dst, int dst_step, int dst_offset,
- int rows, int cols, int dst_step1)
+ __global ushort *src2, int src2_step, int src2_offset,
+ __global ushort *dst, int dst_step, int dst_offset,
+ int rows, int cols, int dst_step1)
{
int x = get_global_id(0);
if (x < cols && y < rows)
{
x = x << 2;
-
- #define dst_align ((dst_offset >> 1) & 3)
+
+#ifdef dst_align
+#undef dst_align
+#endif
+#define dst_align ((dst_offset >> 1) & 3)
int src1_index = mad24(y, src1_step, (x << 1) + src1_offset - (dst_align << 1));
int src2_index = mad24(y, src2_step, (x << 1) + src2_offset - (dst_align << 1));
int dst_end = mad24(y, dst_step, dst_offset + dst_step1);
int dst_index = mad24(y, dst_step, dst_offset + (x << 1) & (int)0xfffffff8);
- int src1_index_fix = src1_index < 0 ? 0 : src1_index;
- int src2_index_fix = src2_index < 0 ? 0 : src2_index;
+ int src1_index_fix = src1_index < 0 ? 0 : src1_index;
+ int src2_index_fix = src2_index < 0 ? 0 : src2_index;
ushort4 src1_data = vload4(0, (__global ushort *)((__global char *)src1 + src1_index_fix));
ushort4 src2_data = vload4(0, (__global ushort *)((__global char *)src2 + src2_index_fix));
- if(src1_index < 0)
- {
- ushort4 tmp;
- tmp.xyzw = (src1_index == -2) ? src1_data.zwxy:src1_data.yzwx;
- src1_data.xyzw = (src1_index == -1) ? src1_data.wxyz:tmp.xyzw;
- }
- if(src2_index < 0)
- {
- ushort4 tmp;
- tmp.xyzw = (src2_index == -2) ? src2_data.zwxy:src2_data.yzwx;
- src2_data.xyzw = (src2_index == -1) ? src2_data.wxyz:tmp.xyzw;
- }
+ if(src1_index < 0)
+ {
+ ushort4 tmp;
+ tmp.xyzw = (src1_index == -2) ? src1_data.zwxy:src1_data.yzwx;
+ src1_data.xyzw = (src1_index == -1) ? src1_data.wxyz:tmp.xyzw;
+ }
+ if(src2_index < 0)
+ {
+ ushort4 tmp;
+ tmp.xyzw = (src2_index == -2) ? src2_data.zwxy:src2_data.yzwx;
+ src2_data.xyzw = (src2_index == -1) ? src2_data.wxyz:tmp.xyzw;
+ }
ushort4 dst_data = *((__global ushort4 *)((__global char *)dst + dst_index));
ushort4 tmp_data = src1_data ^ src2_data;
__kernel void arithm_bitwise_xor_D3 (__global short *src1, int src1_step, int src1_offset,
- __global short *src2, int src2_step, int src2_offset,
- __global short *dst, int dst_step, int dst_offset,
- int rows, int cols, int dst_step1)
+ __global short *src2, int src2_step, int src2_offset,
+ __global short *dst, int dst_step, int dst_offset,
+ int rows, int cols, int dst_step1)
{
int x = get_global_id(0);
if (x < cols && y < rows)
{
x = x << 2;
-
- #define dst_align ((dst_offset >> 1) & 3)
+
+#ifdef dst_align
+#undef dst_align
+#endif
+#define dst_align ((dst_offset >> 1) & 3)
int src1_index = mad24(y, src1_step, (x << 1) + src1_offset - (dst_align << 1));
int src2_index = mad24(y, src2_step, (x << 1) + src2_offset - (dst_align << 1));
int dst_end = mad24(y, dst_step, dst_offset + dst_step1);
int dst_index = mad24(y, dst_step, dst_offset + (x << 1) & (int)0xfffffff8);
- int src1_index_fix = src1_index < 0 ? 0 : src1_index;
- int src2_index_fix = src2_index < 0 ? 0 : src2_index;
+ int src1_index_fix = src1_index < 0 ? 0 : src1_index;
+ int src2_index_fix = src2_index < 0 ? 0 : src2_index;
short4 src1_data = vload4(0, (__global short *)((__global char *)src1 + src1_index_fix));
short4 src2_data = vload4(0, (__global short *)((__global char *)src2 + src2_index_fix));
short4 dst_data = *((__global short4 *)((__global char *)dst + dst_index));
- if(src1_index < 0)
- {
- short4 tmp;
- tmp.xyzw = (src1_index == -2) ? src1_data.zwxy:src1_data.yzwx;
- src1_data.xyzw = (src1_index == -1) ? src1_data.wxyz:tmp.xyzw;
- }
- if(src2_index < 0)
- {
- short4 tmp;
- tmp.xyzw = (src2_index == -2) ? src2_data.zwxy:src2_data.yzwx;
- src2_data.xyzw = (src2_index == -1) ? src2_data.wxyz:tmp.xyzw;
- }
+ if(src1_index < 0)
+ {
+ short4 tmp;
+ tmp.xyzw = (src1_index == -2) ? src1_data.zwxy:src1_data.yzwx;
+ src1_data.xyzw = (src1_index == -1) ? src1_data.wxyz:tmp.xyzw;
+ }
+ if(src2_index < 0)
+ {
+ short4 tmp;
+ tmp.xyzw = (src2_index == -2) ? src2_data.zwxy:src2_data.yzwx;
+ src2_data.xyzw = (src2_index == -1) ? src2_data.wxyz:tmp.xyzw;
+ }
__kernel void arithm_bitwise_xor_D4 (__global int *src1, int src1_step, int src1_offset,
- __global int *src2, int src2_step, int src2_offset,
- __global int *dst, int dst_step, int dst_offset,
- int rows, int cols, int dst_step1)
+ __global int *src2, int src2_step, int src2_offset,
+ __global int *dst, int dst_step, int dst_offset,
+ int rows, int cols, int dst_step1)
{
int x = get_global_id(0);
int y = get_global_id(1);
}
__kernel void arithm_bitwise_xor_D5 (__global char *src1, int src1_step, int src1_offset,
- __global char *src2, int src2_step, int src2_offset,
- __global char *dst, int dst_step, int dst_offset,
- int rows, int cols, int dst_step1)
+ __global char *src2, int src2_step, int src2_offset,
+ __global char *dst, int dst_step, int dst_offset,
+ int rows, int cols, int dst_step1)
{
int x = get_global_id(0);
int y = get_global_id(1);
*((__global char4 *)((__global char *)dst + dst_index)) = tmp;
}
}
-
#if defined (DOUBLE_SUPPORT)
__kernel void arithm_bitwise_xor_D6 (__global char *src1, int src1_step, int src1_offset,
- __global char *src2, int src2_step, int src2_offset,
- __global char *dst, int dst_step, int dst_offset,
- int rows, int cols, int dst_step1)
+ __global char *src2, int src2_step, int src2_offset,
+ __global char *dst, int dst_step, int dst_offset,
+ int rows, int cols, int dst_step1)
{
int x = get_global_id(0);
int y = get_global_id(1);
//
//M*/
#if defined (DOUBLE_SUPPORT)
+#ifdef cl_khr_fp64
#pragma OPENCL EXTENSION cl_khr_fp64:enable
+#elif defined (cl_amd_fp64)
+#pragma OPENCL EXTENSION cl_amd_fp64:enable
+#endif
#endif
-
//////////////////////////////////////////////////////////////////////////////////////////////////////
////////////////////////////////////////////BITWISE_XOR////////////////////////////////////////////////////
///////////////////////////////////////////////////////////////////////////////////////////////////////
/**************************************bitwise_xor with mask**************************************/
-__kernel void arithm_bitwise_xor_with_mask_C1_D0 (__global uchar *src1, int src1_step, int src1_offset,
- __global uchar *src2, int src2_step, int src2_offset,
- __global uchar *mask, int mask_step, int mask_offset,
- __global uchar *dst, int dst_step, int dst_offset,
- int rows, int cols, int dst_step1)
+__kernel void arithm_bitwise_xor_with_mask_C1_D0 (
+ __global uchar *src1, int src1_step, int src1_offset,
+ __global uchar *src2, int src2_step, int src2_offset,
+ __global uchar *mask, int mask_step, int mask_offset,
+ __global uchar *dst, int dst_step, int dst_offset,
+ int rows, int cols, int dst_step1)
{
int x = get_global_id(0);
if (x < cols && y < rows)
{
x = x << 2;
-
- #define dst_align (dst_offset & 3)
+
+#ifdef dst_align
+#undef dst_align
+#endif
+#define dst_align (dst_offset & 3)
int src1_index = mad24(y, src1_step, x + src1_offset - dst_align);
int src2_index = mad24(y, src2_step, x + src2_offset - dst_align);
int mask_index = mad24(y, mask_step, x + mask_offset - dst_align);
-__kernel void arithm_bitwise_xor_with_mask_C1_D1 (__global char *src1, int src1_step, int src1_offset,
- __global char *src2, int src2_step, int src2_offset,
- __global uchar *mask, int mask_step, int mask_offset,
- __global char *dst, int dst_step, int dst_offset,
- int rows, int cols, int dst_step1)
+__kernel void arithm_bitwise_xor_with_mask_C1_D1 (
+ __global char *src1, int src1_step, int src1_offset,
+ __global char *src2, int src2_step, int src2_offset,
+ __global uchar *mask, int mask_step, int mask_offset,
+ __global char *dst, int dst_step, int dst_offset,
+ int rows, int cols, int dst_step1)
{
int x = get_global_id(0);
if (x < cols && y < rows)
{
x = x << 2;
-
- #define dst_align (dst_offset & 3)
+
+#ifdef dst_align
+#undef dst_align
+#endif
+#define dst_align (dst_offset & 3)
int src1_index = mad24(y, src1_step, x + src1_offset - dst_align);
int src2_index = mad24(y, src2_step, x + src2_offset - dst_align);
int mask_index = mad24(y, mask_step, x + mask_offset - dst_align);
-__kernel void arithm_bitwise_xor_with_mask_C1_D2 (__global ushort *src1, int src1_step, int src1_offset,
- __global ushort *src2, int src2_step, int src2_offset,
- __global uchar *mask, int mask_step, int mask_offset,
- __global ushort *dst, int dst_step, int dst_offset,
- int rows, int cols, int dst_step1)
+__kernel void arithm_bitwise_xor_with_mask_C1_D2 (
+ __global ushort *src1, int src1_step, int src1_offset,
+ __global ushort *src2, int src2_step, int src2_offset,
+ __global uchar *mask, int mask_step, int mask_offset,
+ __global ushort *dst, int dst_step, int dst_offset,
+ int rows, int cols, int dst_step1)
{
int x = get_global_id(0);
if (x < cols && y < rows)
{
x = x << 1;
-
- #define dst_align ((dst_offset >> 1) & 1)
+
+#ifdef dst_align
+#undef dst_align
+#endif
+#define dst_align ((dst_offset >> 1) & 1)
int src1_index = mad24(y, src1_step, (x << 1) + src1_offset - (dst_align << 1));
int src2_index = mad24(y, src2_step, (x << 1) + src2_offset - (dst_align << 1));
int mask_index = mad24(y, mask_step, x + mask_offset - dst_align);
-__kernel void arithm_bitwise_xor_with_mask_C1_D3 (__global short *src1, int src1_step, int src1_offset,
- __global short *src2, int src2_step, int src2_offset,
- __global uchar *mask, int mask_step, int mask_offset,
- __global short *dst, int dst_step, int dst_offset,
- int rows, int cols, int dst_step1)
+__kernel void arithm_bitwise_xor_with_mask_C1_D3 (
+ __global short *src1, int src1_step, int src1_offset,
+ __global short *src2, int src2_step, int src2_offset,
+ __global uchar *mask, int mask_step, int mask_offset,
+ __global short *dst, int dst_step, int dst_offset,
+ int rows, int cols, int dst_step1)
{
int x = get_global_id(0);
if (x < cols && y < rows)
{
x = x << 1;
-
- #define dst_align ((dst_offset >> 1) & 1)
+
+#ifdef dst_align
+#undef dst_align
+#endif
+#define dst_align ((dst_offset >> 1) & 1)
int src1_index = mad24(y, src1_step, (x << 1) + src1_offset - (dst_align << 1));
int src2_index = mad24(y, src2_step, (x << 1) + src2_offset - (dst_align << 1));
int mask_index = mad24(y, mask_step, x + mask_offset - dst_align);
short2 src2_data = vload2(0, (__global short *)((__global char *)src2 + src2_index));
uchar2 mask_data = vload2(0, mask + mask_index);
- short2 data = *((__global short2 *)((__global uchar *)dst + dst_index));
- short2 tmp_data = src1_data ^ src2_data;
+ short2 data = *((__global short2 *)((__global uchar *)dst + dst_index));
+ short2 tmp_data = src1_data ^ src2_data;
data.x = convert_short((mask_data.x) && (dst_index + 0 >= dst_start) && (dst_index + 0 < dst_end)) ? tmp_data.x : data.x;
data.y = convert_short((mask_data.y) && (dst_index + 2 >= dst_start) && (dst_index + 2 < dst_end)) ? tmp_data.y : data.y;
-__kernel void arithm_bitwise_xor_with_mask_C1_D4 (__global int *src1, int src1_step, int src1_offset,
- __global int *src2, int src2_step, int src2_offset,
- __global uchar *mask, int mask_step, int mask_offset,
- __global int *dst, int dst_step, int dst_offset,
- int rows, int cols, int dst_step1)
+__kernel void arithm_bitwise_xor_with_mask_C1_D4 (
+ __global int *src1, int src1_step, int src1_offset,
+ __global int *src2, int src2_step, int src2_offset,
+ __global uchar *mask, int mask_step, int mask_offset,
+ __global int *dst, int dst_step, int dst_offset,
+ int rows, int cols, int dst_step1)
{
int x = get_global_id(0);
-__kernel void arithm_bitwise_xor_with_mask_C1_D5 (__global char *src1, int src1_step, int src1_offset,
- __global char *src2, int src2_step, int src2_offset,
- __global uchar *mask, int mask_step, int mask_offset,
- __global char *dst, int dst_step, int dst_offset,
- int rows, int cols, int dst_step1)
+__kernel void arithm_bitwise_xor_with_mask_C1_D5 (
+ __global char *src1, int src1_step, int src1_offset,
+ __global char *src2, int src2_step, int src2_offset,
+ __global uchar *mask, int mask_step, int mask_offset,
+ __global char *dst, int dst_step, int dst_offset,
+ int rows, int cols, int dst_step1)
{
int x = get_global_id(0);
}
-
#if defined (DOUBLE_SUPPORT)
-__kernel void arithm_bitwise_xor_with_mask_C1_D6 (__global char *src1, int src1_step, int src1_offset,
- __global char *src2, int src2_step, int src2_offset,
- __global uchar *mask, int mask_step, int mask_offset,
- __global char *dst, int dst_step, int dst_offset,
- int rows, int cols, int dst_step1)
+__kernel void arithm_bitwise_xor_with_mask_C1_D6 (
+ __global char *src1, int src1_step, int src1_offset,
+ __global char *src2, int src2_step, int src2_offset,
+ __global uchar *mask, int mask_step, int mask_offset,
+ __global char *dst, int dst_step, int dst_offset,
+ int rows, int cols, int dst_step1)
{
int x = get_global_id(0);
-
-__kernel void arithm_bitwise_xor_with_mask_C2_D0 (__global uchar *src1, int src1_step, int src1_offset,
- __global uchar *src2, int src2_step, int src2_offset,
- __global uchar *mask, int mask_step, int mask_offset,
- __global uchar *dst, int dst_step, int dst_offset,
- int rows, int cols, int dst_step1)
+__kernel void arithm_bitwise_xor_with_mask_C2_D0 (
+ __global uchar *src1, int src1_step, int src1_offset,
+ __global uchar *src2, int src2_step, int src2_offset,
+ __global uchar *mask, int mask_step, int mask_offset,
+ __global uchar *dst, int dst_step, int dst_offset,
+ int rows, int cols, int dst_step1)
{
int x = get_global_id(0);
if (x < cols && y < rows)
{
x = x << 1;
-
- #define dst_align ((dst_offset >> 1) & 1)
+
+#ifdef dst_align
+#undef dst_align
+#endif
+#define dst_align ((dst_offset >> 1) & 1)
int src1_index = mad24(y, src1_step, (x << 1) + src1_offset - (dst_align << 1));
int src2_index = mad24(y, src2_step, (x << 1) + src2_offset - (dst_align << 1));
int mask_index = mad24(y, mask_step, x + mask_offset - dst_align);
}
-__kernel void arithm_bitwise_xor_with_mask_C2_D1 (__global char *src1, int src1_step, int src1_offset,
- __global char *src2, int src2_step, int src2_offset,
- __global uchar *mask, int mask_step, int mask_offset,
- __global char *dst, int dst_step, int dst_offset,
- int rows, int cols, int dst_step1)
+__kernel void arithm_bitwise_xor_with_mask_C2_D1 (
+ __global char *src1, int src1_step, int src1_offset,
+ __global char *src2, int src2_step, int src2_offset,
+ __global uchar *mask, int mask_step, int mask_offset,
+ __global char *dst, int dst_step, int dst_offset,
+ int rows, int cols, int dst_step1)
{
int x = get_global_id(0);
if (x < cols && y < rows)
{
x = x << 1;
-
- #define dst_align ((dst_offset >> 1) & 1)
+
+#ifdef dst_align
+#undef dst_align
+#endif
+#define dst_align ((dst_offset >> 1) & 1)
int src1_index = mad24(y, src1_step, (x << 1) + src1_offset - (dst_align << 1));
int src2_index = mad24(y, src2_step, (x << 1) + src2_offset - (dst_align << 1));
int mask_index = mad24(y, mask_step, x + mask_offset - dst_align);
}
}
-__kernel void arithm_bitwise_xor_with_mask_C2_D2 (__global ushort *src1, int src1_step, int src1_offset,
- __global ushort *src2, int src2_step, int src2_offset,
- __global uchar *mask, int mask_step, int mask_offset,
- __global ushort *dst, int dst_step, int dst_offset,
- int rows, int cols, int dst_step1)
+__kernel void arithm_bitwise_xor_with_mask_C2_D2 (
+ __global ushort *src1, int src1_step, int src1_offset,
+ __global ushort *src2, int src2_step, int src2_offset,
+ __global uchar *mask, int mask_step, int mask_offset,
+ __global ushort *dst, int dst_step, int dst_offset,
+ int rows, int cols, int dst_step1)
{
int x = get_global_id(0);
*((__global ushort2 *)((__global char *)dst + dst_index)) = data;
}
}
-__kernel void arithm_bitwise_xor_with_mask_C2_D3 (__global short *src1, int src1_step, int src1_offset,
- __global short *src2, int src2_step, int src2_offset,
- __global uchar *mask, int mask_step, int mask_offset,
- __global short *dst, int dst_step, int dst_offset,
- int rows, int cols, int dst_step1)
+__kernel void arithm_bitwise_xor_with_mask_C2_D3 (
+ __global short *src1, int src1_step, int src1_offset,
+ __global short *src2, int src2_step, int src2_offset,
+ __global uchar *mask, int mask_step, int mask_offset,
+ __global short *dst, int dst_step, int dst_offset,
+ int rows, int cols, int dst_step1)
{
int x = get_global_id(0);
*((__global short2 *)((__global char *)dst + dst_index)) = data;
}
}
-__kernel void arithm_bitwise_xor_with_mask_C2_D4 (__global int *src1, int src1_step, int src1_offset,
- __global int *src2, int src2_step, int src2_offset,
- __global uchar *mask, int mask_step, int mask_offset,
- __global int *dst, int dst_step, int dst_offset,
- int rows, int cols, int dst_step1)
+__kernel void arithm_bitwise_xor_with_mask_C2_D4 (
+ __global int *src1, int src1_step, int src1_offset,
+ __global int *src2, int src2_step, int src2_offset,
+ __global uchar *mask, int mask_step, int mask_offset,
+ __global int *dst, int dst_step, int dst_offset,
+ int rows, int cols, int dst_step1)
{
int x = get_global_id(0);
*((__global int2 *)((__global char *)dst + dst_index)) = data;
}
}
-__kernel void arithm_bitwise_xor_with_mask_C2_D5 (__global char *src1, int src1_step, int src1_offset,
- __global char *src2, int src2_step, int src2_offset,
- __global uchar *mask, int mask_step, int mask_offset,
- __global char *dst, int dst_step, int dst_offset,
- int rows, int cols, int dst_step1)
+__kernel void arithm_bitwise_xor_with_mask_C2_D5 (
+ __global char *src1, int src1_step, int src1_offset,
+ __global char *src2, int src2_step, int src2_offset,
+ __global uchar *mask, int mask_step, int mask_offset,
+ __global char *dst, int dst_step, int dst_offset,
+ int rows, int cols, int dst_step1)
{
int x = get_global_id(0);
}
}
#if defined (DOUBLE_SUPPORT)
-__kernel void arithm_bitwise_xor_with_mask_C2_D6 (__global char *src1, int src1_step, int src1_offset,
- __global char *src2, int src2_step, int src2_offset,
- __global uchar *mask, int mask_step, int mask_offset,
- __global char *dst, int dst_step, int dst_offset,
- int rows, int cols, int dst_step1)
+__kernel void arithm_bitwise_xor_with_mask_C2_D6 (
+ __global char *src1, int src1_step, int src1_offset,
+ __global char *src2, int src2_step, int src2_offset,
+ __global uchar *mask, int mask_step, int mask_offset,
+ __global char *dst, int dst_step, int dst_offset,
+ int rows, int cols, int dst_step1)
{
int x = get_global_id(0);
#endif
-
-__kernel void arithm_bitwise_xor_with_mask_C3_D0 (__global uchar *src1, int src1_step, int src1_offset,
- __global uchar *src2, int src2_step, int src2_offset,
- __global uchar *mask, int mask_step, int mask_offset,
- __global uchar *dst, int dst_step, int dst_offset,
- int rows, int cols, int dst_step1)
+__kernel void arithm_bitwise_xor_with_mask_C3_D0 (
+ __global uchar *src1, int src1_step, int src1_offset,
+ __global uchar *src2, int src2_step, int src2_offset,
+ __global uchar *mask, int mask_step, int mask_offset,
+ __global uchar *dst, int dst_step, int dst_offset,
+ int rows, int cols, int dst_step1)
{
int x = get_global_id(0);
if (x < cols && y < rows)
{
x = x << 2;
-
- #define dst_align (((dst_offset % dst_step) / 3 ) & 3)
+
+#ifdef dst_align
+#undef dst_align
+#endif
+#define dst_align (((dst_offset % dst_step) / 3 ) & 3)
int src1_index = mad24(y, src1_step, (x * 3) + src1_offset - (dst_align * 3));
int src2_index = mad24(y, src2_step, (x * 3) + src2_offset - (dst_align * 3));
int mask_index = mad24(y, mask_step, x + mask_offset - dst_align);
}
-__kernel void arithm_bitwise_xor_with_mask_C3_D1 (__global char *src1, int src1_step, int src1_offset,
- __global char *src2, int src2_step, int src2_offset,
- __global uchar *mask, int mask_step, int mask_offset,
- __global char *dst, int dst_step, int dst_offset,
- int rows, int cols, int dst_step1)
+__kernel void arithm_bitwise_xor_with_mask_C3_D1 (
+ __global char *src1, int src1_step, int src1_offset,
+ __global char *src2, int src2_step, int src2_offset,
+ __global uchar *mask, int mask_step, int mask_offset,
+ __global char *dst, int dst_step, int dst_offset,
+ int rows, int cols, int dst_step1)
{
int x = get_global_id(0);
if (x < cols && y < rows)
{
x = x << 2;
-
- #define dst_align (((dst_offset % dst_step) / 3 ) & 3)
+
+#ifdef dst_align
+#undef dst_align
+#endif
+#define dst_align (((dst_offset % dst_step) / 3 ) & 3)
int src1_index = mad24(y, src1_step, (x * 3) + src1_offset - (dst_align * 3));
int src2_index = mad24(y, src2_step, (x * 3) + src2_offset - (dst_align * 3));
int mask_index = mad24(y, mask_step, x + mask_offset - dst_align);
}
}
-__kernel void arithm_bitwise_xor_with_mask_C3_D2 (__global ushort *src1, int src1_step, int src1_offset,
- __global ushort *src2, int src2_step, int src2_offset,
- __global uchar *mask, int mask_step, int mask_offset,
- __global ushort *dst, int dst_step, int dst_offset,
- int rows, int cols, int dst_step1)
+__kernel void arithm_bitwise_xor_with_mask_C3_D2 (
+ __global ushort *src1, int src1_step, int src1_offset,
+ __global ushort *src2, int src2_step, int src2_offset,
+ __global uchar *mask, int mask_step, int mask_offset,
+ __global ushort *dst, int dst_step, int dst_offset,
+ int rows, int cols, int dst_step1)
{
int x = get_global_id(0);
if (x < cols && y < rows)
{
x = x << 1;
-
- #define dst_align (((dst_offset % dst_step) / 6 ) & 1)
+
+#ifdef dst_align
+#undef dst_align
+#endif
+#define dst_align (((dst_offset % dst_step) / 6 ) & 1)
int src1_index = mad24(y, src1_step, (x * 6) + src1_offset - (dst_align * 6));
int src2_index = mad24(y, src2_step, (x * 6) + src2_offset - (dst_align * 6));
int mask_index = mad24(y, mask_step, x + mask_offset - dst_align);
data_0.xy = ((mask_data.x) && (dst_index + 0 >= dst_start)) ? tmp_data_0.xy : data_0.xy;
data_1.x = ((mask_data.x) && (dst_index + 0 >= dst_start) && (dst_index + 0 < dst_end))
- ? tmp_data_1.x : data_1.x;
+ ? tmp_data_1.x : data_1.x;
data_1.y = ((mask_data.y) && (dst_index + 6 >= dst_start) && (dst_index + 6 < dst_end))
- ? tmp_data_1.y : data_1.y;
+ ? tmp_data_1.y : data_1.y;
data_2.xy = ((mask_data.y) && (dst_index + 6 >= dst_start) && (dst_index + 6 < dst_end))
- ? tmp_data_2.xy : data_2.xy;
+ ? tmp_data_2.xy : data_2.xy;
- *((__global ushort2 *)((__global char *)dst + dst_index + 0))= data_0;
- *((__global ushort2 *)((__global char *)dst + dst_index + 4))= data_1;
- *((__global ushort2 *)((__global char *)dst + dst_index + 8))= data_2;
+ *((__global ushort2 *)((__global char *)dst + dst_index + 0))= data_0;
+ *((__global ushort2 *)((__global char *)dst + dst_index + 4))= data_1;
+ *((__global ushort2 *)((__global char *)dst + dst_index + 8))= data_2;
}
}
-__kernel void arithm_bitwise_xor_with_mask_C3_D3 (__global short *src1, int src1_step, int src1_offset,
- __global short *src2, int src2_step, int src2_offset,
- __global uchar *mask, int mask_step, int mask_offset,
- __global short *dst, int dst_step, int dst_offset,
- int rows, int cols, int dst_step1)
+__kernel void arithm_bitwise_xor_with_mask_C3_D3 (
+ __global short *src1, int src1_step, int src1_offset,
+ __global short *src2, int src2_step, int src2_offset,
+ __global uchar *mask, int mask_step, int mask_offset,
+ __global short *dst, int dst_step, int dst_offset,
+ int rows, int cols, int dst_step1)
{
int x = get_global_id(0);
if (x < cols && y < rows)
{
x = x << 1;
-
- #define dst_align (((dst_offset % dst_step) / 6 ) & 1)
+
+#ifdef dst_align
+#undef dst_align
+#endif
+#define dst_align (((dst_offset % dst_step) / 6 ) & 1)
int src1_index = mad24(y, src1_step, (x * 6) + src1_offset - (dst_align * 6));
int src2_index = mad24(y, src2_step, (x * 6) + src2_offset - (dst_align * 6));
int mask_index = mad24(y, mask_step, x + mask_offset - dst_align);
data_0.xy = ((mask_data.x) && (dst_index + 0 >= dst_start)) ? tmp_data_0.xy : data_0.xy;
data_1.x = ((mask_data.x) && (dst_index + 0 >= dst_start) && (dst_index + 0 < dst_end))
- ? tmp_data_1.x : data_1.x;
+ ? tmp_data_1.x : data_1.x;
data_1.y = ((mask_data.y) && (dst_index + 6 >= dst_start) && (dst_index + 6 < dst_end))
- ? tmp_data_1.y : data_1.y;
+ ? tmp_data_1.y : data_1.y;
data_2.xy = ((mask_data.y) && (dst_index + 6 >= dst_start) && (dst_index + 6 < dst_end))
- ? tmp_data_2.xy : data_2.xy;
+ ? tmp_data_2.xy : data_2.xy;
- *((__global short2 *)((__global char *)dst + dst_index + 0))= data_0;
- *((__global short2 *)((__global char *)dst + dst_index + 4))= data_1;
- *((__global short2 *)((__global char *)dst + dst_index + 8))= data_2;
+ *((__global short2 *)((__global char *)dst + dst_index + 0))= data_0;
+ *((__global short2 *)((__global char *)dst + dst_index + 4))= data_1;
+ *((__global short2 *)((__global char *)dst + dst_index + 8))= data_2;
}
}
-__kernel void arithm_bitwise_xor_with_mask_C3_D4 (__global int *src1, int src1_step, int src1_offset,
- __global int *src2, int src2_step, int src2_offset,
- __global uchar *mask, int mask_step, int mask_offset,
- __global int *dst, int dst_step, int dst_offset,
- int rows, int cols, int dst_step1)
+__kernel void arithm_bitwise_xor_with_mask_C3_D4 (
+ __global int *src1, int src1_step, int src1_offset,
+ __global int *src2, int src2_step, int src2_offset,
+ __global uchar *mask, int mask_step, int mask_offset,
+ __global int *dst, int dst_step, int dst_offset,
+ int rows, int cols, int dst_step1)
{
int x = get_global_id(0);
data_1 = mask_data ? tmp_data_1 : data_1;
data_2 = mask_data ? tmp_data_2 : data_2;
- *((__global int *)((__global char *)dst + dst_index + 0))= data_0;
- *((__global int *)((__global char *)dst + dst_index + 4))= data_1;
- *((__global int *)((__global char *)dst + dst_index + 8))= data_2;
+ *((__global int *)((__global char *)dst + dst_index + 0))= data_0;
+ *((__global int *)((__global char *)dst + dst_index + 4))= data_1;
+ *((__global int *)((__global char *)dst + dst_index + 8))= data_2;
}
}
-__kernel void arithm_bitwise_xor_with_mask_C3_D5 (__global char *src1, int src1_step, int src1_offset,
- __global char *src2, int src2_step, int src2_offset,
- __global uchar *mask, int mask_step, int mask_offset,
- __global char *dst, int dst_step, int dst_offset,
- int rows, int cols, int dst_step1)
+__kernel void arithm_bitwise_xor_with_mask_C3_D5 (
+ __global char *src1, int src1_step, int src1_offset,
+ __global char *src2, int src2_step, int src2_offset,
+ __global uchar *mask, int mask_step, int mask_offset,
+ __global char *dst, int dst_step, int dst_offset,
+ int rows, int cols, int dst_step1)
{
int x = get_global_id(0);
data_1 = mask_data ? tmp_data_1 : data_1;
data_2 = mask_data ? tmp_data_2 : data_2;
- *((__global char4 *)((__global char *)dst + dst_index + 0))= data_0;
- *((__global char4 *)((__global char *)dst + dst_index + 4))= data_1;
- *((__global char4 *)((__global char *)dst + dst_index + 8))= data_2;
+ *((__global char4 *)((__global char *)dst + dst_index + 0))= data_0;
+ *((__global char4 *)((__global char *)dst + dst_index + 4))= data_1;
+ *((__global char4 *)((__global char *)dst + dst_index + 8))= data_2;
}
}
#if defined (DOUBLE_SUPPORT)
-__kernel void arithm_bitwise_xor_with_mask_C3_D6 (__global char *src1, int src1_step, int src1_offset,
- __global char *src2, int src2_step, int src2_offset,
- __global uchar *mask, int mask_step, int mask_offset,
- __global char *dst, int dst_step, int dst_offset,
- int rows, int cols, int dst_step1)
+__kernel void arithm_bitwise_xor_with_mask_C3_D6 (
+ __global char *src1, int src1_step, int src1_offset,
+ __global char *src2, int src2_step, int src2_offset,
+ __global uchar *mask, int mask_step, int mask_offset,
+ __global char *dst, int dst_step, int dst_offset,
+ int rows, int cols, int dst_step1)
{
int x = get_global_id(0);
data_1 = mask_data ? tmp_data_1 : data_1;
data_2 = mask_data ? tmp_data_2 : data_2;
- *((__global char8 *)((__global char *)dst + dst_index + 0 ))= data_0;
- *((__global char8 *)((__global char *)dst + dst_index + 8 ))= data_1;
- *((__global char8 *)((__global char *)dst + dst_index + 16))= data_2;
+ *((__global char8 *)((__global char *)dst + dst_index + 0 ))= data_0;
+ *((__global char8 *)((__global char *)dst + dst_index + 8 ))= data_1;
+ *((__global char8 *)((__global char *)dst + dst_index + 16))= data_2;
}
}
#endif
-
-__kernel void arithm_bitwise_xor_with_mask_C4_D0 (__global uchar *src1, int src1_step, int src1_offset,
- __global uchar *src2, int src2_step, int src2_offset,
- __global uchar *mask, int mask_step, int mask_offset,
- __global uchar *dst, int dst_step, int dst_offset,
- int rows, int cols, int dst_step1)
+__kernel void arithm_bitwise_xor_with_mask_C4_D0 (
+ __global uchar *src1, int src1_step, int src1_offset,
+ __global uchar *src2, int src2_step, int src2_offset,
+ __global uchar *mask, int mask_step, int mask_offset,
+ __global uchar *dst, int dst_step, int dst_offset,
+ int rows, int cols, int dst_step1)
{
int x = get_global_id(0);
}
-__kernel void arithm_bitwise_xor_with_mask_C4_D1 (__global char *src1, int src1_step, int src1_offset,
- __global char *src2, int src2_step, int src2_offset,
- __global uchar *mask, int mask_step, int mask_offset,
- __global char *dst, int dst_step, int dst_offset,
- int rows, int cols, int dst_step1)
+__kernel void arithm_bitwise_xor_with_mask_C4_D1 (
+ __global char *src1, int src1_step, int src1_offset,
+ __global char *src2, int src2_step, int src2_offset,
+ __global uchar *mask, int mask_step, int mask_offset,
+ __global char *dst, int dst_step, int dst_offset,
+ int rows, int cols, int dst_step1)
{
int x = get_global_id(0);
}
}
-__kernel void arithm_bitwise_xor_with_mask_C4_D2 (__global ushort *src1, int src1_step, int src1_offset,
- __global ushort *src2, int src2_step, int src2_offset,
- __global uchar *mask, int mask_step, int mask_offset,
- __global ushort *dst, int dst_step, int dst_offset,
- int rows, int cols, int dst_step1)
+__kernel void arithm_bitwise_xor_with_mask_C4_D2 (
+ __global ushort *src1, int src1_step, int src1_offset,
+ __global ushort *src2, int src2_step, int src2_offset,
+ __global uchar *mask, int mask_step, int mask_offset,
+ __global ushort *dst, int dst_step, int dst_offset,
+ int rows, int cols, int dst_step1)
{
int x = get_global_id(0);
*((__global ushort4 *)((__global char *)dst + dst_index)) = data;
}
}
-__kernel void arithm_bitwise_xor_with_mask_C4_D3 (__global short *src1, int src1_step, int src1_offset,
- __global short *src2, int src2_step, int src2_offset,
- __global uchar *mask, int mask_step, int mask_offset,
- __global short *dst, int dst_step, int dst_offset,
- int rows, int cols, int dst_step1)
+__kernel void arithm_bitwise_xor_with_mask_C4_D3 (
+ __global short *src1, int src1_step, int src1_offset,
+ __global short *src2, int src2_step, int src2_offset,
+ __global uchar *mask, int mask_step, int mask_offset,
+ __global short *dst, int dst_step, int dst_offset,
+ int rows, int cols, int dst_step1)
{
int x = get_global_id(0);
*((__global short4 *)((__global char *)dst + dst_index)) = data;
}
}
-__kernel void arithm_bitwise_xor_with_mask_C4_D4 (__global int *src1, int src1_step, int src1_offset,
- __global int *src2, int src2_step, int src2_offset,
- __global uchar *mask, int mask_step, int mask_offset,
- __global int *dst, int dst_step, int dst_offset,
- int rows, int cols, int dst_step1)
+__kernel void arithm_bitwise_xor_with_mask_C4_D4 (
+ __global int *src1, int src1_step, int src1_offset,
+ __global int *src2, int src2_step, int src2_offset,
+ __global uchar *mask, int mask_step, int mask_offset,
+ __global int *dst, int dst_step, int dst_offset,
+ int rows, int cols, int dst_step1)
{
int x = get_global_id(0);
*((__global int4 *)((__global char *)dst + dst_index)) = data;
}
}
-__kernel void arithm_bitwise_xor_with_mask_C4_D5 (__global char *src1, int src1_step, int src1_offset,
- __global char *src2, int src2_step, int src2_offset,
- __global uchar *mask, int mask_step, int mask_offset,
- __global char *dst, int dst_step, int dst_offset,
- int rows, int cols, int dst_step1)
+__kernel void arithm_bitwise_xor_with_mask_C4_D5 (
+ __global char *src1, int src1_step, int src1_offset,
+ __global char *src2, int src2_step, int src2_offset,
+ __global uchar *mask, int mask_step, int mask_offset,
+ __global char *dst, int dst_step, int dst_offset,
+ int rows, int cols, int dst_step1)
{
int x = get_global_id(0);
}
}
#if defined (DOUBLE_SUPPORT)
-__kernel void arithm_bitwise_xor_with_mask_C4_D6 (__global char *src1, int src1_step, int src1_offset,
- __global char *src2, int src2_step, int src2_offset,
- __global uchar *mask, int mask_step, int mask_offset,
- __global char *dst, int dst_step, int dst_offset,
- int rows, int cols, int dst_step1)
+__kernel void arithm_bitwise_xor_with_mask_C4_D6 (
+ __global char *src1, int src1_step, int src1_offset,
+ __global char *src2, int src2_step, int src2_offset,
+ __global uchar *mask, int mask_step, int mask_offset,
+ __global char *dst, int dst_step, int dst_offset,
+ int rows, int cols, int dst_step1)
{
int x = get_global_id(0);
// the use of this software, even if advised of the possibility of such damage.
//
//
-#if defined (__ATI__)
-#pragma OPENCL EXTENSION cl_amd_fp64:enable
-#elif defined (__NVIDIA__)
+#if defined (DOUBLE_SUPPORT)
+#ifdef cl_khr_fp64
#pragma OPENCL EXTENSION cl_khr_fp64:enable
+#elif defined (cl_amd_fp64)
+#pragma OPENCL EXTENSION cl_amd_fp64:enable
+#endif
#endif
-
//////////////////////////////////////////////////////////////////////////////////////////////////////
////////////////////////////////////////////BITWISE_XOR////////////////////////////////////////////////////
///////////////////////////////////////////////////////////////////////////////////////////////////////
/**************************************xor with scalar without mask**************************************/
-__kernel void arithm_s_bitwise_xor_C1_D0 (__global uchar *src1, int src1_step, int src1_offset,
- __global uchar *dst, int dst_step, int dst_offset,
- uchar4 src2, int rows, int cols, int dst_step1)
+__kernel void arithm_s_bitwise_xor_C1_D0 (
+ __global uchar *src1, int src1_step, int src1_offset,
+ __global uchar *dst, int dst_step, int dst_offset,
+ uchar4 src2, int rows, int cols, int dst_step1)
{
int x = get_global_id(0);
int y = get_global_id(1);
if (x < cols && y < rows)
{
x = x << 2;
-
- #define dst_align (dst_offset & 3)
+
+#ifdef dst_align
+#undef dst_align
+#endif
+#define dst_align (dst_offset & 3)
int src1_index = mad24(y, src1_step, x + src1_offset - dst_align);
int dst_start = mad24(y, dst_step, dst_offset);
}
-__kernel void arithm_s_bitwise_xor_C1_D1 (__global char *src1, int src1_step, int src1_offset,
- __global char *dst, int dst_step, int dst_offset,
- char4 src2, int rows, int cols, int dst_step1)
+__kernel void arithm_s_bitwise_xor_C1_D1 (
+ __global char *src1, int src1_step, int src1_offset,
+ __global char *dst, int dst_step, int dst_offset,
+ char4 src2, int rows, int cols, int dst_step1)
{
int x = get_global_id(0);
int y = get_global_id(1);
if (x < cols && y < rows)
{
x = x << 2;
-
- #define dst_align (dst_offset & 3)
+
+#ifdef dst_align
+#undef dst_align
+#endif
+#define dst_align (dst_offset & 3)
int src1_index = mad24(y, src1_step, x + src1_offset - dst_align);
int dst_start = mad24(y, dst_step, dst_offset);
}
}
-__kernel void arithm_s_bitwise_xor_C1_D2 (__global ushort *src1, int src1_step, int src1_offset,
- __global ushort *dst, int dst_step, int dst_offset,
- ushort4 src2, int rows, int cols, int dst_step1)
+__kernel void arithm_s_bitwise_xor_C1_D2 (
+ __global ushort *src1, int src1_step, int src1_offset,
+ __global ushort *dst, int dst_step, int dst_offset,
+ ushort4 src2, int rows, int cols, int dst_step1)
{
int x = get_global_id(0);
if (x < cols && y < rows)
{
x = x << 1;
-
- #define dst_align ((dst_offset >> 1) & 1)
+
+#ifdef dst_align
+#undef dst_align
+#endif
+#define dst_align ((dst_offset >> 1) & 1)
int src1_index = mad24(y, src1_step, (x << 1) + src1_offset - (dst_align << 1));
int dst_start = mad24(y, dst_step, dst_offset);
*((__global ushort2 *)((__global uchar *)dst + dst_index)) = data;
}
}
-__kernel void arithm_s_bitwise_xor_C1_D3 (__global short *src1, int src1_step, int src1_offset,
- __global short *dst, int dst_step, int dst_offset,
- short4 src2, int rows, int cols, int dst_step1)
+__kernel void arithm_s_bitwise_xor_C1_D3 (
+ __global short *src1, int src1_step, int src1_offset,
+ __global short *dst, int dst_step, int dst_offset,
+ short4 src2, int rows, int cols, int dst_step1)
{
int x = get_global_id(0);
if (x < cols && y < rows)
{
x = x << 1;
-
- #define dst_align ((dst_offset >> 1) & 1)
+
+#ifdef dst_align
+#undef dst_align
+#endif
+#define dst_align ((dst_offset >> 1) & 1)
int src1_index = mad24(y, src1_step, (x << 1) + src1_offset - (dst_align << 1));
int dst_start = mad24(y, dst_step, dst_offset);
*((__global short2 *)((__global uchar *)dst + dst_index)) = data;
}
}
-__kernel void arithm_s_bitwise_xor_C1_D4 (__global int *src1, int src1_step, int src1_offset,
- __global int *dst, int dst_step, int dst_offset,
- int4 src2, int rows, int cols, int dst_step1)
+__kernel void arithm_s_bitwise_xor_C1_D4 (
+ __global int *src1, int src1_step, int src1_offset,
+ __global int *dst, int dst_step, int dst_offset,
+ int4 src2, int rows, int cols, int dst_step1)
{
int x = get_global_id(0);
*((__global int *)((__global char *)dst + dst_index)) = data;
}
}
-__kernel void arithm_s_bitwise_xor_C1_D5 (__global char *src1, int src1_step, int src1_offset,
- __global char *dst, int dst_step, int dst_offset,
- char16 src2, int rows, int cols, int dst_step1)
+__kernel void arithm_s_bitwise_xor_C1_D5 (
+ __global char *src1, int src1_step, int src1_offset,
+ __global char *dst, int dst_step, int dst_offset,
+ char16 src2, int rows, int cols, int dst_step1)
{
int x = get_global_id(0);
}
#if defined (DOUBLE_SUPPORT)
-__kernel void arithm_s_bitwise_xor_C1_D6 (__global short *src1, int src1_step, int src1_offset,
- __global short *dst, int dst_step, int dst_offset,
- short16 src2, int rows, int cols, int dst_step1)
+__kernel void arithm_s_bitwise_xor_C1_D6 (
+ __global short *src1, int src1_step, int src1_offset,
+ __global short *dst, int dst_step, int dst_offset,
+ short16 src2, int rows, int cols, int dst_step1)
{
int x = get_global_id(0);
}
}
#endif
-__kernel void arithm_s_bitwise_xor_C2_D0 (__global uchar *src1, int src1_step, int src1_offset,
- __global uchar *dst, int dst_step, int dst_offset,
- uchar4 src2, int rows, int cols, int dst_step1)
+__kernel void arithm_s_bitwise_xor_C2_D0 (
+ __global uchar *src1, int src1_step, int src1_offset,
+ __global uchar *dst, int dst_step, int dst_offset,
+ uchar4 src2, int rows, int cols, int dst_step1)
{
int x = get_global_id(0);
if (x < cols && y < rows)
{
x = x << 1;
-
- #define dst_align ((dst_offset >> 1) & 1)
+
+#ifdef dst_align
+#undef dst_align
+#endif
+#define dst_align ((dst_offset >> 1) & 1)
int src1_index = mad24(y, src1_step, (x << 1) + src1_offset - (dst_align << 1));
int dst_start = mad24(y, dst_step, dst_offset);
}
-__kernel void arithm_s_bitwise_xor_C2_D1 (__global char *src1, int src1_step, int src1_offset,
- __global char *dst, int dst_step, int dst_offset,
- char4 src2, int rows, int cols, int dst_step1)
+__kernel void arithm_s_bitwise_xor_C2_D1 (
+ __global char *src1, int src1_step, int src1_offset,
+ __global char *dst, int dst_step, int dst_offset,
+ char4 src2, int rows, int cols, int dst_step1)
{
int x = get_global_id(0);
if (x < cols && y < rows)
{
x = x << 1;
-
- #define dst_align ((dst_offset >> 1) & 1)
+
+#ifdef dst_align
+#undef dst_align
+#endif
+#define dst_align ((dst_offset >> 1) & 1)
int src1_index = mad24(y, src1_step, (x << 1) + src1_offset - (dst_align << 1));
int dst_start = mad24(y, dst_step, dst_offset);
}
}
-__kernel void arithm_s_bitwise_xor_C2_D2 (__global ushort *src1, int src1_step, int src1_offset,
- __global ushort *dst, int dst_step, int dst_offset,
- ushort4 src2, int rows, int cols, int dst_step1)
+__kernel void arithm_s_bitwise_xor_C2_D2 (
+ __global ushort *src1, int src1_step, int src1_offset,
+ __global ushort *dst, int dst_step, int dst_offset,
+ ushort4 src2, int rows, int cols, int dst_step1)
{
int x = get_global_id(0);
*((__global ushort2 *)((__global char *)dst + dst_index)) = data;
}
}
-__kernel void arithm_s_bitwise_xor_C2_D3 (__global short *src1, int src1_step, int src1_offset,
- __global short *dst, int dst_step, int dst_offset,
- short4 src2, int rows, int cols, int dst_step1)
+__kernel void arithm_s_bitwise_xor_C2_D3 (
+ __global short *src1, int src1_step, int src1_offset,
+ __global short *dst, int dst_step, int dst_offset,
+ short4 src2, int rows, int cols, int dst_step1)
{
int x = get_global_id(0);
*((__global short2 *)((__global char *)dst + dst_index)) = data;
}
}
-__kernel void arithm_s_bitwise_xor_C2_D4 (__global int *src1, int src1_step, int src1_offset,
- __global int *dst, int dst_step, int dst_offset,
- int4 src2, int rows, int cols, int dst_step1)
+__kernel void arithm_s_bitwise_xor_C2_D4 (
+ __global int *src1, int src1_step, int src1_offset,
+ __global int *dst, int dst_step, int dst_offset,
+ int4 src2, int rows, int cols, int dst_step1)
{
int x = get_global_id(0);
*((__global int2 *)((__global char *)dst + dst_index)) = data;
}
}
-__kernel void arithm_s_bitwise_xor_C2_D5 (__global char *src1, int src1_step, int src1_offset,
- __global char *dst, int dst_step, int dst_offset,
- char16 src2, int rows, int cols, int dst_step1)
+__kernel void arithm_s_bitwise_xor_C2_D5 (
+ __global char *src1, int src1_step, int src1_offset,
+ __global char *dst, int dst_step, int dst_offset,
+ char16 src2, int rows, int cols, int dst_step1)
{
int x = get_global_id(0);
char8 tmp_data = src1_data ^ src2_data;
*((__global char8 *)((__global char *)dst + dst_index)) = tmp_data;
- }
+ }
}
#if defined (DOUBLE_SUPPORT)
-__kernel void arithm_s_bitwise_xor_C2_D6 (__global short *src1, int src1_step, int src1_offset,
- __global short *dst, int dst_step, int dst_offset,
- short16 src2, int rows, int cols, int dst_step1)
+__kernel void arithm_s_bitwise_xor_C2_D6 (
+ __global short *src1, int src1_step, int src1_offset,
+ __global short *dst, int dst_step, int dst_offset,
+ short16 src2, int rows, int cols, int dst_step1)
{
int x = get_global_id(0);
}
}
#endif
-__kernel void arithm_s_bitwise_xor_C3_D0 (__global uchar *src1, int src1_step, int src1_offset,
- __global uchar *dst, int dst_step, int dst_offset,
- uchar4 src2, int rows, int cols, int dst_step1)
+__kernel void arithm_s_bitwise_xor_C3_D0 (
+ __global uchar *src1, int src1_step, int src1_offset,
+ __global uchar *dst, int dst_step, int dst_offset,
+ uchar4 src2, int rows, int cols, int dst_step1)
{
int x = get_global_id(0);
if (x < cols && y < rows)
{
x = x << 2;
-
- #define dst_align (((dst_offset % dst_step) / 3 ) & 3)
+
+#ifdef dst_align
+#undef dst_align
+#endif
+#define dst_align (((dst_offset % dst_step) / 3 ) & 3)
int src1_index = mad24(y, src1_step, (x * 3) + src1_offset - (dst_align * 3));
int dst_start = mad24(y, dst_step, dst_offset);
}
-__kernel void arithm_s_bitwise_xor_C3_D1 (__global char *src1, int src1_step, int src1_offset,
- __global char *dst, int dst_step, int dst_offset,
- char4 src2, int rows, int cols, int dst_step1)
+__kernel void arithm_s_bitwise_xor_C3_D1 (
+ __global char *src1, int src1_step, int src1_offset,
+ __global char *dst, int dst_step, int dst_offset,
+ char4 src2, int rows, int cols, int dst_step1)
{
int x = get_global_id(0);
if (x < cols && y < rows)
{
x = x << 2;
-
- #define dst_align (((dst_offset % dst_step) / 3 ) & 3)
+
+#ifdef dst_align
+#undef dst_align
+#endif
+#define dst_align (((dst_offset % dst_step) / 3 ) & 3)
int src1_index = mad24(y, src1_step, (x * 3) + src1_offset - (dst_align * 3));
int dst_start = mad24(y, dst_step, dst_offset);
}
}
-__kernel void arithm_s_bitwise_xor_C3_D2 (__global ushort *src1, int src1_step, int src1_offset,
- __global ushort *dst, int dst_step, int dst_offset,
- ushort4 src2, int rows, int cols, int dst_step1)
+__kernel void arithm_s_bitwise_xor_C3_D2 (
+ __global ushort *src1, int src1_step, int src1_offset,
+ __global ushort *dst, int dst_step, int dst_offset,
+ ushort4 src2, int rows, int cols, int dst_step1)
{
int x = get_global_id(0);
if (x < cols && y < rows)
{
x = x << 1;
-
- #define dst_align (((dst_offset % dst_step) / 6 ) & 1)
+
+#ifdef dst_align
+#undef dst_align
+#endif
+#define dst_align (((dst_offset % dst_step) / 6 ) & 1)
int src1_index = mad24(y, src1_step, (x * 6) + src1_offset - (dst_align * 6));
int dst_start = mad24(y, dst_step, dst_offset);
data_0.xy = ((dst_index + 0 >= dst_start)) ? tmp_data_0.xy : data_0.xy;
data_1.x = ((dst_index + 0 >= dst_start) && (dst_index + 0 < dst_end))
- ? tmp_data_1.x : data_1.x;
+ ? tmp_data_1.x : data_1.x;
data_1.y = ((dst_index + 6 >= dst_start) && (dst_index + 6 < dst_end))
- ? tmp_data_1.y : data_1.y;
+ ? tmp_data_1.y : data_1.y;
data_2.xy = ((dst_index + 6 >= dst_start) && (dst_index + 6 < dst_end))
- ? tmp_data_2.xy : data_2.xy;
+ ? tmp_data_2.xy : data_2.xy;
- *((__global ushort2 *)((__global char *)dst + dst_index + 0))= data_0;
- *((__global ushort2 *)((__global char *)dst + dst_index + 4))= data_1;
- *((__global ushort2 *)((__global char *)dst + dst_index + 8))= data_2;
+ *((__global ushort2 *)((__global char *)dst + dst_index + 0))= data_0;
+ *((__global ushort2 *)((__global char *)dst + dst_index + 4))= data_1;
+ *((__global ushort2 *)((__global char *)dst + dst_index + 8))= data_2;
}
}
-__kernel void arithm_s_bitwise_xor_C3_D3 (__global short *src1, int src1_step, int src1_offset,
- __global short *dst, int dst_step, int dst_offset,
- short4 src2, int rows, int cols, int dst_step1)
+__kernel void arithm_s_bitwise_xor_C3_D3 (
+ __global short *src1, int src1_step, int src1_offset,
+ __global short *dst, int dst_step, int dst_offset,
+ short4 src2, int rows, int cols, int dst_step1)
{
int x = get_global_id(0);
if (x < cols && y < rows)
{
x = x << 1;
-
- #define dst_align (((dst_offset % dst_step) / 6 ) & 1)
+
+#ifdef dst_align
+#undef dst_align
+#endif
+#define dst_align (((dst_offset % dst_step) / 6 ) & 1)
int src1_index = mad24(y, src1_step, (x * 6) + src1_offset - (dst_align * 6));
int dst_start = mad24(y, dst_step, dst_offset);
data_0.xy = ((dst_index + 0 >= dst_start)) ? tmp_data_0.xy : data_0.xy;
data_1.x = ((dst_index + 0 >= dst_start) && (dst_index + 0 < dst_end))
- ? tmp_data_1.x : data_1.x;
+ ? tmp_data_1.x : data_1.x;
data_1.y = ((dst_index + 6 >= dst_start) && (dst_index + 6 < dst_end))
- ? tmp_data_1.y : data_1.y;
+ ? tmp_data_1.y : data_1.y;
data_2.xy = ((dst_index + 6 >= dst_start) && (dst_index + 6 < dst_end))
- ? tmp_data_2.xy : data_2.xy;
+ ? tmp_data_2.xy : data_2.xy;
- *((__global short2 *)((__global char *)dst + dst_index + 0))= data_0;
- *((__global short2 *)((__global char *)dst + dst_index + 4))= data_1;
- *((__global short2 *)((__global char *)dst + dst_index + 8))= data_2;
+ *((__global short2 *)((__global char *)dst + dst_index + 0))= data_0;
+ *((__global short2 *)((__global char *)dst + dst_index + 4))= data_1;
+ *((__global short2 *)((__global char *)dst + dst_index + 8))= data_2;
}
}
-__kernel void arithm_s_bitwise_xor_C3_D4 (__global int *src1, int src1_step, int src1_offset,
- __global int *dst, int dst_step, int dst_offset,
- int4 src2, int rows, int cols, int dst_step1)
+__kernel void arithm_s_bitwise_xor_C3_D4 (
+ __global int *src1, int src1_step, int src1_offset,
+ __global int *dst, int dst_step, int dst_offset,
+ int4 src2, int rows, int cols, int dst_step1)
{
int x = get_global_id(0);
int tmp_data_1 = src1_data_1 ^ src2_data_1;
int tmp_data_2 = src1_data_2 ^ src2_data_2;
- *((__global int *)((__global char *)dst + dst_index + 0))= tmp_data_0;
- *((__global int *)((__global char *)dst + dst_index + 4))= tmp_data_1;
- *((__global int *)((__global char *)dst + dst_index + 8))= tmp_data_2;
+ *((__global int *)((__global char *)dst + dst_index + 0))= tmp_data_0;
+ *((__global int *)((__global char *)dst + dst_index + 4))= tmp_data_1;
+ *((__global int *)((__global char *)dst + dst_index + 8))= tmp_data_2;
}
}
-__kernel void arithm_s_bitwise_xor_C3_D5 (__global char *src1, int src1_step, int src1_offset,
- __global char *dst, int dst_step, int dst_offset,
- char16 src2, int rows, int cols, int dst_step1)
+__kernel void arithm_s_bitwise_xor_C3_D5 (
+ __global char *src1, int src1_step, int src1_offset,
+ __global char *dst, int dst_step, int dst_offset,
+ char16 src2, int rows, int cols, int dst_step1)
{
int x = get_global_id(0);
char4 tmp_data_1 = src1_data_1 ^ src2_data_1;
char4 tmp_data_2 = src1_data_2 ^ src2_data_2;
- *((__global char4 *)((__global char *)dst + dst_index + 0))= tmp_data_0;
- *((__global char4 *)((__global char *)dst + dst_index + 4))= tmp_data_1;
- *((__global char4 *)((__global char *)dst + dst_index + 8))= tmp_data_2;
+ *((__global char4 *)((__global char *)dst + dst_index + 0))= tmp_data_0;
+ *((__global char4 *)((__global char *)dst + dst_index + 4))= tmp_data_1;
+ *((__global char4 *)((__global char *)dst + dst_index + 8))= tmp_data_2;
}
}
#if defined (DOUBLE_SUPPORT)
-__kernel void arithm_s_bitwise_xor_C3_D6 (__global short *src1, int src1_step, int src1_offset,
- __global short *dst, int dst_step, int dst_offset,
- short16 src2, int rows, int cols, int dst_step1)
+__kernel void arithm_s_bitwise_xor_C3_D6 (
+ __global short *src1, int src1_step, int src1_offset,
+ __global short *dst, int dst_step, int dst_offset,
+ short16 src2, int rows, int cols, int dst_step1)
{
int x = get_global_id(0);
short4 tmp_data_1 = src1_data_1 ^ src2_data_1;
short4 tmp_data_2 = src1_data_2 ^ src2_data_2;
- *((__global short4 *)((__global char *)dst + dst_index + 0 ))= tmp_data_0;
- *((__global short4 *)((__global char *)dst + dst_index + 8 ))= tmp_data_1;
- *((__global short4 *)((__global char *)dst + dst_index + 16))= tmp_data_2;
+ *((__global short4 *)((__global char *)dst + dst_index + 0 ))= tmp_data_0;
+ *((__global short4 *)((__global char *)dst + dst_index + 8 ))= tmp_data_1;
+ *((__global short4 *)((__global char *)dst + dst_index + 16))= tmp_data_2;
}
}
#endif
-__kernel void arithm_s_bitwise_xor_C4_D0 (__global uchar *src1, int src1_step, int src1_offset,
- __global uchar *dst, int dst_step, int dst_offset,
- uchar4 src2, int rows, int cols, int dst_step1)
+__kernel void arithm_s_bitwise_xor_C4_D0 (
+ __global uchar *src1, int src1_step, int src1_offset,
+ __global uchar *dst, int dst_step, int dst_offset,
+ uchar4 src2, int rows, int cols, int dst_step1)
{
int x = get_global_id(0);
}
-__kernel void arithm_s_bitwise_xor_C4_D1 (__global char *src1, int src1_step, int src1_offset,
- __global char *dst, int dst_step, int dst_offset,
- char4 src2, int rows, int cols, int dst_step1)
+__kernel void arithm_s_bitwise_xor_C4_D1 (
+ __global char *src1, int src1_step, int src1_offset,
+ __global char *dst, int dst_step, int dst_offset,
+ char4 src2, int rows, int cols, int dst_step1)
{
int x = get_global_id(0);
}
}
-__kernel void arithm_s_bitwise_xor_C4_D2 (__global ushort *src1, int src1_step, int src1_offset,
- __global ushort *dst, int dst_step, int dst_offset,
- ushort4 src2, int rows, int cols, int dst_step1)
+__kernel void arithm_s_bitwise_xor_C4_D2 (
+ __global ushort *src1, int src1_step, int src1_offset,
+ __global ushort *dst, int dst_step, int dst_offset,
+ ushort4 src2, int rows, int cols, int dst_step1)
{
int x = get_global_id(0);
*((__global ushort4 *)((__global char *)dst + dst_index)) = data;
}
}
-__kernel void arithm_s_bitwise_xor_C4_D3 (__global short *src1, int src1_step, int src1_offset,
- __global short *dst, int dst_step, int dst_offset,
- short4 src2, int rows, int cols, int dst_step1)
+__kernel void arithm_s_bitwise_xor_C4_D3 (
+ __global short *src1, int src1_step, int src1_offset,
+ __global short *dst, int dst_step, int dst_offset,
+ short4 src2, int rows, int cols, int dst_step1)
{
int x = get_global_id(0);
*((__global short4 *)((__global char *)dst + dst_index)) = data;
}
}
-__kernel void arithm_s_bitwise_xor_C4_D4 (__global int *src1, int src1_step, int src1_offset,
- __global int *dst, int dst_step, int dst_offset,
- int4 src2, int rows, int cols, int dst_step1)
+__kernel void arithm_s_bitwise_xor_C4_D4 (
+ __global int *src1, int src1_step, int src1_offset,
+ __global int *dst, int dst_step, int dst_offset,
+ int4 src2, int rows, int cols, int dst_step1)
{
int x = get_global_id(0);
*((__global int4 *)((__global char *)dst + dst_index)) = data;
}
}
-__kernel void arithm_s_bitwise_xor_C4_D5 (__global char *src1, int src1_step, int src1_offset,
- __global char *dst, int dst_step, int dst_offset,
- char16 src2, int rows, int cols, int dst_step1)
+__kernel void arithm_s_bitwise_xor_C4_D5 (
+ __global char *src1, int src1_step, int src1_offset,
+ __global char *dst, int dst_step, int dst_offset,
+ char16 src2, int rows, int cols, int dst_step1)
{
int x = get_global_id(0);
}
}
#if defined (DOUBLE_SUPPORT)
-__kernel void arithm_s_bitwise_xor_C4_D6 (__global short *src1, int src1_step, int src1_offset,
- __global short *dst, int dst_step, int dst_offset,
- short16 src2, int rows, int cols, int dst_step1)
+__kernel void arithm_s_bitwise_xor_C4_D6 (
+ __global short *src1, int src1_step, int src1_offset,
+ __global short *dst, int dst_step, int dst_offset,
+ short16 src2, int rows, int cols, int dst_step1)
{
int x = get_global_id(0);
short4 tmp_data_2 = src1_data_2 ^ src2_data_2;
short4 tmp_data_3 = src1_data_3 ^ src2_data_3;
- *((__global short4 *)((__global char *)dst + dst_index + 0 ))= tmp_data_0;
- *((__global short4 *)((__global char *)dst + dst_index + 8 ))= tmp_data_1;
- *((__global short4 *)((__global char *)dst + dst_index + 16))= tmp_data_2;
- *((__global short4 *)((__global char *)dst + dst_index + 24))= tmp_data_3;
+ *((__global short4 *)((__global char *)dst + dst_index + 0 ))= tmp_data_0;
+ *((__global short4 *)((__global char *)dst + dst_index + 8 ))= tmp_data_1;
+ *((__global short4 *)((__global char *)dst + dst_index + 16))= tmp_data_2;
+ *((__global short4 *)((__global char *)dst + dst_index + 24))= tmp_data_3;
}
}
-#endif
+#endif
\ No newline at end of file
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
-#if defined (__ATI__)
-#pragma OPENCL EXTENSION cl_amd_fp64:enable
-#elif defined (__NVIDIA__)
+
+#if defined (DOUBLE_SUPPORT)
+#ifdef cl_khr_fp64
#pragma OPENCL EXTENSION cl_khr_fp64:enable
+#elif defined (cl_amd_fp64)
+#pragma OPENCL EXTENSION cl_amd_fp64:enable
+#endif
#endif
-
//////////////////////////////////////////////////////////////////////////////////////////////////////
////////////////////////////////////////////BITWISE_XOR////////////////////////////////////////////////////
///////////////////////////////////////////////////////////////////////////////////////////////////////
/**************************************bitwise_xor with scalar with mask**************************************/
-__kernel void arithm_s_bitwise_xor_with_mask_C1_D0 (__global uchar *src1, int src1_step, int src1_offset,
- __global uchar *dst, int dst_step, int dst_offset,
- __global uchar *mask, int mask_step, int mask_offset,
- uchar4 src2, int rows, int cols, int dst_step1)
+__kernel void arithm_s_bitwise_xor_with_mask_C1_D0 (
+ __global uchar *src1, int src1_step, int src1_offset,
+ __global uchar *dst, int dst_step, int dst_offset,
+ __global uchar *mask, int mask_step, int mask_offset,
+ uchar4 src2, int rows, int cols, int dst_step1)
{
int x = get_global_id(0);
if (x < cols && y < rows)
{
x = x << 2;
-
- #define dst_align (dst_offset & 3)
+
+#ifdef dst_align
+#undef dst_align
+#endif
+#define dst_align (dst_offset & 3)
int src1_index = mad24(y, src1_step, x + src1_offset - dst_align);
int mask_index = mad24(y, mask_step, x + mask_offset - dst_align);
}
-__kernel void arithm_s_bitwise_xor_with_mask_C1_D1 (__global char *src1, int src1_step, int src1_offset,
- __global char *dst, int dst_step, int dst_offset,
- __global uchar *mask, int mask_step, int mask_offset,
- char4 src2, int rows, int cols, int dst_step1)
+__kernel void arithm_s_bitwise_xor_with_mask_C1_D1 (
+ __global char *src1, int src1_step, int src1_offset,
+ __global char *dst, int dst_step, int dst_offset,
+ __global uchar *mask, int mask_step, int mask_offset,
+ char4 src2, int rows, int cols, int dst_step1)
{
int x = get_global_id(0);
if (x < cols && y < rows)
{
x = x << 2;
-
- #define dst_align (dst_offset & 3)
+
+#ifdef dst_align
+#undef dst_align
+#endif
+#define dst_align (dst_offset & 3)
int src1_index = mad24(y, src1_step, x + src1_offset - dst_align);
int mask_index = mad24(y, mask_step, x + mask_offset - dst_align);
}
}
-__kernel void arithm_s_bitwise_xor_with_mask_C1_D2 (__global ushort *src1, int src1_step, int src1_offset,
- __global ushort *dst, int dst_step, int dst_offset,
- __global uchar *mask, int mask_step, int mask_offset,
- ushort4 src2, int rows, int cols, int dst_step1)
+__kernel void arithm_s_bitwise_xor_with_mask_C1_D2 (
+ __global ushort *src1, int src1_step, int src1_offset,
+ __global ushort *dst, int dst_step, int dst_offset,
+ __global uchar *mask, int mask_step, int mask_offset,
+ ushort4 src2, int rows, int cols, int dst_step1)
{
int x = get_global_id(0);
if (x < cols && y < rows)
{
x = x << 1;
-
- #define dst_align ((dst_offset >> 1) & 1)
+
+#ifdef dst_align
+#undef dst_align
+#endif
+#define dst_align ((dst_offset >> 1) & 1)
int src1_index = mad24(y, src1_step, (x << 1) + src1_offset - (dst_align << 1));
int mask_index = mad24(y, mask_step, x + mask_offset - dst_align);
*((__global ushort2 *)((__global uchar *)dst + dst_index)) = data;
}
}
-__kernel void arithm_s_bitwise_xor_with_mask_C1_D3 (__global short *src1, int src1_step, int src1_offset,
- __global short *dst, int dst_step, int dst_offset,
- __global uchar *mask, int mask_step, int mask_offset,
- short4 src2, int rows, int cols, int dst_step1)
+__kernel void arithm_s_bitwise_xor_with_mask_C1_D3 (
+ __global short *src1, int src1_step, int src1_offset,
+ __global short *dst, int dst_step, int dst_offset,
+ __global uchar *mask, int mask_step, int mask_offset,
+ short4 src2, int rows, int cols, int dst_step1)
{
int x = get_global_id(0);
if (x < cols && y < rows)
{
x = x << 1;
-
- #define dst_align ((dst_offset >> 1) & 1)
+
+#ifdef dst_align
+#undef dst_align
+#endif
+#define dst_align ((dst_offset >> 1) & 1)
int src1_index = mad24(y, src1_step, (x << 1) + src1_offset - (dst_align << 1));
int mask_index = mad24(y, mask_step, x + mask_offset - dst_align);
*((__global short2 *)((__global uchar *)dst + dst_index)) = data;
}
}
-__kernel void arithm_s_bitwise_xor_with_mask_C1_D4 (__global int *src1, int src1_step, int src1_offset,
- __global int *dst, int dst_step, int dst_offset,
- __global uchar *mask, int mask_step, int mask_offset,
- int4 src2, int rows, int cols, int dst_step1)
+__kernel void arithm_s_bitwise_xor_with_mask_C1_D4 (
+ __global int *src1, int src1_step, int src1_offset,
+ __global int *dst, int dst_step, int dst_offset,
+ __global uchar *mask, int mask_step, int mask_offset,
+ int4 src2, int rows, int cols, int dst_step1)
{
int x = get_global_id(0);
}
}
-__kernel void arithm_s_bitwise_xor_with_mask_C1_D5 (__global char *src1, int src1_step, int src1_offset,
- __global char *dst, int dst_step, int dst_offset,
- __global uchar *mask, int mask_step, int mask_offset,
- char16 src2, int rows, int cols, int dst_step1)
+__kernel void arithm_s_bitwise_xor_with_mask_C1_D5 (
+ __global char *src1, int src1_step, int src1_offset,
+ __global char *dst, int dst_step, int dst_offset,
+ __global uchar *mask, int mask_step, int mask_offset,
+ char16 src2, int rows, int cols, int dst_step1)
{
int x = get_global_id(0);
}
#if defined (DOUBLE_SUPPORT)
-__kernel void arithm_s_bitwise_xor_with_mask_C1_D6 (__global short *src1, int src1_step, int src1_offset,
- __global short *dst, int dst_step, int dst_offset,
- __global uchar *mask, int mask_step, int mask_offset,
- short16 src2, int rows, int cols, int dst_step1)
+__kernel void arithm_s_bitwise_xor_with_mask_C1_D6 (
+ __global short *src1, int src1_step, int src1_offset,
+ __global short *dst, int dst_step, int dst_offset,
+ __global uchar *mask, int mask_step, int mask_offset,
+ short16 src2, int rows, int cols, int dst_step1)
{
int x = get_global_id(0);
}
}
#endif
-__kernel void arithm_s_bitwise_xor_with_mask_C2_D0 (__global uchar *src1, int src1_step, int src1_offset,
- __global uchar *dst, int dst_step, int dst_offset,
- __global uchar *mask, int mask_step, int mask_offset,
- uchar4 src2, int rows, int cols, int dst_step1)
+__kernel void arithm_s_bitwise_xor_with_mask_C2_D0 (
+ __global uchar *src1, int src1_step, int src1_offset,
+ __global uchar *dst, int dst_step, int dst_offset,
+ __global uchar *mask, int mask_step, int mask_offset,
+ uchar4 src2, int rows, int cols, int dst_step1)
{
int x = get_global_id(0);
if (x < cols && y < rows)
{
x = x << 1;
-
- #define dst_align ((dst_offset >> 1) & 1)
+
+#ifdef dst_align
+#undef dst_align
+#endif
+#define dst_align ((dst_offset >> 1) & 1)
int src1_index = mad24(y, src1_step, (x << 1) + src1_offset - (dst_align << 1));
int mask_index = mad24(y, mask_step, x + mask_offset - dst_align);
}
-__kernel void arithm_s_bitwise_xor_with_mask_C2_D1 (__global char *src1, int src1_step, int src1_offset,
- __global char *dst, int dst_step, int dst_offset,
- __global uchar *mask, int mask_step, int mask_offset,
- char4 src2, int rows, int cols, int dst_step1)
+__kernel void arithm_s_bitwise_xor_with_mask_C2_D1 (
+ __global char *src1, int src1_step, int src1_offset,
+ __global char *dst, int dst_step, int dst_offset,
+ __global uchar *mask, int mask_step, int mask_offset,
+ char4 src2, int rows, int cols, int dst_step1)
{
int x = get_global_id(0);
if (x < cols && y < rows)
{
x = x << 1;
-
- #define dst_align ((dst_offset >> 1) & 1)
+
+#ifdef dst_align
+#undef dst_align
+#endif
+#define dst_align ((dst_offset >> 1) & 1)
int src1_index = mad24(y, src1_step, (x << 1) + src1_offset - (dst_align << 1));
int mask_index = mad24(y, mask_step, x + mask_offset - dst_align);
}
}
-__kernel void arithm_s_bitwise_xor_with_mask_C2_D2 (__global ushort *src1, int src1_step, int src1_offset,
- __global ushort *dst, int dst_step, int dst_offset,
- __global uchar *mask, int mask_step, int mask_offset,
- ushort4 src2, int rows, int cols, int dst_step1)
+__kernel void arithm_s_bitwise_xor_with_mask_C2_D2 (
+ __global ushort *src1, int src1_step, int src1_offset,
+ __global ushort *dst, int dst_step, int dst_offset,
+ __global uchar *mask, int mask_step, int mask_offset,
+ ushort4 src2, int rows, int cols, int dst_step1)
{
int x = get_global_id(0);
*((__global ushort2 *)((__global char *)dst + dst_index)) = data;
}
}
-__kernel void arithm_s_bitwise_xor_with_mask_C2_D3 (__global short *src1, int src1_step, int src1_offset,
- __global short *dst, int dst_step, int dst_offset,
- __global uchar *mask, int mask_step, int mask_offset,
- short4 src2, int rows, int cols, int dst_step1)
+__kernel void arithm_s_bitwise_xor_with_mask_C2_D3 (
+ __global short *src1, int src1_step, int src1_offset,
+ __global short *dst, int dst_step, int dst_offset,
+ __global uchar *mask, int mask_step, int mask_offset,
+ short4 src2, int rows, int cols, int dst_step1)
{
int x = get_global_id(0);
*((__global short2 *)((__global char *)dst + dst_index)) = data;
}
}
-__kernel void arithm_s_bitwise_xor_with_mask_C2_D4 (__global int *src1, int src1_step, int src1_offset,
- __global int *dst, int dst_step, int dst_offset,
- __global uchar *mask, int mask_step, int mask_offset,
- int4 src2, int rows, int cols, int dst_step1)
+__kernel void arithm_s_bitwise_xor_with_mask_C2_D4 (
+ __global int *src1, int src1_step, int src1_offset,
+ __global int *dst, int dst_step, int dst_offset,
+ __global uchar *mask, int mask_step, int mask_offset,
+ int4 src2, int rows, int cols, int dst_step1)
{
int x = get_global_id(0);
*((__global int2 *)((__global char *)dst + dst_index)) = data;
}
}
-__kernel void arithm_s_bitwise_xor_with_mask_C2_D5 (__global char *src1, int src1_step, int src1_offset,
- __global char *dst, int dst_step, int dst_offset,
- __global uchar *mask, int mask_step, int mask_offset,
- char16 src2, int rows, int cols, int dst_step1)
+__kernel void arithm_s_bitwise_xor_with_mask_C2_D5 (
+ __global char *src1, int src1_step, int src1_offset,
+ __global char *dst, int dst_step, int dst_offset,
+ __global uchar *mask, int mask_step, int mask_offset,
+ char16 src2, int rows, int cols, int dst_step1)
{
int x = get_global_id(0);
}
}
#if defined (DOUBLE_SUPPORT)
-__kernel void arithm_s_bitwise_xor_with_mask_C2_D6 (__global short *src1, int src1_step, int src1_offset,
- __global short *dst, int dst_step, int dst_offset,
- __global uchar *mask, int mask_step, int mask_offset,
- short16 src2, int rows, int cols, int dst_step1)
+__kernel void arithm_s_bitwise_xor_with_mask_C2_D6 (
+ __global short *src1, int src1_step, int src1_offset,
+ __global short *dst, int dst_step, int dst_offset,
+ __global uchar *mask, int mask_step, int mask_offset,
+ short16 src2, int rows, int cols, int dst_step1)
{
int x = get_global_id(0);
}
}
#endif
-__kernel void arithm_s_bitwise_xor_with_mask_C3_D0 (__global uchar *src1, int src1_step, int src1_offset,
- __global uchar *dst, int dst_step, int dst_offset,
- __global uchar *mask, int mask_step, int mask_offset,
- uchar4 src2, int rows, int cols, int dst_step1)
+__kernel void arithm_s_bitwise_xor_with_mask_C3_D0 (
+ __global uchar *src1, int src1_step, int src1_offset,
+ __global uchar *dst, int dst_step, int dst_offset,
+ __global uchar *mask, int mask_step, int mask_offset,
+ uchar4 src2, int rows, int cols, int dst_step1)
{
int x = get_global_id(0);
if (x < cols && y < rows)
{
x = x << 2;
-
- #define dst_align (((dst_offset % dst_step) / 3 ) & 3)
+
+#ifdef dst_align
+#undef dst_align
+#endif
+#define dst_align (((dst_offset % dst_step) / 3 ) & 3)
int src1_index = mad24(y, src1_step, (x * 3) + src1_offset - (dst_align * 3));
int mask_index = mad24(y, mask_step, x + mask_offset - dst_align);
}
-__kernel void arithm_s_bitwise_xor_with_mask_C3_D1 (__global char *src1, int src1_step, int src1_offset,
- __global char *dst, int dst_step, int dst_offset,
- __global uchar *mask, int mask_step, int mask_offset,
- char4 src2, int rows, int cols, int dst_step1)
+__kernel void arithm_s_bitwise_xor_with_mask_C3_D1 (
+ __global char *src1, int src1_step, int src1_offset,
+ __global char *dst, int dst_step, int dst_offset,
+ __global uchar *mask, int mask_step, int mask_offset,
+ char4 src2, int rows, int cols, int dst_step1)
{
int x = get_global_id(0);
if (x < cols && y < rows)
{
x = x << 2;
-
- #define dst_align (((dst_offset % dst_step) / 3 ) & 3)
+
+#ifdef dst_align
+#undef dst_align
+#endif
+#define dst_align (((dst_offset % dst_step) / 3 ) & 3)
int src1_index = mad24(y, src1_step, (x * 3) + src1_offset - (dst_align * 3));
int mask_index = mad24(y, mask_step, x + mask_offset - dst_align);
}
}
-__kernel void arithm_s_bitwise_xor_with_mask_C3_D2 (__global ushort *src1, int src1_step, int src1_offset,
- __global ushort *dst, int dst_step, int dst_offset,
- __global uchar *mask, int mask_step, int mask_offset,
- ushort4 src2, int rows, int cols, int dst_step1)
+__kernel void arithm_s_bitwise_xor_with_mask_C3_D2 (
+ __global ushort *src1, int src1_step, int src1_offset,
+ __global ushort *dst, int dst_step, int dst_offset,
+ __global uchar *mask, int mask_step, int mask_offset,
+ ushort4 src2, int rows, int cols, int dst_step1)
{
int x = get_global_id(0);
if (x < cols && y < rows)
{
x = x << 1;
-
- #define dst_align (((dst_offset % dst_step) / 6 ) & 1)
+
+#ifdef dst_align
+#undef dst_align
+#endif
+#define dst_align (((dst_offset % dst_step) / 6 ) & 1)
int src1_index = mad24(y, src1_step, (x * 6) + src1_offset - (dst_align * 6));
int mask_index = mad24(y, mask_step, x + mask_offset - dst_align);
data_0.xy = ((mask_data.x) && (dst_index + 0 >= dst_start)) ? tmp_data_0.xy : data_0.xy;
data_1.x = ((mask_data.x) && (dst_index + 0 >= dst_start) && (dst_index + 0 < dst_end))
- ? tmp_data_1.x : data_1.x;
+ ? tmp_data_1.x : data_1.x;
data_1.y = ((mask_data.y) && (dst_index + 6 >= dst_start) && (dst_index + 6 < dst_end))
- ? tmp_data_1.y : data_1.y;
+ ? tmp_data_1.y : data_1.y;
data_2.xy = ((mask_data.y) && (dst_index + 6 >= dst_start) && (dst_index + 6 < dst_end))
- ? tmp_data_2.xy : data_2.xy;
+ ? tmp_data_2.xy : data_2.xy;
- *((__global ushort2 *)((__global char *)dst + dst_index + 0))= data_0;
- *((__global ushort2 *)((__global char *)dst + dst_index + 4))= data_1;
- *((__global ushort2 *)((__global char *)dst + dst_index + 8))= data_2;
+ *((__global ushort2 *)((__global char *)dst + dst_index + 0))= data_0;
+ *((__global ushort2 *)((__global char *)dst + dst_index + 4))= data_1;
+ *((__global ushort2 *)((__global char *)dst + dst_index + 8))= data_2;
}
}
-__kernel void arithm_s_bitwise_xor_with_mask_C3_D3 (__global short *src1, int src1_step, int src1_offset,
- __global short *dst, int dst_step, int dst_offset,
- __global uchar *mask, int mask_step, int mask_offset,
- short4 src2, int rows, int cols, int dst_step1)
+__kernel void arithm_s_bitwise_xor_with_mask_C3_D3 (
+ __global short *src1, int src1_step, int src1_offset,
+ __global short *dst, int dst_step, int dst_offset,
+ __global uchar *mask, int mask_step, int mask_offset,
+ short4 src2, int rows, int cols, int dst_step1)
{
int x = get_global_id(0);
if (x < cols && y < rows)
{
x = x << 1;
-
- #define dst_align (((dst_offset % dst_step) / 6 ) & 1)
+
+#ifdef dst_align
+#undef dst_align
+#endif
+#define dst_align (((dst_offset % dst_step) / 6 ) & 1)
int src1_index = mad24(y, src1_step, (x * 6) + src1_offset - (dst_align * 6));
int mask_index = mad24(y, mask_step, x + mask_offset - dst_align);
data_0.xy = ((mask_data.x) && (dst_index + 0 >= dst_start)) ? tmp_data_0.xy : data_0.xy;
data_1.x = ((mask_data.x) && (dst_index + 0 >= dst_start) && (dst_index + 0 < dst_end))
- ? tmp_data_1.x : data_1.x;
+ ? tmp_data_1.x : data_1.x;
data_1.y = ((mask_data.y) && (dst_index + 6 >= dst_start) && (dst_index + 6 < dst_end))
- ? tmp_data_1.y : data_1.y;
+ ? tmp_data_1.y : data_1.y;
data_2.xy = ((mask_data.y) && (dst_index + 6 >= dst_start) && (dst_index + 6 < dst_end))
- ? tmp_data_2.xy : data_2.xy;
+ ? tmp_data_2.xy : data_2.xy;
- *((__global short2 *)((__global char *)dst + dst_index + 0))= data_0;
- *((__global short2 *)((__global char *)dst + dst_index + 4))= data_1;
- *((__global short2 *)((__global char *)dst + dst_index + 8))= data_2;
+ *((__global short2 *)((__global char *)dst + dst_index + 0))= data_0;
+ *((__global short2 *)((__global char *)dst + dst_index + 4))= data_1;
+ *((__global short2 *)((__global char *)dst + dst_index + 8))= data_2;
}
}
-__kernel void arithm_s_bitwise_xor_with_mask_C3_D4 (__global int *src1, int src1_step, int src1_offset,
- __global int *dst, int dst_step, int dst_offset,
- __global uchar *mask, int mask_step, int mask_offset,
- int4 src2, int rows, int cols, int dst_step1)
+__kernel void arithm_s_bitwise_xor_with_mask_C3_D4 (
+ __global int *src1, int src1_step, int src1_offset,
+ __global int *dst, int dst_step, int dst_offset,
+ __global uchar *mask, int mask_step, int mask_offset,
+ int4 src2, int rows, int cols, int dst_step1)
{
int x = get_global_id(0);
data_1 = mask_data ? tmp_data_1 : data_1;
data_2 = mask_data ? tmp_data_2 : data_2;
- *((__global int *)((__global char *)dst + dst_index + 0))= data_0;
- *((__global int *)((__global char *)dst + dst_index + 4))= data_1;
- *((__global int *)((__global char *)dst + dst_index + 8))= data_2;
+ *((__global int *)((__global char *)dst + dst_index + 0))= data_0;
+ *((__global int *)((__global char *)dst + dst_index + 4))= data_1;
+ *((__global int *)((__global char *)dst + dst_index + 8))= data_2;
}
}
-__kernel void arithm_s_bitwise_xor_with_mask_C3_D5 (__global char *src1, int src1_step, int src1_offset,
- __global char *dst, int dst_step, int dst_offset,
- __global uchar *mask, int mask_step, int mask_offset,
- char16 src2, int rows, int cols, int dst_step1)
+__kernel void arithm_s_bitwise_xor_with_mask_C3_D5 (
+ __global char *src1, int src1_step, int src1_offset,
+ __global char *dst, int dst_step, int dst_offset,
+ __global uchar *mask, int mask_step, int mask_offset,
+ char16 src2, int rows, int cols, int dst_step1)
{
int x = get_global_id(0);
data_1 = mask_data ? tmp_data_1 : data_1;
data_2 = mask_data ? tmp_data_2 : data_2;
- *((__global char4 *)((__global char *)dst + dst_index + 0))= data_0;
- *((__global char4 *)((__global char *)dst + dst_index + 4))= data_1;
- *((__global char4 *)((__global char *)dst + dst_index + 8))= data_2;
+ *((__global char4 *)((__global char *)dst + dst_index + 0))= data_0;
+ *((__global char4 *)((__global char *)dst + dst_index + 4))= data_1;
+ *((__global char4 *)((__global char *)dst + dst_index + 8))= data_2;
}
}
#if defined (DOUBLE_SUPPORT)
-__kernel void arithm_s_bitwise_xor_with_mask_C3_D6 (__global short *src1, int src1_step, int src1_offset,
- __global short *dst, int dst_step, int dst_offset,
- __global uchar *mask, int mask_step, int mask_offset,
- short16 src2, int rows, int cols, int dst_step1)
+__kernel void arithm_s_bitwise_xor_with_mask_C3_D6 (
+ __global short *src1, int src1_step, int src1_offset,
+ __global short *dst, int dst_step, int dst_offset,
+ __global uchar *mask, int mask_step, int mask_offset,
+ short16 src2, int rows, int cols, int dst_step1)
{
int x = get_global_id(0);
data_1 = mask_data ? tmp_data_1 : data_1;
data_2 = mask_data ? tmp_data_2 : data_2;
- *((__global short4 *)((__global char *)dst + dst_index + 0 ))= data_0;
- *((__global short4 *)((__global char *)dst + dst_index + 8 ))= data_1;
- *((__global short4 *)((__global char *)dst + dst_index + 16))= data_2;
+ *((__global short4 *)((__global char *)dst + dst_index + 0 ))= data_0;
+ *((__global short4 *)((__global char *)dst + dst_index + 8 ))= data_1;
+ *((__global short4 *)((__global char *)dst + dst_index + 16))= data_2;
}
}
#endif
-__kernel void arithm_s_bitwise_xor_with_mask_C4_D0 (__global uchar *src1, int src1_step, int src1_offset,
- __global uchar *dst, int dst_step, int dst_offset,
- __global uchar *mask, int mask_step, int mask_offset,
- uchar4 src2, int rows, int cols, int dst_step1)
+__kernel void arithm_s_bitwise_xor_with_mask_C4_D0 (
+ __global uchar *src1, int src1_step, int src1_offset,
+ __global uchar *dst, int dst_step, int dst_offset,
+ __global uchar *mask, int mask_step, int mask_offset,
+ uchar4 src2, int rows, int cols, int dst_step1)
{
int x = get_global_id(0);
}
-__kernel void arithm_s_bitwise_xor_with_mask_C4_D1 (__global char *src1, int src1_step, int src1_offset,
- __global char *dst, int dst_step, int dst_offset,
- __global uchar *mask, int mask_step, int mask_offset,
- char4 src2, int rows, int cols, int dst_step1)
+__kernel void arithm_s_bitwise_xor_with_mask_C4_D1 (
+ __global char *src1, int src1_step, int src1_offset,
+ __global char *dst, int dst_step, int dst_offset,
+ __global uchar *mask, int mask_step, int mask_offset,
+ char4 src2, int rows, int cols, int dst_step1)
{
int x = get_global_id(0);
}
}
-__kernel void arithm_s_bitwise_xor_with_mask_C4_D2 (__global ushort *src1, int src1_step, int src1_offset,
- __global ushort *dst, int dst_step, int dst_offset,
- __global uchar *mask, int mask_step, int mask_offset,
- ushort4 src2, int rows, int cols, int dst_step1)
+__kernel void arithm_s_bitwise_xor_with_mask_C4_D2 (
+ __global ushort *src1, int src1_step, int src1_offset,
+ __global ushort *dst, int dst_step, int dst_offset,
+ __global uchar *mask, int mask_step, int mask_offset,
+ ushort4 src2, int rows, int cols, int dst_step1)
{
int x = get_global_id(0);
*((__global ushort4 *)((__global char *)dst + dst_index)) = data;
}
}
-__kernel void arithm_s_bitwise_xor_with_mask_C4_D3 (__global short *src1, int src1_step, int src1_offset,
- __global short *dst, int dst_step, int dst_offset,
- __global uchar *mask, int mask_step, int mask_offset,
- short4 src2, int rows, int cols, int dst_step1)
+__kernel void arithm_s_bitwise_xor_with_mask_C4_D3 (
+ __global short *src1, int src1_step, int src1_offset,
+ __global short *dst, int dst_step, int dst_offset,
+ __global uchar *mask, int mask_step, int mask_offset,
+ short4 src2, int rows, int cols, int dst_step1)
{
int x = get_global_id(0);
*((__global short4 *)((__global char *)dst + dst_index)) = data;
}
}
-__kernel void arithm_s_bitwise_xor_with_mask_C4_D4 (__global int *src1, int src1_step, int src1_offset,
- __global int *dst, int dst_step, int dst_offset,
- __global uchar *mask, int mask_step, int mask_offset,
- int4 src2, int rows, int cols, int dst_step1)
+__kernel void arithm_s_bitwise_xor_with_mask_C4_D4 (
+ __global int *src1, int src1_step, int src1_offset,
+ __global int *dst, int dst_step, int dst_offset,
+ __global uchar *mask, int mask_step, int mask_offset,
+ int4 src2, int rows, int cols, int dst_step1)
{
int x = get_global_id(0);
*((__global int4 *)((__global char *)dst + dst_index)) = data;
}
}
-__kernel void arithm_s_bitwise_xor_with_mask_C4_D5 (__global char *src1, int src1_step, int src1_offset,
- __global char *dst, int dst_step, int dst_offset,
- __global uchar *mask, int mask_step, int mask_offset,
- char16 src2, int rows, int cols, int dst_step1)
+__kernel void arithm_s_bitwise_xor_with_mask_C4_D5 (
+ __global char *src1, int src1_step, int src1_offset,
+ __global char *dst, int dst_step, int dst_offset,
+ __global uchar *mask, int mask_step, int mask_offset,
+ char16 src2, int rows, int cols, int dst_step1)
{
int x = get_global_id(0);
}
}
#if defined (DOUBLE_SUPPORT)
-__kernel void arithm_s_bitwise_xor_with_mask_C4_D6 (__global short *src1, int src1_step, int src1_offset,
- __global short *dst, int dst_step, int dst_offset,
- __global uchar *mask, int mask_step, int mask_offset,
- short16 src2, int rows, int cols, int dst_step1)
+__kernel void arithm_s_bitwise_xor_with_mask_C4_D6 (
+ __global short *src1, int src1_step, int src1_offset,
+ __global short *dst, int dst_step, int dst_offset,
+ __global uchar *mask, int mask_step, int mask_offset,
+ short16 src2, int rows, int cols, int dst_step1)
{
int x = get_global_id(0);
//
//M*/
#if defined (DOUBLE_SUPPORT)
+#ifdef cl_khr_fp64
#pragma OPENCL EXTENSION cl_khr_fp64:enable
+#elif defined (cl_amd_fp64)
+#pragma OPENCL EXTENSION cl_amd_fp64:enable
+#endif
#endif
//////////////////////////////////////////////////////////////////////////////////////////////////////
///////////////////////////////////////////////////////////////////////////////////////////////////////
__kernel void arithm_compare_eq_D0 (__global uchar *src1, int src1_step, int src1_offset,
- __global uchar *src2, int src2_step, int src2_offset,
- __global uchar *dst, int dst_step, int dst_offset,
- int rows, int cols, int dst_step1)
+ __global uchar *src2, int src2_step, int src2_offset,
+ __global uchar *dst, int dst_step, int dst_offset,
+ int rows, int cols, int dst_step1)
{
int x = get_global_id(0);
int y = get_global_id(1);
if (x < cols && y < rows)
{
x = x << 2;
-
- #define dst_align (dst_offset & 3)
+
+#ifdef dst_align
+#undef dst_align
+#endif
+#define dst_align (dst_offset & 3)
int src1_index = mad24(y, src1_step, x + src1_offset - dst_align);
int src2_index = mad24(y, src2_step, x + src2_offset - dst_align);
__kernel void arithm_compare_ne_D2 (__global ushort *src1, int src1_step, int src1_offset,
- __global ushort *src2, int src2_step, int src2_offset,
- __global uchar *dst, int dst_step, int dst_offset,
- int rows, int cols, int dst_step1)
+ __global ushort *src2, int src2_step, int src2_offset,
+ __global uchar *dst, int dst_step, int dst_offset,
+ int rows, int cols, int dst_step1)
{
int x = get_global_id(0);
if (x < cols && y < rows)
{
x = x << 2;
-
- #define dst_align ((dst_offset >> 1)& 3)
+
+#ifdef dst_align
+#undef dst_align
+#endif
+#define dst_align ((dst_offset >> 1)& 3)
int src1_index = mad24(y, src1_step, (x << 1) + src1_offset - (dst_align << 1));
int src2_index = mad24(y, src2_step, (x << 1) + src2_offset - (dst_align << 1));
__kernel void arithm_compare_eq_D3 (__global short *src1, int src1_step, int src1_offset,
- __global short *src2, int src2_step, int src2_offset,
- __global uchar *dst, int dst_step, int dst_offset,
- int rows, int cols, int dst_step1)
+ __global short *src2, int src2_step, int src2_offset,
+ __global uchar *dst, int dst_step, int dst_offset,
+ int rows, int cols, int dst_step1)
{
int x = get_global_id(0);
if (x < cols && y < rows)
{
x = x << 2;
-
- #define dst_align ((dst_offset >> 1) & 3)
+
+#ifdef dst_align
+#undef dst_align
+#endif
+#define dst_align ((dst_offset >> 1) & 3)
int src1_index = mad24(y, src1_step, (x << 1) + src1_offset - (dst_align << 1));
int src2_index = mad24(y, src2_step, (x << 1) + src2_offset - (dst_align << 1));
__kernel void arithm_compare_eq_D4 (__global int *src1, int src1_step, int src1_offset,
- __global int *src2, int src2_step, int src2_offset,
- __global uchar *dst, int dst_step, int dst_offset,
- int rows, int cols, int dst_step1)
+ __global int *src2, int src2_step, int src2_offset,
+ __global uchar *dst, int dst_step, int dst_offset,
+ int rows, int cols, int dst_step1)
{
int x = get_global_id(0);
int y = get_global_id(1);
if (x < cols && y < rows)
{
x = x << 2;
- #define dst_align ((dst_offset >> 2) & 3)
+#ifdef dst_align
+#undef dst_align
+#endif
+#define dst_align ((dst_offset >> 2) & 3)
int src1_index = mad24(y, src1_step, (x << 2) + src1_offset - (dst_align << 2));
int src2_index = mad24(y, src2_step, (x << 2) + src2_offset - (dst_align << 2));
int src1_index_fix = src1_index < 0 ? 0 : src1_index;
int src2_index_fix = src2_index < 0 ? 0 : src2_index;
- int4 src1_data = vload4(0, (__global int *)((__global char *)src1 + src1_index));
+ int4 src1_data = vload4(0, (__global int *)((__global char *)src1 + src1_index));
int4 src2_data = vload4(0, (__global int *)((__global char *)src2 + src2_index));
if(src1_index < 0)
{
}
__kernel void arithm_compare_eq_D5 (__global float *src1, int src1_step, int src1_offset,
- __global float *src2, int src2_step, int src2_offset,
- __global uchar *dst, int dst_step, int dst_offset,
- int rows, int cols, int dst_step1)
+ __global float *src2, int src2_step, int src2_offset,
+ __global uchar *dst, int dst_step, int dst_offset,
+ int rows, int cols, int dst_step1)
{
int x = get_global_id(0);
int y = get_global_id(1);
if (x < cols && y < rows)
{
x = x << 2;
- #define dst_align ((dst_offset >> 2) & 3)
+#ifdef dst_align
+#undef dst_align
+#endif
+#define dst_align ((dst_offset >> 2) & 3)
int src1_index = mad24(y, src1_step, (x << 2) + src1_offset - (dst_align << 2));
int src2_index = mad24(y, src2_step, (x << 2) + src2_offset - (dst_align << 2));
int src1_index_fix = src1_index < 0 ? 0 : src1_index;
int src2_index_fix = src2_index < 0 ? 0 : src2_index;
float4 src1_data = vload4(0, (__global float *)((__global char *)src1 + src1_index_fix));
- float4 src2_data = vload4(0, (__global float *)((__global char *)src2 + src2_index_fix)); if(src2_index < 0)
+ float4 src2_data = vload4(0, (__global float *)((__global char *)src2 + src2_index_fix));
+ if(src2_index < 0)
{
float4 tmp;
tmp.xyzw = (src2_index == -2) ? src2_data.zwxy:src2_data.yzwx;
#if defined (DOUBLE_SUPPORT)
__kernel void arithm_compare_eq_D6 (__global double *src1, int src1_step, int src1_offset,
- __global double *src2, int src2_step, int src2_offset,
- __global uchar *dst, int dst_step, int dst_offset,
- int rows, int cols, int dst_step1)
+ __global double *src2, int src2_step, int src2_offset,
+ __global uchar *dst, int dst_step, int dst_offset,
+ int rows, int cols, int dst_step1)
{
int x = get_global_id(0);
int y = get_global_id(1);
if (x < cols && y < rows)
{
x = x << 2;
- #define dst_align ((dst_offset >> 3) & 3)
+#ifdef dst_align
+#undef dst_align
+#endif
+#define dst_align ((dst_offset >> 3) & 3)
int src1_index = mad24(y, src1_step, (x << 3) + src1_offset - (dst_align << 3));
int src2_index = mad24(y, src2_step, (x << 3) + src2_offset - (dst_align << 3));
/***********************************Compare GT**************************/
__kernel void arithm_compare_gt_D0 (__global uchar *src1, int src1_step, int src1_offset,
- __global uchar *src2, int src2_step, int src2_offset,
- __global uchar *dst, int dst_step, int dst_offset,
- int rows, int cols, int dst_step1)
+ __global uchar *src2, int src2_step, int src2_offset,
+ __global uchar *dst, int dst_step, int dst_offset,
+ int rows, int cols, int dst_step1)
{
int x = get_global_id(0);
int y = get_global_id(1);
if (x < cols && y < rows)
{
x = x << 2;
-
- #define dst_align (dst_offset & 3)
+
+#ifdef dst_align
+#undef dst_align
+#endif
+#define dst_align (dst_offset & 3)
int src1_index = mad24(y, src1_step, x + src1_offset - dst_align);
int src2_index = mad24(y, src2_step, x + src2_offset - dst_align);
}
__kernel void arithm_compare_gt_D2 (__global ushort *src1, int src1_step, int src1_offset,
- __global ushort *src2, int src2_step, int src2_offset,
- __global uchar *dst, int dst_step, int dst_offset,
- int rows, int cols, int dst_step1)
+ __global ushort *src2, int src2_step, int src2_offset,
+ __global uchar *dst, int dst_step, int dst_offset,
+ int rows, int cols, int dst_step1)
{
int x = get_global_id(0);
if (x < cols && y < rows)
{
x = x << 2;
-
- #define dst_align ((dst_offset >> 1) & 3)
+
+#ifdef dst_align
+#undef dst_align
+#endif
+#define dst_align ((dst_offset >> 1) & 3)
int src1_index = mad24(y, src1_step, (x << 1) + src1_offset - (dst_align << 1));
int src2_index = mad24(y, src2_step, (x << 1) + src2_offset - (dst_align << 1));
__kernel void arithm_compare_gt_D3 (__global short *src1, int src1_step, int src1_offset,
- __global short *src2, int src2_step, int src2_offset,
- __global uchar *dst, int dst_step, int dst_offset,
- int rows, int cols, int dst_step1)
+ __global short *src2, int src2_step, int src2_offset,
+ __global uchar *dst, int dst_step, int dst_offset,
+ int rows, int cols, int dst_step1)
{
int x = get_global_id(0);
if (x < cols && y < rows)
{
x = x << 2;
-
- #define dst_align ((dst_offset >> 1) & 3)
+
+#ifdef dst_align
+#undef dst_align
+#endif
+#define dst_align ((dst_offset >> 1) & 3)
int src1_index = mad24(y, src1_step, (x << 1) + src1_offset - (dst_align << 1));
int src2_index = mad24(y, src2_step, (x << 1) + src2_offset - (dst_align << 1));
}
__kernel void arithm_compare_gt_D4 (__global int *src1, int src1_step, int src1_offset,
- __global int *src2, int src2_step, int src2_offset,
- __global uchar *dst, int dst_step, int dst_offset,
- int rows, int cols, int dst_step1)
+ __global int *src2, int src2_step, int src2_offset,
+ __global uchar *dst, int dst_step, int dst_offset,
+ int rows, int cols, int dst_step1)
{
int x = get_global_id(0);
int y = get_global_id(1);
if (x < cols && y < rows)
{
x = x << 2;
- #define dst_align ((dst_offset >> 2) & 3)
+#ifdef dst_align
+#undef dst_align
+#endif
+#define dst_align ((dst_offset >> 2) & 3)
int src1_index = mad24(y, src1_step, (x << 2) + src1_offset - (dst_align << 2));
int src2_index = mad24(y, src2_step, (x << 2) + src2_offset - (dst_align << 2));
int src1_index_fix = src1_index < 0 ? 0 : src1_index;
int src2_index_fix = src2_index < 0 ? 0 : src2_index;
- int4 src1_data = vload4(0, (__global int *)((__global char *)src1 + src1_index));
+ int4 src1_data = vload4(0, (__global int *)((__global char *)src1 + src1_index));
int4 src2_data = vload4(0, (__global int *)((__global char *)src2 + src2_index));
if(src1_index < 0)
{
}
__kernel void arithm_compare_gt_D5 (__global float *src1, int src1_step, int src1_offset,
- __global float *src2, int src2_step, int src2_offset,
- __global uchar *dst, int dst_step, int dst_offset,
- int rows, int cols, int dst_step1)
+ __global float *src2, int src2_step, int src2_offset,
+ __global uchar *dst, int dst_step, int dst_offset,
+ int rows, int cols, int dst_step1)
{
int x = get_global_id(0);
int y = get_global_id(1);
if (x < cols && y < rows)
{
x = x << 2;
- #define dst_align ((dst_offset >> 2) & 3)
+#ifdef dst_align
+#undef dst_align
+#endif
+#define dst_align ((dst_offset >> 2) & 3)
int src1_index = mad24(y, src1_step, (x << 2) + src1_offset - (dst_align << 2));
int src2_index = mad24(y, src2_step, (x << 2) + src2_offset - (dst_align << 2));
#if defined (DOUBLE_SUPPORT)
__kernel void arithm_compare_gt_D6 (__global double *src1, int src1_step, int src1_offset,
- __global double *src2, int src2_step, int src2_offset,
- __global uchar *dst, int dst_step, int dst_offset,
- int rows, int cols, int dst_step1)
+ __global double *src2, int src2_step, int src2_offset,
+ __global uchar *dst, int dst_step, int dst_offset,
+ int rows, int cols, int dst_step1)
{
int x = get_global_id(0);
int y = get_global_id(1);
if (x < cols && y < rows)
{
x = x << 2;
- #define dst_align ((dst_offset >> 3) & 3)
+#ifdef dst_align
+#undef dst_align
+#endif
+#define dst_align ((dst_offset >> 3) & 3)
int src1_index = mad24(y, src1_step, (x << 3) + src1_offset - (dst_align << 3));
int src2_index = mad24(y, src2_step, (x << 3) + src2_offset - (dst_align << 3));
/***********************************Compare GE**************************/
__kernel void arithm_compare_ge_D0 (__global uchar *src1, int src1_step, int src1_offset,
- __global uchar *src2, int src2_step, int src2_offset,
- __global uchar *dst, int dst_step, int dst_offset,
- int rows, int cols, int dst_step1)
+ __global uchar *src2, int src2_step, int src2_offset,
+ __global uchar *dst, int dst_step, int dst_offset,
+ int rows, int cols, int dst_step1)
{
int x = get_global_id(0);
int y = get_global_id(1);
if (x < cols && y < rows)
{
x = x << 2;
-
- #define dst_align (dst_offset & 3)
+
+#ifdef dst_align
+#undef dst_align
+#endif
+#define dst_align (dst_offset & 3)
int src1_index = mad24(y, src1_step, x + src1_offset - dst_align);
int src2_index = mad24(y, src2_step, x + src2_offset - dst_align);
__kernel void arithm_compare_ge_D2 (__global ushort *src1, int src1_step, int src1_offset,
- __global ushort *src2, int src2_step, int src2_offset,
- __global uchar *dst, int dst_step, int dst_offset,
- int rows, int cols, int dst_step1)
+ __global ushort *src2, int src2_step, int src2_offset,
+ __global uchar *dst, int dst_step, int dst_offset,
+ int rows, int cols, int dst_step1)
{
int x = get_global_id(0);
if (x < cols && y < rows)
{
x = x << 2;
-
- #define dst_align ((dst_offset >> 1) & 3)
+
+#ifdef dst_align
+#undef dst_align
+#endif
+#define dst_align ((dst_offset >> 1) & 3)
int src1_index = mad24(y, src1_step, (x << 1) + src1_offset - (dst_align << 1));
int src2_index = mad24(y, src2_step, (x << 1) + src2_offset - (dst_align << 1));
__kernel void arithm_compare_ge_D3 (__global short *src1, int src1_step, int src1_offset,
- __global short *src2, int src2_step, int src2_offset,
- __global uchar *dst, int dst_step, int dst_offset,
- int rows, int cols, int dst_step1)
+ __global short *src2, int src2_step, int src2_offset,
+ __global uchar *dst, int dst_step, int dst_offset,
+ int rows, int cols, int dst_step1)
{
int x = get_global_id(0);
if (x < cols && y < rows)
{
x = x << 2;
-
- #define dst_align ((dst_offset >> 1)& 3)
+
+#ifdef dst_align
+#undef dst_align
+#endif
+#define dst_align ((dst_offset >> 1)& 3)
int src1_index = mad24(y, src1_step, (x << 1) + src1_offset - (dst_align << 1));
int src2_index = mad24(y, src2_step, (x << 1) + src2_offset - (dst_align << 1));
}
__kernel void arithm_compare_ge_D4 (__global int *src1, int src1_step, int src1_offset,
- __global int *src2, int src2_step, int src2_offset,
- __global uchar *dst, int dst_step, int dst_offset,
- int rows, int cols, int dst_step1)
+ __global int *src2, int src2_step, int src2_offset,
+ __global uchar *dst, int dst_step, int dst_offset,
+ int rows, int cols, int dst_step1)
{
int x = get_global_id(0);
int y = get_global_id(1);
if (x < cols && y < rows)
{
x = x << 2;
-
- #define dst_align ((dst_offset >> 2)& 3)
+
+#ifdef dst_align
+#undef dst_align
+#endif
+#define dst_align ((dst_offset >> 2)& 3)
int src1_index = mad24(y, src1_step, (x << 2) + src1_offset - (dst_align << 2));
int src2_index = mad24(y, src2_step, (x << 2) + src2_offset - (dst_align << 2));
tmp.xyzw = (src2_index == -2) ? src2_data.zwxy:src2_data.yzwx;
src2_data.xyzw = (src2_index == -1) ? src2_data.wxyz:tmp.xyzw;
}
- uchar4 dst_data = *((__global uchar4 *)(dst + dst_index));
+ uchar4 dst_data = *((__global uchar4 *)(dst + dst_index));
uchar4 tmp_data = convert_uchar4((src1_data >= src2_data));
dst_data.x = ((dst_index + 0 >= dst_start) && (dst_index + 0 < dst_end)) ? tmp_data.x : dst_data.x;
}
__kernel void arithm_compare_ge_D5 (__global float *src1, int src1_step, int src1_offset,
- __global float *src2, int src2_step, int src2_offset,
- __global uchar *dst, int dst_step, int dst_offset,
- int rows, int cols, int dst_step1)
+ __global float *src2, int src2_step, int src2_offset,
+ __global uchar *dst, int dst_step, int dst_offset,
+ int rows, int cols, int dst_step1)
{
int x = get_global_id(0);
int y = get_global_id(1);
if (x < cols && y < rows)
{
x = x << 2;
-
- #define dst_align ((dst_offset >> 2)& 3)
+
+#ifdef dst_align
+#undef dst_align
+#endif
+#define dst_align ((dst_offset >> 2)& 3)
int src1_index = mad24(y, src1_step, (x << 2) + src1_offset - (dst_align << 2));
int src2_index = mad24(y, src2_step, (x << 2) + src2_offset - (dst_align << 2));
#if defined (DOUBLE_SUPPORT)
__kernel void arithm_compare_ge_D6 (__global double *src1, int src1_step, int src1_offset,
- __global double *src2, int src2_step, int src2_offset,
- __global uchar *dst, int dst_step, int dst_offset,
- int rows, int cols, int dst_step1)
+ __global double *src2, int src2_step, int src2_offset,
+ __global uchar *dst, int dst_step, int dst_offset,
+ int rows, int cols, int dst_step1)
{
int x = get_global_id(0);
int y = get_global_id(1);
if (x < cols && y < rows)
{
x = x << 2;
-
- #define dst_align ((dst_offset >> 3)& 3)
+
+#ifdef dst_align
+#undef dst_align
+#endif
+#define dst_align ((dst_offset >> 3)& 3)
int src1_index = mad24(y, src1_step, (x << 3) + src1_offset - (dst_align << 3));
int src2_index = mad24(y, src2_step, (x << 3) + src2_offset - (dst_align << 3));
double4 tmp;
tmp.xyzw = (src2_index == -2) ? src2_data.zwxy:src2_data.yzwx;
src2_data.xyzw = (src2_index == -1) ? src2_data.wxyz:tmp.xyzw;
- } uchar4 dst_data = *((__global uchar4 *)(dst + dst_index));
+ }
+ uchar4 dst_data = *((__global uchar4 *)(dst + dst_index));
uchar4 tmp_data = convert_uchar4((src1_data >= src2_data));
dst_data.x = ((dst_index + 0 >= dst_start) && (dst_index + 0 < dst_end)) ? tmp_data.x : dst_data.x;
}
}
#endif
+
//
//M*/
#if defined (DOUBLE_SUPPORT)
+#ifdef cl_khr_fp64
#pragma OPENCL EXTENSION cl_khr_fp64:enable
+#elif defined (cl_amd_fp64)
+#pragma OPENCL EXTENSION cl_amd_fp64:enable
+#endif
#endif
/***********************************Compare NE*******************************/
__kernel void arithm_compare_ne_D0 (__global uchar *src1, int src1_step, int src1_offset,
- __global uchar *src2, int src2_step, int src2_offset,
- __global uchar *dst, int dst_step, int dst_offset,
- int rows, int cols, int dst_step1)
+ __global uchar *src2, int src2_step, int src2_offset,
+ __global uchar *dst, int dst_step, int dst_offset,
+ int rows, int cols, int dst_step1)
{
int x = get_global_id(0);
int y = get_global_id(1);
if (x < cols && y < rows)
{
x = x << 2;
-
- #define dst_align (dst_offset & 3)
+
+#ifdef dst_align
+#undef dst_align
+#endif
+#define dst_align (dst_offset & 3)
int src1_index = mad24(y, src1_step, x + src1_offset - dst_align);
int src2_index = mad24(y, src2_step, x + src2_offset - dst_align);
__kernel void arithm_compare_ne_D2 (__global ushort *src1, int src1_step, int src1_offset,
- __global ushort *src2, int src2_step, int src2_offset,
- __global uchar *dst, int dst_step, int dst_offset,
- int rows, int cols, int dst_step1)
+ __global ushort *src2, int src2_step, int src2_offset,
+ __global uchar *dst, int dst_step, int dst_offset,
+ int rows, int cols, int dst_step1)
{
int x = get_global_id(0);
if (x < cols && y < rows)
{
x = x << 2;
-
- #define dst_align ((dst_offset >> 1)& 3)
+
+#ifdef dst_align
+#undef dst_align
+#endif
+#define dst_align ((dst_offset >> 1)& 3)
int src1_index = mad24(y, src1_step, (x << 1) + src1_offset - (dst_align << 1));
int src2_index = mad24(y, src2_step, (x << 1) + src2_offset - (dst_align << 1));
__kernel void arithm_compare_ne_D3 (__global short *src1, int src1_step, int src1_offset,
- __global short *src2, int src2_step, int src2_offset,
- __global uchar *dst, int dst_step, int dst_offset,
- int rows, int cols, int dst_step1)
+ __global short *src2, int src2_step, int src2_offset,
+ __global uchar *dst, int dst_step, int dst_offset,
+ int rows, int cols, int dst_step1)
{
int x = get_global_id(0);
if (x < cols && y < rows)
{
x = x << 2;
-
- #define dst_align ((dst_offset >> 1)& 3)
+
+#ifdef dst_align
+#undef dst_align
+#endif
+#define dst_align ((dst_offset >> 1)& 3)
int src1_index = mad24(y, src1_step, (x << 1) + src1_offset - (dst_align << 1));
int src2_index = mad24(y, src2_step, (x << 1) + src2_offset - (dst_align << 1));
}
__kernel void arithm_compare_ne_D4 (__global int *src1, int src1_step, int src1_offset,
- __global int *src2, int src2_step, int src2_offset,
- __global uchar *dst, int dst_step, int dst_offset,
- int rows, int cols, int dst_step1)
+ __global int *src2, int src2_step, int src2_offset,
+ __global uchar *dst, int dst_step, int dst_offset,
+ int rows, int cols, int dst_step1)
{
int x = get_global_id(0);
int y = get_global_id(1);
if (x < cols && y < rows)
{
x = x << 2;
- #define dst_align ((dst_offset >> 2)& 3)
+#ifdef dst_align
+#undef dst_align
+#endif
+#define dst_align ((dst_offset >> 2)& 3)
int src1_index = mad24(y, src1_step, (x << 2) + src1_offset - (dst_align << 2));
int src2_index = mad24(y, src2_step, (x << 2) + src2_offset - (dst_align << 2));
}
__kernel void arithm_compare_ne_D5 (__global float *src1, int src1_step, int src1_offset,
- __global float *src2, int src2_step, int src2_offset,
- __global uchar *dst, int dst_step, int dst_offset,
- int rows, int cols, int dst_step1)
+ __global float *src2, int src2_step, int src2_offset,
+ __global uchar *dst, int dst_step, int dst_offset,
+ int rows, int cols, int dst_step1)
{
int x = get_global_id(0);
int y = get_global_id(1);
if (x < cols && y < rows)
{
x = x << 2;
- #define dst_align ((dst_offset >> 2) & 3)
+#ifdef dst_align
+#undef dst_align
+#endif
+#define dst_align ((dst_offset >> 2) & 3)
int src1_index = mad24(y, src1_step, (x << 2) + src1_offset - (dst_align << 2));
int src2_index = mad24(y, src2_step, (x << 2) + src2_offset - (dst_align << 2));
int src1_index_fix = src1_index < 0 ? 0 : src1_index;
int src2_index_fix = src2_index < 0 ? 0 : src2_index;
float4 src1_data = vload4(0, (__global float *)((__global char *)src1 + src1_index_fix));
- float4 src2_data = vload4(0, (__global float *)((__global char *)src2 + src2_index_fix)); if(src1_index < 0)
+ float4 src2_data = vload4(0, (__global float *)((__global char *)src2 + src2_index_fix));
+ if(src1_index < 0)
{
float4 tmp;
tmp.xyzw = (src1_index == -2) ? src1_data.zwxy:src1_data.yzwx;
src2_data.xyzw = (src2_index == -1) ? src2_data.wxyz:tmp.xyzw;
}
- uchar4 dst_data = *((__global uchar4 *)(dst + dst_index));
+ uchar4 dst_data = *((__global uchar4 *)(dst + dst_index));
uchar4 tmp_data = convert_uchar4((src1_data != src2_data));
dst_data.x = ((dst_index + 0 >= dst_start) && (dst_index + 0 < dst_end)) ? tmp_data.x : dst_data.x;
#if defined (DOUBLE_SUPPORT)
__kernel void arithm_compare_ne_D6 (__global double *src1, int src1_step, int src1_offset,
- __global double *src2, int src2_step, int src2_offset,
- __global uchar *dst, int dst_step, int dst_offset,
- int rows, int cols, int dst_step1)
+ __global double *src2, int src2_step, int src2_offset,
+ __global uchar *dst, int dst_step, int dst_offset,
+ int rows, int cols, int dst_step1)
{
int x = get_global_id(0);
int y = get_global_id(1);
if (x < cols && y < rows)
{
x = x << 2;
- #define dst_align ((dst_offset >> 3) & 3)
+#ifdef dst_align
+#undef dst_align
+#endif
+#define dst_align ((dst_offset >> 3) & 3)
int src1_index = mad24(y, src1_step, (x << 3) + src1_offset - (dst_align << 3));
int src2_index = mad24(y, src2_step, (x << 3) + src2_offset - (dst_align << 3));
/***********************************Compare LT*******************************/
__kernel void arithm_compare_lt_D0 (__global uchar *src1, int src1_step, int src1_offset,
- __global uchar *src2, int src2_step, int src2_offset,
- __global uchar *dst, int dst_step, int dst_offset,
- int rows, int cols, int dst_step1)
+ __global uchar *src2, int src2_step, int src2_offset,
+ __global uchar *dst, int dst_step, int dst_offset,
+ int rows, int cols, int dst_step1)
{
int x = get_global_id(0);
int y = get_global_id(1);
if (x < cols && y < rows)
{
x = x << 2;
-
- #define dst_align (dst_offset & 3)
+
+#ifdef dst_align
+#undef dst_align
+#endif
+#define dst_align (dst_offset & 3)
int src1_index = mad24(y, src1_step, x + src1_offset - dst_align);
int src2_index = mad24(y, src2_step, x + src2_offset - dst_align);
__kernel void arithm_compare_lt_D2 (__global ushort *src1, int src1_step, int src1_offset,
- __global ushort *src2, int src2_step, int src2_offset,
- __global uchar *dst, int dst_step, int dst_offset,
- int rows, int cols, int dst_step1)
+ __global ushort *src2, int src2_step, int src2_offset,
+ __global uchar *dst, int dst_step, int dst_offset,
+ int rows, int cols, int dst_step1)
{
int x = get_global_id(0);
if (x < cols && y < rows)
{
x = x << 2;
-
- #define dst_align ((dst_offset >> 1) & 3)
+
+#ifdef dst_align
+#undef dst_align
+#endif
+#define dst_align ((dst_offset >> 1) & 3)
int src1_index = mad24(y, src1_step, (x << 1) + src1_offset - (dst_align << 1));
int src2_index = mad24(y, src2_step, (x << 1) + src2_offset - (dst_align << 1));
__kernel void arithm_compare_lt_D3 (__global short *src1, int src1_step, int src1_offset,
- __global short *src2, int src2_step, int src2_offset,
- __global uchar *dst, int dst_step, int dst_offset,
- int rows, int cols, int dst_step1)
+ __global short *src2, int src2_step, int src2_offset,
+ __global uchar *dst, int dst_step, int dst_offset,
+ int rows, int cols, int dst_step1)
{
int x = get_global_id(0);
if (x < cols && y < rows)
{
x = x << 2;
-
- #define dst_align ((dst_offset >> 1) & 3)
+
+#ifdef dst_align
+#undef dst_align
+#endif
+#define dst_align ((dst_offset >> 1) & 3)
int src1_index = mad24(y, src1_step, (x << 1) + src1_offset - (dst_align << 1));
int src2_index = mad24(y, src2_step, (x << 1) + src2_offset - (dst_align << 1));
}
__kernel void arithm_compare_lt_D4 (__global int *src1, int src1_step, int src1_offset,
- __global int *src2, int src2_step, int src2_offset,
- __global uchar *dst, int dst_step, int dst_offset,
- int rows, int cols, int dst_step1)
+ __global int *src2, int src2_step, int src2_offset,
+ __global uchar *dst, int dst_step, int dst_offset,
+ int rows, int cols, int dst_step1)
{
int x = get_global_id(0);
int y = get_global_id(1);
if (x < cols && y < rows)
{
x = x << 2;
- #define dst_align ((dst_offset >> 2) & 3)
+#ifdef dst_align
+#undef dst_align
+#endif
+#define dst_align ((dst_offset >> 2) & 3)
int src1_index = mad24(y, src1_step, (x << 2) + src1_offset - (dst_align << 2));
int src2_index = mad24(y, src2_step, (x << 2) + src2_offset - (dst_align << 2));
}
__kernel void arithm_compare_lt_D5 (__global float *src1, int src1_step, int src1_offset,
- __global float *src2, int src2_step, int src2_offset,
- __global uchar *dst, int dst_step, int dst_offset,
- int rows, int cols, int dst_step1)
+ __global float *src2, int src2_step, int src2_offset,
+ __global uchar *dst, int dst_step, int dst_offset,
+ int rows, int cols, int dst_step1)
{
int x = get_global_id(0);
int y = get_global_id(1);
if (x < cols && y < rows)
{
x = x << 2;
- #define dst_align ((dst_offset >> 2) & 3)
+#ifdef dst_align
+#undef dst_align
+#endif
+#define dst_align ((dst_offset >> 2) & 3)
int src1_index = mad24(y, src1_step, (x << 2) + src1_offset - (dst_align << 2));
int src2_index = mad24(y, src2_step, (x << 2) + src2_offset - (dst_align << 2));
}
- uchar4 dst_data = *((__global uchar4 *)(dst + dst_index));
+ uchar4 dst_data = *((__global uchar4 *)(dst + dst_index));
uchar4 tmp_data = convert_uchar4((src1_data < src2_data));
dst_data.x = ((dst_index + 0 >= dst_start) && (dst_index + 0 < dst_end)) ? tmp_data.x : dst_data.x;
#if defined (DOUBLE_SUPPORT)
__kernel void arithm_compare_lt_D6 (__global double *src1, int src1_step, int src1_offset,
- __global double *src2, int src2_step, int src2_offset,
- __global uchar *dst, int dst_step, int dst_offset,
- int rows, int cols, int dst_step1)
+ __global double *src2, int src2_step, int src2_offset,
+ __global uchar *dst, int dst_step, int dst_offset,
+ int rows, int cols, int dst_step1)
{
int x = get_global_id(0);
int y = get_global_id(1);
if (x < cols && y < rows)
{
x = x << 2;
- #define dst_align ((dst_offset >> 3) & 3)
+#ifdef dst_align
+#undef dst_align
+#endif
+#define dst_align ((dst_offset >> 3) & 3)
int src1_index = mad24(y, src1_step, (x << 3) + src1_offset - (dst_align << 3));
int src2_index = mad24(y, src2_step, (x << 3) + src2_offset - (dst_align << 3));
}
- uchar4 dst_data = *((__global uchar4 *)(dst + dst_index));
+ uchar4 dst_data = *((__global uchar4 *)(dst + dst_index));
uchar4 tmp_data = convert_uchar4((src1_data < src2_data));
dst_data.x = ((dst_index + 0 >= dst_start) && (dst_index + 0 < dst_end)) ? tmp_data.x : dst_data.x;
/***********************************Compare LE*******************************/
__kernel void arithm_compare_le_D0 (__global uchar *src1, int src1_step, int src1_offset,
- __global uchar *src2, int src2_step, int src2_offset,
- __global uchar *dst, int dst_step, int dst_offset,
- int rows, int cols, int dst_step1)
+ __global uchar *src2, int src2_step, int src2_offset,
+ __global uchar *dst, int dst_step, int dst_offset,
+ int rows, int cols, int dst_step1)
{
int x = get_global_id(0);
int y = get_global_id(1);
if (x < cols && y < rows)
{
x = x << 2;
-
- #define dst_align (dst_offset & 3)
+
+#ifdef dst_align
+#undef dst_align
+#endif
+#define dst_align (dst_offset & 3)
int src1_index = mad24(y, src1_step, x + src1_offset - dst_align);
int src2_index = mad24(y, src2_step, x + src2_offset - dst_align);
__kernel void arithm_compare_le_D2 (__global ushort *src1, int src1_step, int src1_offset,
- __global ushort *src2, int src2_step, int src2_offset,
- __global uchar *dst, int dst_step, int dst_offset,
- int rows, int cols, int dst_step1)
+ __global ushort *src2, int src2_step, int src2_offset,
+ __global uchar *dst, int dst_step, int dst_offset,
+ int rows, int cols, int dst_step1)
{
int x = get_global_id(0);
if (x < cols && y < rows)
{
x = x << 2;
-
- #define dst_align ((dst_offset >> 1) & 3)
+
+#ifdef dst_align
+#undef dst_align
+#endif
+#define dst_align ((dst_offset >> 1) & 3)
int src1_index = mad24(y, src1_step, (x << 1) + src1_offset - (dst_align << 1));
int src2_index = mad24(y, src2_step, (x << 1) + src2_offset - (dst_align << 1));
__kernel void arithm_compare_le_D3 (__global short *src1, int src1_step, int src1_offset,
- __global short *src2, int src2_step, int src2_offset,
- __global uchar *dst, int dst_step, int dst_offset,
- int rows, int cols, int dst_step1)
+ __global short *src2, int src2_step, int src2_offset,
+ __global uchar *dst, int dst_step, int dst_offset,
+ int rows, int cols, int dst_step1)
{
int x = get_global_id(0);
if (x < cols && y < rows)
{
x = x << 2;
-
- #define dst_align ((dst_offset >> 1) & 3)
+
+#ifdef dst_align
+#undef dst_align
+#endif
+#define dst_align ((dst_offset >> 1) & 3)
int src1_index = mad24(y, src1_step, (x << 1) + src1_offset - (dst_align << 1));
int src2_index = mad24(y, src2_step, (x << 1) + src2_offset - (dst_align << 1));
}
__kernel void arithm_compare_le_D4 (__global int *src1, int src1_step, int src1_offset,
- __global int *src2, int src2_step, int src2_offset,
- __global uchar *dst, int dst_step, int dst_offset,
- int rows, int cols, int dst_step1)
+ __global int *src2, int src2_step, int src2_offset,
+ __global uchar *dst, int dst_step, int dst_offset,
+ int rows, int cols, int dst_step1)
{
int x = get_global_id(0);
int y = get_global_id(1);
if (x < cols && y < rows)
{
x = x << 2;
- #define dst_align ((dst_offset >> 2)& 3)
+#ifdef dst_align
+#undef dst_align
+#endif
+#define dst_align ((dst_offset >> 2)& 3)
int src1_index = mad24(y, src1_step, (x << 2) + src1_offset - (dst_align << 2));
int src2_index = mad24(y, src2_step, (x << 2) + src2_offset - (dst_align << 2));
}
__kernel void arithm_compare_le_D5 (__global float *src1, int src1_step, int src1_offset,
- __global float *src2, int src2_step, int src2_offset,
- __global uchar *dst, int dst_step, int dst_offset,
- int rows, int cols, int dst_step1)
+ __global float *src2, int src2_step, int src2_offset,
+ __global uchar *dst, int dst_step, int dst_offset,
+ int rows, int cols, int dst_step1)
{
int x = get_global_id(0);
int y = get_global_id(1);
if (x < cols && y < rows)
{
x = x << 2;
- #define dst_align ((dst_offset >> 2)& 3)
+#ifdef dst_align
+#undef dst_align
+#endif
+#define dst_align ((dst_offset >> 2)& 3)
int src1_index = mad24(y, src1_step, (x << 2) + src1_offset - (dst_align << 2));
int src2_index = mad24(y, src2_step, (x << 2) + src2_offset - (dst_align << 2));
#if defined (DOUBLE_SUPPORT)
__kernel void arithm_compare_le_D6 (__global double *src1, int src1_step, int src1_offset,
- __global double *src2, int src2_step, int src2_offset,
- __global uchar *dst, int dst_step, int dst_offset,
- int rows, int cols, int dst_step1)
+ __global double *src2, int src2_step, int src2_offset,
+ __global uchar *dst, int dst_step, int dst_offset,
+ int rows, int cols, int dst_step1)
{
int x = get_global_id(0);
int y = get_global_id(1);
if (x < cols && y < rows)
{
x = x << 2;
- #define dst_align ((dst_offset >> 3)& 3)
+#ifdef dst_align
+#undef dst_align
+#endif
+#define dst_align ((dst_offset >> 3)& 3)
int src1_index = mad24(y, src1_step, (x << 3) + src1_offset - (dst_align << 3));
int src2_index = mad24(y, src2_step, (x << 3) + src2_offset - (dst_align << 3));
}
}
#endif
+
+
//M*/
#if defined (DOUBLE_SUPPORT)
+#ifdef cl_khr_fp64
#pragma OPENCL EXTENSION cl_khr_fp64:enable
+#elif defined (cl_amd_fp64)
+#pragma OPENCL EXTENSION cl_amd_fp64:enable
+#endif
typedef double F ;
typedef double4 F4;
#define convert_F4 convert_double4
#define convert_F float
#endif
-uchar round2_uchar(F v){
-
- uchar v1 = convert_uchar_sat(round(v));
- //uchar v2 = convert_uchar_sat(v+(v>=0 ? 0.5 : -0.5));
-
- return v1;//(((v-v1)==0.5) && (v1%2==0)) ? v1 : v2;
+inline uchar round2_uchar(F v)
+{
+ return convert_uchar_sat(round(v));
}
-ushort round2_ushort(F v){
-
- ushort v1 = convert_ushort_sat(round(v));
- //ushort v2 = convert_ushort_sat(v+(v>=0 ? 0.5 : -0.5));
-
- return v1;//(((v-v1)==0.5) && (v1%2==0)) ? v1 : v2;
+inline ushort round2_ushort(F v)
+{
+ return convert_ushort_sat(round(v));
}
-short round2_short(F v){
-
- short v1 = convert_short_sat(round(v));
- //short v2 = convert_short_sat(v+(v>=0 ? 0.5 : -0.5));
- return v1;//(((v-v1)==0.5) && (v1%2==0)) ? v1 : v2;
+inline short round2_short(F v)
+{
+ return convert_short_sat(round(v));
}
-int round2_int(F v){
-
- int v1 = convert_int_sat(round(v));
- //int v2 = convert_int_sat(v+(v>=0 ? 0.5 : -0.5));
- return v1;//(((v-v1)==0.5) && (v1%2==0)) ? v1 : v2;
+inline int round2_int(F v)
+{
+ return convert_int_sat(round(v));
}
///////////////////////////////////////////////////////////////////////////////////////
////////////////////////////divide///////////////////////////////////////////////////
__global uchar *dst, int dst_step, int dst_offset,
int rows, int cols, int dst_step1, F scalar)
{
- int x = get_global_id(0);
- int y = get_global_id(1);
+ int2 coor = (int2)(get_global_id(0), get_global_id(1));
- if (x < cols && y < rows)
+ if (coor.x < cols && coor.y < rows)
{
- x = x << 2;
+ coor.x = coor.x << 2;
+
+#ifdef dst_align
+#undef dst_align
+#endif
+#define dst_align (dst_offset & 3)
+ int2 src_index = (int2)(mad24(coor.y, src1_step, coor.x + src1_offset - dst_align),
+ mad24(coor.y, src2_step, coor.x + src2_offset - dst_align));
- #define dst_align (dst_offset & 3)
- int src1_index = mad24(y, src1_step, x + src1_offset - dst_align);
- int src2_index = mad24(y, src2_step, x + src2_offset - dst_align);
+ int4 dst_args = (int4)(mad24(coor.y, dst_step, dst_offset),
+ mad24(coor.y, dst_step, dst_offset + dst_step1),
+ mad24(coor.y, dst_step, dst_offset + coor.x & (int)0xfffffffc),
+ 0);
- int dst_start = mad24(y, dst_step, dst_offset);
- int dst_end = mad24(y, dst_step, dst_offset + dst_step1);
- int dst_index = mad24(y, dst_step, dst_offset + x & (int)0xfffffffc);
-
- uchar4 src1_data = vload4(0, src1 + src1_index);
- uchar4 src2_data = vload4(0, src2 + src2_index);
- uchar4 dst_data = *((__global uchar4 *)(dst + dst_index));
+ uchar4 src1_data = vload4(0, src1 + src_index.x);
+ uchar4 src2_data = vload4(0, src2 + src_index.y);
+ uchar4 dst_data = *((__global uchar4 *)(dst + dst_args.z));
F4 tmp = convert_F4(src1_data) * scalar;
-
uchar4 tmp_data;
- tmp_data.x = ((tmp.x == 0) || (src2_data.x == 0)) ? 0 : round2_uchar(tmp.x / (F)src2_data.x);
- tmp_data.y = ((tmp.y == 0) || (src2_data.y == 0)) ? 0 : round2_uchar(tmp.y / (F)src2_data.y);
- tmp_data.z = ((tmp.z == 0) || (src2_data.z == 0)) ? 0 : round2_uchar(tmp.z / (F)src2_data.z);
- tmp_data.w = ((tmp.w == 0) || (src2_data.w == 0)) ? 0 : round2_uchar(tmp.w / (F)src2_data.w);
+ tmp_data.x = ((tmp.x == 0) || (src2_data.x == 0)) ? 0 : round2_uchar(tmp.x / src2_data.x);
+ tmp_data.y = ((tmp.y == 0) || (src2_data.y == 0)) ? 0 : round2_uchar(tmp.y / src2_data.y);
+ tmp_data.z = ((tmp.z == 0) || (src2_data.z == 0)) ? 0 : round2_uchar(tmp.z / src2_data.z);
+ tmp_data.w = ((tmp.w == 0) || (src2_data.w == 0)) ? 0 : round2_uchar(tmp.w / src2_data.w);
- dst_data.x = ((dst_index + 0 >= dst_start) && (dst_index + 0 < dst_end)) ? tmp_data.x : dst_data.x;
- dst_data.y = ((dst_index + 1 >= dst_start) && (dst_index + 1 < dst_end)) ? tmp_data.y : dst_data.y;
- dst_data.z = ((dst_index + 2 >= dst_start) && (dst_index + 2 < dst_end)) ? tmp_data.z : dst_data.z;
- dst_data.w = ((dst_index + 3 >= dst_start) && (dst_index + 3 < dst_end)) ? tmp_data.w : dst_data.w;
+ dst_data.x = ((dst_args.z + 0 >= dst_args.x) && (dst_args.z + 0 < dst_args.y)) ? tmp_data.x : dst_data.x;
+ dst_data.y = ((dst_args.z + 1 >= dst_args.x) && (dst_args.z + 1 < dst_args.y)) ? tmp_data.y : dst_data.y;
+ dst_data.z = ((dst_args.z + 2 >= dst_args.x) && (dst_args.z + 2 < dst_args.y)) ? tmp_data.z : dst_data.z;
+ dst_data.w = ((dst_args.z + 3 >= dst_args.x) && (dst_args.z + 3 < dst_args.y)) ? tmp_data.w : dst_data.w;
- *((__global uchar4 *)(dst + dst_index)) = dst_data;
+ *((__global uchar4 *)(dst + dst_args.z)) = dst_data;
}
}
if (x < cols && y < rows)
{
x = x << 2;
-
- #define dst_align ((dst_offset >> 1) & 3)
+
+#ifdef dst_align
+#undef dst_align
+#endif
+#define dst_align ((dst_offset >> 1) & 3)
int src1_index = mad24(y, src1_step, (x << 1) + src1_offset - (dst_align << 1));
int src2_index = mad24(y, src2_step, (x << 1) + src2_offset - (dst_align << 1));
if (x < cols && y < rows)
{
x = x << 2;
-
- #define dst_align ((dst_offset >> 1) & 3)
+
+#ifdef dst_align
+#undef dst_align
+#endif
+#define dst_align ((dst_offset >> 1) & 3)
int src1_index = mad24(y, src1_step, (x << 1) + src1_offset - (dst_align << 1));
int src2_index = mad24(y, src2_step, (x << 1) + src2_offset - (dst_align << 1));
if (x < cols && y < rows)
{
x = x << 2;
-
- #define dst_align (dst_offset & 3)
+
+#ifdef dst_align
+#undef dst_align
+#endif
+#define dst_align (dst_offset & 3)
int src_index = mad24(y, src_step, x + src_offset - dst_align);
int dst_start = mad24(y, dst_step, dst_offset);
if (x < cols && y < rows)
{
x = x << 2;
-
- #define dst_align ((dst_offset >> 1) & 3)
+
+#ifdef dst_align
+#undef dst_align
+#endif
+#define dst_align ((dst_offset >> 1) & 3)
int src_index = mad24(y, src_step, (x << 1) + src_offset - (dst_align << 1));
int dst_start = mad24(y, dst_step, dst_offset);
if (x < cols && y < rows)
{
x = x << 2;
-
- #define dst_align ((dst_offset >> 1) & 3)
+
+#ifdef dst_align
+#undef dst_align
+#endif
+#define dst_align ((dst_offset >> 1) & 3)
int src_index = mad24(y, src_step, (x << 1) + src_offset - (dst_align << 1));
int dst_start = mad24(y, dst_step, dst_offset);
}
}
#endif
+
+
//M*/
#if defined (DOUBLE_SUPPORT)
+#ifdef cl_khr_fp64
#pragma OPENCL EXTENSION cl_khr_fp64:enable
+#elif defined (cl_amd_fp64)
+#pragma OPENCL EXTENSION cl_amd_fp64:enable
+#endif
#endif
//////////////////////////////////////////////////////////////////////////////////////////////////////
if (x < cols && y < thread_rows)
{
x = x << 2;
-
- #define dst_align (dst_offset & 3)
+
+#ifdef dst_align
+#undef dst_align
+#endif
+#define dst_align (dst_offset & 3)
int src_index_0 = mad24(y, src_step, x + src_offset - dst_align);
int src_index_1 = mad24(rows - y - 1, src_step, x + src_offset - dst_align);
if (x < cols && y < thread_rows)
{
x = x << 2;
-
- #define dst_align (dst_offset & 3)
+
+#ifdef dst_align
+#undef dst_align
+#endif
+#define dst_align (dst_offset & 3)
int src_index_0 = mad24(y, src_step, x + src_offset - dst_align);
int src_index_1 = mad24(rows - y - 1, src_step, x + src_offset - dst_align);
if (x < cols && y < thread_rows)
{
x = x << 2;
-
- #define dst_align (((dst_offset >> 1) & 3) << 1)
+
+#ifdef dst_align
+#undef dst_align
+#endif
+#define dst_align (((dst_offset >> 1) & 3) << 1)
int src_index_0 = mad24(y, src_step, (x << 1) + src_offset - dst_align);
int src_index_1 = mad24(rows - y - 1, src_step, (x << 1) + src_offset - dst_align);
if (x < cols && y < thread_rows)
{
x = x << 2;
-
- #define dst_align (((dst_offset >> 1) & 3) << 1)
+
+#ifdef dst_align
+#undef dst_align
+#endif
+#define dst_align (((dst_offset >> 1) & 3) << 1)
int src_index_0 = mad24(y, src_step, (x << 1) + src_offset - dst_align);
int src_index_1 = mad24(rows - y - 1, src_step, (x << 1) + src_offset - dst_align);
//
// @Authors
// Jia Haipeng, jiahaipeng95@gmail.com
-// Dachuan Zhao, dachuan@multicorewareinc.com
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
//M*/
-#if defined DOUBLE_SUPPORT
+#if defined (DOUBLE_SUPPORT)
+#ifdef cl_khr_fp64
#pragma OPENCL EXTENSION cl_khr_fp64:enable
+#elif defined (cl_amd_fp64)
+#pragma OPENCL EXTENSION cl_amd_fp64:enable
+#endif
#endif
-int4 round_int4(float4 v){
+int4 round_int4(float4 v)
+{
v.s0 = v.s0 + (v.s0 > 0 ? 0.5 : -0.5);
v.s1 = v.s1 + (v.s1 > 0 ? 0.5 : -0.5);
v.s2 = v.s2 + (v.s2 > 0 ? 0.5 : -0.5);
return convert_int4_sat(v);
}
-uint4 round_uint4(float4 v){
+uint4 round_uint4(float4 v)
+{
v.s0 = v.s0 + (v.s0 > 0 ? 0.5 : -0.5);
v.s1 = v.s1 + (v.s1 > 0 ? 0.5 : -0.5);
v.s2 = v.s2 + (v.s2 > 0 ? 0.5 : -0.5);
return convert_uint4_sat(v);
}
-long round_int(float v){
+long round_int(float v)
+{
v = v + (v > 0 ? 0.5 : -0.5);
return convert_int_sat(v);
if (x < cols && y < rows)
{
x = x << 2;
-
- #define dst_align (dst_offset & 3)
+
+#ifdef dst_align
+#undef dst_align
+#endif
+#define dst_align (dst_offset & 3)
int src1_index = mad24(y, src1_step, x + src1_offset - dst_align);
int src2_index = mad24(y, src2_step, x + src2_offset - dst_align);
if (x < cols && y < rows)
{
x = x << 2;
-
- #define dst_align ((dst_offset >> 1) & 3)
+
+#ifdef dst_align
+#undef dst_align
+#endif
+#define dst_align ((dst_offset >> 1) & 3)
int src1_index = mad24(y, src1_step, (x << 1) + src1_offset - (dst_align << 1));
int src2_index = mad24(y, src2_step, (x << 1) + src2_offset - (dst_align << 1));
if (x < cols && y < rows)
{
x = x << 2;
-
- #define dst_align ((dst_offset >> 1) & 3)
+
+#ifdef dst_align
+#undef dst_align
+#endif
+#define dst_align ((dst_offset >> 1) & 3)
int src1_index = mad24(y, src1_step, (x << 1) + src1_offset - (dst_align << 1));
int src2_index = mad24(y, src2_step, (x << 1) + src2_offset - (dst_align << 1));
#endif
__kernel void arithm_muls_D5 (__global float *src1, int src1_step, int src1_offset,
- __global float *dst, int dst_step, int dst_offset,
- int rows, int cols, int dst_step1, float scalar)
+ __global float *dst, int dst_step, int dst_offset,
+ int rows, int cols, int dst_step1, float scalar)
{
int x = get_global_id(0);
int y = get_global_id(1);