From 073096357657ceb251303dd5162f4a7eecf48846 Mon Sep 17 00:00:00 2001 From: Ilya Lavrenov Date: Tue, 24 Sep 2013 13:58:18 +0400 Subject: [PATCH] refactored and extended ocl::compare --- modules/ocl/src/arithm.cpp | 86 +-- modules/ocl/src/opencl/arithm_compare.cl | 74 ++ modules/ocl/src/opencl/arithm_compare_eq.cl | 1016 --------------------------- modules/ocl/src/opencl/arithm_compare_ne.cl | 1013 -------------------------- 4 files changed, 106 insertions(+), 2083 deletions(-) create mode 100644 modules/ocl/src/opencl/arithm_compare.cl delete mode 100644 modules/ocl/src/opencl/arithm_compare_eq.cl delete mode 100644 modules/ocl/src/opencl/arithm_compare_ne.cl diff --git a/modules/ocl/src/arithm.cpp b/modules/ocl/src/arithm.cpp index 4f6737e..1effac2 100644 --- a/modules/ocl/src/arithm.cpp +++ b/modules/ocl/src/arithm.cpp @@ -82,8 +82,7 @@ namespace cv extern const char *arithm_bitwise_binary_scalar; extern const char *arithm_bitwise_binary_scalar_mask; extern const char *arithm_bitwise_not; - extern const char *arithm_compare_eq; - extern const char *arithm_compare_ne; + extern const char *arithm_compare; extern const char *arithm_transpose; extern const char *arithm_flip; extern const char *arithm_flip_rc; @@ -268,76 +267,55 @@ void cv::ocl::absdiff(const oclMat &src1, const Scalar &src2, oclMat &dst) ////////////////////////////////////////////////////////////////////////////// ///////////////////////////////// compare /////////////////////////////////// ////////////////////////////////////////////////////////////////////////////// -static void compare_run(const oclMat &src1, const oclMat &src2, oclMat &dst, string kernelName, const char **kernelString) + +static void compare_run(const oclMat &src1, const oclMat &src2, oclMat &dst, int cmpOp, + string kernelName, const char **kernelString) { - dst.create(src1.size(), CV_8UC1); - CV_Assert(src1.oclchannels() == 1); CV_Assert(src1.type() == src2.type()); - Context *clCxt = src1.clCxt; + dst.create(src1.size(), CV_8UC1); + Context *clCxt = src1.clCxt; + int depth = src1.depth(); - int vector_lengths[7] = {4, 0, 4, 4, 4, 4, 4}; - size_t vector_length = vector_lengths[depth]; - int offset_cols = (dst.offset / dst.elemSize1()) & (vector_length - 1); - int cols = divUp(dst.cols + offset_cols, vector_length); size_t localThreads[3] = { 64, 4, 1 }; - size_t globalThreads[3] = { cols, dst.rows, 1 }; + size_t globalThreads[3] = { dst.cols, dst.rows, 1 }; + + int src1step1 = src1.step1(), src1offset1 = src1.offset / src1.elemSize1(); + int src2step1 = src2.step1(), src2offset1 = src2.offset / src2.elemSize1(); + int dststep1 = dst.step1(), dstoffset1 = dst.offset / dst.elemSize1(); + + const char * const typeMap[] = { "uchar", "char", "ushort", "short", "int", "float", "double" }; + const char * operationMap[] = { "==", ">", ">=", "<", "<=", "!=" }; + std::string buildOptions = format("-D T=%s -D Operation=%s", typeMap[depth], operationMap[cmpOp]); - int dst_step1 = dst.cols * dst.elemSize(); vector > args; args.push_back( make_pair( sizeof(cl_mem), (void *)&src1.data )); - args.push_back( make_pair( sizeof(cl_int), (void *)&src1.step )); - args.push_back( make_pair( sizeof(cl_int), (void *)&src1.offset )); + args.push_back( make_pair( sizeof(cl_int), (void *)&src1step1 )); + args.push_back( make_pair( sizeof(cl_int), (void *)&src1offset1 )); args.push_back( make_pair( sizeof(cl_mem), (void *)&src2.data )); - args.push_back( make_pair( sizeof(cl_int), (void *)&src2.step )); - args.push_back( make_pair( sizeof(cl_int), (void *)&src2.offset )); + args.push_back( make_pair( sizeof(cl_int), (void *)&src2step1 )); + args.push_back( make_pair( sizeof(cl_int), (void *)&src2offset1 )); args.push_back( make_pair( sizeof(cl_mem), (void *)&dst.data )); - args.push_back( make_pair( sizeof(cl_int), (void *)&dst.step )); - args.push_back( make_pair( sizeof(cl_int), (void *)&dst.offset )); + args.push_back( make_pair( sizeof(cl_int), (void *)&dststep1 )); + args.push_back( make_pair( sizeof(cl_int), (void *)&dstoffset1 )); + args.push_back( make_pair( sizeof(cl_int), (void *)&src1.cols )); args.push_back( make_pair( sizeof(cl_int), (void *)&src1.rows )); - args.push_back( make_pair( sizeof(cl_int), (void *)&cols )); - args.push_back( make_pair( sizeof(cl_int), (void *)&dst_step1 )); - openCLExecuteKernel(clCxt, kernelString, kernelName, globalThreads, localThreads, args, -1, depth); + + openCLExecuteKernel(clCxt, kernelString, kernelName, globalThreads, localThreads, + args, -1, -1, buildOptions.c_str()); } void cv::ocl::compare(const oclMat &src1, const oclMat &src2, oclMat &dst , int cmpOp) { - if(!src1.clCxt->supportsFeature(Context::CL_DOUBLE) && src1.type() == CV_64F) + if (!src1.clCxt->supportsFeature(Context::CL_DOUBLE) && src1.depth() == CV_64F) { cout << "Selected device do not support double" << endl; return; } - string kernelName; - const char **kernelString = NULL; - switch( cmpOp ) - { - case CMP_EQ: - kernelName = "arithm_compare_eq"; - kernelString = &arithm_compare_eq; - break; - case CMP_GT: - kernelName = "arithm_compare_gt"; - kernelString = &arithm_compare_eq; - break; - case CMP_GE: - kernelName = "arithm_compare_ge"; - kernelString = &arithm_compare_eq; - break; - case CMP_NE: - kernelName = "arithm_compare_ne"; - kernelString = &arithm_compare_ne; - break; - case CMP_LT: - kernelName = "arithm_compare_lt"; - kernelString = &arithm_compare_ne; - break; - case CMP_LE: - kernelName = "arithm_compare_le"; - kernelString = &arithm_compare_ne; - break; - default: - CV_Error(CV_StsBadArg, "Unknown comparison method"); - } - compare_run(src1, src2, dst, kernelName, kernelString); + + CV_Assert(src1.channels() == 1 && src2.channels() == 1); + CV_Assert(cmpOp >= CMP_EQ && cmpOp <= CMP_NE); + + compare_run(src1, src2, dst, cmpOp, "arithm_compare", &arithm_compare); } ////////////////////////////////////////////////////////////////////////////// diff --git a/modules/ocl/src/opencl/arithm_compare.cl b/modules/ocl/src/opencl/arithm_compare.cl new file mode 100644 index 0000000..d0842db --- /dev/null +++ b/modules/ocl/src/opencl/arithm_compare.cl @@ -0,0 +1,74 @@ +/*M/////////////////////////////////////////////////////////////////////////////////////// +// +// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. +// +// By downloading, copying, installing or using the software you agree to this license. +// If you do not agree to this license, do not download, install, +// copy or use the software. +// +// +// License Agreement +// For Open Source Computer Vision Library +// +// Copyright (C) 2010-2012, Institute Of Software Chinese Academy Of Science, all rights reserved. +// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved. +// Third party copyrights are property of their respective owners. +// +// @Authors +// Jia Haipeng, jiahaipeng95@gmail.com +// +// Redistribution and use in source and binary forms, with or without modification, +// are permitted provided that the following conditions are met: +// +// * Redistribution's of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// +// * Redistribution's in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other oclMaterials provided with the distribution. +// +// * The name of the copyright holders may not be used to endorse or promote products +// derived from this software without specific prior written permission. +// +// This software is provided by the copyright holders and contributors as is and +// any express or implied warranties, including, but not limited to, the implied +// warranties of merchantability and fitness for a particular purpose are disclaimed. +// In no event shall the Intel Corporation or contributors be liable for any direct, +// indirect, incidental, special, exemplary, or consequential damages +// (including, but not limited to, procurement of substitute goods or services; +// loss of use, data, or profits; or business interruption) however caused +// and on any theory of liability, whether in contract, strict liability, +// or tort (including negligence or otherwise) arising in any way out of +// the use of this software, even if advised of the possibility of such damage. +// +//M*/ + +#if defined (DOUBLE_SUPPORT) +#ifdef cl_khr_fp64 +#pragma OPENCL EXTENSION cl_khr_fp64:enable +#elif defined (cl_amd_fp64) +#pragma OPENCL EXTENSION cl_amd_fp64:enable +#endif +#endif + +////////////////////////////////////////////////////////////////////////////////////////////////////// +/////////////////////////////////////////////addWeighted////////////////////////////////////////////// +/////////////////////////////////////////////////////////////////////////////////////////////////////// + +__kernel void arithm_compare(__global T * src1, int src1_step1, int src1_offset1, + __global T * src2, int src2_step1, int src2_offset1, + __global uchar * dst, int dst_step1, int dst_offset1, + int cols1, int rows) +{ + int x = get_global_id(0); + int y = get_global_id(1); + + if (x < cols1 && y < rows) + { + int src1_index = mad24(y, src1_step1, x + src1_offset1); + int src2_index = mad24(y, src2_step1, x + src2_offset1); + int dst_index = mad24(y, dst_step1, x + dst_offset1); + + dst[dst_index] = convert_uchar(src1[src1_index] Operation src2[src2_index] ? 255 : 0); + } +} diff --git a/modules/ocl/src/opencl/arithm_compare_eq.cl b/modules/ocl/src/opencl/arithm_compare_eq.cl deleted file mode 100644 index 16a56ac..0000000 --- a/modules/ocl/src/opencl/arithm_compare_eq.cl +++ /dev/null @@ -1,1016 +0,0 @@ -/*M/////////////////////////////////////////////////////////////////////////////////////// -// -// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. -// -// By downloading, copying, installing or using the software you agree to this license. -// If you do not agree to this license, do not download, install, -// copy or use the software. -// -// -// License Agreement -// For Open Source Computer Vision Library -// -// Copyright (C) 2010-2012, Institute Of Software Chinese Academy Of Science, all rights reserved. -// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved. -// Third party copyrights are property of their respective owners. -// -// @Authors -// Jiang Liyuan, jlyuan001.good@163.com -// -// Redistribution and use in source and binary forms, with or without modification, -// are permitted provided that the following conditions are met: -// -// * Redistribution's of source code must retain the above copyright notice, -// this list of conditions and the following disclaimer. -// -// * Redistribution's in binary form must reproduce the above copyright notice, -// this list of conditions and the following disclaimer in the documentation -// and/or other oclMaterials provided with the distribution. -// -// * The name of the copyright holders may not be used to endorse or promote products -// derived from this software without specific prior written permission. -// -// This software is provided by the copyright holders and contributors as is and -// any express or implied warranties, including, but not limited to, the implied -// warranties of merchantability and fitness for a particular purpose are disclaimed. -// In no event shall the Intel Corporation or contributors be liable for any direct, -// indirect, incidental, special, exemplary, or consequential damages -// (including, but not limited to, procurement of substitute goods or services; -// loss of use, data, or profits; or business interruption) however caused -// and on any theory of liability, whether in contract, strict liability, -// or tort (including negligence or otherwise) arising in any way out of -// the use of this software, even if advised of the possibility of such damage. -// -//M*/ -#if defined (DOUBLE_SUPPORT) -#ifdef cl_khr_fp64 -#pragma OPENCL EXTENSION cl_khr_fp64:enable -#elif defined (cl_amd_fp64) -#pragma OPENCL EXTENSION cl_amd_fp64:enable -#endif -#endif - -////////////////////////////////////////////////////////////////////////////////////////////////////// -////////////////////////////////////////////Compare EQ//////////////////////////////////////////////////// -/////////////////////////////////////////////////////////////////////////////////////////////////////// - -__kernel void arithm_compare_eq_D0 (__global uchar *src1, int src1_step, int src1_offset, - __global uchar *src2, int src2_step, int src2_offset, - __global uchar *dst, int dst_step, int dst_offset, - int rows, int cols, int dst_step1) -{ - int x = get_global_id(0); - int y = get_global_id(1); - - if (x < cols && y < rows) - { - x = x << 2; - -#ifdef dst_align -#undef dst_align -#endif -#define dst_align (dst_offset & 3) - int src1_index = mad24(y, src1_step, x + src1_offset - dst_align); - int src2_index = mad24(y, src2_step, x + src2_offset - dst_align); - - int dst_start = mad24(y, dst_step, dst_offset); - int dst_end = mad24(y, dst_step, dst_offset + dst_step1); - int dst_index = mad24(y, dst_step, dst_offset + x & (int)0xfffffffc); - int src1_index_fix = src1_index < 0 ? 0 : src1_index; - int src2_index_fix = src2_index < 0 ? 0 : src2_index; - uchar4 src1_data = vload4(0, src1 + src1_index_fix); - uchar4 src2_data = vload4(0, src2 + src2_index_fix); - if(src1_index < 0) - { - uchar4 tmp; - tmp.xyzw = (src1_index == -2) ? src1_data.zwxy:src1_data.yzwx; - src1_data.xyzw = (src1_index == -1) ? src1_data.wxyz:tmp.xyzw; - } - if(src2_index < 0) - { - uchar4 tmp; - tmp.xyzw = (src2_index == -2) ? src2_data.zwxy:src2_data.yzwx; - src2_data.xyzw = (src2_index == -1) ? src2_data.wxyz:tmp.xyzw; - } - - - - uchar4 dst_data = *((__global uchar4 *)(dst + dst_index)); - uchar4 tmp_data = convert_uchar4((src1_data == src2_data)); - - dst_data.x = ((dst_index + 0 >= dst_start) && (dst_index + 0 < dst_end)) ? tmp_data.x : dst_data.x; - dst_data.y = ((dst_index + 1 >= dst_start) && (dst_index + 1 < dst_end)) ? tmp_data.y : dst_data.y; - dst_data.z = ((dst_index + 2 >= dst_start) && (dst_index + 2 < dst_end)) ? tmp_data.z : dst_data.z; - dst_data.w = ((dst_index + 3 >= dst_start) && (dst_index + 3 < dst_end)) ? tmp_data.w : dst_data.w; - - *((__global uchar4 *)(dst + dst_index)) = dst_data; - } -} - - -__kernel void arithm_compare_ne_D2 (__global ushort *src1, int src1_step, int src1_offset, - __global ushort *src2, int src2_step, int src2_offset, - __global uchar *dst, int dst_step, int dst_offset, - int rows, int cols, int dst_step1) - -{ - int x = get_global_id(0); - int y = get_global_id(1); - - if (x < cols && y < rows) - { - x = x << 2; - -#ifdef dst_align -#undef dst_align -#endif -#define dst_align ((dst_offset >> 1)& 3) - int src1_index = mad24(y, src1_step, (x << 1) + src1_offset - (dst_align << 1)); - int src2_index = mad24(y, src2_step, (x << 1) + src2_offset - (dst_align << 1)); - - int dst_start = mad24(y, dst_step, dst_offset); - int dst_end = mad24(y, dst_step, dst_offset + dst_step1); - int dst_index = mad24(y, dst_step, dst_offset + x & (int)0xfffffffc); - - int src1_index_fix = src1_index < 0 ? 0 : src1_index; - int src2_index_fix = src2_index < 0 ? 0 : src2_index; - ushort4 src1_data = vload4(0, (__global ushort *)((__global char *)src1 + src1_index)); - ushort4 src2_data = vload4(0, (__global ushort *)((__global char *)src2 + src2_index)); - if(src1_index < 0) - { - ushort4 tmp; - tmp.xyzw = (src1_index == -2) ? src1_data.zwxy:src1_data.yzwx; - src1_data.xyzw = (src1_index == -1) ? src1_data.wxyz:tmp.xyzw; - } - if(src2_index < 0) - { - ushort4 tmp; - tmp.xyzw = (src2_index == -2) ? src2_data.zwxy:src2_data.yzwx; - src2_data.xyzw = (src2_index == -1) ? src2_data.wxyz:tmp.xyzw; - } - - uchar4 dst_data = *((__global uchar4 *)(dst + dst_index)); - uchar4 tmp_data = convert_uchar4((src1_data == src2_data)); - - dst_data.x = ((dst_index + 0 >= dst_start) && (dst_index + 0 < dst_end)) ? tmp_data.x : dst_data.x; - dst_data.y = ((dst_index + 1 >= dst_start) && (dst_index + 1 < dst_end)) ? tmp_data.y : dst_data.y; - dst_data.z = ((dst_index + 2 >= dst_start) && (dst_index + 2 < dst_end)) ? tmp_data.z : dst_data.z; - dst_data.w = ((dst_index + 3 >= dst_start) && (dst_index + 3 < dst_end)) ? tmp_data.w : dst_data.w; - - *((__global uchar4 *)(dst + dst_index)) = dst_data; - } -} - - -__kernel void arithm_compare_eq_D3 (__global short *src1, int src1_step, int src1_offset, - __global short *src2, int src2_step, int src2_offset, - __global uchar *dst, int dst_step, int dst_offset, - int rows, int cols, int dst_step1) - -{ - int x = get_global_id(0); - int y = get_global_id(1); - - if (x < cols && y < rows) - { - x = x << 2; - -#ifdef dst_align -#undef dst_align -#endif -#define dst_align ((dst_offset >> 1) & 3) - int src1_index = mad24(y, src1_step, (x << 1) + src1_offset - (dst_align << 1)); - int src2_index = mad24(y, src2_step, (x << 1) + src2_offset - (dst_align << 1)); - - int dst_start = mad24(y, dst_step, dst_offset); - int dst_end = mad24(y, dst_step, dst_offset + dst_step1); - int dst_index = mad24(y, dst_step, dst_offset + x & (int)0xfffffffc); - int src1_index_fix = src1_index < 0 ? 0 : src1_index; - int src2_index_fix = src2_index < 0 ? 0 : src2_index; - short4 src1_data = vload4(0, (__global short *)((__global char *)src1 + src1_index)); - short4 src2_data = vload4(0, (__global short *)((__global char *)src2 + src2_index)); - if(src1_index < 0) - { - short4 tmp; - tmp.xyzw = (src1_index == -2) ? src1_data.zwxy:src1_data.yzwx; - src1_data.xyzw = (src1_index == -1) ? src1_data.wxyz:tmp.xyzw; - } - if(src2_index < 0) - { - short4 tmp; - tmp.xyzw = (src2_index == -2) ? src2_data.zwxy:src2_data.yzwx; - src2_data.xyzw = (src2_index == -1) ? src2_data.wxyz:tmp.xyzw; - } - - - - - uchar4 dst_data = *((__global uchar4 *)(dst + dst_index)); - uchar4 tmp_data = convert_uchar4((src1_data == src2_data)); - - dst_data.x = ((dst_index + 0 >= dst_start) && (dst_index + 0 < dst_end)) ? tmp_data.x : dst_data.x; - dst_data.y = ((dst_index + 1 >= dst_start) && (dst_index + 1 < dst_end)) ? tmp_data.y : dst_data.y; - dst_data.z = ((dst_index + 2 >= dst_start) && (dst_index + 2 < dst_end)) ? tmp_data.z : dst_data.z; - dst_data.w = ((dst_index + 3 >= dst_start) && (dst_index + 3 < dst_end)) ? tmp_data.w : dst_data.w; - - *((__global uchar4 *)(dst + dst_index)) = dst_data; - } -} - - - -__kernel void arithm_compare_eq_D4 (__global int *src1, int src1_step, int src1_offset, - __global int *src2, int src2_step, int src2_offset, - __global uchar *dst, int dst_step, int dst_offset, - int rows, int cols, int dst_step1) -{ - int x = get_global_id(0); - int y = get_global_id(1); - - if (x < cols && y < rows) - { - x = x << 2; -#ifdef dst_align -#undef dst_align -#endif -#define dst_align ((dst_offset >> 2) & 3) - int src1_index = mad24(y, src1_step, (x << 2) + src1_offset - (dst_align << 2)); - int src2_index = mad24(y, src2_step, (x << 2) + src2_offset - (dst_align << 2)); - - int dst_start = mad24(y, dst_step, dst_offset); - int dst_end = mad24(y, dst_step, dst_offset + dst_step1); - int dst_index = mad24(y, dst_step, dst_offset + x & (int)0xfffffffc); - int src1_index_fix = src1_index < 0 ? 0 : src1_index; - int src2_index_fix = src2_index < 0 ? 0 : src2_index; - - int4 src1_data = vload4(0, (__global int *)((__global char *)src1 + src1_index)); - int4 src2_data = vload4(0, (__global int *)((__global char *)src2 + src2_index)); - if(src1_index < 0) - { - int4 tmp; - tmp.xyzw = (src1_index == -2) ? src1_data.zwxy:src1_data.yzwx; - src1_data.xyzw = (src1_index == -1) ? src1_data.wxyz:tmp.xyzw; - } - if(src2_index < 0) - { - int4 tmp; - tmp.xyzw = (src2_index == -2) ? src2_data.zwxy:src2_data.yzwx; - src2_data.xyzw = (src2_index == -1) ? src2_data.wxyz:tmp.xyzw; - } - - uchar4 dst_data = *((__global uchar4 *)(dst + dst_index)); - uchar4 tmp_data = convert_uchar4((src1_data == src2_data)); - - dst_data.x = ((dst_index + 0 >= dst_start) && (dst_index + 0 < dst_end)) ? tmp_data.x : dst_data.x; - dst_data.y = ((dst_index + 1 >= dst_start) && (dst_index + 1 < dst_end)) ? tmp_data.y : dst_data.y; - dst_data.z = ((dst_index + 2 >= dst_start) && (dst_index + 2 < dst_end)) ? tmp_data.z : dst_data.z; - dst_data.w = ((dst_index + 3 >= dst_start) && (dst_index + 3 < dst_end)) ? tmp_data.w : dst_data.w; - - *((__global uchar4 *)(dst + dst_index)) = dst_data; - } -} - -__kernel void arithm_compare_eq_D5 (__global float *src1, int src1_step, int src1_offset, - __global float *src2, int src2_step, int src2_offset, - __global uchar *dst, int dst_step, int dst_offset, - int rows, int cols, int dst_step1) -{ - int x = get_global_id(0); - int y = get_global_id(1); - - if (x < cols && y < rows) - { - x = x << 2; -#ifdef dst_align -#undef dst_align -#endif -#define dst_align ((dst_offset >> 2) & 3) - int src1_index = mad24(y, src1_step, (x << 2) + src1_offset - (dst_align << 2)); - int src2_index = mad24(y, src2_step, (x << 2) + src2_offset - (dst_align << 2)); - - int dst_start = mad24(y, dst_step, dst_offset); - int dst_end = mad24(y, dst_step, dst_offset + dst_step1); - int dst_index = mad24(y, dst_step, dst_offset + x & (int)0xfffffffc); - int src1_index_fix = src1_index < 0 ? 0 : src1_index; - int src2_index_fix = src2_index < 0 ? 0 : src2_index; - float4 src1_data = vload4(0, (__global float *)((__global char *)src1 + src1_index_fix)); - float4 src2_data = vload4(0, (__global float *)((__global char *)src2 + src2_index_fix)); - if(src2_index < 0) - { - float4 tmp; - tmp.xyzw = (src2_index == -2) ? src2_data.zwxy:src2_data.yzwx; - src2_data.xyzw = (src2_index == -1) ? src2_data.wxyz:tmp.xyzw; - } - - - uchar4 dst_data = *((__global uchar4 *)(dst + dst_index)); - uchar4 tmp_data = convert_uchar4((src1_data == src2_data)); - - dst_data.x = ((dst_index + 0 >= dst_start) && (dst_index + 0 < dst_end)) ? tmp_data.x : dst_data.x; - dst_data.y = ((dst_index + 1 >= dst_start) && (dst_index + 1 < dst_end)) ? tmp_data.y : dst_data.y; - dst_data.z = ((dst_index + 2 >= dst_start) && (dst_index + 2 < dst_end)) ? tmp_data.z : dst_data.z; - dst_data.w = ((dst_index + 3 >= dst_start) && (dst_index + 3 < dst_end)) ? tmp_data.w : dst_data.w; - - *((__global uchar4 *)(dst + dst_index)) = dst_data; - } -} - -#if defined (DOUBLE_SUPPORT) -__kernel void arithm_compare_eq_D6 (__global double *src1, int src1_step, int src1_offset, - __global double *src2, int src2_step, int src2_offset, - __global uchar *dst, int dst_step, int dst_offset, - int rows, int cols, int dst_step1) -{ - int x = get_global_id(0); - int y = get_global_id(1); - - if (x < cols && y < rows) - { - x = x << 2; -#ifdef dst_align -#undef dst_align -#endif -#define dst_align ((dst_offset >> 3) & 3) - int src1_index = mad24(y, src1_step, (x << 3) + src1_offset - (dst_align << 3)); - int src2_index = mad24(y, src2_step, (x << 3) + src2_offset - (dst_align << 3)); - - int dst_start = mad24(y, dst_step, dst_offset); - int dst_end = mad24(y, dst_step, dst_offset + dst_step1); - int dst_index = mad24(y, dst_step, dst_offset + x & (int)0xfffffffc); - int src1_index_fix = src1_index < 0 ? 0 : src1_index; - int src2_index_fix = src2_index < 0 ? 0 : src2_index; - double4 src1_data = vload4(0, (__global double *)((__global char *)src1 + src1_index_fix)); - double4 src2_data = vload4(0, (__global double *)((__global char *)src2 + src2_index_fix)); - if(src1_index < 0) - { - double4 tmp; - tmp.xyzw = (src1_index == -2) ? src1_data.zwxy:src1_data.yzwx; - src1_data.xyzw = (src1_index == -1) ? src1_data.wxyz:tmp.xyzw; - } - if(src2_index < 0) - { - double4 tmp; - tmp.xyzw = (src2_index == -2) ? src2_data.zwxy:src2_data.yzwx; - src2_data.xyzw = (src2_index == -1) ? src2_data.wxyz:tmp.xyzw; - } - - - uchar4 dst_data = *((__global uchar4 *)(dst + dst_index)); - uchar4 tmp_data = convert_uchar4((src1_data == src2_data)); - - dst_data.x = ((dst_index + 0 >= dst_start) && (dst_index + 0 < dst_end)) ? tmp_data.x : dst_data.x; - dst_data.y = ((dst_index + 1 >= dst_start) && (dst_index + 1 < dst_end)) ? tmp_data.y : dst_data.y; - dst_data.z = ((dst_index + 2 >= dst_start) && (dst_index + 2 < dst_end)) ? tmp_data.z : dst_data.z; - dst_data.w = ((dst_index + 3 >= dst_start) && (dst_index + 3 < dst_end)) ? tmp_data.w : dst_data.w; - - *((__global uchar4 *)(dst + dst_index)) = dst_data; - } -} -#endif - -/***********************************Compare GT**************************/ -__kernel void arithm_compare_gt_D0 (__global uchar *src1, int src1_step, int src1_offset, - __global uchar *src2, int src2_step, int src2_offset, - __global uchar *dst, int dst_step, int dst_offset, - int rows, int cols, int dst_step1) -{ - int x = get_global_id(0); - int y = get_global_id(1); - - if (x < cols && y < rows) - { - x = x << 2; - -#ifdef dst_align -#undef dst_align -#endif -#define dst_align (dst_offset & 3) - int src1_index = mad24(y, src1_step, x + src1_offset - dst_align); - int src2_index = mad24(y, src2_step, x + src2_offset - dst_align); - - int dst_start = mad24(y, dst_step, dst_offset); - int dst_end = mad24(y, dst_step, dst_offset + dst_step1); - int dst_index = mad24(y, dst_step, dst_offset + x & (int)0xfffffffc); - int src1_index_fix = src1_index < 0 ? 0 : src1_index; - int src2_index_fix = src2_index < 0 ? 0 : src2_index; - uchar4 src1_data = vload4(0, src1 + src1_index_fix); - uchar4 src2_data = vload4(0, src2 + src2_index_fix); - if(src1_index < 0) - { - uchar4 tmp; - tmp.xyzw = (src1_index == -2) ? src1_data.zwxy:src1_data.yzwx; - src1_data.xyzw = (src1_index == -1) ? src1_data.wxyz:tmp.xyzw; - } - if(src2_index < 0) - { - uchar4 tmp; - tmp.xyzw = (src2_index == -2) ? src2_data.zwxy:src2_data.yzwx; - src2_data.xyzw = (src2_index == -1) ? src2_data.wxyz:tmp.xyzw; - } - - - - uchar4 dst_data = *((__global uchar4 *)(dst + dst_index)); - uchar4 tmp_data = convert_uchar4((src1_data > src2_data)); - - dst_data.x = ((dst_index + 0 >= dst_start) && (dst_index + 0 < dst_end)) ? tmp_data.x : dst_data.x; - dst_data.y = ((dst_index + 1 >= dst_start) && (dst_index + 1 < dst_end)) ? tmp_data.y : dst_data.y; - dst_data.z = ((dst_index + 2 >= dst_start) && (dst_index + 2 < dst_end)) ? tmp_data.z : dst_data.z; - dst_data.w = ((dst_index + 3 >= dst_start) && (dst_index + 3 < dst_end)) ? tmp_data.w : dst_data.w; - - *((__global uchar4 *)(dst + dst_index)) = dst_data; - } -} - -__kernel void arithm_compare_gt_D2 (__global ushort *src1, int src1_step, int src1_offset, - __global ushort *src2, int src2_step, int src2_offset, - __global uchar *dst, int dst_step, int dst_offset, - int rows, int cols, int dst_step1) - -{ - int x = get_global_id(0); - int y = get_global_id(1); - - if (x < cols && y < rows) - { - x = x << 2; - -#ifdef dst_align -#undef dst_align -#endif -#define dst_align ((dst_offset >> 1) & 3) - int src1_index = mad24(y, src1_step, (x << 1) + src1_offset - (dst_align << 1)); - int src2_index = mad24(y, src2_step, (x << 1) + src2_offset - (dst_align << 1)); - - int dst_start = mad24(y, dst_step, dst_offset); - int dst_end = mad24(y, dst_step, dst_offset + dst_step1); - int dst_index = mad24(y, dst_step, dst_offset + x & (int)0xfffffffc); - int src1_index_fix = src1_index < 0 ? 0 : src1_index; - int src2_index_fix = src2_index < 0 ? 0 : src2_index; - ushort4 src1_data = vload4(0, (__global ushort *)((__global char *)src1 + src1_index)); - ushort4 src2_data = vload4(0, (__global ushort *)((__global char *)src2 + src2_index)); - if(src1_index < 0) - { - ushort4 tmp; - tmp.xyzw = (src1_index == -2) ? src1_data.zwxy:src1_data.yzwx; - src1_data.xyzw = (src1_index == -1) ? src1_data.wxyz:tmp.xyzw; - } - if(src2_index < 0) - { - ushort4 tmp; - tmp.xyzw = (src2_index == -2) ? src2_data.zwxy:src2_data.yzwx; - src2_data.xyzw = (src2_index == -1) ? src2_data.wxyz:tmp.xyzw; - } - - - - uchar4 dst_data = *((__global uchar4 *)(dst + dst_index)); - uchar4 tmp_data = convert_uchar4((src1_data > src2_data)); - - dst_data.x = ((dst_index + 0 >= dst_start) && (dst_index + 0 < dst_end)) ? tmp_data.x : dst_data.x; - dst_data.y = ((dst_index + 1 >= dst_start) && (dst_index + 1 < dst_end)) ? tmp_data.y : dst_data.y; - dst_data.z = ((dst_index + 2 >= dst_start) && (dst_index + 2 < dst_end)) ? tmp_data.z : dst_data.z; - dst_data.w = ((dst_index + 3 >= dst_start) && (dst_index + 3 < dst_end)) ? tmp_data.w : dst_data.w; - - *((__global uchar4 *)(dst + dst_index)) = dst_data; - } -} - - - -__kernel void arithm_compare_gt_D3 (__global short *src1, int src1_step, int src1_offset, - __global short *src2, int src2_step, int src2_offset, - __global uchar *dst, int dst_step, int dst_offset, - int rows, int cols, int dst_step1) - -{ - int x = get_global_id(0); - int y = get_global_id(1); - - if (x < cols && y < rows) - { - x = x << 2; - -#ifdef dst_align -#undef dst_align -#endif -#define dst_align ((dst_offset >> 1) & 3) - int src1_index = mad24(y, src1_step, (x << 1) + src1_offset - (dst_align << 1)); - int src2_index = mad24(y, src2_step, (x << 1) + src2_offset - (dst_align << 1)); - - int dst_start = mad24(y, dst_step, dst_offset); - int dst_end = mad24(y, dst_step, dst_offset + dst_step1); - int dst_index = mad24(y, dst_step, dst_offset + x & (int)0xfffffffc); - int src1_index_fix = src1_index < 0 ? 0 : src1_index; - int src2_index_fix = src2_index < 0 ? 0 : src2_index; - short4 src1_data = vload4(0, (__global short *)((__global char *)src1 + src1_index)); - short4 src2_data = vload4(0, (__global short *)((__global char *)src2 + src2_index)); - if(src1_index < 0) - { - short4 tmp; - tmp.xyzw = (src1_index == -2) ? src1_data.zwxy:src1_data.yzwx; - src1_data.xyzw = (src1_index == -1) ? src1_data.wxyz:tmp.xyzw; - } - if(src2_index < 0) - { - short4 tmp; - tmp.xyzw = (src2_index == -2) ? src2_data.zwxy:src2_data.yzwx; - src2_data.xyzw = (src2_index == -1) ? src2_data.wxyz:tmp.xyzw; - } - - - - uchar4 dst_data = *((__global uchar4 *)(dst + dst_index)); - uchar4 tmp_data = convert_uchar4((src1_data > src2_data)); - - dst_data.x = ((dst_index + 0 >= dst_start) && (dst_index + 0 < dst_end)) ? tmp_data.x : dst_data.x; - dst_data.y = ((dst_index + 1 >= dst_start) && (dst_index + 1 < dst_end)) ? tmp_data.y : dst_data.y; - dst_data.z = ((dst_index + 2 >= dst_start) && (dst_index + 2 < dst_end)) ? tmp_data.z : dst_data.z; - dst_data.w = ((dst_index + 3 >= dst_start) && (dst_index + 3 < dst_end)) ? tmp_data.w : dst_data.w; - - *((__global uchar4 *)(dst + dst_index)) = dst_data; - } -} - -__kernel void arithm_compare_gt_D4 (__global int *src1, int src1_step, int src1_offset, - __global int *src2, int src2_step, int src2_offset, - __global uchar *dst, int dst_step, int dst_offset, - int rows, int cols, int dst_step1) -{ - int x = get_global_id(0); - int y = get_global_id(1); - - if (x < cols && y < rows) - { - x = x << 2; -#ifdef dst_align -#undef dst_align -#endif -#define dst_align ((dst_offset >> 2) & 3) - int src1_index = mad24(y, src1_step, (x << 2) + src1_offset - (dst_align << 2)); - int src2_index = mad24(y, src2_step, (x << 2) + src2_offset - (dst_align << 2)); - - int dst_start = mad24(y, dst_step, dst_offset); - int dst_end = mad24(y, dst_step, dst_offset + dst_step1); - int dst_index = mad24(y, dst_step, dst_offset + x & (int)0xfffffffc); - int src1_index_fix = src1_index < 0 ? 0 : src1_index; - int src2_index_fix = src2_index < 0 ? 0 : src2_index; - - int4 src1_data = vload4(0, (__global int *)((__global char *)src1 + src1_index)); - int4 src2_data = vload4(0, (__global int *)((__global char *)src2 + src2_index)); - if(src1_index < 0) - { - int4 tmp; - tmp.xyzw = (src1_index == -2) ? src1_data.zwxy:src1_data.yzwx; - src1_data.xyzw = (src1_index == -1) ? src1_data.wxyz:tmp.xyzw; - } - if(src2_index < 0) - { - int4 tmp; - tmp.xyzw = (src2_index == -2) ? src2_data.zwxy:src2_data.yzwx; - src2_data.xyzw = (src2_index == -1) ? src2_data.wxyz:tmp.xyzw; - } - - - uchar4 dst_data = *((__global uchar4 *)(dst + dst_index)); - uchar4 tmp_data = convert_uchar4((src1_data > src2_data)); - - dst_data.x = ((dst_index + 0 >= dst_start) && (dst_index + 0 < dst_end)) ? tmp_data.x : dst_data.x; - dst_data.y = ((dst_index + 1 >= dst_start) && (dst_index + 1 < dst_end)) ? tmp_data.y : dst_data.y; - dst_data.z = ((dst_index + 2 >= dst_start) && (dst_index + 2 < dst_end)) ? tmp_data.z : dst_data.z; - dst_data.w = ((dst_index + 3 >= dst_start) && (dst_index + 3 < dst_end)) ? tmp_data.w : dst_data.w; - - *((__global uchar4 *)(dst + dst_index)) = dst_data; - } -} - -__kernel void arithm_compare_gt_D5 (__global float *src1, int src1_step, int src1_offset, - __global float *src2, int src2_step, int src2_offset, - __global uchar *dst, int dst_step, int dst_offset, - int rows, int cols, int dst_step1) -{ - int x = get_global_id(0); - int y = get_global_id(1); - - if (x < cols && y < rows) - { - x = x << 2; -#ifdef dst_align -#undef dst_align -#endif -#define dst_align ((dst_offset >> 2) & 3) - int src1_index = mad24(y, src1_step, (x << 2) + src1_offset - (dst_align << 2)); - int src2_index = mad24(y, src2_step, (x << 2) + src2_offset - (dst_align << 2)); - - int dst_start = mad24(y, dst_step, dst_offset); - int dst_end = mad24(y, dst_step, dst_offset + dst_step1); - int dst_index = mad24(y, dst_step, dst_offset + x & (int)0xfffffffc); - int src1_index_fix = src1_index < 0 ? 0 : src1_index; - int src2_index_fix = src2_index < 0 ? 0 : src2_index; - float4 src1_data = vload4(0, (__global float *)((__global char *)src1 + src1_index_fix)); - float4 src2_data = vload4(0, (__global float *)((__global char *)src2 + src2_index_fix)); - if(src1_index < 0) - { - float4 tmp; - tmp.xyzw = (src1_index == -2) ? src1_data.zwxy:src1_data.yzwx; - src1_data.xyzw = (src1_index == -1) ? src1_data.wxyz:tmp.xyzw; - } - if(src2_index < 0) - { - float4 tmp; - tmp.xyzw = (src2_index == -2) ? src2_data.zwxy:src2_data.yzwx; - src2_data.xyzw = (src2_index == -1) ? src2_data.wxyz:tmp.xyzw; - } - - - uchar4 dst_data = *((__global uchar4 *)(dst + dst_index)); - uchar4 tmp_data = convert_uchar4((src1_data > src2_data)); - - dst_data.x = ((dst_index + 0 >= dst_start) && (dst_index + 0 < dst_end)) ? tmp_data.x : dst_data.x; - dst_data.y = ((dst_index + 1 >= dst_start) && (dst_index + 1 < dst_end)) ? tmp_data.y : dst_data.y; - dst_data.z = ((dst_index + 2 >= dst_start) && (dst_index + 2 < dst_end)) ? tmp_data.z : dst_data.z; - dst_data.w = ((dst_index + 3 >= dst_start) && (dst_index + 3 < dst_end)) ? tmp_data.w : dst_data.w; - - *((__global uchar4 *)(dst + dst_index)) = dst_data; - } -} - -#if defined (DOUBLE_SUPPORT) -__kernel void arithm_compare_gt_D6 (__global double *src1, int src1_step, int src1_offset, - __global double *src2, int src2_step, int src2_offset, - __global uchar *dst, int dst_step, int dst_offset, - int rows, int cols, int dst_step1) -{ - int x = get_global_id(0); - int y = get_global_id(1); - - if (x < cols && y < rows) - { - x = x << 2; -#ifdef dst_align -#undef dst_align -#endif -#define dst_align ((dst_offset >> 3) & 3) - int src1_index = mad24(y, src1_step, (x << 3) + src1_offset - (dst_align << 3)); - int src2_index = mad24(y, src2_step, (x << 3) + src2_offset - (dst_align << 3)); - - int dst_start = mad24(y, dst_step, dst_offset); - int dst_end = mad24(y, dst_step, dst_offset + dst_step1); - int dst_index = mad24(y, dst_step, dst_offset + x & (int)0xfffffffc); - int src1_index_fix = src1_index < 0 ? 0 : src1_index; - int src2_index_fix = src2_index < 0 ? 0 : src2_index; - double4 src1_data = vload4(0, (__global double *)((__global char *)src1 + src1_index_fix)); - double4 src2_data = vload4(0, (__global double *)((__global char *)src2 + src2_index_fix)); - if(src1_index < 0) - { - double4 tmp; - tmp.xyzw = (src1_index == -2) ? src1_data.zwxy:src1_data.yzwx; - src1_data.xyzw = (src1_index == -1) ? src1_data.wxyz:tmp.xyzw; - } - if(src2_index < 0) - { - double4 tmp; - tmp.xyzw = (src2_index == -2) ? src2_data.zwxy:src2_data.yzwx; - src2_data.xyzw = (src2_index == -1) ? src2_data.wxyz:tmp.xyzw; - } - - - uchar4 dst_data = *((__global uchar4 *)(dst + dst_index)); - uchar4 tmp_data = convert_uchar4((src1_data > src2_data)); - - dst_data.x = ((dst_index + 0 >= dst_start) && (dst_index + 0 < dst_end)) ? tmp_data.x : dst_data.x; - dst_data.y = ((dst_index + 1 >= dst_start) && (dst_index + 1 < dst_end)) ? tmp_data.y : dst_data.y; - dst_data.z = ((dst_index + 2 >= dst_start) && (dst_index + 2 < dst_end)) ? tmp_data.z : dst_data.z; - dst_data.w = ((dst_index + 3 >= dst_start) && (dst_index + 3 < dst_end)) ? tmp_data.w : dst_data.w; - - *((__global uchar4 *)(dst + dst_index)) = dst_data; - } -} -#endif - -/***********************************Compare GE**************************/ -__kernel void arithm_compare_ge_D0 (__global uchar *src1, int src1_step, int src1_offset, - __global uchar *src2, int src2_step, int src2_offset, - __global uchar *dst, int dst_step, int dst_offset, - int rows, int cols, int dst_step1) -{ - int x = get_global_id(0); - int y = get_global_id(1); - - if (x < cols && y < rows) - { - x = x << 2; - -#ifdef dst_align -#undef dst_align -#endif -#define dst_align (dst_offset & 3) - int src1_index = mad24(y, src1_step, x + src1_offset - dst_align); - int src2_index = mad24(y, src2_step, x + src2_offset - dst_align); - - int dst_start = mad24(y, dst_step, dst_offset); - int dst_end = mad24(y, dst_step, dst_offset + dst_step1); - int dst_index = mad24(y, dst_step, dst_offset + x & (int)0xfffffffc); - - int src1_index_fix = src1_index < 0 ? 0 : src1_index; - int src2_index_fix = src2_index < 0 ? 0 : src2_index; - uchar4 src1_data = vload4(0, src1 + src1_index_fix); - uchar4 src2_data = vload4(0, src2 + src2_index_fix); - if(src1_index < 0) - { - uchar4 tmp; - tmp.xyzw = (src1_index == -2) ? src1_data.zwxy:src1_data.yzwx; - src1_data.xyzw = (src1_index == -1) ? src1_data.wxyz:tmp.xyzw; - } - if(src2_index < 0) - { - uchar4 tmp; - tmp.xyzw = (src2_index == -2) ? src2_data.zwxy:src2_data.yzwx; - src2_data.xyzw = (src2_index == -1) ? src2_data.wxyz:tmp.xyzw; - } - - - - uchar4 dst_data = *((__global uchar4 *)(dst + dst_index)); - uchar4 tmp_data = convert_uchar4((src1_data >= src2_data)); - - dst_data.x = ((dst_index + 0 >= dst_start) && (dst_index + 0 < dst_end)) ? tmp_data.x : dst_data.x; - dst_data.y = ((dst_index + 1 >= dst_start) && (dst_index + 1 < dst_end)) ? tmp_data.y : dst_data.y; - dst_data.z = ((dst_index + 2 >= dst_start) && (dst_index + 2 < dst_end)) ? tmp_data.z : dst_data.z; - dst_data.w = ((dst_index + 3 >= dst_start) && (dst_index + 3 < dst_end)) ? tmp_data.w : dst_data.w; - - *((__global uchar4 *)(dst + dst_index)) = dst_data; - } -} - - - -__kernel void arithm_compare_ge_D2 (__global ushort *src1, int src1_step, int src1_offset, - __global ushort *src2, int src2_step, int src2_offset, - __global uchar *dst, int dst_step, int dst_offset, - int rows, int cols, int dst_step1) - -{ - int x = get_global_id(0); - int y = get_global_id(1); - - if (x < cols && y < rows) - { - x = x << 2; - -#ifdef dst_align -#undef dst_align -#endif -#define dst_align ((dst_offset >> 1) & 3) - int src1_index = mad24(y, src1_step, (x << 1) + src1_offset - (dst_align << 1)); - int src2_index = mad24(y, src2_step, (x << 1) + src2_offset - (dst_align << 1)); - - int dst_start = mad24(y, dst_step, dst_offset); - int dst_end = mad24(y, dst_step, dst_offset + dst_step1); - int dst_index = mad24(y, dst_step, dst_offset + x & (int)0xfffffffc); - - int src1_index_fix = src1_index < 0 ? 0 : src1_index; - int src2_index_fix = src2_index < 0 ? 0 : src2_index; - ushort4 src1_data = vload4(0, (__global ushort *)((__global char *)src1 + src1_index)); - ushort4 src2_data = vload4(0, (__global ushort *)((__global char *)src2 + src2_index)); - if(src1_index < 0) - { - ushort4 tmp; - tmp.xyzw = (src1_index == -2) ? src1_data.zwxy:src1_data.yzwx; - src1_data.xyzw = (src1_index == -1) ? src1_data.wxyz:tmp.xyzw; - } - if(src2_index < 0) - { - ushort4 tmp; - tmp.xyzw = (src2_index == -2) ? src2_data.zwxy:src2_data.yzwx; - src2_data.xyzw = (src2_index == -1) ? src2_data.wxyz:tmp.xyzw; - } - - - - - uchar4 dst_data = *((__global uchar4 *)(dst + dst_index)); - uchar4 tmp_data = convert_uchar4((src1_data >= src2_data)); - - dst_data.x = ((dst_index + 0 >= dst_start) && (dst_index + 0 < dst_end)) ? tmp_data.x : dst_data.x; - dst_data.y = ((dst_index + 1 >= dst_start) && (dst_index + 1 < dst_end)) ? tmp_data.y : dst_data.y; - dst_data.z = ((dst_index + 2 >= dst_start) && (dst_index + 2 < dst_end)) ? tmp_data.z : dst_data.z; - dst_data.w = ((dst_index + 3 >= dst_start) && (dst_index + 3 < dst_end)) ? tmp_data.w : dst_data.w; - - *((__global uchar4 *)(dst + dst_index)) = dst_data; - } -} - - - -__kernel void arithm_compare_ge_D3 (__global short *src1, int src1_step, int src1_offset, - __global short *src2, int src2_step, int src2_offset, - __global uchar *dst, int dst_step, int dst_offset, - int rows, int cols, int dst_step1) - -{ - int x = get_global_id(0); - int y = get_global_id(1); - - if (x < cols && y < rows) - { - x = x << 2; - -#ifdef dst_align -#undef dst_align -#endif -#define dst_align ((dst_offset >> 1)& 3) - int src1_index = mad24(y, src1_step, (x << 1) + src1_offset - (dst_align << 1)); - int src2_index = mad24(y, src2_step, (x << 1) + src2_offset - (dst_align << 1)); - - int dst_start = mad24(y, dst_step, dst_offset); - int dst_end = mad24(y, dst_step, dst_offset + dst_step1); - int dst_index = mad24(y, dst_step, dst_offset + x & (int)0xfffffffc); - - int src1_index_fix = src1_index < 0 ? 0 : src1_index; - int src2_index_fix = src2_index < 0 ? 0 : src2_index; - short4 src1_data = vload4(0, (__global short *)((__global char *)src1 + src1_index)); - short4 src2_data = vload4(0, (__global short *)((__global char *)src2 + src2_index)); - if(src1_index < 0) - { - short4 tmp; - tmp.xyzw = (src1_index == -2) ? src1_data.zwxy:src1_data.yzwx; - src1_data.xyzw = (src1_index == -1) ? src1_data.wxyz:tmp.xyzw; - } - if(src2_index < 0) - { - short4 tmp; - tmp.xyzw = (src2_index == -2) ? src2_data.zwxy:src2_data.yzwx; - src2_data.xyzw = (src2_index == -1) ? src2_data.wxyz:tmp.xyzw; - } - - - - uchar4 dst_data = *((__global uchar4 *)(dst + dst_index)); - uchar4 tmp_data = convert_uchar4((src1_data >= src2_data)); - - dst_data.x = ((dst_index + 0 >= dst_start) && (dst_index + 0 < dst_end)) ? tmp_data.x : dst_data.x; - dst_data.y = ((dst_index + 1 >= dst_start) && (dst_index + 1 < dst_end)) ? tmp_data.y : dst_data.y; - dst_data.z = ((dst_index + 2 >= dst_start) && (dst_index + 2 < dst_end)) ? tmp_data.z : dst_data.z; - dst_data.w = ((dst_index + 3 >= dst_start) && (dst_index + 3 < dst_end)) ? tmp_data.w : dst_data.w; - - *((__global uchar4 *)(dst + dst_index)) = dst_data; - } -} - -__kernel void arithm_compare_ge_D4 (__global int *src1, int src1_step, int src1_offset, - __global int *src2, int src2_step, int src2_offset, - __global uchar *dst, int dst_step, int dst_offset, - int rows, int cols, int dst_step1) -{ - int x = get_global_id(0); - int y = get_global_id(1); - - if (x < cols && y < rows) - { - x = x << 2; - -#ifdef dst_align -#undef dst_align -#endif -#define dst_align ((dst_offset >> 2)& 3) - int src1_index = mad24(y, src1_step, (x << 2) + src1_offset - (dst_align << 2)); - int src2_index = mad24(y, src2_step, (x << 2) + src2_offset - (dst_align << 2)); - - int dst_start = mad24(y, dst_step, dst_offset); - int dst_end = mad24(y, dst_step, dst_offset + dst_step1); - int dst_index = mad24(y, dst_step, dst_offset + x & (int)0xfffffffc); - - int src1_index_fix = src1_index < 0 ? 0 : src1_index; - int src2_index_fix = src2_index < 0 ? 0 : src2_index; - - int4 src1_data = vload4(0, (__global int *)((__global char *)src1 + src1_index)); - int4 src2_data = vload4(0, (__global int *)((__global char *)src2 + src2_index)); - if(src1_index < 0) - { - int4 tmp; - tmp.xyzw = (src1_index == -2) ? src1_data.zwxy:src1_data.yzwx; - src1_data.xyzw = (src1_index == -1) ? src1_data.wxyz:tmp.xyzw; - } - if(src2_index < 0) - { - int4 tmp; - tmp.xyzw = (src2_index == -2) ? src2_data.zwxy:src2_data.yzwx; - src2_data.xyzw = (src2_index == -1) ? src2_data.wxyz:tmp.xyzw; - } - uchar4 dst_data = *((__global uchar4 *)(dst + dst_index)); - uchar4 tmp_data = convert_uchar4((src1_data >= src2_data)); - - dst_data.x = ((dst_index + 0 >= dst_start) && (dst_index + 0 < dst_end)) ? tmp_data.x : dst_data.x; - dst_data.y = ((dst_index + 1 >= dst_start) && (dst_index + 1 < dst_end)) ? tmp_data.y : dst_data.y; - dst_data.z = ((dst_index + 2 >= dst_start) && (dst_index + 2 < dst_end)) ? tmp_data.z : dst_data.z; - dst_data.w = ((dst_index + 3 >= dst_start) && (dst_index + 3 < dst_end)) ? tmp_data.w : dst_data.w; - - *((__global uchar4 *)(dst + dst_index)) = dst_data; - } -} - -__kernel void arithm_compare_ge_D5 (__global float *src1, int src1_step, int src1_offset, - __global float *src2, int src2_step, int src2_offset, - __global uchar *dst, int dst_step, int dst_offset, - int rows, int cols, int dst_step1) -{ - int x = get_global_id(0); - int y = get_global_id(1); - - if (x < cols && y < rows) - { - x = x << 2; - -#ifdef dst_align -#undef dst_align -#endif -#define dst_align ((dst_offset >> 2)& 3) - int src1_index = mad24(y, src1_step, (x << 2) + src1_offset - (dst_align << 2)); - int src2_index = mad24(y, src2_step, (x << 2) + src2_offset - (dst_align << 2)); - - int dst_start = mad24(y, dst_step, dst_offset); - int dst_end = mad24(y, dst_step, dst_offset + dst_step1); - int dst_index = mad24(y, dst_step, dst_offset + x & (int)0xfffffffc); - - int src1_index_fix = src1_index < 0 ? 0 : src1_index; - int src2_index_fix = src2_index < 0 ? 0 : src2_index; - float4 src1_data = vload4(0, (__global float *)((__global char *)src1 + src1_index_fix)); - float4 src2_data = vload4(0, (__global float *)((__global char *)src2 + src2_index_fix)); - if(src1_index < 0) - { - - float4 tmp; - tmp.xyzw = (src1_index == -2) ? src1_data.zwxy:src1_data.yzwx; - src1_data.xyzw = (src1_index == -1) ? src1_data.wxyz:tmp.xyzw; - } - if(src2_index < 0) - { - float4 tmp; - tmp.xyzw = (src2_index == -2) ? src2_data.zwxy:src2_data.yzwx; - src2_data.xyzw = (src2_index == -1) ? src2_data.wxyz:tmp.xyzw; - } - - uchar4 dst_data = *((__global uchar4 *)(dst + dst_index)); - uchar4 tmp_data = convert_uchar4((src1_data >= src2_data)); - - dst_data.x = ((dst_index + 0 >= dst_start) && (dst_index + 0 < dst_end)) ? tmp_data.x : dst_data.x; - dst_data.y = ((dst_index + 1 >= dst_start) && (dst_index + 1 < dst_end)) ? tmp_data.y : dst_data.y; - dst_data.z = ((dst_index + 2 >= dst_start) && (dst_index + 2 < dst_end)) ? tmp_data.z : dst_data.z; - dst_data.w = ((dst_index + 3 >= dst_start) && (dst_index + 3 < dst_end)) ? tmp_data.w : dst_data.w; - - *((__global uchar4 *)(dst + dst_index)) = dst_data; - } -} - -#if defined (DOUBLE_SUPPORT) -__kernel void arithm_compare_ge_D6 (__global double *src1, int src1_step, int src1_offset, - __global double *src2, int src2_step, int src2_offset, - __global uchar *dst, int dst_step, int dst_offset, - int rows, int cols, int dst_step1) -{ - int x = get_global_id(0); - int y = get_global_id(1); - - if (x < cols && y < rows) - { - x = x << 2; - -#ifdef dst_align -#undef dst_align -#endif -#define dst_align ((dst_offset >> 3)& 3) - int src1_index = mad24(y, src1_step, (x << 3) + src1_offset - (dst_align << 3)); - int src2_index = mad24(y, src2_step, (x << 3) + src2_offset - (dst_align << 3)); - - int dst_start = mad24(y, dst_step, dst_offset); - int dst_end = mad24(y, dst_step, dst_offset + dst_step1); - int dst_index = mad24(y, dst_step, dst_offset + x & (int)0xfffffffc); - int src1_index_fix = src1_index < 0 ? 0 : src1_index; - int src2_index_fix = src2_index < 0 ? 0 : src2_index; - double4 src1_data = vload4(0, (__global double *)((__global char *)src1 + src1_index_fix)); - double4 src2_data = vload4(0, (__global double *)((__global char *)src2 + src2_index_fix)); - if(src1_index < 0) - { - double4 tmp; - tmp.xyzw = (src1_index == -2) ? src1_data.zwxy:src1_data.yzwx; - src1_data.xyzw = (src1_index == -1) ? src1_data.wxyz:tmp.xyzw; - } - if(src2_index < 0) - { - double4 tmp; - tmp.xyzw = (src2_index == -2) ? src2_data.zwxy:src2_data.yzwx; - src2_data.xyzw = (src2_index == -1) ? src2_data.wxyz:tmp.xyzw; - } - uchar4 dst_data = *((__global uchar4 *)(dst + dst_index)); - uchar4 tmp_data = convert_uchar4((src1_data >= src2_data)); - - dst_data.x = ((dst_index + 0 >= dst_start) && (dst_index + 0 < dst_end)) ? tmp_data.x : dst_data.x; - dst_data.y = ((dst_index + 1 >= dst_start) && (dst_index + 1 < dst_end)) ? tmp_data.y : dst_data.y; - dst_data.z = ((dst_index + 2 >= dst_start) && (dst_index + 2 < dst_end)) ? tmp_data.z : dst_data.z; - dst_data.w = ((dst_index + 3 >= dst_start) && (dst_index + 3 < dst_end)) ? tmp_data.w : dst_data.w; - - *((__global uchar4 *)(dst + dst_index)) = dst_data; - } -} -#endif diff --git a/modules/ocl/src/opencl/arithm_compare_ne.cl b/modules/ocl/src/opencl/arithm_compare_ne.cl deleted file mode 100644 index fb5859d..0000000 --- a/modules/ocl/src/opencl/arithm_compare_ne.cl +++ /dev/null @@ -1,1013 +0,0 @@ -/*M/////////////////////////////////////////////////////////////////////////////////////// -// -// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. -// -// By downloading, copying, installing or using the software you agree to this license. -// If you do not agree to this license, do not download, install, -// copy or use the software. -// -// -// License Agreement -// For Open Source Computer Vision Library -// -// Copyright (C) 2010-2012, Institute Of Software Chinese Academy Of Science, all rights reserved. -// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved. -// Third party copyrights are property of their respective owners. -// -// @Authors -// Jiang Liyuan, jlyuan001.good@163.com -// -// Redistribution and use in source and binary forms, with or without modification, -// are permitted provided that the following conditions are met: -// -// * Redistribution's of source code must retain the above copyright notice, -// this list of conditions and the following disclaimer. -// -// * Redistribution's in binary form must reproduce the above copyright notice, -// this list of conditions and the following disclaimer in the documentation -// and/or other oclMaterials provided with the distribution. -// -// * The name of the copyright holders may not be used to endorse or promote products -// derived from this software without specific prior written permission. -// -// This software is provided by the copyright holders and contributors as is and -// any express or implied warranties, including, but not limited to, the implied -// warranties of merchantability and fitness for a particular purpose are disclaimed. -// In no event shall the Intel Corporation or contributors be liable for any direct, -// indirect, incidental, special, exemplary, or consequential damages -// (including, but not limited to, procurement of substitute goods or services; -// loss of use, data, or profits; or business interruption) however caused -// and on any theory of liability, whether in contract, strict liability, -// or tort (including negligence or otherwise) arising in any way out of -// the use of this software, even if advised of the possibility of such damage. -// -//M*/ -#if defined (DOUBLE_SUPPORT) -#ifdef cl_khr_fp64 -#pragma OPENCL EXTENSION cl_khr_fp64:enable -#elif defined (cl_amd_fp64) -#pragma OPENCL EXTENSION cl_amd_fp64:enable -#endif -#endif -/***********************************Compare NE*******************************/ -__kernel void arithm_compare_ne_D0 (__global uchar *src1, int src1_step, int src1_offset, - __global uchar *src2, int src2_step, int src2_offset, - __global uchar *dst, int dst_step, int dst_offset, - int rows, int cols, int dst_step1) -{ - int x = get_global_id(0); - int y = get_global_id(1); - - if (x < cols && y < rows) - { - x = x << 2; - -#ifdef dst_align -#undef dst_align -#endif -#define dst_align (dst_offset & 3) - int src1_index = mad24(y, src1_step, x + src1_offset - dst_align); - int src2_index = mad24(y, src2_step, x + src2_offset - dst_align); - - int dst_start = mad24(y, dst_step, dst_offset); - int dst_end = mad24(y, dst_step, dst_offset + dst_step1); - int dst_index = mad24(y, dst_step, dst_offset + x & (int)0xfffffffc); - int src1_index_fix = src1_index < 0 ? 0 : src1_index; - int src2_index_fix = src2_index < 0 ? 0 : src2_index; - uchar4 src1_data = vload4(0, src1 + src1_index_fix); - uchar4 src2_data = vload4(0, src2 + src2_index_fix); - if(src1_index < 0) - { - uchar4 tmp; - tmp.xyzw = (src1_index == -2) ? src1_data.zwxy:src1_data.yzwx; - src1_data.xyzw = (src1_index == -1) ? src1_data.wxyz:tmp.xyzw; - } - if(src2_index < 0) - { - uchar4 tmp; - tmp.xyzw = (src2_index == -2) ? src2_data.zwxy:src2_data.yzwx; - src2_data.xyzw = (src2_index == -1) ? src2_data.wxyz:tmp.xyzw; - } - - - uchar4 dst_data = *((__global uchar4 *)(dst + dst_index)); - uchar4 tmp_data = convert_uchar4((src1_data != src2_data)); - - dst_data.x = ((dst_index + 0 >= dst_start) && (dst_index + 0 < dst_end)) ? tmp_data.x : dst_data.x; - dst_data.y = ((dst_index + 1 >= dst_start) && (dst_index + 1 < dst_end)) ? tmp_data.y : dst_data.y; - dst_data.z = ((dst_index + 2 >= dst_start) && (dst_index + 2 < dst_end)) ? tmp_data.z : dst_data.z; - dst_data.w = ((dst_index + 3 >= dst_start) && (dst_index + 3 < dst_end)) ? tmp_data.w : dst_data.w; - - *((__global uchar4 *)(dst + dst_index)) = dst_data; - } -} - - - -__kernel void arithm_compare_ne_D2 (__global ushort *src1, int src1_step, int src1_offset, - __global ushort *src2, int src2_step, int src2_offset, - __global uchar *dst, int dst_step, int dst_offset, - int rows, int cols, int dst_step1) - -{ - int x = get_global_id(0); - int y = get_global_id(1); - - if (x < cols && y < rows) - { - x = x << 2; - -#ifdef dst_align -#undef dst_align -#endif -#define dst_align ((dst_offset >> 1)& 3) - int src1_index = mad24(y, src1_step, (x << 1) + src1_offset - (dst_align << 1)); - int src2_index = mad24(y, src2_step, (x << 1) + src2_offset - (dst_align << 1)); - - int dst_start = mad24(y, dst_step, dst_offset); - int dst_end = mad24(y, dst_step, dst_offset + dst_step1); - int dst_index = mad24(y, dst_step, dst_offset + x & (int)0xfffffffc); - - int src1_index_fix = src1_index < 0 ? 0 : src1_index; - int src2_index_fix = src2_index < 0 ? 0 : src2_index; - ushort4 src1_data = vload4(0, (__global ushort *)((__global char *)src1 + src1_index)); - ushort4 src2_data = vload4(0, (__global ushort *)((__global char *)src2 + src2_index)); - if(src1_index < 0) - { - ushort4 tmp; - tmp.xyzw = (src1_index == -2) ? src1_data.zwxy:src1_data.yzwx; - src1_data.xyzw = (src1_index == -1) ? src1_data.wxyz:tmp.xyzw; - } - if(src2_index < 0) - { - ushort4 tmp; - tmp.xyzw = (src2_index == -2) ? src2_data.zwxy:src2_data.yzwx; - src2_data.xyzw = (src2_index == -1) ? src2_data.wxyz:tmp.xyzw; - } - - uchar4 dst_data = *((__global uchar4 *)(dst + dst_index)); - uchar4 tmp_data = convert_uchar4((src1_data != src2_data)); - - dst_data.x = ((dst_index + 0 >= dst_start) && (dst_index + 0 < dst_end)) ? tmp_data.x : dst_data.x; - dst_data.y = ((dst_index + 1 >= dst_start) && (dst_index + 1 < dst_end)) ? tmp_data.y : dst_data.y; - dst_data.z = ((dst_index + 2 >= dst_start) && (dst_index + 2 < dst_end)) ? tmp_data.z : dst_data.z; - dst_data.w = ((dst_index + 3 >= dst_start) && (dst_index + 3 < dst_end)) ? tmp_data.w : dst_data.w; - - *((__global uchar4 *)(dst + dst_index)) = dst_data; - } -} - - - -__kernel void arithm_compare_ne_D3 (__global short *src1, int src1_step, int src1_offset, - __global short *src2, int src2_step, int src2_offset, - __global uchar *dst, int dst_step, int dst_offset, - int rows, int cols, int dst_step1) - -{ - int x = get_global_id(0); - int y = get_global_id(1); - - if (x < cols && y < rows) - { - x = x << 2; - -#ifdef dst_align -#undef dst_align -#endif -#define dst_align ((dst_offset >> 1)& 3) - int src1_index = mad24(y, src1_step, (x << 1) + src1_offset - (dst_align << 1)); - int src2_index = mad24(y, src2_step, (x << 1) + src2_offset - (dst_align << 1)); - - int dst_start = mad24(y, dst_step, dst_offset); - int dst_end = mad24(y, dst_step, dst_offset + dst_step1); - int dst_index = mad24(y, dst_step, dst_offset + x & (int)0xfffffffc); - int src1_index_fix = src1_index < 0 ? 0 : src1_index; - int src2_index_fix = src2_index < 0 ? 0 : src2_index; - short4 src1_data = vload4(0, (__global short *)((__global char *)src1 + src1_index)); - short4 src2_data = vload4(0, (__global short *)((__global char *)src2 + src2_index)); - if(src1_index < 0) - { - short4 tmp; - tmp.xyzw = (src1_index == -2) ? src1_data.zwxy:src1_data.yzwx; - src1_data.xyzw = (src1_index == -1) ? src1_data.wxyz:tmp.xyzw; - } - if(src2_index < 0) - { - short4 tmp; - tmp.xyzw = (src2_index == -2) ? src2_data.zwxy:src2_data.yzwx; - src2_data.xyzw = (src2_index == -1) ? src2_data.wxyz:tmp.xyzw; - } - - - uchar4 dst_data = *((__global uchar4 *)(dst + dst_index)); - uchar4 tmp_data = convert_uchar4((src1_data != src2_data)); - - dst_data.x = ((dst_index + 0 >= dst_start) && (dst_index + 0 < dst_end)) ? tmp_data.x : dst_data.x; - dst_data.y = ((dst_index + 1 >= dst_start) && (dst_index + 1 < dst_end)) ? tmp_data.y : dst_data.y; - dst_data.z = ((dst_index + 2 >= dst_start) && (dst_index + 2 < dst_end)) ? tmp_data.z : dst_data.z; - dst_data.w = ((dst_index + 3 >= dst_start) && (dst_index + 3 < dst_end)) ? tmp_data.w : dst_data.w; - - *((__global uchar4 *)(dst + dst_index)) = dst_data; - } -} - -__kernel void arithm_compare_ne_D4 (__global int *src1, int src1_step, int src1_offset, - __global int *src2, int src2_step, int src2_offset, - __global uchar *dst, int dst_step, int dst_offset, - int rows, int cols, int dst_step1) -{ - int x = get_global_id(0); - int y = get_global_id(1); - - if (x < cols && y < rows) - { - x = x << 2; -#ifdef dst_align -#undef dst_align -#endif -#define dst_align ((dst_offset >> 2)& 3) - int src1_index = mad24(y, src1_step, (x << 2) + src1_offset - (dst_align << 2)); - int src2_index = mad24(y, src2_step, (x << 2) + src2_offset - (dst_align << 2)); - - int dst_start = mad24(y, dst_step, dst_offset); - int dst_end = mad24(y, dst_step, dst_offset + dst_step1); - int dst_index = mad24(y, dst_step, dst_offset + x & (int)0xfffffffc); - - int src1_index_fix = src1_index < 0 ? 0 : src1_index; - int src2_index_fix = src2_index < 0 ? 0 : src2_index; - - int4 src1_data = vload4(0, (__global int *)((__global char *)src1 + src1_index)); - int4 src2_data = vload4(0, (__global int *)((__global char *)src2 + src2_index)); - if(src1_index < 0) - { - int4 tmp; - tmp.xyzw = (src1_index == -2) ? src1_data.zwxy:src1_data.yzwx; - src1_data.xyzw = (src1_index == -1) ? src1_data.wxyz:tmp.xyzw; - } - if(src2_index < 0) - { - int4 tmp; - tmp.xyzw = (src2_index == -2) ? src2_data.zwxy:src2_data.yzwx; - src2_data.xyzw = (src2_index == -1) ? src2_data.wxyz:tmp.xyzw; - } - - uchar4 dst_data = *((__global uchar4 *)(dst + dst_index)); - uchar4 tmp_data = convert_uchar4((src1_data != src2_data)); - - dst_data.x = ((dst_index + 0 >= dst_start) && (dst_index + 0 < dst_end)) ? tmp_data.x : dst_data.x; - dst_data.y = ((dst_index + 1 >= dst_start) && (dst_index + 1 < dst_end)) ? tmp_data.y : dst_data.y; - dst_data.z = ((dst_index + 2 >= dst_start) && (dst_index + 2 < dst_end)) ? tmp_data.z : dst_data.z; - dst_data.w = ((dst_index + 3 >= dst_start) && (dst_index + 3 < dst_end)) ? tmp_data.w : dst_data.w; - - *((__global uchar4 *)(dst + dst_index)) = dst_data; - } -} - -__kernel void arithm_compare_ne_D5 (__global float *src1, int src1_step, int src1_offset, - __global float *src2, int src2_step, int src2_offset, - __global uchar *dst, int dst_step, int dst_offset, - int rows, int cols, int dst_step1) -{ - int x = get_global_id(0); - int y = get_global_id(1); - - if (x < cols && y < rows) - { - x = x << 2; -#ifdef dst_align -#undef dst_align -#endif -#define dst_align ((dst_offset >> 2) & 3) - int src1_index = mad24(y, src1_step, (x << 2) + src1_offset - (dst_align << 2)); - int src2_index = mad24(y, src2_step, (x << 2) + src2_offset - (dst_align << 2)); - - int dst_start = mad24(y, dst_step, dst_offset); - int dst_end = mad24(y, dst_step, dst_offset + dst_step1); - int dst_index = mad24(y, dst_step, dst_offset + x & (int)0xfffffffc); - int src1_index_fix = src1_index < 0 ? 0 : src1_index; - int src2_index_fix = src2_index < 0 ? 0 : src2_index; - float4 src1_data = vload4(0, (__global float *)((__global char *)src1 + src1_index_fix)); - float4 src2_data = vload4(0, (__global float *)((__global char *)src2 + src2_index_fix)); - if(src1_index < 0) - { - float4 tmp; - tmp.xyzw = (src1_index == -2) ? src1_data.zwxy:src1_data.yzwx; - src1_data.xyzw = (src1_index == -1) ? src1_data.wxyz:tmp.xyzw; - } - if(src2_index < 0) - { - float4 tmp; - tmp.xyzw = (src2_index == -2) ? src2_data.zwxy:src2_data.yzwx; - src2_data.xyzw = (src2_index == -1) ? src2_data.wxyz:tmp.xyzw; - } - - uchar4 dst_data = *((__global uchar4 *)(dst + dst_index)); - uchar4 tmp_data = convert_uchar4((src1_data != src2_data)); - - dst_data.x = ((dst_index + 0 >= dst_start) && (dst_index + 0 < dst_end)) ? tmp_data.x : dst_data.x; - dst_data.y = ((dst_index + 1 >= dst_start) && (dst_index + 1 < dst_end)) ? tmp_data.y : dst_data.y; - dst_data.z = ((dst_index + 2 >= dst_start) && (dst_index + 2 < dst_end)) ? tmp_data.z : dst_data.z; - dst_data.w = ((dst_index + 3 >= dst_start) && (dst_index + 3 < dst_end)) ? tmp_data.w : dst_data.w; - - *((__global uchar4 *)(dst + dst_index)) = dst_data; - } -} - -#if defined (DOUBLE_SUPPORT) -__kernel void arithm_compare_ne_D6 (__global double *src1, int src1_step, int src1_offset, - __global double *src2, int src2_step, int src2_offset, - __global uchar *dst, int dst_step, int dst_offset, - int rows, int cols, int dst_step1) -{ - int x = get_global_id(0); - int y = get_global_id(1); - - if (x < cols && y < rows) - { - x = x << 2; -#ifdef dst_align -#undef dst_align -#endif -#define dst_align ((dst_offset >> 3) & 3) - int src1_index = mad24(y, src1_step, (x << 3) + src1_offset - (dst_align << 3)); - int src2_index = mad24(y, src2_step, (x << 3) + src2_offset - (dst_align << 3)); - - int dst_start = mad24(y, dst_step, dst_offset); - int dst_end = mad24(y, dst_step, dst_offset + dst_step1); - int dst_index = mad24(y, dst_step, dst_offset + x & (int)0xfffffffc); - int src1_index_fix = src1_index < 0 ? 0 : src1_index; - int src2_index_fix = src2_index < 0 ? 0 : src2_index; - double4 src1_data = vload4(0, (__global double *)((__global char *)src1 + src1_index_fix)); - double4 src2_data = vload4(0, (__global double *)((__global char *)src2 + src2_index_fix)); - if(src1_index < 0) - { - double4 tmp; - tmp.xyzw = (src1_index == -2) ? src1_data.zwxy:src1_data.yzwx; - src1_data.xyzw = (src1_index == -1) ? src1_data.wxyz:tmp.xyzw; - } - if(src2_index < 0) - { - double4 tmp; - tmp.xyzw = (src2_index == -2) ? src2_data.zwxy:src2_data.yzwx; - src2_data.xyzw = (src2_index == -1) ? src2_data.wxyz:tmp.xyzw; - } - - - uchar4 dst_data = *((__global uchar4 *)(dst + dst_index)); - uchar4 tmp_data = convert_uchar4((src1_data != src2_data)); - - dst_data.x = ((dst_index + 0 >= dst_start) && (dst_index + 0 < dst_end)) ? tmp_data.x : dst_data.x; - dst_data.y = ((dst_index + 1 >= dst_start) && (dst_index + 1 < dst_end)) ? tmp_data.y : dst_data.y; - dst_data.z = ((dst_index + 2 >= dst_start) && (dst_index + 2 < dst_end)) ? tmp_data.z : dst_data.z; - dst_data.w = ((dst_index + 3 >= dst_start) && (dst_index + 3 < dst_end)) ? tmp_data.w : dst_data.w; - - *((__global uchar4 *)(dst + dst_index)) = dst_data; - } -} -#endif - - -/***********************************Compare LT*******************************/ -__kernel void arithm_compare_lt_D0 (__global uchar *src1, int src1_step, int src1_offset, - __global uchar *src2, int src2_step, int src2_offset, - __global uchar *dst, int dst_step, int dst_offset, - int rows, int cols, int dst_step1) -{ - int x = get_global_id(0); - int y = get_global_id(1); - - if (x < cols && y < rows) - { - x = x << 2; - -#ifdef dst_align -#undef dst_align -#endif -#define dst_align (dst_offset & 3) - int src1_index = mad24(y, src1_step, x + src1_offset - dst_align); - int src2_index = mad24(y, src2_step, x + src2_offset - dst_align); - - int dst_start = mad24(y, dst_step, dst_offset); - int dst_end = mad24(y, dst_step, dst_offset + dst_step1); - int dst_index = mad24(y, dst_step, dst_offset + x & (int)0xfffffffc); - int src1_index_fix = src1_index < 0 ? 0 : src1_index; - int src2_index_fix = src2_index < 0 ? 0 : src2_index; - uchar4 src1_data = vload4(0, src1 + src1_index_fix); - uchar4 src2_data = vload4(0, src2 + src2_index_fix); - if(src1_index < 0) - { - uchar4 tmp; - tmp.xyzw = (src1_index == -2) ? src1_data.zwxy:src1_data.yzwx; - src1_data.xyzw = (src1_index == -1) ? src1_data.wxyz:tmp.xyzw; - } - if(src2_index < 0) - { - uchar4 tmp; - tmp.xyzw = (src2_index == -2) ? src2_data.zwxy:src2_data.yzwx; - src2_data.xyzw = (src2_index == -1) ? src2_data.wxyz:tmp.xyzw; - } - - - uchar4 dst_data = *((__global uchar4 *)(dst + dst_index)); - uchar4 tmp_data = convert_uchar4((src1_data < src2_data)); - - dst_data.x = ((dst_index + 0 >= dst_start) && (dst_index + 0 < dst_end)) ? tmp_data.x : dst_data.x; - dst_data.y = ((dst_index + 1 >= dst_start) && (dst_index + 1 < dst_end)) ? tmp_data.y : dst_data.y; - dst_data.z = ((dst_index + 2 >= dst_start) && (dst_index + 2 < dst_end)) ? tmp_data.z : dst_data.z; - dst_data.w = ((dst_index + 3 >= dst_start) && (dst_index + 3 < dst_end)) ? tmp_data.w : dst_data.w; - - *((__global uchar4 *)(dst + dst_index)) = dst_data; - } -} - - - -__kernel void arithm_compare_lt_D2 (__global ushort *src1, int src1_step, int src1_offset, - __global ushort *src2, int src2_step, int src2_offset, - __global uchar *dst, int dst_step, int dst_offset, - int rows, int cols, int dst_step1) - -{ - int x = get_global_id(0); - int y = get_global_id(1); - - if (x < cols && y < rows) - { - x = x << 2; - -#ifdef dst_align -#undef dst_align -#endif -#define dst_align ((dst_offset >> 1) & 3) - int src1_index = mad24(y, src1_step, (x << 1) + src1_offset - (dst_align << 1)); - int src2_index = mad24(y, src2_step, (x << 1) + src2_offset - (dst_align << 1)); - - int dst_start = mad24(y, dst_step, dst_offset); - int dst_end = mad24(y, dst_step, dst_offset + dst_step1); - int dst_index = mad24(y, dst_step, dst_offset + x & (int)0xfffffffc); - - int src1_index_fix = src1_index < 0 ? 0 : src1_index; - int src2_index_fix = src2_index < 0 ? 0 : src2_index; - ushort4 src1_data = vload4(0, (__global ushort *)((__global char *)src1 + src1_index)); - ushort4 src2_data = vload4(0, (__global ushort *)((__global char *)src2 + src2_index)); - if(src1_index < 0) - { - ushort4 tmp; - tmp.xyzw = (src1_index == -2) ? src1_data.zwxy:src1_data.yzwx; - src1_data.xyzw = (src1_index == -1) ? src1_data.wxyz:tmp.xyzw; - } - if(src2_index < 0) - { - ushort4 tmp; - tmp.xyzw = (src2_index == -2) ? src2_data.zwxy:src2_data.yzwx; - src2_data.xyzw = (src2_index == -1) ? src2_data.wxyz:tmp.xyzw; - } - - - uchar4 dst_data = *((__global uchar4 *)(dst + dst_index)); - uchar4 tmp_data = convert_uchar4((src1_data < src2_data)); - - dst_data.x = ((dst_index + 0 >= dst_start) && (dst_index + 0 < dst_end)) ? tmp_data.x : dst_data.x; - dst_data.y = ((dst_index + 1 >= dst_start) && (dst_index + 1 < dst_end)) ? tmp_data.y : dst_data.y; - dst_data.z = ((dst_index + 2 >= dst_start) && (dst_index + 2 < dst_end)) ? tmp_data.z : dst_data.z; - dst_data.w = ((dst_index + 3 >= dst_start) && (dst_index + 3 < dst_end)) ? tmp_data.w : dst_data.w; - - *((__global uchar4 *)(dst + dst_index)) = dst_data; - } -} - - - -__kernel void arithm_compare_lt_D3 (__global short *src1, int src1_step, int src1_offset, - __global short *src2, int src2_step, int src2_offset, - __global uchar *dst, int dst_step, int dst_offset, - int rows, int cols, int dst_step1) - -{ - int x = get_global_id(0); - int y = get_global_id(1); - - if (x < cols && y < rows) - { - x = x << 2; - -#ifdef dst_align -#undef dst_align -#endif -#define dst_align ((dst_offset >> 1) & 3) - int src1_index = mad24(y, src1_step, (x << 1) + src1_offset - (dst_align << 1)); - int src2_index = mad24(y, src2_step, (x << 1) + src2_offset - (dst_align << 1)); - - int dst_start = mad24(y, dst_step, dst_offset); - int dst_end = mad24(y, dst_step, dst_offset + dst_step1); - int dst_index = mad24(y, dst_step, dst_offset + x & (int)0xfffffffc); - int src1_index_fix = src1_index < 0 ? 0 : src1_index; - int src2_index_fix = src2_index < 0 ? 0 : src2_index; - short4 src1_data = vload4(0, (__global short *)((__global char *)src1 + src1_index)); - short4 src2_data = vload4(0, (__global short *)((__global char *)src2 + src2_index)); - if(src1_index < 0) - { - short4 tmp; - tmp.xyzw = (src1_index == -2) ? src1_data.zwxy:src1_data.yzwx; - src1_data.xyzw = (src1_index == -1) ? src1_data.wxyz:tmp.xyzw; - } - if(src2_index < 0) - { - short4 tmp; - tmp.xyzw = (src2_index == -2) ? src2_data.zwxy:src2_data.yzwx; - src2_data.xyzw = (src2_index == -1) ? src2_data.wxyz:tmp.xyzw; - } - - - - uchar4 dst_data = *((__global uchar4 *)(dst + dst_index)); - uchar4 tmp_data = convert_uchar4((src1_data < src2_data)); - - dst_data.x = ((dst_index + 0 >= dst_start) && (dst_index + 0 < dst_end)) ? tmp_data.x : dst_data.x; - dst_data.y = ((dst_index + 1 >= dst_start) && (dst_index + 1 < dst_end)) ? tmp_data.y : dst_data.y; - dst_data.z = ((dst_index + 2 >= dst_start) && (dst_index + 2 < dst_end)) ? tmp_data.z : dst_data.z; - dst_data.w = ((dst_index + 3 >= dst_start) && (dst_index + 3 < dst_end)) ? tmp_data.w : dst_data.w; - - *((__global uchar4 *)(dst + dst_index)) = dst_data; - } -} - -__kernel void arithm_compare_lt_D4 (__global int *src1, int src1_step, int src1_offset, - __global int *src2, int src2_step, int src2_offset, - __global uchar *dst, int dst_step, int dst_offset, - int rows, int cols, int dst_step1) -{ - int x = get_global_id(0); - int y = get_global_id(1); - - if (x < cols && y < rows) - { - x = x << 2; -#ifdef dst_align -#undef dst_align -#endif -#define dst_align ((dst_offset >> 2) & 3) - int src1_index = mad24(y, src1_step, (x << 2) + src1_offset - (dst_align << 2)); - int src2_index = mad24(y, src2_step, (x << 2) + src2_offset - (dst_align << 2)); - - int dst_start = mad24(y, dst_step, dst_offset); - int dst_end = mad24(y, dst_step, dst_offset + dst_step1); - int dst_index = mad24(y, dst_step, dst_offset + x & (int)0xfffffffc); - - int src1_index_fix = src1_index < 0 ? 0 : src1_index; - int src2_index_fix = src2_index < 0 ? 0 : src2_index; - - int4 src1_data = vload4(0, (__global int *)((__global char *)src1 + src1_index)); - int4 src2_data = vload4(0, (__global int *)((__global char *)src2 + src2_index)); - if(src1_index < 0) - { - int4 tmp; - tmp.xyzw = (src1_index == -2) ? src1_data.zwxy:src1_data.yzwx; - src1_data.xyzw = (src1_index == -1) ? src1_data.wxyz:tmp.xyzw; - } - if(src2_index < 0) - { - int4 tmp; - tmp.xyzw = (src2_index == -2) ? src2_data.zwxy:src2_data.yzwx; - src2_data.xyzw = (src2_index == -1) ? src2_data.wxyz:tmp.xyzw; - } - - - - - uchar4 dst_data = *((__global uchar4 *)(dst + dst_index)); - uchar4 tmp_data = convert_uchar4((src1_data < src2_data)); - - dst_data.x = ((dst_index + 0 >= dst_start) && (dst_index + 0 < dst_end)) ? tmp_data.x : dst_data.x; - dst_data.y = ((dst_index + 1 >= dst_start) && (dst_index + 1 < dst_end)) ? tmp_data.y : dst_data.y; - dst_data.z = ((dst_index + 2 >= dst_start) && (dst_index + 2 < dst_end)) ? tmp_data.z : dst_data.z; - dst_data.w = ((dst_index + 3 >= dst_start) && (dst_index + 3 < dst_end)) ? tmp_data.w : dst_data.w; - - *((__global uchar4 *)(dst + dst_index)) = dst_data; - } -} - -__kernel void arithm_compare_lt_D5 (__global float *src1, int src1_step, int src1_offset, - __global float *src2, int src2_step, int src2_offset, - __global uchar *dst, int dst_step, int dst_offset, - int rows, int cols, int dst_step1) -{ - int x = get_global_id(0); - int y = get_global_id(1); - - if (x < cols && y < rows) - { - x = x << 2; -#ifdef dst_align -#undef dst_align -#endif -#define dst_align ((dst_offset >> 2) & 3) - int src1_index = mad24(y, src1_step, (x << 2) + src1_offset - (dst_align << 2)); - int src2_index = mad24(y, src2_step, (x << 2) + src2_offset - (dst_align << 2)); - - int dst_start = mad24(y, dst_step, dst_offset); - int dst_end = mad24(y, dst_step, dst_offset + dst_step1); - int dst_index = mad24(y, dst_step, dst_offset + x & (int)0xfffffffc); - int src1_index_fix = src1_index < 0 ? 0 : src1_index; - int src2_index_fix = src2_index < 0 ? 0 : src2_index; - float4 src1_data = vload4(0, (__global float *)((__global char *)src1 + src1_index_fix)); - float4 src2_data = vload4(0, (__global float *)((__global char *)src2 + src2_index_fix)); - if(src1_index < 0) - { - float4 tmp; - tmp.xyzw = (src1_index == -2) ? src1_data.zwxy:src1_data.yzwx; - src1_data.xyzw = (src1_index == -1) ? src1_data.wxyz:tmp.xyzw; - } - if(src2_index < 0) - { - float4 tmp; - tmp.xyzw = (src2_index == -2) ? src2_data.zwxy:src2_data.yzwx; - src2_data.xyzw = (src2_index == -1) ? src2_data.wxyz:tmp.xyzw; - } - - - uchar4 dst_data = *((__global uchar4 *)(dst + dst_index)); - uchar4 tmp_data = convert_uchar4((src1_data < src2_data)); - - dst_data.x = ((dst_index + 0 >= dst_start) && (dst_index + 0 < dst_end)) ? tmp_data.x : dst_data.x; - dst_data.y = ((dst_index + 1 >= dst_start) && (dst_index + 1 < dst_end)) ? tmp_data.y : dst_data.y; - dst_data.z = ((dst_index + 2 >= dst_start) && (dst_index + 2 < dst_end)) ? tmp_data.z : dst_data.z; - dst_data.w = ((dst_index + 3 >= dst_start) && (dst_index + 3 < dst_end)) ? tmp_data.w : dst_data.w; - - *((__global uchar4 *)(dst + dst_index)) = dst_data; - } -} - -#if defined (DOUBLE_SUPPORT) -__kernel void arithm_compare_lt_D6 (__global double *src1, int src1_step, int src1_offset, - __global double *src2, int src2_step, int src2_offset, - __global uchar *dst, int dst_step, int dst_offset, - int rows, int cols, int dst_step1) -{ - int x = get_global_id(0); - int y = get_global_id(1); - - if (x < cols && y < rows) - { - x = x << 2; -#ifdef dst_align -#undef dst_align -#endif -#define dst_align ((dst_offset >> 3) & 3) - int src1_index = mad24(y, src1_step, (x << 3) + src1_offset - (dst_align << 3)); - int src2_index = mad24(y, src2_step, (x << 3) + src2_offset - (dst_align << 3)); - - int dst_start = mad24(y, dst_step, dst_offset); - int dst_end = mad24(y, dst_step, dst_offset + dst_step1); - int dst_index = mad24(y, dst_step, dst_offset + x & (int)0xfffffffc); - int src1_index_fix = src1_index < 0 ? 0 : src1_index; - int src2_index_fix = src2_index < 0 ? 0 : src2_index; - double4 src1_data = vload4(0, (__global double *)((__global char *)src1 + src1_index_fix)); - double4 src2_data = vload4(0, (__global double *)((__global char *)src2 + src2_index_fix)); - if(src1_index < 0) - { - double4 tmp; - tmp.xyzw = (src1_index == -2) ? src1_data.zwxy:src1_data.yzwx; - src1_data.xyzw = (src1_index == -1) ? src1_data.wxyz:tmp.xyzw; - } - if(src2_index < 0) - { - double4 tmp; - tmp.xyzw = (src2_index == -2) ? src2_data.zwxy:src2_data.yzwx; - src2_data.xyzw = (src2_index == -1) ? src2_data.wxyz:tmp.xyzw; - } - - - uchar4 dst_data = *((__global uchar4 *)(dst + dst_index)); - uchar4 tmp_data = convert_uchar4((src1_data < src2_data)); - - dst_data.x = ((dst_index + 0 >= dst_start) && (dst_index + 0 < dst_end)) ? tmp_data.x : dst_data.x; - dst_data.y = ((dst_index + 1 >= dst_start) && (dst_index + 1 < dst_end)) ? tmp_data.y : dst_data.y; - dst_data.z = ((dst_index + 2 >= dst_start) && (dst_index + 2 < dst_end)) ? tmp_data.z : dst_data.z; - dst_data.w = ((dst_index + 3 >= dst_start) && (dst_index + 3 < dst_end)) ? tmp_data.w : dst_data.w; - - *((__global uchar4 *)(dst + dst_index)) = dst_data; - } -} -#endif - -/***********************************Compare LE*******************************/ -__kernel void arithm_compare_le_D0 (__global uchar *src1, int src1_step, int src1_offset, - __global uchar *src2, int src2_step, int src2_offset, - __global uchar *dst, int dst_step, int dst_offset, - int rows, int cols, int dst_step1) -{ - int x = get_global_id(0); - int y = get_global_id(1); - - if (x < cols && y < rows) - { - x = x << 2; - -#ifdef dst_align -#undef dst_align -#endif -#define dst_align (dst_offset & 3) - int src1_index = mad24(y, src1_step, x + src1_offset - dst_align); - int src2_index = mad24(y, src2_step, x + src2_offset - dst_align); - - int dst_start = mad24(y, dst_step, dst_offset); - int dst_end = mad24(y, dst_step, dst_offset + dst_step1); - int dst_index = mad24(y, dst_step, dst_offset + x & (int)0xfffffffc); - int src1_index_fix = src1_index < 0 ? 0 : src1_index; - int src2_index_fix = src2_index < 0 ? 0 : src2_index; - uchar4 src1_data = vload4(0, src1 + src1_index_fix); - uchar4 src2_data = vload4(0, src2 + src2_index_fix); - if(src1_index < 0) - { - uchar4 tmp; - tmp.xyzw = (src1_index == -2) ? src1_data.zwxy:src1_data.yzwx; - src1_data.xyzw = (src1_index == -1) ? src1_data.wxyz:tmp.xyzw; - } - if(src2_index < 0) - { - uchar4 tmp; - tmp.xyzw = (src2_index == -2) ? src2_data.zwxy:src2_data.yzwx; - src2_data.xyzw = (src2_index == -1) ? src2_data.wxyz:tmp.xyzw; - } - - - - uchar4 dst_data = *((__global uchar4 *)(dst + dst_index)); - uchar4 tmp_data = convert_uchar4((src1_data <= src2_data)); - - dst_data.x = ((dst_index + 0 >= dst_start) && (dst_index + 0 < dst_end)) ? tmp_data.x : dst_data.x; - dst_data.y = ((dst_index + 1 >= dst_start) && (dst_index + 1 < dst_end)) ? tmp_data.y : dst_data.y; - dst_data.z = ((dst_index + 2 >= dst_start) && (dst_index + 2 < dst_end)) ? tmp_data.z : dst_data.z; - dst_data.w = ((dst_index + 3 >= dst_start) && (dst_index + 3 < dst_end)) ? tmp_data.w : dst_data.w; - - *((__global uchar4 *)(dst + dst_index)) = dst_data; - } -} - - - -__kernel void arithm_compare_le_D2 (__global ushort *src1, int src1_step, int src1_offset, - __global ushort *src2, int src2_step, int src2_offset, - __global uchar *dst, int dst_step, int dst_offset, - int rows, int cols, int dst_step1) - -{ - int x = get_global_id(0); - int y = get_global_id(1); - - if (x < cols && y < rows) - { - x = x << 2; - -#ifdef dst_align -#undef dst_align -#endif -#define dst_align ((dst_offset >> 1) & 3) - int src1_index = mad24(y, src1_step, (x << 1) + src1_offset - (dst_align << 1)); - int src2_index = mad24(y, src2_step, (x << 1) + src2_offset - (dst_align << 1)); - - int dst_start = mad24(y, dst_step, dst_offset); - int dst_end = mad24(y, dst_step, dst_offset + dst_step1); - int dst_index = mad24(y, dst_step, dst_offset + x & (int)0xfffffffc); - int src1_index_fix = src1_index < 0 ? 0 : src1_index; - int src2_index_fix = src2_index < 0 ? 0 : src2_index; - ushort4 src1_data = vload4(0, (__global ushort *)((__global char *)src1 + src1_index)); - ushort4 src2_data = vload4(0, (__global ushort *)((__global char *)src2 + src2_index)); - if(src1_index < 0) - { - ushort4 tmp; - tmp.xyzw = (src1_index == -2) ? src1_data.zwxy:src1_data.yzwx; - src1_data.xyzw = (src1_index == -1) ? src1_data.wxyz:tmp.xyzw; - } - if(src2_index < 0) - { - ushort4 tmp; - tmp.xyzw = (src2_index == -2) ? src2_data.zwxy:src2_data.yzwx; - src2_data.xyzw = (src2_index == -1) ? src2_data.wxyz:tmp.xyzw; - } - - - - uchar4 dst_data = *((__global uchar4 *)(dst + dst_index)); - uchar4 tmp_data = convert_uchar4((src1_data <= src2_data)); - - dst_data.x = ((dst_index + 0 >= dst_start) && (dst_index + 0 < dst_end)) ? tmp_data.x : dst_data.x; - dst_data.y = ((dst_index + 1 >= dst_start) && (dst_index + 1 < dst_end)) ? tmp_data.y : dst_data.y; - dst_data.z = ((dst_index + 2 >= dst_start) && (dst_index + 2 < dst_end)) ? tmp_data.z : dst_data.z; - dst_data.w = ((dst_index + 3 >= dst_start) && (dst_index + 3 < dst_end)) ? tmp_data.w : dst_data.w; - - *((__global uchar4 *)(dst + dst_index)) = dst_data; - } -} - - - -__kernel void arithm_compare_le_D3 (__global short *src1, int src1_step, int src1_offset, - __global short *src2, int src2_step, int src2_offset, - __global uchar *dst, int dst_step, int dst_offset, - int rows, int cols, int dst_step1) - -{ - int x = get_global_id(0); - int y = get_global_id(1); - - if (x < cols && y < rows) - { - x = x << 2; - -#ifdef dst_align -#undef dst_align -#endif -#define dst_align ((dst_offset >> 1) & 3) - int src1_index = mad24(y, src1_step, (x << 1) + src1_offset - (dst_align << 1)); - int src2_index = mad24(y, src2_step, (x << 1) + src2_offset - (dst_align << 1)); - - int dst_start = mad24(y, dst_step, dst_offset); - int dst_end = mad24(y, dst_step, dst_offset + dst_step1); - int dst_index = mad24(y, dst_step, dst_offset + x & (int)0xfffffffc); - int src1_index_fix = src1_index < 0 ? 0 : src1_index; - int src2_index_fix = src2_index < 0 ? 0 : src2_index; - short4 src1_data = vload4(0, (__global short *)((__global char *)src1 + src1_index)); - short4 src2_data = vload4(0, (__global short *)((__global char *)src2 + src2_index)); - if(src1_index < 0) - { - short4 tmp; - tmp.xyzw = (src1_index == -2) ? src1_data.zwxy:src1_data.yzwx; - src1_data.xyzw = (src1_index == -1) ? src1_data.wxyz:tmp.xyzw; - } - if(src2_index < 0) - { - short4 tmp; - tmp.xyzw = (src2_index == -2) ? src2_data.zwxy:src2_data.yzwx; - src2_data.xyzw = (src2_index == -1) ? src2_data.wxyz:tmp.xyzw; - } - - - - uchar4 dst_data = *((__global uchar4 *)(dst + dst_index)); - uchar4 tmp_data = convert_uchar4((src1_data <= src2_data)); - - dst_data.x = ((dst_index + 0 >= dst_start) && (dst_index + 0 < dst_end)) ? tmp_data.x : dst_data.x; - dst_data.y = ((dst_index + 1 >= dst_start) && (dst_index + 1 < dst_end)) ? tmp_data.y : dst_data.y; - dst_data.z = ((dst_index + 2 >= dst_start) && (dst_index + 2 < dst_end)) ? tmp_data.z : dst_data.z; - dst_data.w = ((dst_index + 3 >= dst_start) && (dst_index + 3 < dst_end)) ? tmp_data.w : dst_data.w; - - *((__global uchar4 *)(dst + dst_index)) = dst_data; - } -} - -__kernel void arithm_compare_le_D4 (__global int *src1, int src1_step, int src1_offset, - __global int *src2, int src2_step, int src2_offset, - __global uchar *dst, int dst_step, int dst_offset, - int rows, int cols, int dst_step1) -{ - int x = get_global_id(0); - int y = get_global_id(1); - - if (x < cols && y < rows) - { - x = x << 2; -#ifdef dst_align -#undef dst_align -#endif -#define dst_align ((dst_offset >> 2)& 3) - int src1_index = mad24(y, src1_step, (x << 2) + src1_offset - (dst_align << 2)); - int src2_index = mad24(y, src2_step, (x << 2) + src2_offset - (dst_align << 2)); - - int dst_start = mad24(y, dst_step, dst_offset); - int dst_end = mad24(y, dst_step, dst_offset + dst_step1); - int dst_index = mad24(y, dst_step, dst_offset + x & (int)0xfffffffc); - int src1_index_fix = src1_index < 0 ? 0 : src1_index; - int src2_index_fix = src2_index < 0 ? 0 : src2_index; - - int4 src1_data = vload4(0, (__global int *)((__global char *)src1 + src1_index)); - int4 src2_data = vload4(0, (__global int *)((__global char *)src2 + src2_index)); - if(src1_index < 0) - { - int4 tmp; - tmp.xyzw = (src1_index == -2) ? src1_data.zwxy:src1_data.yzwx; - src1_data.xyzw = (src1_index == -1) ? src1_data.wxyz:tmp.xyzw; - } - if(src2_index < 0) - { - int4 tmp; - tmp.xyzw = (src2_index == -2) ? src2_data.zwxy:src2_data.yzwx; - src2_data.xyzw = (src2_index == -1) ? src2_data.wxyz:tmp.xyzw; - } - - uchar4 dst_data = *((__global uchar4 *)(dst + dst_index)); - uchar4 tmp_data =convert_uchar4((src1_data <= src2_data)); - - dst_data.x = ((dst_index + 0 >= dst_start) && (dst_index + 0 < dst_end)) ? tmp_data.x : dst_data.x; - dst_data.y = ((dst_index + 1 >= dst_start) && (dst_index + 1 < dst_end)) ? tmp_data.y : dst_data.y; - dst_data.z = ((dst_index + 2 >= dst_start) && (dst_index + 2 < dst_end)) ? tmp_data.z : dst_data.z; - dst_data.w = ((dst_index + 3 >= dst_start) && (dst_index + 3 < dst_end)) ? tmp_data.w : dst_data.w; - - *((__global uchar4 *)(dst + dst_index)) = dst_data; - } -} - -__kernel void arithm_compare_le_D5 (__global float *src1, int src1_step, int src1_offset, - __global float *src2, int src2_step, int src2_offset, - __global uchar *dst, int dst_step, int dst_offset, - int rows, int cols, int dst_step1) -{ - int x = get_global_id(0); - int y = get_global_id(1); - - if (x < cols && y < rows) - { - x = x << 2; -#ifdef dst_align -#undef dst_align -#endif -#define dst_align ((dst_offset >> 2)& 3) - int src1_index = mad24(y, src1_step, (x << 2) + src1_offset - (dst_align << 2)); - int src2_index = mad24(y, src2_step, (x << 2) + src2_offset - (dst_align << 2)); - - int dst_start = mad24(y, dst_step, dst_offset); - int dst_end = mad24(y, dst_step, dst_offset + dst_step1); - int dst_index = mad24(y, dst_step, dst_offset + x & (int)0xfffffffc); - int src1_index_fix = src1_index < 0 ? 0 : src1_index; - int src2_index_fix = src2_index < 0 ? 0 : src2_index; - float4 src1_data = vload4(0, (__global float *)((__global char *)src1 + src1_index_fix)); - float4 src2_data = vload4(0, (__global float *)((__global char *)src2 + src2_index_fix)); - if(src1_index < 0) - { - float4 tmp; - tmp.xyzw = (src1_index == -2) ? src1_data.zwxy:src1_data.yzwx; - src1_data.xyzw = (src1_index == -1) ? src1_data.wxyz:tmp.xyzw; - } - if(src2_index < 0) - { - float4 tmp; - tmp.xyzw = (src2_index == -2) ? src2_data.zwxy:src2_data.yzwx; - src2_data.xyzw = (src2_index == -1) ? src2_data.wxyz:tmp.xyzw; - } - - uchar4 dst_data = *((__global uchar4 *)(dst + dst_index)); - uchar4 tmp_data = convert_uchar4((src1_data <= src2_data)); - - dst_data.x = ((dst_index + 0 >= dst_start) && (dst_index + 0 < dst_end)) ? tmp_data.x : dst_data.x; - dst_data.y = ((dst_index + 1 >= dst_start) && (dst_index + 1 < dst_end)) ? tmp_data.y : dst_data.y; - dst_data.z = ((dst_index + 2 >= dst_start) && (dst_index + 2 < dst_end)) ? tmp_data.z : dst_data.z; - dst_data.w = ((dst_index + 3 >= dst_start) && (dst_index + 3 < dst_end)) ? tmp_data.w : dst_data.w; - - *((__global uchar4 *)(dst + dst_index)) = dst_data; - } -} - -#if defined (DOUBLE_SUPPORT) -__kernel void arithm_compare_le_D6 (__global double *src1, int src1_step, int src1_offset, - __global double *src2, int src2_step, int src2_offset, - __global uchar *dst, int dst_step, int dst_offset, - int rows, int cols, int dst_step1) -{ - int x = get_global_id(0); - int y = get_global_id(1); - - if (x < cols && y < rows) - { - x = x << 2; -#ifdef dst_align -#undef dst_align -#endif -#define dst_align ((dst_offset >> 3)& 3) - int src1_index = mad24(y, src1_step, (x << 3) + src1_offset - (dst_align << 3)); - int src2_index = mad24(y, src2_step, (x << 3) + src2_offset - (dst_align << 3)); - - int dst_start = mad24(y, dst_step, dst_offset); - int dst_end = mad24(y, dst_step, dst_offset + dst_step1); - int dst_index = mad24(y, dst_step, dst_offset + x & (int)0xfffffffc); - int src1_index_fix = src1_index < 0 ? 0 : src1_index; - int src2_index_fix = src2_index < 0 ? 0 : src2_index; - double4 src1_data = vload4(0, (__global double *)((__global char *)src1 + src1_index_fix)); - double4 src2_data = vload4(0, (__global double *)((__global char *)src2 + src2_index_fix)); - if(src1_index < 0) - { - double4 tmp; - tmp.xyzw = (src1_index == -2) ? src1_data.zwxy:src1_data.yzwx; - src1_data.xyzw = (src1_index == -1) ? src1_data.wxyz:tmp.xyzw; - } - if(src2_index < 0) - { - double4 tmp; - tmp.xyzw = (src2_index == -2) ? src2_data.zwxy:src2_data.yzwx; - src2_data.xyzw = (src2_index == -1) ? src2_data.wxyz:tmp.xyzw; - } - - - uchar4 dst_data = *((__global uchar4 *)(dst + dst_index)); - uchar4 tmp_data = convert_uchar4((src1_data <= src2_data)); - - dst_data.x = ((dst_index + 0 >= dst_start) && (dst_index + 0 < dst_end)) ? tmp_data.x : dst_data.x; - dst_data.y = ((dst_index + 1 >= dst_start) && (dst_index + 1 < dst_end)) ? tmp_data.y : dst_data.y; - dst_data.z = ((dst_index + 2 >= dst_start) && (dst_index + 2 < dst_end)) ? tmp_data.z : dst_data.z; - dst_data.w = ((dst_index + 3 >= dst_start) && (dst_index + 3 < dst_end)) ? tmp_data.w : dst_data.w; - - *((__global uchar4 *)(dst + dst_index)) = dst_data; - } -} -#endif -- 2.7.4