--- /dev/null
+We greatly appreciate your support and contributions and they are always welcomed!
+
+Github pull requests are the convenient way to contribute to OpenCV project. Good pull requests have all of these attributes:
+
+* Are scoped to one specific issue
+* Include a test to demonstrate the correctness
+* Update the docs if relevant
+* Match the [coding style guidelines](http://code.opencv.org/projects/opencv/wiki/CodingStyleGuide)
+* Don't messed by "oops" commits
+
+You can find more detailes about contributing process on http://opencv.org/contribute.html
\ No newline at end of file
public class BinderConnector
{
- public BinderConnector(MarketConnector Market)
- {
- Init(Market);
+ public BinderConnector(MarketConnector Market) {
+ mMarket = Market;
+ }
+
+ public boolean Init() {
+ boolean result = false;
+ if (mIsReady)
+ result = Init(mMarket);
+
+ return result;
}
+
public native IBinder Connect();
+
public boolean Disconnect()
{
- Final();
- return true;
- }
+ if (mIsReady)
+ Final();
- static
- {
- System.loadLibrary("OpenCVEngine");
- System.loadLibrary("OpenCVEngine_jni");
+ return mIsReady;
}
private native boolean Init(MarketConnector Market);
- public native void Final();
+ private native void Final();
+ private static boolean mIsReady = false;
+ private MarketConnector mMarket;
+
+ static {
+ try {
+ System.loadLibrary("OpenCVEngine");
+ System.loadLibrary("OpenCVEngine_jni");
+ mIsReady = true;
+ }
+ catch(UnsatisfiedLinkError e) {
+ mIsReady = false;
+ e.printStackTrace();
+ }
+ }
+
}
public static native int DetectKnownPlatforms();
- static
- {
- System.loadLibrary("OpenCVEngine");
- System.loadLibrary("OpenCVEngine_jni");
+ public static boolean mIsReady = false;
+
+ static {
+ try {
+ System.loadLibrary("OpenCVEngine");
+ System.loadLibrary("OpenCVEngine_jni");
+ mIsReady = true;
+ }
+ catch(UnsatisfiedLinkError e) {
+ mIsReady = false;
+ e.printStackTrace();
+ }
}
}
import android.app.Service;
import android.content.Intent;
import android.os.IBinder;
+import android.os.RemoteException;
import android.util.Log;
-
public class OpenCVEngineService extends Service
{
private static final String TAG = "OpenCVEngine/Service";
- private IBinder mEngineInterface;
+ private IBinder mEngineInterface = null;
private MarketConnector mMarket;
private BinderConnector mNativeBinder;
- public void onCreate()
- {
+
+ public void onCreate() {
Log.i(TAG, "Service starting");
super.onCreate();
Log.i(TAG, "Engine binder component creating");
mMarket = new MarketConnector(getBaseContext());
mNativeBinder = new BinderConnector(mMarket);
- mEngineInterface = mNativeBinder.Connect();
- Log.i(TAG, "Service started successfully");
+ if (mNativeBinder.Init()) {
+ mEngineInterface = mNativeBinder.Connect();
+ Log.i(TAG, "Service started successfully");
+ } else {
+ Log.e(TAG, "Cannot initialize native part of OpenCV Manager!");
+ Log.e(TAG, "Using stub instead");
+
+ mEngineInterface = new OpenCVEngineInterface.Stub() {
+
+ @Override
+ public boolean installVersion(String version) throws RemoteException {
+ // TODO Auto-generated method stub
+ return false;
+ }
+
+ @Override
+ public String getLibraryList(String version) throws RemoteException {
+ // TODO Auto-generated method stub
+ return null;
+ }
+
+ @Override
+ public String getLibPathByVersion(String version) throws RemoteException {
+ // TODO Auto-generated method stub
+ return null;
+ }
+
+ @Override
+ public int getEngineVersion() throws RemoteException {
+ return -1;
+ }
+ };
+ }
}
- public IBinder onBind(Intent intent)
- {
+ public IBinder onBind(Intent intent) {
Log.i(TAG, "Service onBind called for intent " + intent.toString());
return mEngineInterface;
}
+
public boolean onUnbind(Intent intent)
{
Log.i(TAG, "Service onUnbind called for intent " + intent.toString());
@Override
public void onCreate(Bundle savedInstanceState) {
super.onCreate(savedInstanceState);
+
+ if (!HardwareDetector.mIsReady) {
+ Log.e(TAG, "Cannot initialize native part of OpenCV Manager!");
+
+ AlertDialog dialog = new AlertDialog.Builder(this).create();
+
+ dialog.setTitle("OpenCV Manager Error");
+ dialog.setMessage("OpenCV Manager is incompatible with this device. Please replace it with an appropriate package.");
+ dialog.setCancelable(false);
+ dialog.setButton("OK", new DialogInterface.OnClickListener() {
+
+ public void onClick(DialogInterface dialog, int which) {
+ finish();
+ }
+ });
+
+ dialog.show();
+ return;
+ }
+
setContentView(R.layout.main);
TextView OsVersionView = (TextView)findViewById(R.id.OsVersionValue);
}
});
+ mPackageChangeReciever = new BroadcastReceiver() {
+
+ @Override
+ public void onReceive(Context context, Intent intent) {
+ Log.d("OpenCVManager/Reciever", "Bradcast message " + intent.getAction() + " reciever");
+ Log.d("OpenCVManager/Reciever", "Filling package list on broadcast message");
+ if (!bindService(new Intent("org.opencv.engine.BIND"), new OpenCVEngineServiceConnection(), Context.BIND_AUTO_CREATE))
+ {
+ TextView EngineVersionView = (TextView)findViewById(R.id.EngineVersionValue);
+ EngineVersionView.setText("not avaliable");
+ }
+ }
+ };
+
IntentFilter filter = new IntentFilter();
filter.addAction(Intent.ACTION_PACKAGE_ADDED);
filter.addAction(Intent.ACTION_PACKAGE_CHANGED);
@Override
protected void onDestroy() {
super.onDestroy();
- unregisterReceiver(mPackageChangeReciever);
+ if (mPackageChangeReciever != null)
+ unregisterReceiver(mPackageChangeReciever);
}
@Override
protected void onResume() {
super.onResume();
- Log.d(TAG, "Filling package list on resume");
- if (!bindService(new Intent("org.opencv.engine.BIND"), new OpenCVEngineServiceConnection(), Context.BIND_AUTO_CREATE))
- {
- TextView EngineVersionView = (TextView)findViewById(R.id.EngineVersionValue);
- EngineVersionView.setText("not avaliable");
+ if (HardwareDetector.mIsReady) {
+ Log.d(TAG, "Filling package list on resume");
+ OpenCVEngineServiceConnection connection = new OpenCVEngineServiceConnection();
+ if (!bindService(new Intent("org.opencv.engine.BIND"), connection, Context.BIND_AUTO_CREATE)) {
+ Log.e(TAG, "Cannot bind to OpenCV Manager service!");
+ TextView EngineVersionView = (TextView)findViewById(R.id.EngineVersionValue);
+ if (EngineVersionView != null)
+ EngineVersionView.setText("not avaliable");
+ unbindService(connection);
+ }
}
}
protected int ManagerApiLevel = 0;
protected String ManagerVersion;
- protected BroadcastReceiver mPackageChangeReciever = new BroadcastReceiver() {
-
- @Override
- public void onReceive(Context context, Intent intent) {
- Log.d("OpenCVManager/Reciever", "Bradcast message " + intent.getAction() + " reciever");
- Log.d("OpenCVManager/Reciever", "Filling package list on broadcast message");
- if (!bindService(new Intent("org.opencv.engine.BIND"), new OpenCVEngineServiceConnection(), Context.BIND_AUTO_CREATE))
- {
- TextView EngineVersionView = (TextView)findViewById(R.id.EngineVersionValue);
- EngineVersionView.setText("not avaliable");
- }
- }
- };
+ protected BroadcastReceiver mPackageChangeReciever = null;
protected class OpenCVEngineServiceConnection implements ServiceConnection
{
public void onServiceConnected(ComponentName name, IBinder service) {
OpenCVEngineInterface EngineService = OpenCVEngineInterface.Stub.asInterface(service);
+ if (EngineService == null) {
+ Log.e(TAG, "Cannot connect to OpenCV Manager Service!");
+ unbindService(this);
+ return;
+ }
+
try {
ManagerApiLevel = EngineService.getEngineVersion();
} catch (RemoteException e) {
set(OPENCL_INCLUDE_DIR "" CACHE STRING "OpenCL include directory")
mark_as_advanced(OPENCL_INCLUDE_DIR OPENCL_LIBRARY)
else(APPLE)
- find_package(OpenCL QUIET)
+ #find_package(OpenCL QUIET)
if (NOT OPENCL_FOUND)
find_path(OPENCL_ROOT_DIR
if(BUILD_DOCS)
find_host_program(SPHINX_BUILD sphinx-build)
if(SPHINX_BUILD)
- if(UNIX)
- execute_process(COMMAND sh -c "${SPHINX_BUILD} -_ 2>&1 | sed -ne 1p"
- RESULT_VARIABLE SPHINX_PROCESS
- OUTPUT_VARIABLE SPHINX_VERSION
- OUTPUT_STRIP_TRAILING_WHITESPACE)
- else()
- execute_process(COMMAND ${PYTHON_EXECUTABLE} -c "import sphinx; print sphinx.__version__"
- RESULT_VARIABLE SPHINX_PROCESS
- OUTPUT_VARIABLE SPHINX_VERSION
- OUTPUT_STRIP_TRAILING_WHITESPACE)
- endif()
- if(SPHINX_PROCESS EQUAL 0)
+ execute_process(COMMAND "${SPHINX_BUILD}"
+ OUTPUT_QUIET
+ ERROR_VARIABLE SPHINX_OUTPUT
+ OUTPUT_STRIP_TRAILING_WHITESPACE)
+ if(SPHINX_OUTPUT MATCHES "^Sphinx v([0-9][^ \n]*)")
+ set(SPHINX_VERSION "${CMAKE_MATCH_1}")
set(HAVE_SPHINX 1)
message(STATUS "Found Sphinx ${SPHINX_VERSION}: ${SPHINX_BUILD}")
endif()
file(GLOB lib_hdrs "include/opencv2/${name}/*.hpp" "include/opencv2/${name}/*.h")
file(GLOB lib_hdrs_detail "include/opencv2/${name}/detail/*.hpp" "include/opencv2/${name}/detail/*.h")
- file(GLOB cl_kernels "src/opencl/*.cl")
+ file(GLOB lib_cuda_srcs "src/cuda/*.cu")
+ set(cuda_objs "")
+ set(lib_cuda_hdrs "")
- source_group("Src" FILES ${lib_srcs} ${lib_int_hdrs})
- source_group("Include" FILES ${lib_hdrs})
- source_group("Include\\detail" FILES ${lib_hdrs_detail})
+ if(HAVE_CUDA AND lib_cuda_srcs)
+ ocv_include_directories(${CUDA_INCLUDE_DIRS})
+ file(GLOB lib_cuda_hdrs "src/cuda/*.hpp")
+
+ ocv_cuda_compile(cuda_objs ${lib_cuda_srcs} ${lib_cuda_hdrs})
+ source_group("Src\\Cuda" FILES ${lib_cuda_srcs} ${lib_cuda_hdrs})
+ endif()
+
+ file(GLOB cl_kernels "src/opencl/*.cl")
if(HAVE_OPENCL AND cl_kernels)
ocv_include_directories(${OPENCL_INCLUDE_DIRS})
list(APPEND lib_srcs ${cl_kernels} "${CMAKE_CURRENT_BINARY_DIR}/kernels.cpp")
endif()
- ocv_set_module_sources(${ARGN} HEADERS ${lib_hdrs} ${lib_hdrs_detail} SOURCES ${lib_srcs} ${lib_int_hdrs})
+ source_group("Src" FILES ${lib_srcs} ${lib_int_hdrs})
+ source_group("Include" FILES ${lib_hdrs})
+ source_group("Include\\detail" FILES ${lib_hdrs_detail})
+
+ ocv_set_module_sources(${ARGN} HEADERS ${lib_hdrs} ${lib_hdrs_detail}
+ SOURCES ${lib_srcs} ${lib_int_hdrs} ${cuda_objs} ${lib_cuda_srcs} ${lib_cuda_hdrs})
endmacro()
# creates OpenCV module in current folder
if(NOT "${ARGN}" STREQUAL "SKIP_LINK")
target_link_libraries(${the_module} ${OPENCV_MODULE_${the_module}_DEPS} ${OPENCV_MODULE_${the_module}_DEPS_EXT} ${OPENCV_LINKER_LIBS} ${IPP_LIBS} ${ARGN})
+ if (HAVE_CUDA)
+ target_link_libraries(${the_module} ${CUDA_LIBRARIES} ${CUDA_npp_LIBRARY})
+ endif()
if(HAVE_OPENCL AND OPENCL_LIBRARIES)
target_link_libraries(${the_module} ${OPENCL_LIBRARIES})
endif()
# ocv_define_module(module_name [INTERNAL] [REQUIRED] [<list of dependencies>] [OPTIONAL <list of optional dependencies>])
macro(ocv_define_module module_name)
ocv_add_module(${module_name} ${ARGN})
- ocv_glob_module_sources()
ocv_module_include_directories()
+ ocv_glob_module_sources()
ocv_create_module()
ocv_add_precompiled_headers(${the_module})
-#/usr/bin/env python
+#!/usr/bin/env python
import sys, glob
-#/usr/bin/env python
+#!/usr/bin/env python
import os, sys, fnmatch, re
-#/usr/bin/env python
+#!/usr/bin/env python
# -*- coding: utf-8 -*-
#
-#/usr/bin/env python
+#!/usr/bin/env python
# -*- coding: utf-8 -*-
"""
ocv domain, a modified copy of sphinx.domains.cpp + shpinx.domains.python.
-#/usr/bin/env python
+#!/usr/bin/env python
import sys
-#/usr/bin/env python
+#!/usr/bin/env python
"""gen_pattern.py
To run:
-#/usr/bin/env python
+#!/usr/bin/env python
# svgfig.py copyright (C) 2008 Jim Pivarski <jpivarski@gmail.com>
#
-#/usr/bin/env python
+#!/usr/bin/env python
import os, sys, re
-#/usr/bin/env python
+#!/usr/bin/env python
import sys
import os.path
ocv_module_include_directories(${ZLIB_INCLUDE_DIR})
if(HAVE_CUDA)
- ocv_source_group("Src\\Cuda" GLOB "src/cuda/*.cu")
- ocv_include_directories("${OpenCV_SOURCE_DIR}/modules/gpu/include" ${CUDA_INCLUDE_DIRS})
+ ocv_include_directories("${OpenCV_SOURCE_DIR}/modules/gpu/include")
ocv_warnings_disable(CMAKE_CXX_FLAGS -Wundef)
+endif()
- file(GLOB lib_cuda "src/cuda/*.cu")
- ocv_cuda_compile(cuda_objs ${lib_cuda})
+file(GLOB lib_cuda_hdrs "include/opencv2/${name}/cuda/*.hpp" "include/opencv2/${name}/cuda/*.h")
+file(GLOB lib_cuda_hdrs_detail "include/opencv2/${name}/cuda/detail/*.hpp" "include/opencv2/${name}/cuda/detail/*.h")
- set(cuda_link_libs ${CUDA_LIBRARIES} ${CUDA_npp_LIBRARY})
-else()
- set(lib_cuda "")
- set(cuda_objs "")
- set(cuda_link_libs "")
-endif()
+source_group("Cuda Headers" FILES ${lib_cuda_hdrs})
+source_group("Cuda Headers\\Detail" FILES ${lib_cuda_hdrs_detail})
-ocv_glob_module_sources(SOURCES ${lib_cuda} ${cuda_objs} "${opencv_core_BINARY_DIR}/version_string.inc")
+ocv_glob_module_sources(SOURCES "${opencv_core_BINARY_DIR}/version_string.inc"
+ HEADERS ${lib_cuda_hdrs} ${lib_cuda_hdrs_detail})
-ocv_create_module(${cuda_link_libs})
+ocv_create_module()
ocv_add_precompiled_headers(${the_module})
ocv_add_accuracy_tests()
ocv_add_perf_tests()
-
#define CV_DbgAssert(expr)
#endif
+CV_EXPORTS void glob(String pattern, std::vector<String>& result, bool recursive = false);
+
CV_EXPORTS void setNumThreads(int nthreads);
CV_EXPORTS int getNumThreads();
CV_EXPORTS int getThreadNum();
uint64 state;
};
+/*!
+ Random Number Generator - MT
+
+ The class implements RNG using the Mersenne Twister algorithm
+*/
+class CV_EXPORTS RNG_MT19937
+{
+public:
+ RNG_MT19937();
+ RNG_MT19937(unsigned s);
+ void seed(unsigned s);
+
+ unsigned next();
+
+ operator int();
+ operator unsigned();
+ operator float();
+ operator double();
+
+ unsigned operator ()(unsigned N);
+ unsigned operator ()();
+
+ //! returns uniformly distributed integer random number from [a,b) range
+ int uniform(int a, int b);
+ //! returns uniformly distributed floating-point random number from [a,b) range
+ float uniform(float a, float b);
+ //! returns uniformly distributed double-precision floating-point random number from [a,b) range
+ double uniform(double a, double b);
+
+private:
+ enum PeriodParameters {N = 624, M = 397};
+ unsigned state[N];
+ int mti;
+};
/*!
Termination criteria in iterative algorithms
Scalar operator * (const Matx<_Tp, 4, 4>& a, const Scalar& b)
{
Matx<double, 4, 1> c(Matx<double, 4, 4>(a), b, Matx_MatMulOp());
- return reinterpret_cast<const Scalar&>(c);
+ return static_cast<const Scalar&>(c);
}
Scalar operator * (const Matx<double, 4, 4>& a, const Scalar& b)
{
Matx<double, 4, 1> c(a, b, Matx_MatMulOp());
- return reinterpret_cast<const Scalar&>(c);
+ return static_cast<const Scalar&>(c);
}
testing::Combine(
testing::Values(::perf::szVGA, ::perf::sz1080p),
testing::Values(CV_8UC1, CV_8UC4, CV_8SC1, CV_16UC1, CV_16SC1, CV_32SC1, CV_32FC1),
- testing::ValuesIn(CmpType::all())
+ CmpType::all()
)
)
{
testing::Combine(
testing::Values(TYPICAL_MAT_SIZES),
testing::Values(TYPICAL_MAT_TYPES),
- testing::ValuesIn(CmpType::all())
+ CmpType::all()
)
)
{
testing::Combine(
testing::Values(TYPICAL_MAT_SIZES),
testing::Values(TYPICAL_MAT_TYPES),
- testing::ValuesIn(ROp::all())
+ ROp::all()
)
)
{
testing::Combine(
testing::Values(TYPICAL_MAT_SIZES),
testing::Values(TYPICAL_MAT_TYPES),
- testing::ValuesIn(ROp::all())
+ ROp::all()
)
)
{
--- /dev/null
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+// By downloading, copying, installing or using the software you agree to this license.
+// If you do not agree to this license, do not download, install,
+// copy or use the software.
+//
+//
+// License Agreement
+// For Open Source Computer Vision Library
+//
+// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
+// Copyright (C) 2008-2013, Willow Garage Inc., all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+// * Redistribution's of source code must retain the above copyright notice,
+// this list of conditions and the following disclaimer.
+//
+// * Redistribution's in binary form must reproduce the above copyright notice,
+// this list of conditions and the following disclaimer in the documentation
+// and / or other materials provided with the distribution.
+//
+// * The name of the copyright holders may not be used to endorse or promote products
+// derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors "as is" and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+
+#include "precomp.hpp"
+
+#if defined WIN32 || defined _WIN32 || defined WINCE
+# include <windows.h>
+const char dir_separators[] = "/\\";
+const char native_separator = '\\';
+
+namespace
+{
+ struct dirent
+ {
+ const char* d_name;
+ };
+
+ struct DIR
+ {
+ WIN32_FIND_DATA data;
+ HANDLE handle;
+ dirent ent;
+ };
+
+ DIR* opendir(const char* path)
+ {
+ DIR* dir = new DIR;
+ dir->ent.d_name = 0;
+ dir->handle = ::FindFirstFileA((cv::String(path) + "\\*").c_str(), &dir->data);
+ if(dir->handle == INVALID_HANDLE_VALUE)
+ {
+ /*closedir will do all cleanup*/
+ return 0;
+ }
+ return dir;
+ }
+
+ dirent* readdir(DIR* dir)
+ {
+ if (dir->ent.d_name != 0)
+ {
+ if (::FindNextFile(dir->handle, &dir->data) != TRUE)
+ return 0;
+ }
+ dir->ent.d_name = dir->data.cFileName;
+ return &dir->ent;
+ }
+
+ void closedir(DIR* dir)
+ {
+ ::FindClose(dir->handle);
+ delete dir;
+ }
+
+
+}
+#else
+# include <dirent.h>
+# include <sys/stat.h>
+const char dir_separators[] = "/";
+const char native_separator = '/';
+#endif
+
+static bool isDir(const cv::String& path, DIR* dir)
+{
+#if defined WIN32 || defined _WIN32 || defined WINCE
+ DWORD attributes;
+ if (dir)
+ attributes = dir->data.dwFileAttributes;
+ else
+ attributes = ::GetFileAttributes(path.c_str());
+
+ return (attributes != INVALID_FILE_ATTRIBUTES) && ((attributes & FILE_ATTRIBUTE_DIRECTORY) != 0);
+#else
+ (void)dir;
+ struct stat stat_buf;
+ if (0 != stat( path.c_str(), &stat_buf))
+ return false;
+ int is_dir = S_ISDIR( stat_buf.st_mode);
+ return is_dir != 0;
+#endif
+}
+
+static bool wildcmp(const char *string, const char *wild)
+{
+ // Based on wildcmp written by Jack Handy - <A href="mailto:jakkhandy@hotmail.com">jakkhandy@hotmail.com</A>
+ const char *cp = 0, *mp = 0;
+
+ while ((*string) && (*wild != '*'))
+ {
+ if ((*wild != *string) && (*wild != '?'))
+ {
+ return false;
+ }
+
+ wild++;
+ string++;
+ }
+
+ while (*string)
+ {
+ if (*wild == '*')
+ {
+ if (!*++wild)
+ {
+ return true;
+ }
+
+ mp = wild;
+ cp = string + 1;
+ }
+ else if ((*wild == *string) || (*wild == '?'))
+ {
+ wild++;
+ string++;
+ }
+ else
+ {
+ wild = mp;
+ string = cp++;
+ }
+ }
+
+ while (*wild == '*')
+ {
+ wild++;
+ }
+
+ return *wild == 0;
+}
+
+static void glob_rec(const cv::String& directory, const cv::String& wildchart, std::vector<cv::String>& result, bool recursive)
+{
+ DIR *dir;
+ struct dirent *ent;
+
+ if ((dir = opendir (directory.c_str())) != 0)
+ {
+ /* find all the files and directories within directory */
+ try
+ {
+ while ((ent = readdir (dir)) != 0)
+ {
+ const char* name = ent->d_name;
+ if((name[0] == 0) || (name[0] == '.' && name[1] == 0) || (name[0] == '.' && name[1] == '.' && name[2] == 0))
+ continue;
+
+ cv::String path = directory + native_separator + name;
+
+ if (isDir(path, dir))
+ {
+ if (recursive)
+ glob_rec(path, wildchart, result, recursive);
+ }
+ else
+ {
+ if (wildchart.empty() || wildcmp(name, wildchart.c_str()))
+ result.push_back(path);
+ }
+ }
+ }
+ catch (...)
+ {
+ closedir(dir);
+ throw;
+ }
+ closedir(dir);
+ }
+ else CV_Error(CV_StsObjectNotFound, cv::format("could not open directory: %s", directory.c_str()));
+}
+
+void cv::glob(String pattern, std::vector<String>& result, bool recursive)
+{
+ result.clear();
+ String path, wildchart;
+
+ if (isDir(pattern, 0))
+ {
+ if(strchr(dir_separators, pattern[pattern.size() - 1]) != 0)
+ {
+ path = pattern.substr(0, pattern.size() - 1);
+ }
+ else
+ {
+ path = pattern;
+ }
+ }
+ else
+ {
+ size_t pos = pattern.find_last_of(dir_separators);
+ if (pos == String::npos)
+ {
+ wildchart = pattern;
+ path = ".";
+ }
+ else
+ {
+ path = pattern.substr(0, pos);
+ wildchart = pattern.substr(pos + 1);
+ }
+ }
+
+ glob_rec(path, wildchart, result, recursive);
+ std::sort(result.begin(), result.end());
+}
cv::randShuffle( dst, iter_factor, &rng );
}
+// Mersenne Twister random number generator.
+// Inspired by http://www.math.sci.hiroshima-u.ac.jp/~m-mat/MT/MT2002/CODES/mt19937ar.c
+
+/*
+ A C-program for MT19937, with initialization improved 2002/1/26.
+ Coded by Takuji Nishimura and Makoto Matsumoto.
+
+ Before using, initialize the state by using init_genrand(seed)
+ or init_by_array(init_key, key_length).
+
+ Copyright (C) 1997 - 2002, Makoto Matsumoto and Takuji Nishimura,
+ All rights reserved.
+
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions
+ are met:
+
+ 1. Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+
+ 2. Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in the
+ documentation and/or other materials provided with the distribution.
+
+ 3. The names of its contributors may not be used to endorse or promote
+ products derived from this software without specific prior written
+ permission.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+ CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+ LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+ NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+
+ Any feedback is very welcome.
+ http://www.math.sci.hiroshima-u.ac.jp/~m-mat/MT/emt.html
+ email: m-mat @ math.sci.hiroshima-u.ac.jp (remove space)
+*/
+
+cv::RNG_MT19937::RNG_MT19937(unsigned s) { seed(s); }
+
+cv::RNG_MT19937::RNG_MT19937() { seed(5489U); }
+
+void cv::RNG_MT19937::seed(unsigned s)
+{
+ state[0]= s;
+ for (mti = 1; mti < N; mti++)
+ {
+ /* See Knuth TAOCP Vol2. 3rd Ed. P.106 for multiplier. */
+ state[mti] = (1812433253U * (state[mti - 1] ^ (state[mti - 1] >> 30)) + mti);
+ }
+}
+
+unsigned cv::RNG_MT19937::next()
+{
+ /* mag01[x] = x * MATRIX_A for x=0,1 */
+ static unsigned mag01[2] = { 0x0U, /*MATRIX_A*/ 0x9908b0dfU};
+
+ const unsigned UPPER_MASK = 0x80000000U;
+ const unsigned LOWER_MASK = 0x7fffffffU;
+
+ /* generate N words at one time */
+ if (mti >= N)
+ {
+ int kk = 0;
+
+ for (; kk < N - M; ++kk)
+ {
+ unsigned y = (state[kk] & UPPER_MASK) | (state[kk + 1] & LOWER_MASK);
+ state[kk] = state[kk + M] ^ (y >> 1) ^ mag01[y & 0x1U];
+ }
+
+ for (; kk < N - 1; ++kk)
+ {
+ unsigned y = (state[kk] & UPPER_MASK) | (state[kk + 1] & LOWER_MASK);
+ state[kk] = state[kk + (M - N)] ^ (y >> 1) ^ mag01[y & 0x1U];
+ }
+
+ unsigned y = (state[N - 1] & UPPER_MASK) | (state[0] & LOWER_MASK);
+ state[N - 1] = state[M - 1] ^ (y >> 1) ^ mag01[y & 0x1U];
+
+ mti = 0;
+ }
+
+ unsigned y = state[mti++];
+
+ /* Tempering */
+ y ^= (y >> 11);
+ y ^= (y << 7) & 0x9d2c5680U;
+ y ^= (y << 15) & 0xefc60000U;
+ y ^= (y >> 18);
+
+ return y;
+}
+
+cv::RNG_MT19937::operator unsigned() { return next(); }
+
+cv::RNG_MT19937::operator int() { return (int)next();}
+
+cv::RNG_MT19937::operator float() { return next() * (1.f / 4294967296.f); }
+
+cv::RNG_MT19937::operator double()
+{
+ unsigned a = next() >> 5;
+ unsigned b = next() >> 6;
+ return (a * 67108864.0 + b) * (1.0 / 9007199254740992.0);
+}
+
+int cv::RNG_MT19937::uniform(int a, int b) { return (int)(next() % (b - a) + a); }
+
+float cv::RNG_MT19937::uniform(float a, float b) { return ((float)*this)*(b - a) + a; }
+
+double cv::RNG_MT19937::uniform(double a, double b) { return ((double)*this)*(b - a) + a; }
+
+unsigned cv::RNG_MT19937::operator ()(unsigned b) { return next() % b; }
+
+unsigned cv::RNG_MT19937::operator ()() { return next(); }
+
/* End of file. */
TEST(Core_InputOutput, huge) { CV_BigMatrixIOTest test; test.safe_run(); }
*/
+TEST(Core_globbing, accuracy)
+{
+ std::string patternLena = cvtest::TS::ptr()->get_data_path() + "lena*.*";
+ std::string patternLenaPng = cvtest::TS::ptr()->get_data_path() + "lena.png";
+
+ std::vector<String> lenas, pngLenas;
+ cv::glob(patternLena, lenas, true);
+ cv::glob(patternLenaPng, pngLenas, true);
+
+ ASSERT_GT(lenas.size(), pngLenas.size());
+
+ for (size_t i = 0; i < pngLenas.size(); ++i)
+ {
+ ASSERT_NE(std::find(lenas.begin(), lenas.end(), pngLenas[i]), lenas.end());
+ }
+}
+
TEST(Core_InputOutput, FileStorage)
{
std::string file = cv::tempfile(".xml");
TEST(Core_Rand, range) { Core_RandRangeTest test; test.safe_run(); }
+
+TEST(Core_RNG_MT19937, regression)
+{
+ cv::RNG_MT19937 rng;
+ int actual[61] = {0, };
+ const size_t length = (sizeof(actual) / sizeof(actual[0]));
+ for (int i = 0; i < 10000; ++i )
+ {
+ actual[(unsigned)(rng.next() ^ i) % length]++;
+ }
+
+ int expected[length] = {
+ 177, 158, 180, 177, 160, 179, 143, 162,
+ 177, 144, 170, 174, 165, 168, 168, 156,
+ 177, 157, 159, 169, 177, 182, 166, 154,
+ 144, 180, 168, 152, 170, 187, 160, 145,
+ 139, 164, 157, 179, 148, 183, 159, 160,
+ 196, 184, 149, 142, 162, 148, 163, 152,
+ 168, 173, 160, 181, 172, 181, 155, 153,
+ 158, 171, 138, 150, 150 };
+
+ for (size_t i = 0; i < length; ++i)
+ {
+ ASSERT_EQ(expected[i], actual[i]);
+ }
+}
PERF_TEST_P(fast, detect, testing::Combine(
testing::Values(FAST_IMAGES),
- testing::ValuesIn(FastType::all())
+ FastType::all()
))
{
String filename = getDataPath(get<0>(GetParam()));
-#/usr/bin/env python
+#!/usr/bin/env python
import sys, re
//////////////////////////////////////////////////////////////////////
// CompareMat
-CV_ENUM(CmpCode, cv::CMP_EQ, cv::CMP_GT, cv::CMP_GE, cv::CMP_LT, cv::CMP_LE, cv::CMP_NE)
-#define ALL_CMP_CODES ValuesIn(CmpCode::all())
+CV_ENUM(CmpCode, CMP_EQ, CMP_GT, CMP_GE, CMP_LT, CMP_LE, CMP_NE)
DEF_PARAM_TEST(Sz_Depth_Code, cv::Size, MatDepth, CmpCode);
PERF_TEST_P(Sz_Depth_Code, Core_CompareMat,
Combine(GPU_TYPICAL_MAT_SIZES,
ARITHM_MAT_DEPTH,
- ALL_CMP_CODES))
+ CmpCode::all()))
{
const cv::Size size = GET_PARAM(0);
const int depth = GET_PARAM(1);
PERF_TEST_P(Sz_Depth_Code, Core_CompareScalar,
Combine(GPU_TYPICAL_MAT_SIZES,
ARITHM_MAT_DEPTH,
- ALL_CMP_CODES))
+ CmpCode::all()))
{
const cv::Size size = GET_PARAM(0);
const int depth = GET_PARAM(1);
//////////////////////////////////////////////////////////////////////
// GEMM
-CV_FLAGS(GemmFlags, 0, cv::GEMM_1_T, cv::GEMM_2_T, cv::GEMM_3_T)
+CV_FLAGS(GemmFlags, 0, GEMM_1_T, GEMM_2_T, GEMM_3_T)
#define ALL_GEMM_FLAGS Values(0, CV_GEMM_A_T, CV_GEMM_B_T, CV_GEMM_C_T, CV_GEMM_A_T | CV_GEMM_B_T, CV_GEMM_A_T | CV_GEMM_C_T, CV_GEMM_A_T | CV_GEMM_B_T | CV_GEMM_C_T)
DEF_PARAM_TEST(Sz_Type_Flags, cv::Size, MatType, GemmFlags);
enum {FLIP_BOTH = 0, FLIP_X = 1, FLIP_Y = -1};
CV_ENUM(FlipCode, FLIP_BOTH, FLIP_X, FLIP_Y)
-#define ALL_FLIP_CODES ValuesIn(FlipCode::all())
DEF_PARAM_TEST(Sz_Depth_Cn_Code, cv::Size, MatDepth, MatCn, FlipCode);
Combine(GPU_TYPICAL_MAT_SIZES,
Values(CV_8U, CV_16U, CV_32F),
GPU_CHANNELS_1_3_4,
- ALL_FLIP_CODES))
+ FlipCode::all()))
{
const cv::Size size = GET_PARAM(0);
const int depth = GET_PARAM(1);
//////////////////////////////////////////////////////////////////////
// Reduce
-CV_ENUM(ReduceCode, CV_REDUCE_SUM, CV_REDUCE_AVG, CV_REDUCE_MAX, CV_REDUCE_MIN)
-#define ALL_REDUCE_CODES ValuesIn(ReduceCode::all())
-
enum {Rows = 0, Cols = 1};
+CV_ENUM(ReduceCode, CV_REDUCE_SUM, CV_REDUCE_AVG, CV_REDUCE_MAX, CV_REDUCE_MIN)
CV_ENUM(ReduceDim, Rows, Cols)
-#define ALL_REDUCE_DIMS ValuesIn(ReduceDim::all())
DEF_PARAM_TEST(Sz_Depth_Cn_Code_Dim, cv::Size, MatDepth, MatCn, ReduceCode, ReduceDim);
Combine(GPU_TYPICAL_MAT_SIZES,
Values(CV_8U, CV_16U, CV_16S, CV_32F),
Values(1, 2, 3, 4),
- ALL_REDUCE_CODES,
- ALL_REDUCE_DIMS))
+ ReduceCode::all(),
+ ReduceDim::all()))
{
const cv::Size size = GET_PARAM(0);
const int depth = GET_PARAM(1);
//////////////////////////////////////////////////////////////////////
// MorphologyEx
-CV_ENUM(MorphOp, cv::MORPH_OPEN, cv::MORPH_CLOSE, cv::MORPH_GRADIENT, cv::MORPH_TOPHAT, cv::MORPH_BLACKHAT)
-#define ALL_MORPH_OPS ValuesIn(MorphOp::all())
+CV_ENUM(MorphOp, MORPH_OPEN, MORPH_CLOSE, MORPH_GRADIENT, MORPH_TOPHAT, MORPH_BLACKHAT)
DEF_PARAM_TEST(Sz_Type_Op, cv::Size, MatType, MorphOp);
-PERF_TEST_P(Sz_Type_Op, Filters_MorphologyEx, Combine(GPU_TYPICAL_MAT_SIZES, Values(CV_8UC1, CV_8UC4), ALL_MORPH_OPS))
+PERF_TEST_P(Sz_Type_Op, Filters_MorphologyEx, Combine(GPU_TYPICAL_MAT_SIZES, Values(CV_8UC1, CV_8UC4), MorphOp::all()))
{
declare.time(20.0);
enum { HALF_SIZE=0, UPSIDE_DOWN, REFLECTION_X, REFLECTION_BOTH };
CV_ENUM(RemapMode, HALF_SIZE, UPSIDE_DOWN, REFLECTION_X, REFLECTION_BOTH);
-#define ALL_REMAP_MODES ValuesIn(RemapMode::all())
void generateMap(cv::Mat& map_x, cv::Mat& map_y, int remapMode)
{
GPU_CHANNELS_1_3_4,
Values(Interpolation(cv::INTER_NEAREST), Interpolation(cv::INTER_LINEAR), Interpolation(cv::INTER_CUBIC)),
ALL_BORDER_MODES,
- ALL_REMAP_MODES))
+ RemapMode::all()))
{
declare.time(20.0);
//////////////////////////////////////////////////////////////////////
// Threshold
-CV_ENUM(ThreshOp, cv::THRESH_BINARY, cv::THRESH_BINARY_INV, cv::THRESH_TRUNC, cv::THRESH_TOZERO, cv::THRESH_TOZERO_INV)
-#define ALL_THRESH_OPS ValuesIn(ThreshOp::all())
+CV_ENUM(ThreshOp, THRESH_BINARY, THRESH_BINARY_INV, THRESH_TRUNC, THRESH_TOZERO, THRESH_TOZERO_INV)
DEF_PARAM_TEST(Sz_Depth_Op, cv::Size, MatDepth, ThreshOp);
PERF_TEST_P(Sz_Depth_Op, ImgProc_Threshold,
Combine(GPU_TYPICAL_MAT_SIZES,
Values(CV_8U, CV_16U, CV_32F, CV_64F),
- ALL_THRESH_OPS))
+ ThreshOp::all()))
{
const cv::Size size = GET_PARAM(0);
const int depth = GET_PARAM(1);
////////////////////////////////////////////////////////////////////////////////
// MatchTemplate8U
-CV_ENUM(TemplateMethod, cv::TM_SQDIFF, cv::TM_SQDIFF_NORMED, cv::TM_CCORR, cv::TM_CCORR_NORMED, cv::TM_CCOEFF, cv::TM_CCOEFF_NORMED)
-#define ALL_TEMPLATE_METHODS ValuesIn(TemplateMethod::all())
+CV_ENUM(TemplateMethod, TM_SQDIFF, TM_SQDIFF_NORMED, TM_CCORR, TM_CCORR_NORMED, TM_CCOEFF, TM_CCOEFF_NORMED)
DEF_PARAM_TEST(Sz_TemplateSz_Cn_Method, cv::Size, cv::Size, MatCn, TemplateMethod);
Combine(GPU_TYPICAL_MAT_SIZES,
Values(cv::Size(5, 5), cv::Size(16, 16), cv::Size(30, 30)),
GPU_CHANNELS_1_3_4,
- ALL_TEMPLATE_METHODS))
+ TemplateMethod::all()))
{
declare.time(300.0);
//////////////////////////////////////////////////////////////////////
// MulSpectrums
-CV_FLAGS(DftFlags, 0, cv::DFT_INVERSE, cv::DFT_SCALE, cv::DFT_ROWS, cv::DFT_COMPLEX_OUTPUT, cv::DFT_REAL_OUTPUT)
+CV_FLAGS(DftFlags, 0, DFT_INVERSE, DFT_SCALE, DFT_ROWS, DFT_COMPLEX_OUTPUT, DFT_REAL_OUTPUT)
DEF_PARAM_TEST(Sz_Flags, cv::Size, DftFlags);
}
CV_ENUM(DemosaicingCode,
- cv::COLOR_BayerBG2BGR, cv::COLOR_BayerGB2BGR, cv::COLOR_BayerRG2BGR, cv::COLOR_BayerGR2BGR,
- cv::COLOR_BayerBG2GRAY, cv::COLOR_BayerGB2GRAY, cv::COLOR_BayerRG2GRAY, cv::COLOR_BayerGR2GRAY,
- cv::gpu::COLOR_BayerBG2BGR_MHT, cv::gpu::COLOR_BayerGB2BGR_MHT, cv::gpu::COLOR_BayerRG2BGR_MHT, cv::gpu::COLOR_BayerGR2BGR_MHT,
- cv::gpu::COLOR_BayerBG2GRAY_MHT, cv::gpu::COLOR_BayerGB2GRAY_MHT, cv::gpu::COLOR_BayerRG2GRAY_MHT, cv::gpu::COLOR_BayerGR2GRAY_MHT)
+ COLOR_BayerBG2BGR, COLOR_BayerGB2BGR, COLOR_BayerRG2BGR, COLOR_BayerGR2BGR,
+ COLOR_BayerBG2GRAY, COLOR_BayerGB2GRAY, COLOR_BayerRG2GRAY, COLOR_BayerGR2GRAY,
+ COLOR_BayerBG2BGR_MHT, COLOR_BayerGB2BGR_MHT, COLOR_BayerRG2BGR_MHT, COLOR_BayerGR2BGR_MHT,
+ COLOR_BayerBG2GRAY_MHT, COLOR_BayerGB2GRAY_MHT, COLOR_BayerRG2GRAY_MHT, COLOR_BayerGR2GRAY_MHT)
DEF_PARAM_TEST(Sz_Code, cv::Size, DemosaicingCode);
PERF_TEST_P(Sz_Code, ImgProc_Demosaicing,
Combine(GPU_TYPICAL_MAT_SIZES,
- ValuesIn(DemosaicingCode::all())))
+ DemosaicingCode::all()))
{
const cv::Size size = GET_PARAM(0);
const int code = GET_PARAM(1);
//////////////////////////////////////////////////////////////////////
// AlphaComp
-CV_ENUM(AlphaOp, cv::gpu::ALPHA_OVER, cv::gpu::ALPHA_IN, cv::gpu::ALPHA_OUT, cv::gpu::ALPHA_ATOP, cv::gpu::ALPHA_XOR, cv::gpu::ALPHA_PLUS, cv::gpu::ALPHA_OVER_PREMUL, cv::gpu::ALPHA_IN_PREMUL, cv::gpu::ALPHA_OUT_PREMUL, cv::gpu::ALPHA_ATOP_PREMUL, cv::gpu::ALPHA_XOR_PREMUL, cv::gpu::ALPHA_PLUS_PREMUL, cv::gpu::ALPHA_PREMUL)
-#define ALL_ALPHA_OPS ValuesIn(AlphaOp::all())
+CV_ENUM(AlphaOp, ALPHA_OVER, ALPHA_IN, ALPHA_OUT, ALPHA_ATOP, ALPHA_XOR, ALPHA_PLUS, ALPHA_OVER_PREMUL, ALPHA_IN_PREMUL, ALPHA_OUT_PREMUL, ALPHA_ATOP_PREMUL, ALPHA_XOR_PREMUL, ALPHA_PLUS_PREMUL, ALPHA_PREMUL)
DEF_PARAM_TEST(Sz_Type_Op, cv::Size, MatType, AlphaOp);
PERF_TEST_P(Sz_Type_Op, ImgProc_AlphaComp,
Combine(GPU_TYPICAL_MAT_SIZES,
Values(CV_8UC4, CV_16UC4, CV_32SC4, CV_32FC4),
- ALL_ALPHA_OPS))
+ AlphaOp::all()))
{
const cv::Size size = GET_PARAM(0);
const int type = GET_PARAM(1);
//////////////////////////////////////////////////////////////////////
// GeneralizedHough
-CV_FLAGS(GHMethod, cv::GHT_POSITION, cv::GHT_SCALE, cv::GHT_ROTATION);
+CV_FLAGS(GHMethod, GHT_POSITION, GHT_SCALE, GHT_ROTATION);
DEF_PARAM_TEST(Method_Sz, GHMethod, cv::Size);
////////////////////////////////////////////////////////////////////////////////
// Compare_Array
-CV_ENUM(CmpCode, cv::CMP_EQ, cv::CMP_GT, cv::CMP_GE, cv::CMP_LT, cv::CMP_LE, cv::CMP_NE)
-#define ALL_CMP_CODES testing::Values(CmpCode(cv::CMP_EQ), CmpCode(cv::CMP_NE), CmpCode(cv::CMP_GT), CmpCode(cv::CMP_GE), CmpCode(cv::CMP_LT), CmpCode(cv::CMP_LE))
+CV_ENUM(CmpCode, CMP_EQ, CMP_NE, CMP_GT, CMP_GE, CMP_LT, CMP_LE)
PARAM_TEST_CASE(Compare_Array, cv::gpu::DeviceInfo, cv::Size, MatDepth, CmpCode, UseRoi)
{
ALL_DEVICES,
DIFFERENT_SIZES,
ALL_DEPTH,
- ALL_CMP_CODES,
+ CmpCode::all(),
WHOLE_SUBMAT));
////////////////////////////////////////////////////////////////////////////////
ALL_DEVICES,
DIFFERENT_SIZES,
TYPES(CV_8U, CV_64F, 1, 4),
- ALL_CMP_CODES,
+ CmpCode::all(),
WHOLE_SUBMAT));
//////////////////////////////////////////////////////////////////////////////
#ifdef HAVE_CUBLAS
-CV_FLAGS(GemmFlags, 0, cv::GEMM_1_T, cv::GEMM_2_T, cv::GEMM_3_T);
+CV_FLAGS(GemmFlags, 0, GEMM_1_T, GEMM_2_T, GEMM_3_T);
#define ALL_GEMM_FLAGS testing::Values(GemmFlags(0), GemmFlags(cv::GEMM_1_T), GemmFlags(cv::GEMM_2_T), GemmFlags(cv::GEMM_3_T), GemmFlags(cv::GEMM_1_T | cv::GEMM_2_T), GemmFlags(cv::GEMM_1_T | cv::GEMM_3_T), GemmFlags(cv::GEMM_1_T | cv::GEMM_2_T | cv::GEMM_3_T))
PARAM_TEST_CASE(GEMM, cv::gpu::DeviceInfo, cv::Size, MatType, GemmFlags, UseRoi)
IMPLEMENT_PARAM_CLASS(ORB_BlurForDescriptor, bool)
}
-CV_ENUM(ORB_ScoreType, cv::ORB::HARRIS_SCORE, cv::ORB::FAST_SCORE)
+CV_ENUM(ORB_ScoreType, ORB::HARRIS_SCORE, ORB::FAST_SCORE)
PARAM_TEST_CASE(ORB, cv::gpu::DeviceInfo, ORB_FeaturesCount, ORB_ScaleFactor, ORB_LevelsCount, ORB_EdgeThreshold, ORB_firstLevel, ORB_WTA_K, ORB_ScoreType, ORB_PatchSize, ORB_BlurForDescriptor)
{
/////////////////////////////////////////////////////////////////////////////////////////////////
// MorphEx
-CV_ENUM(MorphOp, cv::MORPH_OPEN, cv::MORPH_CLOSE, cv::MORPH_GRADIENT, cv::MORPH_TOPHAT, cv::MORPH_BLACKHAT)
-#define ALL_MORPH_OPS testing::Values(MorphOp(cv::MORPH_OPEN), MorphOp(cv::MORPH_CLOSE), MorphOp(cv::MORPH_GRADIENT), MorphOp(cv::MORPH_TOPHAT), MorphOp(cv::MORPH_BLACKHAT))
+CV_ENUM(MorphOp, MORPH_OPEN, MORPH_CLOSE, MORPH_GRADIENT, MORPH_TOPHAT, MORPH_BLACKHAT)
PARAM_TEST_CASE(MorphEx, cv::gpu::DeviceInfo, cv::Size, MatType, MorphOp, Anchor, Iterations, UseRoi)
{
ALL_DEVICES,
DIFFERENT_SIZES,
testing::Values(MatType(CV_8UC1), MatType(CV_8UC4)),
- ALL_MORPH_OPS,
+ MorphOp::all(),
testing::Values(Anchor(cv::Point(-1, -1)), Anchor(cv::Point(0, 0)), Anchor(cv::Point(2, 2))),
testing::Values(Iterations(1), Iterations(2), Iterations(3)),
WHOLE_SUBMAT));
////////////////////////////////////////////////////////////////////////////////
// MatchTemplate8U
-CV_ENUM(TemplateMethod, cv::TM_SQDIFF, cv::TM_SQDIFF_NORMED, cv::TM_CCORR, cv::TM_CCORR_NORMED, cv::TM_CCOEFF, cv::TM_CCOEFF_NORMED)
-#define ALL_TEMPLATE_METHODS testing::Values(TemplateMethod(cv::TM_SQDIFF), TemplateMethod(cv::TM_SQDIFF_NORMED), TemplateMethod(cv::TM_CCORR), TemplateMethod(cv::TM_CCORR_NORMED), TemplateMethod(cv::TM_CCOEFF), TemplateMethod(cv::TM_CCOEFF_NORMED))
+CV_ENUM(TemplateMethod, TM_SQDIFF, TM_SQDIFF_NORMED, TM_CCORR, TM_CCORR_NORMED, TM_CCOEFF, TM_CCOEFF_NORMED)
namespace
{
DIFFERENT_SIZES,
testing::Values(TemplateSize(cv::Size(5, 5)), TemplateSize(cv::Size(16, 16)), TemplateSize(cv::Size(30, 30))),
testing::Values(Channels(1), Channels(3), Channels(4)),
- ALL_TEMPLATE_METHODS));
+ TemplateMethod::all()));
////////////////////////////////////////////////////////////////////////////////
// MatchTemplate32F
////////////////////////////////////////////////////////////////////////////
// MulSpectrums
-CV_FLAGS(DftFlags, 0, cv::DFT_INVERSE, cv::DFT_SCALE, cv::DFT_ROWS, cv::DFT_COMPLEX_OUTPUT, cv::DFT_REAL_OUTPUT)
+CV_FLAGS(DftFlags, 0, DFT_INVERSE, DFT_SCALE, DFT_ROWS, DFT_COMPLEX_OUTPUT, DFT_REAL_OUTPUT)
PARAM_TEST_CASE(MulSpectrums, cv::gpu::DeviceInfo, cv::Size, DftFlags)
{
{
IMPLEMENT_PARAM_CLASS(PyrScale, double)
IMPLEMENT_PARAM_CLASS(PolyN, int)
- CV_FLAGS(FarnebackOptFlowFlags, 0, cv::OPTFLOW_FARNEBACK_GAUSSIAN)
+ CV_FLAGS(FarnebackOptFlowFlags, 0, OPTFLOW_FARNEBACK_GAUSSIAN)
IMPLEMENT_PARAM_CLASS(UseInitFlow, bool)
}
using namespace cvtest;
-CV_ENUM(ThreshOp, cv::THRESH_BINARY, cv::THRESH_BINARY_INV, cv::THRESH_TRUNC, cv::THRESH_TOZERO, cv::THRESH_TOZERO_INV)
-#define ALL_THRESH_OPS testing::Values(ThreshOp(cv::THRESH_BINARY), ThreshOp(cv::THRESH_BINARY_INV), ThreshOp(cv::THRESH_TRUNC), ThreshOp(cv::THRESH_TOZERO), ThreshOp(cv::THRESH_TOZERO_INV))
+CV_ENUM(ThreshOp, THRESH_BINARY, THRESH_BINARY_INV, THRESH_TRUNC, THRESH_TOZERO, THRESH_TOZERO_INV)
PARAM_TEST_CASE(Threshold, cv::gpu::DeviceInfo, cv::Size, MatType, ThreshOp, UseRoi)
{
ALL_DEVICES,
DIFFERENT_SIZES,
testing::Values(MatType(CV_8UC1), MatType(CV_16SC1), MatType(CV_32FC1)),
- ALL_THRESH_OPS,
+ ThreshOp::all(),
WHOLE_SUBMAT));
#endif // HAVE_CUDA
- USE_TEMP_BUFFER fixes the main problem (improper buffer management) and
prevents bad images in the first place
+11th patch: April 2, 2013, Forrest Reiling forrest.reiling@gmail.com
+Added v4l2 support for getting capture property CV_CAP_PROP_POS_MSEC.
+Returns the millisecond timestamp of the last frame grabbed or 0 if no frames have been grabbed
+Used to successfully synchonize 2 Logitech C310 USB webcams to within 16 ms of one another
+
make & enjoy!
struct v4l2_queryctrl queryctrl;
struct v4l2_querymenu querymenu;
+ struct timeval timestamp;
+
/* V4L2 control variables */
int v4l2_brightness, v4l2_brightness_min, v4l2_brightness_max;
int v4l2_contrast, v4l2_contrast_min, v4l2_contrast_max;
capture->v4l2_gain_max = 0;
capture->v4l2_exposure_max = 0;
+ capture->timestamp.tv_sec = 0;
+ capture->timestamp.tv_usec = 0;
+
/* Scan V4L2 controls */
v4l2_scan_controls(capture);
if (-1 == ioctl (capture->deviceHandle, VIDIOC_QBUF, &buf))
perror ("VIDIOC_QBUF");
+ //set timestamp in capture struct to be timestamp of most recent frame
+ capture->timestamp = buf.timestamp;
+
return 1;
}
/* initialize the control structure */
switch (property_id) {
+ case CV_CAP_PROP_POS_MSEC:
+ if (capture->FirstCapture) {
+ return 0;
+ } else {
+ return 1000 * capture->timestamp.tv_sec + ((double) capture->timestamp.tv_usec) / 1000;
+ }
+ break;
case CV_CAP_PROP_BRIGHTNESS:
capture->control.id = V4L2_CID_BRIGHTNESS;
break;
Combine(
Values( szVGA, sz1080p ), // image size
Values( 3, 5 ), // d
- ValuesIn( Mat_Type::all() ) // image type
+ Mat_Type::all() // image type
)
)
{
testing::Combine(
testing::Values(szODD, szQVGA, szVGA, sz720p),
testing::Values(CV_8UC1, CV_8UC4, CV_16UC1, CV_16SC1, CV_32FC1),
- testing::ValuesIn(BorderType3x3::all())
+ BorderType3x3::all()
)
)
{
testing::Combine(
testing::Values(szODD, szQVGA, szVGA, sz720p),
testing::Values(CV_8UC1, CV_8UC4, CV_16UC1, CV_16SC1, CV_32FC1),
- testing::ValuesIn(BorderType3x3::all())
+ BorderType3x3::all()
)
)
{
testing::Combine(
testing::Values(szVGA, sz720p),
testing::Values(CV_8UC1, CV_8UC4, CV_16UC1, CV_16SC1, CV_32FC1),
- testing::ValuesIn(BorderType::all())
+ BorderType::all()
)
)
{
testing::Combine(
testing::Values(szODD, szQVGA, szVGA, sz720p),
testing::Values(CV_8UC1, CV_16SC1, CV_32SC1, CV_32FC1, CV_32FC3),
- testing::ValuesIn(BorderType3x3::all())
+ BorderType3x3::all()
)
)
{
testing::Combine(
testing::Values(szODD, szQVGA, szVGA, sz720p),
testing::Values(CV_8UC1, CV_16SC1, CV_32SC1, CV_32FC1, CV_32FC3),
- testing::ValuesIn(BorderType3x3::all())
+ BorderType3x3::all()
)
)
{
testing::Combine(
testing::Values(szODD, szQVGA, szVGA, sz720p),
testing::Values(CV_8UC1, CV_8UC4, CV_16UC1, CV_16SC1, CV_32FC1),
- testing::ValuesIn(BorderType::all())
+ BorderType::all()
)
)
{
testing::Combine(
testing::Values(szVGA, sz720p),
testing::Values(CV_8UC1, CV_8UC4, CV_16UC1, CV_16SC1, CV_32FC1, CV_32FC3),
- testing::ValuesIn(BorderType::all())
+ BorderType::all()
)
)
{
testing::Values( "stitching/a1.png", "cv/shared/pic5.png"),
testing::Values( 3, 5 ),
testing::Values( 3, 5 ),
- testing::ValuesIn(BorderType::all())
+ BorderType::all()
)
)
{
testing::Values( 3, 5 ),
testing::Values( 3, 5 ),
testing::Values( 0.04, 0.1 ),
- testing::ValuesIn(BorderType::all())
+ BorderType::all()
)
)
{
PERF_TEST_P(Size_CvtMode, cvtColor8u,
testing::Combine(
testing::Values(::perf::szODD, ::perf::szVGA, ::perf::sz1080p),
- testing::ValuesIn(CvtMode::all())
+ CvtMode::all()
)
)
{
PERF_TEST_P(Size_CvtMode_Bayer, cvtColorBayer8u,
testing::Combine(
testing::Values(::perf::szODD, ::perf::szVGA),
- testing::ValuesIn(CvtModeBayer::all())
+ CvtModeBayer::all()
)
)
{
PERF_TEST_P(Size_CvtMode2, cvtColorYUV420,
testing::Combine(
testing::Values(szVGA, sz1080p, Size(130, 60)),
- testing::ValuesIn(CvtMode2::all())
+ CvtMode2::all()
)
)
{
PERF_TEST_P(Size_CvtMode3, cvtColorRGB2YUV420p,
testing::Combine(
testing::Values(szVGA, sz720p, sz1080p, Size(130, 60)),
- testing::ValuesIn(CvtMode3::all())
+ CvtMode3::all()
)
)
{
Combine(
Values( Size(320, 240), sz1080p ),
Values( 3, 5 ),
- ValuesIn( BorderMode::all() )
+ BorderMode::all()
)
)
{
cv::Size(1024, 768), cv::Size(1280, 1024)),
testing::Values(cv::Size(12, 12), cv::Size(28, 9),
cv::Size(8, 30), cv::Size(16, 16)),
- testing::ValuesIn(MethodType::all())
+ MethodType::all()
)
)
{
testing::Combine(
testing::Values(cv::Size(1280, 1024)),
testing::Values(cv::Size(1260, 1000), cv::Size(1261, 1013)),
- testing::ValuesIn(MethodType::all())
+ MethodType::all()
)
)
{
Values( szVGA, sz1080p ),
Values( CV_16UC1, CV_16SC1, CV_32FC1 ),
Values( CV_16SC2, CV_32FC1, CV_32FC2 ),
- ValuesIn( InterType::all() )
+ InterType::all()
)
)
{
testing::Values(FILTER_SRC_SIZES),
testing::Values(CV_16S, CV_32F),
testing::Values(make_tuple(0, 1), make_tuple(1, 0), make_tuple(1, 1), make_tuple(0, 2), make_tuple(2, 0), make_tuple(2, 2)),
- testing::ValuesIn(BorderType3x3::all())
+ BorderType3x3::all()
)
)
{
testing::Values(FILTER_SRC_SIZES),
testing::Values(CV_16S, CV_32F),
testing::Values(make_tuple(0, 1), make_tuple(1, 0), make_tuple(1, 1), make_tuple(0, 2), make_tuple(2, 0), make_tuple(2, 2)),
- testing::ValuesIn(BorderType3x3ROI::all())
+ BorderType3x3ROI::all()
)
)
{
testing::Values(FILTER_SRC_SIZES),
testing::Values(CV_16S, CV_32F),
testing::Values(make_tuple(0, 1), make_tuple(1, 0), make_tuple(1, 1), make_tuple(0, 2), make_tuple(2, 0)),
- testing::ValuesIn(BorderType::all())
+ BorderType::all()
)
)
{
testing::Values(FILTER_SRC_SIZES),
testing::Values(CV_16S, CV_32F),
testing::Values(make_tuple(0, 1), make_tuple(1, 0), make_tuple(1, 1), make_tuple(0, 2), make_tuple(2, 0)),
- testing::ValuesIn(BorderTypeROI::all())
+ BorderTypeROI::all()
)
)
{
testing::Values(FILTER_SRC_SIZES),
testing::Values(CV_16S, CV_32F),
testing::Values(make_tuple(0, 1), make_tuple(1, 0)),
- testing::ValuesIn(BorderType3x3::all())
+ BorderType3x3::all()
)
)
{
testing::Values(FILTER_SRC_SIZES),
testing::Values(CV_16S, CV_32F),
testing::Values(make_tuple(0, 1), make_tuple(1, 0)),
- testing::ValuesIn(BorderType3x3ROI::all())
+ BorderType3x3ROI::all()
)
)
{
testing::Values(FILTER_SRC_SIZES),
testing::Values(CV_16S, CV_32F),
testing::Values(make_tuple(0, 1), make_tuple(1, 0)),
- testing::ValuesIn(BorderType3x3::all())
+ BorderType3x3::all()
)
)
{
testing::Values(FILTER_SRC_SIZES),
testing::Values(CV_16S, CV_32F),
testing::Values(make_tuple(0, 1), make_tuple(1, 0)),
- testing::ValuesIn(BorderType3x3ROI::all())
+ BorderType3x3ROI::all()
)
)
{
testing::Combine(
testing::Values(TYPICAL_MAT_SIZES),
testing::Values(CV_8UC1, CV_16SC1),
- testing::ValuesIn(ThreshType::all())
+ ThreshType::all()
)
)
{
PERF_TEST_P(Size_AdaptThreshType_AdaptThreshMethod_BlockSize, adaptiveThreshold,
testing::Combine(
testing::Values(TYPICAL_MAT_SIZES),
- testing::ValuesIn(AdaptThreshType::all()),
- testing::ValuesIn(AdaptThreshMethod::all()),
+ AdaptThreshType::all(),
+ AdaptThreshMethod::all(),
testing::Values(3, 5)
)
)
PERF_TEST_P( TestWarpAffine, WarpAffine,
Combine(
Values( szVGA, sz720p, sz1080p ),
- ValuesIn( InterType::all() ),
- ValuesIn( BorderMode::all() )
+ InterType::all(),
+ BorderMode::all()
)
)
{
PERF_TEST_P( TestWarpPerspective, WarpPerspective,
Combine(
Values( szVGA, sz720p, sz1080p ),
- ValuesIn( InterType::all() ),
- ValuesIn( BorderMode::all() )
+ InterType::all(),
+ BorderMode::all()
)
)
{
PERF_TEST_P( TestWarpPerspectiveNear_t, WarpPerspectiveNear,
Combine(
Values( Size(640,480), Size(1920,1080), Size(2592,1944) ),
- ValuesIn( InterType::all() ),
- ValuesIn( BorderMode::all() ),
+ InterType::all(),
+ BorderMode::all(),
Values( CV_8UC1, CV_8UC4 )
)
)
Combine(
Values( TYPICAL_MAT_TYPES ),
Values( szVGA, sz720p, sz1080p ),
- ValuesIn( InterType::all() ),
- ValuesIn( BorderMode::all() ),
- ValuesIn( RemapMode::all() )
+ InterType::all(),
+ BorderMode::all(),
+ RemapMode::all()
)
)
{
<?xml version="1.0" encoding="UTF-8"?>
<lint>
+ <issue id="InlinedApi">
+ <ignore path="src\org\opencv\android\JavaCameraView.java" />
+ </issue>
<issue id="NewApi">
<ignore path="src\org\opencv\android\JavaCameraView.java" />
</issue>
-#/usr/bin/env python
+#!/usr/bin/env python
import sys, os, re
-#/usr/bin/env python
+#!/usr/bin/env python
import sys, re, os.path
from string import Template
-#/usr/bin/env python
+#!/usr/bin/env python
import os, sys, re, string, glob
from optparse import OptionParser
-#/usr/bin/env python
+#!/usr/bin/env python
import os, sys, re, string, fnmatch
allmodules = ["core", "flann", "imgproc", "ml", "highgui", "video", "features2d", "calib3d", "objdetect", "legacy", "contrib", "gpu", "androidcamera", "java", "python", "stitching", "ts", "photo", "nonfree", "videostab", "ocl", "superres"]
getHolder().addCallback(this);
mMaxWidth = MAX_UNSPECIFIED;
mMaxHeight = MAX_UNSPECIFIED;
+ styledAttrs.recycle();
}
public interface CvCameraViewListener {
mPreviewFormat = format;
}
- private CvCameraViewListenerAdapter() {}
-
private int mPreviewFormat = Highgui.CV_CAP_ANDROID_COLOR_FRAME_RGBA;
private CvCameraViewListener mOldStyleListener;
};
mRgba.release();
}
- private JavaCameraFrame(CvCameraViewFrame obj) {
- }
-
private Mat mYuvFrameData;
private Mat mRgba;
private int mWidth;
}
}
- private class NativeCameraFrame implements CvCameraViewFrame {
+ private static class NativeCameraFrame implements CvCameraViewFrame {
@Override
public Mat rgba() {
- mCamera.retrieve(mRgba, Highgui.CV_CAP_ANDROID_COLOR_FRAME_RGBA);
+ mCapture.retrieve(mRgba, Highgui.CV_CAP_ANDROID_COLOR_FRAME_RGBA);
return mRgba;
}
@Override
public Mat gray() {
- mCamera.retrieve(mGray, Highgui.CV_CAP_ANDROID_GREY_FRAME);
+ mCapture.retrieve(mGray, Highgui.CV_CAP_ANDROID_GREY_FRAME);
return mGray;
}
private class CameraWorker implements Runnable {
- private Mat mRgba = new Mat();
- private Mat mGray = new Mat();
-
public void run() {
do {
if (!mCamera.grab()) {
protected:
+ void update_weights_impl( CvBoostTree* tree, double initial_weights[2] );
+
virtual bool set_params( const CvBoostParams& params );
virtual void update_weights( CvBoostTree* tree );
virtual void trim_weights();
}
void
-CvBoost::update_weights( CvBoostTree* tree )
+CvBoost::update_weights_impl( CvBoostTree* tree, double initial_weights[2] )
{
- CV_FUNCNAME( "CvBoost::update_weights" );
+ CV_FUNCNAME( "CvBoost::update_weights_impl" );
__BEGIN__;
// so we need to convert class labels to floating-point values
double w0 = 1./n;
- double p[2] = { 1, 1 };
+ double p[2] = { initial_weights[0], initial_weights[1] };
cvReleaseMat( &orig_response );
cvReleaseMat( &sum_response );
__END__;
}
+void
+CvBoost::update_weights( CvBoostTree* tree ) {
+ double initial_weights[2] = { 1, 1 };
+ update_weights_impl( tree, initial_weights );
+}
static CV_IMPLEMENT_QSORT_EX( icvSort_64f, double, CV_LT, int )
endif()
set(the_description "Functionality with possible limitations on the use")
-ocv_add_module(nonfree opencv_imgproc opencv_features2d opencv_calib3d OPTIONAL opencv_gpu opencv_ocl)
-ocv_module_include_directories()
-
-if(HAVE_CUDA AND HAVE_opencv_gpu)
- ocv_source_group("Src\\Cuda" GLOB "src/cuda/*.cu")
- ocv_include_directories("${OpenCV_SOURCE_DIR}/modules/gpu/include" ${CUDA_INCLUDE_DIRS})
- ocv_warnings_disable(CMAKE_CXX_FLAGS -Wundef)
-
- file(GLOB lib_cuda "src/cuda/*.cu")
- ocv_cuda_compile(cuda_objs ${lib_cuda})
-
- set(cuda_link_libs ${CUDA_LIBRARIES} ${CUDA_npp_LIBRARY})
-else()
- set(lib_cuda "")
- set(cuda_objs "")
- set(cuda_link_libs "")
-endif()
-
-ocv_glob_module_sources(SOURCES ${lib_cuda} ${cuda_objs})
-
-ocv_create_module(${cuda_link_libs})
-ocv_add_precompiled_headers(${the_module})
-
-ocv_add_accuracy_tests()
-ocv_add_perf_tests()
+ocv_warnings_disable(CMAKE_CXX_FLAGS -Wundef)
+ocv_define_module(nonfree opencv_imgproc opencv_features2d opencv_calib3d OPTIONAL opencv_gpu opencv_ocl)
//
//M*/
-#if !defined CUDA_DISABLER
+#include "opencv2/opencv_modules.hpp"
+
+#ifdef HAVE_OPENCV_GPU
#include "opencv2/gpu/device/common.hpp"
#include "opencv2/gpu/device/limits.hpp"
}}} // namespace cv { namespace gpu { namespace device
-#endif /* CUDA_DISABLER */
+#endif /* HAVE_OPENCV_GPU */
//
//M*/
-#if !defined CUDA_DISABLER
+#include "opencv2/opencv_modules.hpp"
+
+#ifdef HAVE_OPENCV_GPU
#include "opencv2/gpu/device/common.hpp"
}
}}}
-
-#endif /* CUDA_DISABLER */
+#endif /* HAVE_OPENCV_GPU */
size_t wave_size = 0;
queryDeviceInfo(WAVEFRONT_SIZE, &wave_size);
- std::sprintf(pSURF_OPTIONS, " -D WAVE_SIZE=%d", static_cast<int>(wave_size));
+ std::sprintf(pSURF_OPTIONS, "-D WAVE_SIZE=%d", static_cast<int>(wave_size));
OPTION_INIT = true;
}
openCLExecuteKernel(clCxt, source, kernelName, globalThreads, localThreads, args, channels, depth, SURF_OPTIONS);
// currently only support wavefront size queries
enum DEVICE_INFO
{
- WAVEFRONT_SIZE, //in AMD speak
- WARP_SIZE = WAVEFRONT_SIZE //in nvidia speak
+ WAVEFRONT_SIZE, //in AMD speak
+ WARP_SIZE = WAVEFRONT_SIZE, //in nvidia speak
+ IS_CPU_DEVICE //check if the device is CPU
};
//info should have been pre-allocated
void CV_EXPORTS queryDeviceInfo(DEVICE_INFO info_type, void* info);
namespace ocl
{
////////////////////////////////OpenCL kernel strings/////////////////////
- extern const char *bitwise;
- extern const char *bitwiseM;
extern const char *transpose_kernel;
extern const char *arithm_nonzero;
extern const char *arithm_sum;
extern const char *arithm_add;
extern const char *arithm_add_scalar;
extern const char *arithm_add_scalar_mask;
+ extern const char *arithm_bitwise_binary;
+ extern const char *arithm_bitwise_binary_mask;
+ extern const char *arithm_bitwise_binary_scalar;
+ extern const char *arithm_bitwise_binary_scalar_mask;
extern const char *arithm_bitwise_not;
- extern const char *arithm_bitwise_and;
- extern const char *arithm_bitwise_and_mask;
- extern const char *arithm_bitwise_and_scalar;
- extern const char *arithm_bitwise_and_scalar_mask;
- extern const char *arithm_bitwise_or;
- extern const char *arithm_bitwise_or_mask;
- extern const char *arithm_bitwise_or_scalar;
- extern const char *arithm_bitwise_or_scalar_mask;
- extern const char *arithm_bitwise_xor;
- extern const char *arithm_bitwise_xor_mask;
- extern const char *arithm_bitwise_xor_scalar;
- extern const char *arithm_bitwise_xor_scalar_mask;
extern const char *arithm_compare_eq;
extern const char *arithm_compare_ne;
- extern const char *arithm_sub;
- extern const char *arithm_sub_scalar;
- extern const char *arithm_sub_scalar_mask;
extern const char *arithm_mul;
extern const char *arithm_div;
extern const char *arithm_absdiff;
/////////////////////// add subtract multiply divide /////////////////////////
//////////////////////////////////////////////////////////////////////////////
template<typename T>
-void arithmetic_run(const oclMat &src1, const oclMat &src2, oclMat &dst, string kernelName, const char **kernelString, void *_scalar)
+void arithmetic_run(const oclMat &src1, const oclMat &src2, oclMat &dst,
+ string kernelName, const char **kernelString, void *_scalar, int op_type = 0)
{
if(!src1.clCxt->supportsFeature(Context::CL_DOUBLE) && src1.type() == CV_64F)
{
scalar = (T)scalar1;
args.push_back( make_pair( sizeof(T), (void *)&scalar ));
}
-
- openCLExecuteKernel(clCxt, kernelString, kernelName, globalThreads, localThreads, args, -1, depth);
+ switch(op_type)
+ {
+ case MAT_ADD:
+ openCLExecuteKernel(clCxt, kernelString, kernelName, globalThreads, localThreads, args, -1, depth, "-D ARITHM_ADD");
+ break;
+ case MAT_SUB:
+ openCLExecuteKernel(clCxt, kernelString, kernelName, globalThreads, localThreads, args, -1, depth, "-D ARITHM_SUB");
+ break;
+ default:
+ openCLExecuteKernel(clCxt, kernelString, kernelName, globalThreads, localThreads, args, -1, depth);
+ }
}
-static void arithmetic_run(const oclMat &src1, const oclMat &src2, oclMat &dst, string kernelName, const char **kernelString)
+static void arithmetic_run(const oclMat &src1, const oclMat &src2, oclMat &dst,
+ string kernelName, const char **kernelString, int op_type = 0)
{
- arithmetic_run<char>(src1, src2, dst, kernelName, kernelString, (void *)NULL);
+ arithmetic_run<char>(src1, src2, dst, kernelName, kernelString, (void *)NULL, op_type);
}
-static void arithmetic_run(const oclMat &src1, const oclMat &src2, oclMat &dst, const oclMat &mask, string kernelName, const char **kernelString)
+static void arithmetic_run(const oclMat &src1, const oclMat &src2, oclMat &dst, const oclMat &mask,
+ string kernelName, const char **kernelString, int op_type = 0)
{
if(!src1.clCxt->supportsFeature(Context::CL_DOUBLE) && src1.type() == CV_64F)
{
args.push_back( make_pair( sizeof(cl_int), (void *)&cols ));
args.push_back( make_pair( sizeof(cl_int), (void *)&dst_step1 ));
- openCLExecuteKernel(clCxt, kernelString, kernelName, globalThreads, localThreads, args, channels, depth);
+ switch (op_type)
+ {
+ case MAT_ADD:
+ openCLExecuteKernel(clCxt, kernelString, kernelName, globalThreads, localThreads, args, channels, depth, "-D ARITHM_ADD");
+ break;
+ case MAT_SUB:
+ openCLExecuteKernel(clCxt, kernelString, kernelName, globalThreads, localThreads, args, channels, depth, "-D ARITHM_SUB");
+ break;
+ default:
+ openCLExecuteKernel(clCxt, kernelString, kernelName, globalThreads, localThreads, args, channels, depth);
+ }
}
void cv::ocl::add(const oclMat &src1, const oclMat &src2, oclMat &dst)
{
- arithmetic_run(src1, src2, dst, "arithm_add", &arithm_add);
+ arithmetic_run(src1, src2, dst, "arithm_add", &arithm_add, MAT_ADD);
}
void cv::ocl::add(const oclMat &src1, const oclMat &src2, oclMat &dst, const oclMat &mask)
{
- arithmetic_run(src1, src2, dst, mask, "arithm_add_with_mask", &arithm_add);
+ arithmetic_run(src1, src2, dst, mask, "arithm_add_with_mask", &arithm_add, MAT_ADD);
}
void cv::ocl::subtract(const oclMat &src1, const oclMat &src2, oclMat &dst)
{
- arithmetic_run(src1, src2, dst, "arithm_sub", &arithm_sub);
+ arithmetic_run(src1, src2, dst, "arithm_add", &arithm_add, MAT_SUB);
}
void cv::ocl::subtract(const oclMat &src1, const oclMat &src2, oclMat &dst, const oclMat &mask)
{
- arithmetic_run(src1, src2, dst, mask, "arithm_sub_with_mask", &arithm_sub);
+ arithmetic_run(src1, src2, dst, mask, "arithm_add_with_mask", &arithm_add, MAT_SUB);
}
typedef void (*MulDivFunc)(const oclMat &src1, const oclMat &src2, oclMat &dst, string kernelName,
const char **kernelString, void *scalar);
args.push_back( make_pair( sizeof(cl_int) , (void *)&cols ));
args.push_back( make_pair( sizeof(cl_int) , (void *)&dst_step1 ));
if(isMatSubScalar != 0)
- {
- isMatSubScalar = isMatSubScalar > 0 ? 1 : 0;
- args.push_back( make_pair( sizeof(cl_int) , (void *)&isMatSubScalar));
- }
-
- openCLExecuteKernel(clCxt, kernelString, kernelName, globalThreads, localThreads, args, channels, depth);
+ openCLExecuteKernel(clCxt, kernelString, kernelName, globalThreads, localThreads, args, channels, depth, "-D ARITHM_SUB");
+ else
+ openCLExecuteKernel(clCxt, kernelString, kernelName, globalThreads, localThreads, args, channels, depth, "-D ARITHM_ADD");
}
static void arithmetic_scalar_run(const oclMat &src, oclMat &dst, string kernelName, const char **kernelString, double scalar)
void cv::ocl::subtract(const oclMat &src1, const Scalar &src2, oclMat &dst, const oclMat &mask)
{
- string kernelName = mask.data ? "arithm_s_sub_with_mask" : "arithm_s_sub";
- const char **kernelString = mask.data ? &arithm_sub_scalar_mask : &arithm_sub_scalar;
+ string kernelName = mask.data ? "arithm_s_add_with_mask" : "arithm_s_add";
+ const char **kernelString = mask.data ? &arithm_add_scalar_mask : &arithm_add_scalar;
arithmetic_scalar( src1, src2, dst, mask, kernelName, kernelString, 1);
}
void cv::ocl::subtract(const Scalar &src2, const oclMat &src1, oclMat &dst, const oclMat &mask)
{
- string kernelName = mask.data ? "arithm_s_sub_with_mask" : "arithm_s_sub";
- const char **kernelString = mask.data ? &arithm_sub_scalar_mask : &arithm_sub_scalar;
+ string kernelName = mask.data ? "arithm_s_add_with_mask" : "arithm_s_add";
+ const char **kernelString = mask.data ? &arithm_add_scalar_mask : &arithm_add_scalar;
arithmetic_scalar( src1, src2, dst, mask, kernelName, kernelString, -1);
}
void cv::ocl::divide(double scalar, const oclMat &src, oclMat &dst)
template<typename T>
-void bitwise_run(const oclMat &src1, const oclMat &src2, oclMat &dst, string kernelName, const char **kernelString, void *_scalar)
+void bitwise_run(const oclMat &src1, const oclMat &src2, oclMat &dst, string kernelName,
+ const char **kernelString, void *_scalar, const char* _opt = NULL)
{
dst.create(src1.size(), src1.type());
CV_Assert(src1.cols == src2.cols && src2.cols == dst.cols &&
args.push_back( make_pair( sizeof(T), (void *)&scalar ));
}
- openCLExecuteKernel(clCxt, kernelString, kernelName, globalThreads, localThreads, args, -1, depth);
+ openCLExecuteKernel(clCxt, kernelString, kernelName, globalThreads, localThreads, args, -1, depth, _opt);
}
-static void bitwise_run(const oclMat &src1, const oclMat &src2, oclMat &dst, string kernelName, const char **kernelString)
+static void bitwise_run(const oclMat &src1, const oclMat &src2, oclMat &dst,
+ string kernelName, const char **kernelString, const char* _opt = NULL)
{
- bitwise_run<char>(src1, src2, dst, kernelName, kernelString, (void *)NULL);
+ bitwise_run<char>(src1, src2, dst, kernelName, kernelString, (void *)NULL, _opt);
}
-static void bitwise_run(const oclMat &src1, const oclMat &src2, oclMat &dst, const oclMat &mask, string kernelName, const char **kernelString)
+static void bitwise_run(const oclMat &src1, const oclMat &src2, oclMat &dst,
+ const oclMat &mask, string kernelName, const char **kernelString, const char* _opt = NULL)
{
dst.create(src1.size(), src1.type());
CV_Assert(src1.cols == src2.cols && src2.cols == dst.cols &&
args.push_back( make_pair( sizeof(cl_int), (void *)&cols ));
args.push_back( make_pair( sizeof(cl_int), (void *)&dst_step1 ));
- openCLExecuteKernel(clCxt, kernelString, kernelName, globalThreads, localThreads, args, channels, depth);
+ openCLExecuteKernel(clCxt, kernelString, kernelName, globalThreads, localThreads, args, channels, depth, _opt);
}
template <typename WT , typename CL_WT>
-void bitwise_scalar_run(const oclMat &src1, const Scalar &src2, oclMat &dst, const oclMat &mask, string kernelName, const char **kernelString, int isMatSubScalar)
+void bitwise_scalar_run(const oclMat &src1, const Scalar &src2, oclMat &dst,
+ const oclMat &mask, string kernelName, const char **kernelString, int isMatSubScalar, const char* opt = NULL)
{
dst.create(src1.size(), src1.type());
args.push_back( make_pair( sizeof(cl_int) , (void *)&isMatSubScalar));
}
- openCLExecuteKernel(clCxt, kernelString, kernelName, globalThreads, localThreads, args, channels, depth);
+ openCLExecuteKernel(clCxt, kernelString, kernelName, globalThreads, localThreads, args, channels, depth, opt);
}
-typedef void (*BitwiseFuncS)(const oclMat &src1, const Scalar &src2, oclMat &dst, const oclMat &mask, string kernelName, const char **kernelString, int isMatSubScalar);
+typedef void (*BitwiseFuncS)(const oclMat &src1, const Scalar &src2, oclMat &dst,
+ const oclMat &mask, string kernelName, const char **kernelString, int isMatSubScalar, const char* opt);
-static void bitwise_scalar(const oclMat &src1, const Scalar &src2, oclMat &dst, const oclMat &mask, string kernelName, const char **kernelString, int isMatSubScalar)
+static void bitwise_scalar(const oclMat &src1, const Scalar &src2, oclMat &dst,
+ const oclMat &mask, string kernelName, const char **kernelString, int isMatSubScalar, const char* opt)
{
static BitwiseFuncS tab[8] =
{
BitwiseFuncS func = tab[src1.depth()];
if(func == 0)
cv::ocl::error("Unsupported arithmetic operation", __FILE__, __LINE__);
- func(src1, src2, dst, mask, kernelName, kernelString, isMatSubScalar);
+ func(src1, src2, dst, mask, kernelName, kernelString, isMatSubScalar, opt);
}
-static void bitwise_scalar(const oclMat &src1, const Scalar &src2, oclMat &dst, const oclMat &mask, string kernelName, const char **kernelString)
+static void bitwise_scalar(const oclMat &src1, const Scalar &src2, oclMat &dst,
+ const oclMat &mask, string kernelName, const char **kernelString, const char * opt = NULL)
{
- bitwise_scalar(src1, src2, dst, mask, kernelName, kernelString, 0);
+ bitwise_scalar(src1, src2, dst, mask, kernelName, kernelString, 0, opt);
}
void cv::ocl::bitwise_not(const oclMat &src, oclMat &dst)
cout << "Selected device do not support double" << endl;
return;
}
- oclMat emptyMat;
- string kernelName = mask.empty() ? "arithm_bitwise_or" : "arithm_bitwise_or_with_mask";
+
+ string kernelName = mask.empty() ? "arithm_bitwise_binary" : "arithm_bitwise_binary_with_mask";
+ static const char opt [] = "-D OP_BINARY=|";
if (mask.empty())
- bitwise_run(src1, src2, dst, kernelName, &arithm_bitwise_or);
+ bitwise_run(src1, src2, dst, kernelName, &arithm_bitwise_binary, opt);
else
- bitwise_run(src1, src2, dst, mask, kernelName, &arithm_bitwise_or_mask);
+ bitwise_run(src1, src2, dst, mask, kernelName, &arithm_bitwise_binary_mask, opt);
}
cout << "Selected device do not support double" << endl;
return;
}
- string kernelName = mask.data ? "arithm_s_bitwise_or_with_mask" : "arithm_s_bitwise_or";
+ static const char opt [] = "-D OP_BINARY=|";
+ string kernelName = mask.data ? "arithm_s_bitwise_binary_with_mask" : "arithm_s_bitwise_binary";
if (mask.data)
- bitwise_scalar( src1, src2, dst, mask, kernelName, &arithm_bitwise_or_scalar_mask);
+ bitwise_scalar( src1, src2, dst, mask, kernelName, &arithm_bitwise_binary_scalar_mask, opt);
else
- bitwise_scalar( src1, src2, dst, mask, kernelName, &arithm_bitwise_or_scalar);
+ bitwise_scalar( src1, src2, dst, mask, kernelName, &arithm_bitwise_binary_scalar, opt);
}
void cv::ocl::bitwise_and(const oclMat &src1, const oclMat &src2, oclMat &dst, const oclMat &mask)
}
oclMat emptyMat;
- string kernelName = mask.empty() ? "arithm_bitwise_and" : "arithm_bitwise_and_with_mask";
+ string kernelName = mask.empty() ? "arithm_bitwise_binary" : "arithm_bitwise_binary_with_mask";
+ static const char opt [] = "-D OP_BINARY=&";
if (mask.empty())
- bitwise_run(src1, src2, dst, kernelName, &arithm_bitwise_and);
+ bitwise_run(src1, src2, dst, kernelName, &arithm_bitwise_binary, opt);
else
- bitwise_run(src1, src2, dst, mask, kernelName, &arithm_bitwise_and_mask);
+ bitwise_run(src1, src2, dst, mask, kernelName, &arithm_bitwise_binary_mask, opt);
}
void cv::ocl::bitwise_and(const oclMat &src1, const Scalar &src2, oclMat &dst, const oclMat &mask)
cout << "Selected device do not support double" << endl;
return;
}
- string kernelName = mask.data ? "arithm_s_bitwise_and_with_mask" : "arithm_s_bitwise_and";
+ static const char opt [] = "-D OP_BINARY=&";
+ string kernelName = mask.data ? "arithm_s_bitwise_binary_with_mask" : "arithm_s_bitwise_binary";
if (mask.data)
- bitwise_scalar(src1, src2, dst, mask, kernelName, &arithm_bitwise_and_scalar_mask);
+ bitwise_scalar(src1, src2, dst, mask, kernelName, &arithm_bitwise_binary_scalar_mask, opt);
else
- bitwise_scalar(src1, src2, dst, mask, kernelName, &arithm_bitwise_and_scalar);
+ bitwise_scalar(src1, src2, dst, mask, kernelName, &arithm_bitwise_binary_scalar, opt);
}
void cv::ocl::bitwise_xor(const oclMat &src1, const oclMat &src2, oclMat &dst, const oclMat &mask)
cout << "Selected device do not support double" << endl;
return;
}
- oclMat emptyMat;
- string kernelName = mask.empty() ? "arithm_bitwise_xor" : "arithm_bitwise_xor_with_mask";
+ string kernelName = mask.empty() ? "arithm_bitwise_binary" : "arithm_bitwise_binary_with_mask";
+ static const char opt [] = "-D OP_BINARY=^";
if (mask.empty())
- bitwise_run(src1, src2, dst, kernelName, &arithm_bitwise_xor);
+ bitwise_run(src1, src2, dst, kernelName, &arithm_bitwise_binary, opt);
else
- bitwise_run(src1, src2, dst, mask, kernelName, &arithm_bitwise_xor_mask);
+ bitwise_run(src1, src2, dst, mask, kernelName, &arithm_bitwise_binary_mask, opt);
}
cout << "Selected device do not support double" << endl;
return;
}
- string kernelName = mask.data ? "arithm_s_bitwise_xor_with_mask" : "arithm_s_bitwise_xor";
+ string kernelName = mask.data ? "arithm_s_bitwise_binary_with_mask" : "arithm_s_bitwise_binary";
+ static const char opt [] = "-D OP_BINARY=^";
if (mask.data)
- bitwise_scalar( src1, src2, dst, mask, kernelName, &arithm_bitwise_xor_scalar_mask);
+ bitwise_scalar( src1, src2, dst, mask, kernelName, &arithm_bitwise_binary_scalar_mask, opt);
else
- bitwise_scalar( src1, src2, dst, mask, kernelName, &arithm_bitwise_xor_scalar);
+ bitwise_scalar( src1, src2, dst, mask, kernelName, &arithm_bitwise_binary_scalar, opt);
}
oclMat cv::ocl::operator ~ (const oclMat &src)
//
// @Authors
// Nathan, liujun@multicorewareinc.com
+// Peng Xiao, pengxiao@outlook.com
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
}
}
+static const int OPT_SIZE = 100;
+
+static const char * T_ARR [] = {
+ "uchar",
+ "char",
+ "ushort",
+ "short",
+ "int",
+ "float -D T_FLOAT",
+ "double"};
+
template < int BLOCK_SIZE, int MAX_DESC_LEN/*, typename Mask*/ >
void matchUnrolledCached(const oclMat &query, const oclMat &train, const oclMat &/*mask*/,
const oclMat &trainIdx, const oclMat &distance, int distType)
{
- assert(query.type() == CV_32F);
cv::ocl::Context *ctx = query.clCxt;
size_t globalSize[] = {(query.rows + BLOCK_SIZE - 1) / BLOCK_SIZE * BLOCK_SIZE, BLOCK_SIZE, 1};
size_t localSize[] = {BLOCK_SIZE, BLOCK_SIZE, 1};
int m_size = MAX_DESC_LEN;
vector< pair<size_t, const void *> > args;
+ char opt [OPT_SIZE] = "";
+ sprintf(opt,
+ "-D T=%s -D DIST_TYPE=%d -D BLOCK_SIZE=%d -D MAX_DESC_LEN=%d",
+ T_ARR[query.depth()], distType, block_size, m_size);
+
if(globalSize[0] != 0)
{
args.push_back( make_pair( sizeof(cl_mem), (void *)&query.data ));
args.push_back( make_pair( sizeof(cl_mem), (void *)&trainIdx.data ));
args.push_back( make_pair( sizeof(cl_mem), (void *)&distance.data ));
args.push_back( make_pair( smemSize, (void *)NULL));
- args.push_back( make_pair( sizeof(cl_int), (void *)&block_size ));
- args.push_back( make_pair( sizeof(cl_int), (void *)&m_size ));
args.push_back( make_pair( sizeof(cl_int), (void *)&query.rows ));
args.push_back( make_pair( sizeof(cl_int), (void *)&query.cols ));
args.push_back( make_pair( sizeof(cl_int), (void *)&train.rows ));
args.push_back( make_pair( sizeof(cl_int), (void *)&train.cols ));
args.push_back( make_pair( sizeof(cl_int), (void *)&query.step ));
- args.push_back( make_pair( sizeof(cl_int), (void *)&distType ));
std::string kernelName = "BruteForceMatch_UnrollMatch";
- openCLExecuteKernel(ctx, &brute_force_match, kernelName, globalSize, localSize, args, -1, query.depth());
+ openCLExecuteKernel(ctx, &brute_force_match, kernelName, globalSize, localSize, args, -1, -1, opt);
}
}
void match(const oclMat &query, const oclMat &train, const oclMat &/*mask*/,
const oclMat &trainIdx, const oclMat &distance, int distType)
{
- assert(query.type() == CV_32F);
cv::ocl::Context *ctx = query.clCxt;
size_t globalSize[] = {(query.rows + BLOCK_SIZE - 1) / BLOCK_SIZE * BLOCK_SIZE, BLOCK_SIZE, 1};
size_t localSize[] = {BLOCK_SIZE, BLOCK_SIZE, 1};
int block_size = BLOCK_SIZE;
vector< pair<size_t, const void *> > args;
+ char opt [OPT_SIZE] = "";
+ sprintf(opt,
+ "-D T=%s -D DIST_TYPE=%d -D BLOCK_SIZE=%d",
+ T_ARR[query.depth()], distType, block_size);
if(globalSize[0] != 0)
{
args.push_back( make_pair( sizeof(cl_mem), (void *)&query.data ));
args.push_back( make_pair( sizeof(cl_mem), (void *)&trainIdx.data ));
args.push_back( make_pair( sizeof(cl_mem), (void *)&distance.data ));
args.push_back( make_pair( smemSize, (void *)NULL));
- args.push_back( make_pair( sizeof(cl_int), (void *)&block_size ));
args.push_back( make_pair( sizeof(cl_int), (void *)&query.rows ));
args.push_back( make_pair( sizeof(cl_int), (void *)&query.cols ));
args.push_back( make_pair( sizeof(cl_int), (void *)&train.rows ));
args.push_back( make_pair( sizeof(cl_int), (void *)&train.cols ));
args.push_back( make_pair( sizeof(cl_int), (void *)&query.step ));
- args.push_back( make_pair( sizeof(cl_int), (void *)&distType ));
std::string kernelName = "BruteForceMatch_Match";
- openCLExecuteKernel(ctx, &brute_force_match, kernelName, globalSize, localSize, args, -1, query.depth());
+ openCLExecuteKernel(ctx, &brute_force_match, kernelName, globalSize, localSize, args, -1, -1, opt);
}
}
void matchUnrolledCached(const oclMat &query, const oclMat &train, float maxDistance, const oclMat &/*mask*/,
const oclMat &trainIdx, const oclMat &distance, const oclMat &nMatches, int distType)
{
- assert(query.type() == CV_32F);
cv::ocl::Context *ctx = query.clCxt;
size_t globalSize[] = {(train.rows + BLOCK_SIZE - 1) / BLOCK_SIZE * BLOCK_SIZE, (query.rows + BLOCK_SIZE - 1) / BLOCK_SIZE * BLOCK_SIZE, 1};
size_t localSize[] = {BLOCK_SIZE, BLOCK_SIZE, 1};
int m_size = MAX_DESC_LEN;
vector< pair<size_t, const void *> > args;
+ char opt [OPT_SIZE] = "";
+ sprintf(opt,
+ "-D T=%s -D DIST_TYPE=%d -D BLOCK_SIZE=%d -D MAX_DESC_LEN=%d",
+ T_ARR[query.depth()], distType, block_size, m_size);
+
if(globalSize[0] != 0)
{
args.push_back( make_pair( sizeof(cl_mem), (void *)&query.data ));
args.push_back( make_pair( sizeof(cl_mem), (void *)&distance.data ));
args.push_back( make_pair( sizeof(cl_mem), (void *)&nMatches.data ));
args.push_back( make_pair( smemSize, (void *)NULL));
- args.push_back( make_pair( sizeof(cl_int), (void *)&block_size ));
- args.push_back( make_pair( sizeof(cl_int), (void *)&m_size ));
args.push_back( make_pair( sizeof(cl_int), (void *)&query.rows ));
args.push_back( make_pair( sizeof(cl_int), (void *)&query.cols ));
args.push_back( make_pair( sizeof(cl_int), (void *)&train.rows ));
args.push_back( make_pair( sizeof(cl_int), (void *)&trainIdx.cols ));
args.push_back( make_pair( sizeof(cl_int), (void *)&query.step ));
args.push_back( make_pair( sizeof(cl_int), (void *)&trainIdx.step ));
- args.push_back( make_pair( sizeof(cl_int), (void *)&distType ));
std::string kernelName = "BruteForceMatch_RadiusUnrollMatch";
- openCLExecuteKernel(ctx, &brute_force_match, kernelName, globalSize, localSize, args, -1, query.depth());
+ openCLExecuteKernel(ctx, &brute_force_match, kernelName, globalSize, localSize, args, -1, -1, opt);
}
}
void radius_match(const oclMat &query, const oclMat &train, float maxDistance, const oclMat &/*mask*/,
const oclMat &trainIdx, const oclMat &distance, const oclMat &nMatches, int distType)
{
- assert(query.type() == CV_32F);
cv::ocl::Context *ctx = query.clCxt;
size_t globalSize[] = {(train.rows + BLOCK_SIZE - 1) / BLOCK_SIZE * BLOCK_SIZE, (query.rows + BLOCK_SIZE - 1) / BLOCK_SIZE * BLOCK_SIZE, 1};
size_t localSize[] = {BLOCK_SIZE, BLOCK_SIZE, 1};
int block_size = BLOCK_SIZE;
vector< pair<size_t, const void *> > args;
+ char opt [OPT_SIZE] = "";
+ sprintf(opt,
+ "-D T=%s -D DIST_TYPE=%d -D BLOCK_SIZE=%d",
+ T_ARR[query.depth()], distType, block_size);
+
if(globalSize[0] != 0)
{
args.push_back( make_pair( sizeof(cl_mem), (void *)&query.data ));
args.push_back( make_pair( sizeof(cl_mem), (void *)&distance.data ));
args.push_back( make_pair( sizeof(cl_mem), (void *)&nMatches.data ));
args.push_back( make_pair( smemSize, (void *)NULL));
- args.push_back( make_pair( sizeof(cl_int), (void *)&block_size ));
args.push_back( make_pair( sizeof(cl_int), (void *)&query.rows ));
args.push_back( make_pair( sizeof(cl_int), (void *)&query.cols ));
args.push_back( make_pair( sizeof(cl_int), (void *)&train.rows ));
args.push_back( make_pair( sizeof(cl_int), (void *)&trainIdx.cols ));
args.push_back( make_pair( sizeof(cl_int), (void *)&query.step ));
args.push_back( make_pair( sizeof(cl_int), (void *)&trainIdx.step ));
- args.push_back( make_pair( sizeof(cl_int), (void *)&distType ));
std::string kernelName = "BruteForceMatch_RadiusMatch";
- openCLExecuteKernel(ctx, &brute_force_match, kernelName, globalSize, localSize, args, -1, query.depth());
+ openCLExecuteKernel(ctx, &brute_force_match, kernelName, globalSize, localSize, args, -1, -1, opt);
}
}
int m_size = MAX_DESC_LEN;
vector< pair<size_t, const void *> > args;
+ char opt [OPT_SIZE] = "";
+ sprintf(opt,
+ "-D T=%s -D DIST_TYPE=%d -D BLOCK_SIZE=%d -D MAX_DESC_LEN=%d",
+ T_ARR[query.depth()], distType, block_size, m_size);
+
if(globalSize[0] != 0)
{
args.push_back( make_pair( sizeof(cl_mem), (void *)&query.data ));
args.push_back( make_pair( sizeof(cl_mem), (void *)&trainIdx.data ));
args.push_back( make_pair( sizeof(cl_mem), (void *)&distance.data ));
args.push_back( make_pair( smemSize, (void *)NULL));
- args.push_back( make_pair( sizeof(cl_int), (void *)&block_size ));
- args.push_back( make_pair( sizeof(cl_int), (void *)&m_size ));
args.push_back( make_pair( sizeof(cl_int), (void *)&query.rows ));
args.push_back( make_pair( sizeof(cl_int), (void *)&query.cols ));
args.push_back( make_pair( sizeof(cl_int), (void *)&train.rows ));
args.push_back( make_pair( sizeof(cl_int), (void *)&train.cols ));
args.push_back( make_pair( sizeof(cl_int), (void *)&query.step ));
- args.push_back( make_pair( sizeof(cl_int), (void *)&distType ));
std::string kernelName = "BruteForceMatch_knnUnrollMatch";
- openCLExecuteKernel(ctx, &brute_force_match, kernelName, globalSize, localSize, args, -1, query.depth());
+ openCLExecuteKernel(ctx, &brute_force_match, kernelName, globalSize, localSize, args, -1, -1, opt);
}
}
int block_size = BLOCK_SIZE;
vector< pair<size_t, const void *> > args;
+ char opt [OPT_SIZE] = "";
+ sprintf(opt,
+ "-D T=%s -D DIST_TYPE=%d -D BLOCK_SIZE=%d",
+ T_ARR[query.depth()], distType, block_size);
+
if(globalSize[0] != 0)
{
args.push_back( make_pair( sizeof(cl_mem), (void *)&query.data ));
args.push_back( make_pair( sizeof(cl_mem), (void *)&trainIdx.data ));
args.push_back( make_pair( sizeof(cl_mem), (void *)&distance.data ));
args.push_back( make_pair( smemSize, (void *)NULL));
- args.push_back( make_pair( sizeof(cl_int), (void *)&block_size ));
args.push_back( make_pair( sizeof(cl_int), (void *)&query.rows ));
args.push_back( make_pair( sizeof(cl_int), (void *)&query.cols ));
args.push_back( make_pair( sizeof(cl_int), (void *)&train.rows ));
args.push_back( make_pair( sizeof(cl_int), (void *)&train.cols ));
args.push_back( make_pair( sizeof(cl_int), (void *)&query.step ));
- args.push_back( make_pair( sizeof(cl_int), (void *)&distType ));
std::string kernelName = "BruteForceMatch_knnMatch";
- openCLExecuteKernel(ctx, &brute_force_match, kernelName, globalSize, localSize, args, -1, query.depth());
+ openCLExecuteKernel(ctx, &brute_force_match, kernelName, globalSize, localSize, args, -1, -1, opt);
}
}
int m_size = MAX_DESC_LEN;
vector< pair<size_t, const void *> > args;
+ char opt [OPT_SIZE] = "";
+ sprintf(opt,
+ "-D T=%s -D DIST_TYPE=%d -D BLOCK_SIZE=%d -D MAX_DESC_LEN=%d",
+ T_ARR[query.depth()], distType, block_size, m_size);
+
if(globalSize[0] != 0)
{
args.push_back( make_pair( sizeof(cl_mem), (void *)&query.data ));
args.push_back( make_pair( sizeof(cl_int), (void *)&train.rows ));
args.push_back( make_pair( sizeof(cl_int), (void *)&train.cols ));
args.push_back( make_pair( sizeof(cl_int), (void *)&query.step ));
- args.push_back( make_pair( sizeof(cl_int), (void *)&distType ));
std::string kernelName = "BruteForceMatch_calcDistanceUnrolled";
- openCLExecuteKernel(ctx, &brute_force_match, kernelName, globalSize, localSize, args, -1, query.depth());
+ openCLExecuteKernel(ctx, &brute_force_match, kernelName, globalSize, localSize, args, -1, -1, opt);
}
}
int block_size = BLOCK_SIZE;
vector< pair<size_t, const void *> > args;
+ char opt [OPT_SIZE] = "";
+ sprintf(opt,
+ "-D T=%s -D DIST_TYPE=%d -D BLOCK_SIZE=%d",
+ T_ARR[query.depth()], distType, block_size);
+
if(globalSize[0] != 0)
{
args.push_back( make_pair( sizeof(cl_mem), (void *)&query.data ));
args.push_back( make_pair( sizeof(cl_int), (void *)&train.rows ));
args.push_back( make_pair( sizeof(cl_int), (void *)&train.cols ));
args.push_back( make_pair( sizeof(cl_int), (void *)&query.step ));
- args.push_back( make_pair( sizeof(cl_int), (void *)&distType ));
std::string kernelName = "BruteForceMatch_calcDistance";
- openCLExecuteKernel(ctx, &brute_force_match, kernelName, globalSize, localSize, args, -1, query.depth());
+ openCLExecuteKernel(ctx, &brute_force_match, kernelName, globalSize, localSize, args, -1, -1, opt);
}
}
//args.push_back( make_pair( sizeof(cl_int), (void *)&train.cols ));
//args.push_back( make_pair( sizeof(cl_int), (void *)&query.step ));
- openCLExecuteKernel(ctx, &brute_force_match, kernelName, globalSize, localSize, args, trainIdx.depth(), -1);
+ openCLExecuteKernel(ctx, &brute_force_match, kernelName, globalSize, localSize, args, -1, -1);
}
}
if (query.empty() || train.empty())
return;
- // match1 doesn't support signed char type, match2 only support float, hamming support uchar, ushort and int
- int callType = query.depth();
- char cvFuncName[] = "singleMatch";
- if (callType != 5)
- CV_ERROR(CV_UNSUPPORTED_FORMAT_ERR, "BruteForceMatch OpenCL only support float type query!\n");
-
- if ((distType == 0 && callType == 1 ) || (distType == 1 && callType != 5) || (distType == 2 && (callType != 0
- || callType != 2 || callType != 4)))
- {
- CV_ERROR(CV_UNSUPPORTED_DEPTH_ERR, "BruteForceMatch OpenCL only support float type query!\n");
- }
-
CV_Assert(query.channels() == 1 && query.depth() < CV_64F);
CV_Assert(train.cols == query.cols && train.type() == query.type());
- trainIdx.create(1, query.rows, CV_32S);
- distance.create(1, query.rows, CV_32F);
+ ensureSizeIsEnough(1, query.rows, CV_32S, trainIdx);
+ ensureSizeIsEnough(1, query.rows, CV_32F, distance);
matchDispatcher(query, train, mask, trainIdx, distance, distType);
-exit:
+
return;
}
void cv::ocl::BruteForceMatcher_OCL_base::match(const oclMat &query, const oclMat &train, vector<DMatch> &matches, const oclMat &mask)
{
- assert(mask.empty()); // mask is not supported at the moment
+ assert(mask.empty()); // mask is not supported at the moment
oclMat trainIdx, distance;
matchSingle(query, train, trainIdx, distance, mask);
matchDownload(trainIdx, distance, matches);
if (query.empty() || trainCollection.empty())
return;
- // match1 doesn't support signed char type, match2 only support float, hamming support uchar, ushort and int
- int callType = query.depth();
- char cvFuncName[] = "matchCollection";
- if (callType != 5)
- CV_ERROR(CV_UNSUPPORTED_FORMAT_ERR, "BruteForceMatch OpenCL only support float type query!\n");
-
- if ((distType == 0 && callType == 1 ) || (distType == 1 && callType != 5) || (distType == 2 && (callType != 0
- || callType != 2 || callType != 4)))
- {
- CV_ERROR(CV_UNSUPPORTED_DEPTH_ERR, "BruteForceMatch OpenCL only support float type query!\n");
- }
-
CV_Assert(query.channels() == 1 && query.depth() < CV_64F);
+
+ const int nQuery = query.rows;
- trainIdx.create(1, query.rows, CV_32S);
- imgIdx.create(1, query.rows, CV_32S);
- distance.create(1, query.rows, CV_32F);
+ ensureSizeIsEnough(1, nQuery, CV_32S, trainIdx);
+ ensureSizeIsEnough(1, nQuery, CV_32S, imgIdx);
+ ensureSizeIsEnough(1, nQuery, CV_32F, distance);
matchDispatcher(query, (const oclMat *)trainCollection.ptr(), trainCollection.cols, masks, trainIdx, imgIdx, distance, distType);
-exit:
+
return;
}
if (query.empty() || train.empty())
return;
- // match1 doesn't support signed char type, match2 only support float, hamming support uchar, ushort and int
- int callType = query.depth();
-
- char cvFuncName[] = "knnMatchSingle";
- if (callType != 5)
- CV_ERROR(CV_UNSUPPORTED_FORMAT_ERR, "BruteForceMatch OpenCL only support float type query!\n");
-
- if ((distType == 0 && callType == 1 ) || (distType == 1 && callType != 5) || (distType == 2 && (callType != 0
- || callType != 2 || callType != 4)))
- {
- CV_ERROR(CV_UNSUPPORTED_DEPTH_ERR, "BruteForceMatch OpenCL only support float type query!\n");
- }
-
CV_Assert(query.channels() == 1 && query.depth() < CV_64F);
CV_Assert(train.type() == query.type() && train.cols == query.cols);
+ const int nQuery = query.rows;
+ const int nTrain = train.rows;
+
if (k == 2)
{
- trainIdx.create(1, query.rows, CV_32SC2);
- distance.create(1, query.rows, CV_32FC2);
+ ensureSizeIsEnough(1, nQuery, CV_32SC2, trainIdx);
+ ensureSizeIsEnough(1, nQuery, CV_32FC2, distance);
}
else
{
- trainIdx.create(query.rows, k, CV_32S);
- distance.create(query.rows, k, CV_32F);
- allDist.create(query.rows, train.rows, CV_32FC1);
+ ensureSizeIsEnough(nQuery, k, CV_32S, trainIdx);
+ ensureSizeIsEnough(nQuery, k, CV_32F, distance);
+ ensureSizeIsEnough(nQuery, nTrain, CV_32FC1, allDist);
}
trainIdx.setTo(Scalar::all(-1));
kmatchDispatcher(query, train, k, mask, trainIdx, distance, allDist, distType);
-exit:
+
return;
}
typedef void (*caller_t)(const oclMat & query, const oclMat & trains, const oclMat & masks,
const oclMat & trainIdx, const oclMat & imgIdx, const oclMat & distance);
-#if 0
- static const caller_t callers[3][6] =
- {
- {
- ocl_match2L1_gpu<unsigned char>, 0/*match2L1_gpu<signed char>*/,
- ocl_match2L1_gpu<unsigned short>, ocl_match2L1_gpu<short>,
- ocl_match2L1_gpu<int>, ocl_match2L1_gpu<float>
- },
- {
- 0/*match2L2_gpu<unsigned char>*/, 0/*match2L2_gpu<signed char>*/,
- 0/*match2L2_gpu<unsigned short>*/, 0/*match2L2_gpu<short>*/,
- 0/*match2L2_gpu<int>*/, ocl_match2L2_gpu<float>
- },
- {
- ocl_match2Hamming_gpu<unsigned char>, 0/*match2Hamming_gpu<signed char>*/,
- ocl_match2Hamming_gpu<unsigned short>, 0/*match2Hamming_gpu<short>*/,
- ocl_match2Hamming_gpu<int>, 0/*match2Hamming_gpu<float>*/
- }
- };
-#endif
+
CV_Assert(query.channels() == 1 && query.depth() < CV_64F);
const int nQuery = query.rows;
- trainIdx.create(1, nQuery, CV_32SC2);
- imgIdx.create(1, nQuery, CV_32SC2);
- distance.create(1, nQuery, CV_32SC2);
+ ensureSizeIsEnough(1, nQuery, CV_32SC2, trainIdx);
+ ensureSizeIsEnough(1, nQuery, CV_32SC2, imgIdx);
+ ensureSizeIsEnough(1, nQuery, CV_32FC2, distance);
trainIdx.setTo(Scalar::all(-1));
// radiusMatchSingle
void cv::ocl::BruteForceMatcher_OCL_base::radiusMatchSingle(const oclMat &query, const oclMat &train,
- oclMat &trainIdx, oclMat &distance, oclMat &nMatches, float maxDistance, const oclMat &mask)
+ oclMat &trainIdx, oclMat &distance, oclMat &nMatches, float maxDistance, const oclMat &mask)
{
if (query.empty() || train.empty())
return;
- // match1 doesn't support signed char type, match2 only support float, hamming support uchar, ushort and int
- int callType = query.depth();
- char cvFuncName[] = "radiusMatchSingle";
- if (callType != 5)
- CV_ERROR(CV_UNSUPPORTED_FORMAT_ERR, "BruteForceMatch OpenCL only support float type query!\n");
-
- if ((distType == 0 && callType == 1 ) || (distType == 1 && callType != 5) || (distType == 2 && (callType != 0
- || callType != 2 || callType != 4)))
- {
- CV_ERROR(CV_UNSUPPORTED_DEPTH_ERR, "BruteForceMatch OpenCL only support float type query!\n");
- }
+ const int nQuery = query.rows;
+ const int nTrain = train.rows;
CV_Assert(query.channels() == 1 && query.depth() < CV_64F);
CV_Assert(train.type() == query.type() && train.cols == query.cols);
CV_Assert(trainIdx.empty() || (trainIdx.rows == query.rows && trainIdx.size() == distance.size()));
- nMatches.create(1, query.rows, CV_32SC1);
+ ensureSizeIsEnough(1, nQuery, CV_32SC1, nMatches);
if (trainIdx.empty())
{
- trainIdx.create(query.rows, std::max((train.rows/ 100), 10), CV_32SC1);
- distance.create(query.rows, std::max((train.rows/ 100), 10), CV_32FC1);
+ ensureSizeIsEnough(nQuery, std::max((nTrain / 100), 10), CV_32SC1, trainIdx);
+ ensureSizeIsEnough(nQuery, std::max((nTrain / 100), 10), CV_32FC1, distance);
}
nMatches.setTo(Scalar::all(0));
matchDispatcher(query, train, maxDistance, mask, trainIdx, distance, nMatches, distType);
-exit:
+
return;
}
char compile_option[128];
sprintf(compile_option, "-D RADIUSX=%d -D RADIUSY=%d -D LSIZE0=%d -D LSIZE1=%d -D ERODE %s %s",
anchor.x, anchor.y, (int)localThreads[0], (int)localThreads[1],
- rectKernel?"-D RECTKERNEL":"",
- s);
+ s, rectKernel?"-D RECTKERNEL":"");
vector< pair<size_t, const void *> > args;
args.push_back(make_pair(sizeof(cl_mem), (void *)&src.data));
args.push_back(make_pair(sizeof(cl_mem), (void *)&dst.data));
//M*/
#include "precomp.hpp"
-
using namespace cv;
using namespace cv::ocl;
using namespace std;
}
}
-
void cv::ocl::HOGDescriptor::computeBlockHistograms(const oclMat &img)
{
computeGradient(img, grad, qangle);
cdescr_size = descr_size;
}
+static inline int divUp(int total, int grain)
+{
+ return (total + grain - 1) / grain;
+}
+
+static void openCLExecuteKernel_hog(Context *clCxt , const char **source, string kernelName,
+ size_t globalThreads[3], size_t localThreads[3],
+ vector< pair<size_t, const void *> > &args)
+{
+ size_t wave_size = 0;
+ queryDeviceInfo(WAVEFRONT_SIZE, &wave_size);
+ if (wave_size <= 16)
+ {
+ char build_options[64];
+ sprintf(build_options, (wave_size == 16) ? "-D WAVE_SIZE_16" : "-D WAVE_SIZE_1");
+ openCLExecuteKernel(clCxt, source, kernelName, globalThreads, localThreads, args, -1, -1, build_options);
+ }
+ else
+ openCLExecuteKernel(clCxt, source, kernelName, globalThreads, localThreads, args, -1, -1);
+}
+
void cv::ocl::device::hog::compute_hists(int nbins, int block_stride_x, int block_stride_y,
int height, int width, const cv::ocl::oclMat &grad,
const cv::ocl::oclMat &qangle, float sigma, cv::ocl::oclMat &block_hists)
int img_block_width = (width - CELLS_PER_BLOCK_X * CELL_WIDTH + block_stride_x) / block_stride_x;
int img_block_height = (height - CELLS_PER_BLOCK_Y * CELL_HEIGHT + block_stride_y) / block_stride_y;
- size_t globalThreads[3] = { img_block_width * 32, img_block_height * 2, 1 };
- size_t localThreads[3] = { 32, 2, 1 };
+ int blocks_total = img_block_width * img_block_height;
+ int blocks_in_group = 4;
+ size_t localThreads[3] = { blocks_in_group * 24, 2, 1 };
+ size_t globalThreads[3] = { divUp(blocks_total, blocks_in_group) * localThreads[0], 2, 1 };
int grad_quadstep = grad.step >> 2;
int qangle_step = qangle.step;
int hists_size = (nbins * CELLS_PER_BLOCK_X * CELLS_PER_BLOCK_Y * 12) * sizeof(float);
int final_hists_size = (nbins * CELLS_PER_BLOCK_X * CELLS_PER_BLOCK_Y) * sizeof(float);
- int smem = hists_size + final_hists_size;
+ int smem = (hists_size + final_hists_size) * blocks_in_group;
- args.push_back( make_pair( sizeof(cl_int), (void *)&width));
args.push_back( make_pair( sizeof(cl_int), (void *)&cblock_stride_x));
args.push_back( make_pair( sizeof(cl_int), (void *)&cblock_stride_y));
args.push_back( make_pair( sizeof(cl_int), (void *)&cnbins));
args.push_back( make_pair( sizeof(cl_int), (void *)&cblock_hist_size));
args.push_back( make_pair( sizeof(cl_int), (void *)&img_block_width));
+ args.push_back( make_pair( sizeof(cl_int), (void *)&blocks_in_group));
+ args.push_back( make_pair( sizeof(cl_int), (void *)&blocks_total));
args.push_back( make_pair( sizeof(cl_int), (void *)&grad_quadstep));
args.push_back( make_pair( sizeof(cl_int), (void *)&qangle_step));
args.push_back( make_pair( sizeof(cl_mem), (void *)&grad.data));
args.push_back( make_pair( sizeof(cl_mem), (void *)&block_hists.data));
args.push_back( make_pair( smem, (void *)NULL));
- openCLExecuteKernel2(clCxt, &objdetect_hog, kernelName, globalThreads, localThreads, args, -1, -1);
+ openCLExecuteKernel_hog(clCxt, &objdetect_hog, kernelName, globalThreads, localThreads, args);
}
void cv::ocl::device::hog::normalize_hists(int nbins, int block_stride_x, int block_stride_y,
args.push_back( make_pair( sizeof(cl_float), (void *)&threshold));
args.push_back( make_pair( nthreads * sizeof(float), (void *)NULL));
- openCLExecuteKernel2(clCxt, &objdetect_hog, kernelName, globalThreads, localThreads, args, -1, -1);
+ openCLExecuteKernel_hog(clCxt, &objdetect_hog, kernelName, globalThreads, localThreads, args);
}
void cv::ocl::device::hog::classify_hists(int win_height, int win_width, int block_stride_y,
args.push_back( make_pair( sizeof(cl_float), (void *)&threshold));
args.push_back( make_pair( sizeof(cl_mem), (void *)&labels.data));
- openCLExecuteKernel2(clCxt, &objdetect_hog, kernelName, globalThreads, localThreads, args, -1, -1);
+ openCLExecuteKernel_hog(clCxt, &objdetect_hog, kernelName, globalThreads, localThreads, args);
}
void cv::ocl::device::hog::extract_descrs_by_rows(int win_height, int win_width, int block_stride_y, int block_stride_x,
args.push_back( make_pair( sizeof(cl_mem), (void *)&block_hists.data));
args.push_back( make_pair( sizeof(cl_mem), (void *)&descriptors.data));
- openCLExecuteKernel2(clCxt, &objdetect_hog, kernelName, globalThreads, localThreads, args, -1, -1);
+ openCLExecuteKernel(clCxt, &objdetect_hog, kernelName, globalThreads, localThreads, args, -1, -1);
}
void cv::ocl::device::hog::extract_descrs_by_cols(int win_height, int win_width, int block_stride_y, int block_stride_x,
args.push_back( make_pair( sizeof(cl_mem), (void *)&block_hists.data));
args.push_back( make_pair( sizeof(cl_mem), (void *)&descriptors.data));
- openCLExecuteKernel2(clCxt, &objdetect_hog, kernelName, globalThreads, localThreads, args, -1, -1);
-}
-
-static inline int divUp(int total, int grain)
-{
- return (total + grain - 1) / grain;
+ openCLExecuteKernel(clCxt, &objdetect_hog, kernelName, globalThreads, localThreads, args, -1, -1);
}
void cv::ocl::device::hog::compute_gradients_8UC1(int height, int width, const cv::ocl::oclMat &img,
args.push_back( make_pair( sizeof(cl_char), (void *)&correctGamma));
args.push_back( make_pair( sizeof(cl_int), (void *)&cnbins));
- openCLExecuteKernel2(clCxt, &objdetect_hog, kernelName, globalThreads, localThreads, args, -1, -1);
+ openCLExecuteKernel(clCxt, &objdetect_hog, kernelName, globalThreads, localThreads, args, -1, -1);
}
void cv::ocl::device::hog::compute_gradients_8UC4(int height, int width, const cv::ocl::oclMat &img,
args.push_back( make_pair( sizeof(cl_char), (void *)&correctGamma));
args.push_back( make_pair( sizeof(cl_int), (void *)&cnbins));
- openCLExecuteKernel2(clCxt, &objdetect_hog, kernelName, globalThreads, localThreads, args, -1, -1);
+ openCLExecuteKernel(clCxt, &objdetect_hog, kernelName, globalThreads, localThreads, args, -1, -1);
}
void cv::ocl::device::hog::resize( const oclMat &src, oclMat &dst, const Size sz)
float ifx = (float)src.cols / sz.width;
float ify = (float)src.rows / sz.height;
+ int src_step = static_cast<int>(src.step);
+ int dst_step = static_cast<int>(dst.step);
vector< pair<size_t, const void *> > args;
args.push_back( make_pair(sizeof(cl_mem), (void *)&dst.data));
args.push_back( make_pair(sizeof(cl_mem), (void *)&src.data));
args.push_back( make_pair(sizeof(cl_int), (void *)&dst.offset));
args.push_back( make_pair(sizeof(cl_int), (void *)&src.offset));
- args.push_back( make_pair(sizeof(cl_int), (void *)&dst.step));
- args.push_back( make_pair(sizeof(cl_int), (void *)&src.step));
+ args.push_back( make_pair(sizeof(cl_int), (void *)&dst_step));
+ args.push_back( make_pair(sizeof(cl_int), (void *)&src_step));
args.push_back( make_pair(sizeof(cl_int), (void *)&src.cols));
args.push_back( make_pair(sizeof(cl_int), (void *)&src.rows));
args.push_back( make_pair(sizeof(cl_int), (void *)&sz.width));
args.push_back( make_pair(sizeof(cl_float), (void *)&ifx));
args.push_back( make_pair(sizeof(cl_float), (void *)&ify));
- openCLExecuteKernel2(clCxt, &objdetect_hog, kernelName, globalThreads, localThreads, args, -1, -1);
+ openCLExecuteKernel(clCxt, &objdetect_hog, kernelName, globalThreads, localThreads, args, -1, -1);
}
warpPerspective_gpu(src, dst, coeffs, interpolation);
}
-
////////////////////////////////////////////////////////////////////////
// integral
-
void integral(const oclMat &src, oclMat &sum, oclMat &sqsum)
{
CV_Assert(src.type() == CV_8UC1);
int vcols = (pre_invalid + src.cols + vlen - 1) / vlen;
oclMat t_sum , t_sqsum;
- t_sum.create(src.cols, src.rows, CV_32SC1);
- t_sqsum.create(src.cols, src.rows, CV_32FC1);
-
int w = src.cols + 1, h = src.rows + 1;
- sum.create(h, w, CV_32SC1);
- sqsum.create(h, w, CV_32FC1);
- int sum_offset = sum.offset / vlen, sqsum_offset = sqsum.offset / vlen;
-
- vector<pair<size_t , const void *> > args;
- args.push_back( make_pair( sizeof(cl_mem) , (void *)&src.data ));
- args.push_back( make_pair( sizeof(cl_mem) , (void *)&t_sum.data ));
- args.push_back( make_pair( sizeof(cl_mem) , (void *)&t_sqsum.data ));
- args.push_back( make_pair( sizeof(cl_int) , (void *)&offset ));
- args.push_back( make_pair( sizeof(cl_int) , (void *)&pre_invalid ));
- args.push_back( make_pair( sizeof(cl_int) , (void *)&src.rows ));
- args.push_back( make_pair( sizeof(cl_int) , (void *)&src.cols ));
- args.push_back( make_pair( sizeof(cl_int) , (void *)&src.step ));
- args.push_back( make_pair( sizeof(cl_int) , (void *)&t_sum.step));
- size_t gt[3] = {((vcols + 1) / 2) * 256, 1, 1}, lt[3] = {256, 1, 1};
- openCLExecuteKernel(src.clCxt, &imgproc_integral, "integral_cols", gt, lt, args, -1, -1);
- args.clear();
- args.push_back( make_pair( sizeof(cl_mem) , (void *)&t_sum.data ));
- args.push_back( make_pair( sizeof(cl_mem) , (void *)&t_sqsum.data ));
- args.push_back( make_pair( sizeof(cl_mem) , (void *)&sum.data ));
- args.push_back( make_pair( sizeof(cl_mem) , (void *)&sqsum.data ));
- args.push_back( make_pair( sizeof(cl_int) , (void *)&t_sum.rows ));
- args.push_back( make_pair( sizeof(cl_int) , (void *)&t_sum.cols ));
- args.push_back( make_pair( sizeof(cl_int) , (void *)&t_sum.step ));
- args.push_back( make_pair( sizeof(cl_int) , (void *)&sum.step));
- args.push_back( make_pair( sizeof(cl_int) , (void *)&sqsum.step));
- args.push_back( make_pair( sizeof(cl_int) , (void *)&sum_offset));
- args.push_back( make_pair( sizeof(cl_int) , (void *)&sqsum_offset));
- size_t gt2[3] = {t_sum.cols * 32, 1, 1}, lt2[3] = {256, 1, 1};
- openCLExecuteKernel(src.clCxt, &imgproc_integral, "integral_rows", gt2, lt2, args, -1, -1);
- //cout << "tested" << endl;
+ int depth;
+ if( src.cols * src.rows <= 2901 * 2901 ) //2901 is the maximum size for int when all values are 255
+ {
+ t_sum.create(src.cols, src.rows, CV_32SC1);
+ sum.create(h, w, CV_32SC1);
+ }
+ else
+ {
+ //Use float to prevent overflow
+ t_sum.create(src.cols, src.rows, CV_32FC1);
+ sum.create(h, w, CV_32FC1);
+ }
+ t_sqsum.create(src.cols, src.rows, CV_32FC1);
+ sqsum.create(h, w, CV_32FC1);
+ depth = sum.depth();
+ int sum_offset = sum.offset / vlen;
+ int sqsum_offset = sqsum.offset / vlen;
+
+ vector<pair<size_t , const void *> > args;
+ args.push_back( make_pair( sizeof(cl_mem) , (void *)&src.data ));
+ args.push_back( make_pair( sizeof(cl_mem) , (void *)&t_sum.data ));
+ args.push_back( make_pair( sizeof(cl_mem) , (void *)&t_sqsum.data ));
+ args.push_back( make_pair( sizeof(cl_int) , (void *)&offset ));
+ args.push_back( make_pair( sizeof(cl_int) , (void *)&pre_invalid ));
+ args.push_back( make_pair( sizeof(cl_int) , (void *)&src.rows ));
+ args.push_back( make_pair( sizeof(cl_int) , (void *)&src.cols ));
+ args.push_back( make_pair( sizeof(cl_int) , (void *)&src.step ));
+ args.push_back( make_pair( sizeof(cl_int) , (void *)&t_sum.step));
+ size_t gt[3] = {((vcols + 1) / 2) * 256, 1, 1}, lt[3] = {256, 1, 1};
+ openCLExecuteKernel(src.clCxt, &imgproc_integral, "integral_cols", gt, lt, args, -1, depth);
+ args.clear();
+ args.push_back( make_pair( sizeof(cl_mem) , (void *)&t_sum.data ));
+ args.push_back( make_pair( sizeof(cl_mem) , (void *)&t_sqsum.data ));
+ args.push_back( make_pair( sizeof(cl_mem) , (void *)&sum.data ));
+ args.push_back( make_pair( sizeof(cl_mem) , (void *)&sqsum.data ));
+ args.push_back( make_pair( sizeof(cl_int) , (void *)&t_sum.rows ));
+ args.push_back( make_pair( sizeof(cl_int) , (void *)&t_sum.cols ));
+ args.push_back( make_pair( sizeof(cl_int) , (void *)&t_sum.step ));
+ args.push_back( make_pair( sizeof(cl_int) , (void *)&sum.step));
+ args.push_back( make_pair( sizeof(cl_int) , (void *)&sqsum.step));
+ args.push_back( make_pair( sizeof(cl_int) , (void *)&sum_offset));
+ args.push_back( make_pair( sizeof(cl_int) , (void *)&sqsum_offset));
+ size_t gt2[3] = {t_sum.cols * 32, 1, 1}, lt2[3] = {256, 1, 1};
+ openCLExecuteKernel(src.clCxt, &imgproc_integral, "integral_rows", gt2, lt2, args, -1, depth);
}
+
void integral(const oclMat &src, oclMat &sum)
{
CV_Assert(src.type() == CV_8UC1);
int vcols = (pre_invalid + src.cols + vlen - 1) / vlen;
oclMat t_sum;
- t_sum.create(src.cols, src.rows, CV_32SC1);
-
int w = src.cols + 1, h = src.rows + 1;
- sum.create(h, w, CV_32SC1);
- int sum_offset = sum.offset / vlen;
-
- vector<pair<size_t , const void *> > args;
- args.push_back( make_pair( sizeof(cl_mem) , (void *)&src.data ));
- args.push_back( make_pair( sizeof(cl_mem) , (void *)&t_sum.data ));
- args.push_back( make_pair( sizeof(cl_int) , (void *)&offset ));
- args.push_back( make_pair( sizeof(cl_int) , (void *)&pre_invalid ));
- args.push_back( make_pair( sizeof(cl_int) , (void *)&src.rows ));
- args.push_back( make_pair( sizeof(cl_int) , (void *)&src.cols ));
- args.push_back( make_pair( sizeof(cl_int) , (void *)&src.step ));
- args.push_back( make_pair( sizeof(cl_int) , (void *)&t_sum.step));
- size_t gt[3] = {((vcols + 1) / 2) * 256, 1, 1}, lt[3] = {256, 1, 1};
- openCLExecuteKernel(src.clCxt, &imgproc_integral_sum, "integral_sum_cols", gt, lt, args, -1, -1);
- args.clear();
- args.push_back( make_pair( sizeof(cl_mem) , (void *)&t_sum.data ));
- args.push_back( make_pair( sizeof(cl_mem) , (void *)&sum.data ));
- args.push_back( make_pair( sizeof(cl_int) , (void *)&t_sum.rows ));
- args.push_back( make_pair( sizeof(cl_int) , (void *)&t_sum.cols ));
- args.push_back( make_pair( sizeof(cl_int) , (void *)&t_sum.step ));
- args.push_back( make_pair( sizeof(cl_int) , (void *)&sum.step));
- args.push_back( make_pair( sizeof(cl_int) , (void *)&sum_offset));
- size_t gt2[3] = {t_sum.cols * 32, 1, 1}, lt2[3] = {256, 1, 1};
- openCLExecuteKernel(src.clCxt, &imgproc_integral_sum, "integral_sum_rows", gt2, lt2, args, -1, -1);
- //cout << "tested" << endl;
+ int depth;
+ if(src.cols * src.rows <= 2901 * 2901)
+ {
+ t_sum.create(src.cols, src.rows, CV_32SC1);
+ sum.create(h, w, CV_32SC1);
+ }else
+ {
+ t_sum.create(src.cols, src.rows, CV_32FC1);
+ sum.create(h, w, CV_32FC1);
+ }
+ depth = sum.depth();
+ int sum_offset = sum.offset / vlen;
+ vector<pair<size_t , const void *> > args;
+ args.push_back( make_pair( sizeof(cl_mem) , (void *)&src.data ));
+ args.push_back( make_pair( sizeof(cl_mem) , (void *)&t_sum.data ));
+ args.push_back( make_pair( sizeof(cl_int) , (void *)&offset ));
+ args.push_back( make_pair( sizeof(cl_int) , (void *)&pre_invalid ));
+ args.push_back( make_pair( sizeof(cl_int) , (void *)&src.rows ));
+ args.push_back( make_pair( sizeof(cl_int) , (void *)&src.cols ));
+ args.push_back( make_pair( sizeof(cl_int) , (void *)&src.step ));
+ args.push_back( make_pair( sizeof(cl_int) , (void *)&t_sum.step));
+ size_t gt[3] = {((vcols + 1) / 2) * 256, 1, 1}, lt[3] = {256, 1, 1};
+ openCLExecuteKernel(src.clCxt, &imgproc_integral_sum, "integral_sum_cols", gt, lt, args, -1, depth);
+ args.clear();
+ args.push_back( make_pair( sizeof(cl_mem) , (void *)&t_sum.data ));
+ args.push_back( make_pair( sizeof(cl_mem) , (void *)&sum.data ));
+ args.push_back( make_pair( sizeof(cl_int) , (void *)&t_sum.rows ));
+ args.push_back( make_pair( sizeof(cl_int) , (void *)&t_sum.cols ));
+ args.push_back( make_pair( sizeof(cl_int) , (void *)&t_sum.step ));
+ args.push_back( make_pair( sizeof(cl_int) , (void *)&sum.step));
+ args.push_back( make_pair( sizeof(cl_int) , (void *)&sum_offset));
+ size_t gt2[3] = {t_sum.cols * 32, 1, 1}, lt2[3] = {256, 1, 1};
+ openCLExecuteKernel(src.clCxt, &imgproc_integral_sum, "integral_sum_rows", gt2, lt2, args, -1, depth);
}
/////////////////////// corner //////////////////////////////
{
case WAVEFRONT_SIZE:
{
+ bool is_cpu = false;
+ queryDeviceInfo(IS_CPU_DEVICE, &is_cpu);
+ if(is_cpu)
+ {
+ *(int*)info = 1;
+ return;
+ }
#ifdef CL_DEVICE_WAVEFRONT_WIDTH_AMD
try
{
}
break;
+ case IS_CPU_DEVICE:
+ {
+ cl_device_type devicetype;
+ openCLSafeCall(clGetDeviceInfo(impl->devices[impl->devnum],
+ CL_DEVICE_TYPE, sizeof(cl_device_type),
+ &devicetype, NULL));
+ *(bool*)info = (devicetype == CVCL_DEVICE_TYPE_CPU);
+ }
+ break;
default:
CV_Error(-1, "Invalid device info type");
break;
if( rtype < 0 )
rtype = type();
else
- rtype = CV_MAKETYPE(CV_MAT_DEPTH(rtype), channels());
+ rtype = CV_MAKETYPE(CV_MAT_DEPTH(rtype), oclchannels());
//int scn = channels();
int sdepth = depth(), ddepth = CV_MAT_DEPTH(rtype);
#endif
#endif
+#ifdef ARITHM_ADD
+ #define ARITHM_OP(A,B) ((A)+(B))
+#elif defined ARITHM_SUB
+ #define ARITHM_OP(A,B) ((A)-(B))
+#endif
//////////////////////////////////////////////////////////////////////////////////////////////////////
/////////////////////////////////////////////ADD////////////////////////////////////////////////////
///////////////////////////////////////////////////////////////////////////////////////////////////////
src2_data.xyzw = (src2_index == -1) ? src2_data.wxyz:tmp.xyzw;
}
uchar4 dst_data = *((__global uchar4 *)(dst + dst_index));
- short4 tmp = convert_short4_sat(src1_data) + convert_short4_sat(src2_data);
+ short4 tmp = ARITHM_OP(convert_short4_sat(src1_data), convert_short4_sat(src2_data));
uchar4 tmp_data = convert_uchar4_sat(tmp);
dst_data.x = ((dst_index + 0 >= dst_start) && (dst_index + 0 < dst_end)) ? tmp_data.x : dst_data.x;
ushort4 src2_data = vload4(0, (__global ushort *)((__global char *)src2 + src2_index));
ushort4 dst_data = *((__global ushort4 *)((__global char *)dst + dst_index));
- int4 tmp = convert_int4_sat(src1_data) + convert_int4_sat(src2_data);
+ int4 tmp = ARITHM_OP(convert_int4_sat(src1_data), convert_int4_sat(src2_data));
ushort4 tmp_data = convert_ushort4_sat(tmp);
dst_data.x = ((dst_index + 0 >= dst_start) && (dst_index + 0 < dst_end)) ? tmp_data.x : dst_data.x;
short4 src2_data = vload4(0, (__global short *)((__global char *)src2 + src2_index));
short4 dst_data = *((__global short4 *)((__global char *)dst + dst_index));
- int4 tmp = convert_int4_sat(src1_data) + convert_int4_sat(src2_data);
+ int4 tmp = ARITHM_OP(convert_int4_sat(src1_data), convert_int4_sat(src2_data));
short4 tmp_data = convert_short4_sat(tmp);
dst_data.x = ((dst_index + 0 >= dst_start) && (dst_index + 0 < dst_end)) ? tmp_data.x : dst_data.x;
int data1 = *((__global int *)((__global char *)src1 + src1_index));
int data2 = *((__global int *)((__global char *)src2 + src2_index));
- long tmp = (long)(data1) + (long)(data2);
+ long tmp = ARITHM_OP((long)(data1), (long)(data2));
*((__global int *)((__global char *)dst + dst_index)) = convert_int_sat(tmp);
}
float data1 = *((__global float *)((__global char *)src1 + src1_index));
float data2 = *((__global float *)((__global char *)src2 + src2_index));
- float tmp = data1 + data2;
+ float tmp = ARITHM_OP(data1, data2);
*((__global float *)((__global char *)dst + dst_index)) = tmp;
}
double data1 = *((__global double *)((__global char *)src1 + src1_index));
double data2 = *((__global double *)((__global char *)src2 + src2_index));
- *((__global double *)((__global char *)dst + dst_index)) = data1 + data2;
+ *((__global double *)((__global char *)dst + dst_index)) = ARITHM_OP(data1, data2);
}
}
#endif
}
uchar4 data = *((__global uchar4 *)(dst + dst_index));
- short4 tmp = convert_short4_sat(src1_data) + convert_short4_sat(src2_data);
+ short4 tmp = ARITHM_OP(convert_short4_sat(src1_data), convert_short4_sat(src2_data));
uchar4 tmp_data = convert_uchar4_sat(tmp);
data.x = ((mask_data.x) && (dst_index + 0 >= dst_start) && (dst_index + 0 < dst_end)) ? tmp_data.x : data.x;
uchar2 mask_data = vload2(0, mask + mask_index);
ushort2 data = *((__global ushort2 *)((__global uchar *)dst + dst_index));
- int2 tmp = convert_int2_sat(src1_data) + convert_int2_sat(src2_data);
+ int2 tmp = ARITHM_OP(convert_int2_sat(src1_data), convert_int2_sat(src2_data));
ushort2 tmp_data = convert_ushort2_sat(tmp);
data.x = ((mask_data.x) && (dst_index + 0 >= dst_start)) ? tmp_data.x : data.x;
uchar2 mask_data = vload2(0, mask + mask_index);
short2 data = *((__global short2 *)((__global uchar *)dst + dst_index));
- int2 tmp = convert_int2_sat(src1_data) + convert_int2_sat(src2_data);
+ int2 tmp = ARITHM_OP(convert_int2_sat(src1_data), convert_int2_sat(src2_data));
short2 tmp_data = convert_short2_sat(tmp);
data.x = ((mask_data.x) && (dst_index + 0 >= dst_start)) ? tmp_data.x : data.x;
int src_data2 = *((__global int *)((__global char *)src2 + src2_index));
int dst_data = *((__global int *)((__global char *)dst + dst_index));
- int data = convert_int_sat((long)src_data1 + (long)src_data2);
+ int data = convert_int_sat(ARITHM_OP((long)src_data1, (long)src_data2));
data = mask_data ? data : dst_data;
*((__global int *)((__global char *)dst + dst_index)) = data;
float src_data2 = *((__global float *)((__global char *)src2 + src2_index));
float dst_data = *((__global float *)((__global char *)dst + dst_index));
- float data = src_data1 + src_data2;
+ float data = ARITHM_OP(src_data1, src_data2);
data = mask_data ? data : dst_data;
*((__global float *)((__global char *)dst + dst_index)) = data;
double src_data2 = *((__global double *)((__global char *)src2 + src2_index));
double dst_data = *((__global double *)((__global char *)dst + dst_index));
- double data = src_data1 + src_data2;
+ double data = ARITHM_OP(src_data1, src_data2);
data = mask_data ? data : dst_data;
*((__global double *)((__global char *)dst + dst_index)) = data;
uchar2 mask_data = vload2(0, mask + mask_index);
uchar4 data = *((__global uchar4 *)(dst + dst_index));
- short4 tmp = convert_short4_sat(src1_data) + convert_short4_sat(src2_data);
+ short4 tmp = ARITHM_OP(convert_short4_sat(src1_data), convert_short4_sat(src2_data));
uchar4 tmp_data = convert_uchar4_sat(tmp);
data.xy = ((mask_data.x) && (dst_index + 0 >= dst_start)) ? tmp_data.xy : data.xy;
ushort2 src_data2 = *((__global ushort2 *)((__global char *)src2 + src2_index));
ushort2 dst_data = *((__global ushort2 *)((__global char *)dst + dst_index));
- int2 tmp = convert_int2_sat(src_data1) + convert_int2_sat(src_data2);
+ int2 tmp = ARITHM_OP(convert_int2_sat(src_data1), convert_int2_sat(src_data2));
ushort2 data = convert_ushort2_sat(tmp);
data = mask_data ? data : dst_data;
short2 src_data2 = *((__global short2 *)((__global char *)src2 + src2_index));
short2 dst_data = *((__global short2 *)((__global char *)dst + dst_index));
- int2 tmp = convert_int2_sat(src_data1) + convert_int2_sat(src_data2);
+ int2 tmp = ARITHM_OP(convert_int2_sat(src_data1), convert_int2_sat(src_data2));
short2 data = convert_short2_sat(tmp);
data = mask_data ? data : dst_data;
int2 src_data2 = *((__global int2 *)((__global char *)src2 + src2_index));
int2 dst_data = *((__global int2 *)((__global char *)dst + dst_index));
- int2 data = convert_int2_sat(convert_long2_sat(src_data1) + convert_long2_sat(src_data2));
+ int2 data = convert_int2_sat(ARITHM_OP(convert_long2_sat(src_data1), convert_long2_sat(src_data2)));
data = mask_data ? data : dst_data;
*((__global int2 *)((__global char *)dst + dst_index)) = data;
float2 src_data2 = *((__global float2 *)((__global char *)src2 + src2_index));
float2 dst_data = *((__global float2 *)((__global char *)dst + dst_index));
- float2 data = src_data1 + src_data2;
+ float2 data = ARITHM_OP(src_data1, src_data2);
data = mask_data ? data : dst_data;
*((__global float2 *)((__global char *)dst + dst_index)) = data;
double2 src_data2 = *((__global double2 *)((__global char *)src2 + src2_index));
double2 dst_data = *((__global double2 *)((__global char *)dst + dst_index));
- double2 data = src_data1 + src_data2;
+ double2 data = ARITHM_OP(src_data1, src_data2);
data = mask_data ? data : dst_data;
*((__global double2 *)((__global char *)dst + dst_index)) = data;
}
}
#endif
-__kernel void arithm_add_with_mask_C3_D0 (__global uchar *src1, int src1_step, int src1_offset,
- __global uchar *src2, int src2_step, int src2_offset,
- __global uchar *mask, int mask_step, int mask_offset,
- __global uchar *dst, int dst_step, int dst_offset,
- int rows, int cols, int dst_step1)
-{
-
- int x = get_global_id(0);
- int y = get_global_id(1);
-
- if (x < cols && y < rows)
- {
- x = x << 2;
-
-#ifdef dst_align
-#undef dst_align
-#endif
-#define dst_align (((dst_offset % dst_step) / 3 ) & 3)
- int src1_index = mad24(y, src1_step, (x * 3) + src1_offset - (dst_align * 3));
- int src2_index = mad24(y, src2_step, (x * 3) + src2_offset - (dst_align * 3));
- int mask_index = mad24(y, mask_step, x + mask_offset - dst_align);
-
- int dst_start = mad24(y, dst_step, dst_offset);
- int dst_end = mad24(y, dst_step, dst_offset + dst_step1);
- int dst_index = mad24(y, dst_step, dst_offset + (x * 3) - (dst_align * 3));
-
- uchar4 src1_data_0 = vload4(0, src1 + src1_index + 0);
- uchar4 src1_data_1 = vload4(0, src1 + src1_index + 4);
- uchar4 src1_data_2 = vload4(0, src1 + src1_index + 8);
-
- uchar4 src2_data_0 = vload4(0, src2 + src2_index + 0);
- uchar4 src2_data_1 = vload4(0, src2 + src2_index + 4);
- uchar4 src2_data_2 = vload4(0, src2 + src2_index + 8);
-
- uchar4 mask_data = vload4(0, mask + mask_index);
-
- uchar4 data_0 = *((__global uchar4 *)(dst + dst_index + 0));
- uchar4 data_1 = *((__global uchar4 *)(dst + dst_index + 4));
- uchar4 data_2 = *((__global uchar4 *)(dst + dst_index + 8));
-
- uchar4 tmp_data_0 = convert_uchar4_sat(convert_short4_sat(src1_data_0) + convert_short4_sat(src2_data_0));
- uchar4 tmp_data_1 = convert_uchar4_sat(convert_short4_sat(src1_data_1) + convert_short4_sat(src2_data_1));
- uchar4 tmp_data_2 = convert_uchar4_sat(convert_short4_sat(src1_data_2) + convert_short4_sat(src2_data_2));
-
- data_0.xyz = ((mask_data.x) && (dst_index + 0 >= dst_start)) ? tmp_data_0.xyz : data_0.xyz;
- data_0.w = ((mask_data.y) && (dst_index + 3 >= dst_start) && (dst_index + 3 < dst_end))
- ? tmp_data_0.w : data_0.w;
-
- data_1.xy = ((mask_data.y) && (dst_index + 3 >= dst_start) && (dst_index + 3 < dst_end))
- ? tmp_data_1.xy : data_1.xy;
- data_1.zw = ((mask_data.z) && (dst_index + 6 >= dst_start) && (dst_index + 6 < dst_end))
- ? tmp_data_1.zw : data_1.zw;
-
- data_2.x = ((mask_data.z) && (dst_index + 6 >= dst_start) && (dst_index + 6 < dst_end))
- ? tmp_data_2.x : data_2.x;
- data_2.yzw = ((mask_data.w) && (dst_index + 9 >= dst_start) && (dst_index + 9 < dst_end))
- ? tmp_data_2.yzw : data_2.yzw;
-
- *((__global uchar4 *)(dst + dst_index + 0)) = data_0;
- *((__global uchar4 *)(dst + dst_index + 4)) = data_1;
- *((__global uchar4 *)(dst + dst_index + 8)) = data_2;
- }
-}
-__kernel void arithm_add_with_mask_C3_D2 (__global ushort *src1, int src1_step, int src1_offset,
- __global ushort *src2, int src2_step, int src2_offset,
- __global uchar *mask, int mask_step, int mask_offset,
- __global ushort *dst, int dst_step, int dst_offset,
- int rows, int cols, int dst_step1)
-{
-
- int x = get_global_id(0);
- int y = get_global_id(1);
-
- if (x < cols && y < rows)
- {
- x = x << 1;
-
-#ifdef dst_align
-#undef dst_align
-#endif
-#define dst_align (((dst_offset % dst_step) / 6 ) & 1)
- int src1_index = mad24(y, src1_step, (x * 6) + src1_offset - (dst_align * 6));
- int src2_index = mad24(y, src2_step, (x * 6) + src2_offset - (dst_align * 6));
- int mask_index = mad24(y, mask_step, x + mask_offset - dst_align);
-
- int dst_start = mad24(y, dst_step, dst_offset);
- int dst_end = mad24(y, dst_step, dst_offset + dst_step1);
- int dst_index = mad24(y, dst_step, dst_offset + (x * 6) - (dst_align * 6));
-
- ushort2 src1_data_0 = vload2(0, (__global ushort *)((__global char *)src1 + src1_index + 0));
- ushort2 src1_data_1 = vload2(0, (__global ushort *)((__global char *)src1 + src1_index + 4));
- ushort2 src1_data_2 = vload2(0, (__global ushort *)((__global char *)src1 + src1_index + 8));
-
- ushort2 src2_data_0 = vload2(0, (__global ushort *)((__global char *)src2 + src2_index + 0));
- ushort2 src2_data_1 = vload2(0, (__global ushort *)((__global char *)src2 + src2_index + 4));
- ushort2 src2_data_2 = vload2(0, (__global ushort *)((__global char *)src2 + src2_index + 8));
-
- uchar2 mask_data = vload2(0, mask + mask_index);
-
- ushort2 data_0 = *((__global ushort2 *)((__global char *)dst + dst_index + 0));
- ushort2 data_1 = *((__global ushort2 *)((__global char *)dst + dst_index + 4));
- ushort2 data_2 = *((__global ushort2 *)((__global char *)dst + dst_index + 8));
-
- ushort2 tmp_data_0 = convert_ushort2_sat(convert_int2_sat(src1_data_0) + convert_int2_sat(src2_data_0));
- ushort2 tmp_data_1 = convert_ushort2_sat(convert_int2_sat(src1_data_1) + convert_int2_sat(src2_data_1));
- ushort2 tmp_data_2 = convert_ushort2_sat(convert_int2_sat(src1_data_2) + convert_int2_sat(src2_data_2));
-
- data_0.xy = ((mask_data.x) && (dst_index + 0 >= dst_start)) ? tmp_data_0.xy : data_0.xy;
-
- data_1.x = ((mask_data.x) && (dst_index + 0 >= dst_start) && (dst_index + 0 < dst_end))
- ? tmp_data_1.x : data_1.x;
- data_1.y = ((mask_data.y) && (dst_index + 6 >= dst_start) && (dst_index + 6 < dst_end))
- ? tmp_data_1.y : data_1.y;
-
- data_2.xy = ((mask_data.y) && (dst_index + 6 >= dst_start) && (dst_index + 6 < dst_end))
- ? tmp_data_2.xy : data_2.xy;
-
- *((__global ushort2 *)((__global char *)dst + dst_index + 0))= data_0;
- *((__global ushort2 *)((__global char *)dst + dst_index + 4))= data_1;
- *((__global ushort2 *)((__global char *)dst + dst_index + 8))= data_2;
- }
-}
-__kernel void arithm_add_with_mask_C3_D3 (__global short *src1, int src1_step, int src1_offset,
- __global short *src2, int src2_step, int src2_offset,
- __global uchar *mask, int mask_step, int mask_offset,
- __global short *dst, int dst_step, int dst_offset,
- int rows, int cols, int dst_step1)
-{
-
- int x = get_global_id(0);
- int y = get_global_id(1);
-
- if (x < cols && y < rows)
- {
- x = x << 1;
-
-#ifdef dst_align
-#undef dst_align
-#endif
-#define dst_align (((dst_offset % dst_step) / 6 ) & 1)
- int src1_index = mad24(y, src1_step, (x * 6) + src1_offset - (dst_align * 6));
- int src2_index = mad24(y, src2_step, (x * 6) + src2_offset - (dst_align * 6));
- int mask_index = mad24(y, mask_step, x + mask_offset - dst_align);
-
- int dst_start = mad24(y, dst_step, dst_offset);
- int dst_end = mad24(y, dst_step, dst_offset + dst_step1);
- int dst_index = mad24(y, dst_step, dst_offset + (x * 6) - (dst_align * 6));
-
- short2 src1_data_0 = vload2(0, (__global short *)((__global char *)src1 + src1_index + 0));
- short2 src1_data_1 = vload2(0, (__global short *)((__global char *)src1 + src1_index + 4));
- short2 src1_data_2 = vload2(0, (__global short *)((__global char *)src1 + src1_index + 8));
-
- short2 src2_data_0 = vload2(0, (__global short *)((__global char *)src2 + src2_index + 0));
- short2 src2_data_1 = vload2(0, (__global short *)((__global char *)src2 + src2_index + 4));
- short2 src2_data_2 = vload2(0, (__global short *)((__global char *)src2 + src2_index + 8));
-
- uchar2 mask_data = vload2(0, mask + mask_index);
-
- short2 data_0 = *((__global short2 *)((__global char *)dst + dst_index + 0));
- short2 data_1 = *((__global short2 *)((__global char *)dst + dst_index + 4));
- short2 data_2 = *((__global short2 *)((__global char *)dst + dst_index + 8));
-
- short2 tmp_data_0 = convert_short2_sat(convert_int2_sat(src1_data_0) + convert_int2_sat(src2_data_0));
- short2 tmp_data_1 = convert_short2_sat(convert_int2_sat(src1_data_1) + convert_int2_sat(src2_data_1));
- short2 tmp_data_2 = convert_short2_sat(convert_int2_sat(src1_data_2) + convert_int2_sat(src2_data_2));
-
- data_0.xy = ((mask_data.x) && (dst_index + 0 >= dst_start)) ? tmp_data_0.xy : data_0.xy;
-
- data_1.x = ((mask_data.x) && (dst_index + 0 >= dst_start) && (dst_index + 0 < dst_end))
- ? tmp_data_1.x : data_1.x;
- data_1.y = ((mask_data.y) && (dst_index + 6 >= dst_start) && (dst_index + 6 < dst_end))
- ? tmp_data_1.y : data_1.y;
-
- data_2.xy = ((mask_data.y) && (dst_index + 6 >= dst_start) && (dst_index + 6 < dst_end))
- ? tmp_data_2.xy : data_2.xy;
-
- *((__global short2 *)((__global char *)dst + dst_index + 0))= data_0;
- *((__global short2 *)((__global char *)dst + dst_index + 4))= data_1;
- *((__global short2 *)((__global char *)dst + dst_index + 8))= data_2;
- }
-}
-__kernel void arithm_add_with_mask_C3_D4 (__global int *src1, int src1_step, int src1_offset,
- __global int *src2, int src2_step, int src2_offset,
- __global uchar *mask, int mask_step, int mask_offset,
- __global int *dst, int dst_step, int dst_offset,
- int rows, int cols, int dst_step1)
-{
-
- int x = get_global_id(0);
- int y = get_global_id(1);
-
- if (x < cols && y < rows)
- {
- int src1_index = mad24(y, src1_step, (x * 12) + src1_offset);
- int src2_index = mad24(y, src2_step, (x * 12) + src2_offset);
- int mask_index = mad24(y, mask_step, x + mask_offset);
- int dst_index = mad24(y, dst_step, dst_offset + (x * 12));
-
- int src1_data_0 = *((__global int *)((__global char *)src1 + src1_index + 0));
- int src1_data_1 = *((__global int *)((__global char *)src1 + src1_index + 4));
- int src1_data_2 = *((__global int *)((__global char *)src1 + src1_index + 8));
-
- int src2_data_0 = *((__global int *)((__global char *)src2 + src2_index + 0));
- int src2_data_1 = *((__global int *)((__global char *)src2 + src2_index + 4));
- int src2_data_2 = *((__global int *)((__global char *)src2 + src2_index + 8));
- uchar mask_data = * (mask + mask_index);
-
- int data_0 = *((__global int *)((__global char *)dst + dst_index + 0));
- int data_1 = *((__global int *)((__global char *)dst + dst_index + 4));
- int data_2 = *((__global int *)((__global char *)dst + dst_index + 8));
-
- int tmp_data_0 = convert_int_sat((long)src1_data_0 + (long)src2_data_0);
- int tmp_data_1 = convert_int_sat((long)src1_data_1 + (long)src2_data_1);
- int tmp_data_2 = convert_int_sat((long)src1_data_2 + (long)src2_data_2);
-
- data_0 = mask_data ? tmp_data_0 : data_0;
- data_1 = mask_data ? tmp_data_1 : data_1;
- data_2 = mask_data ? tmp_data_2 : data_2;
-
- *((__global int *)((__global char *)dst + dst_index + 0))= data_0;
- *((__global int *)((__global char *)dst + dst_index + 4))= data_1;
- *((__global int *)((__global char *)dst + dst_index + 8))= data_2;
- }
-}
-__kernel void arithm_add_with_mask_C3_D5 (__global float *src1, int src1_step, int src1_offset,
- __global float *src2, int src2_step, int src2_offset,
- __global uchar *mask, int mask_step, int mask_offset,
- __global float *dst, int dst_step, int dst_offset,
- int rows, int cols, int dst_step1)
-{
-
- int x = get_global_id(0);
- int y = get_global_id(1);
-
- if (x < cols && y < rows)
- {
- int src1_index = mad24(y, src1_step, (x * 12) + src1_offset);
- int src2_index = mad24(y, src2_step, (x * 12) + src2_offset);
- int mask_index = mad24(y, mask_step, x + mask_offset);
- int dst_index = mad24(y, dst_step, dst_offset + (x * 12));
-
- float src1_data_0 = *((__global float *)((__global char *)src1 + src1_index + 0));
- float src1_data_1 = *((__global float *)((__global char *)src1 + src1_index + 4));
- float src1_data_2 = *((__global float *)((__global char *)src1 + src1_index + 8));
-
- float src2_data_0 = *((__global float *)((__global char *)src2 + src2_index + 0));
- float src2_data_1 = *((__global float *)((__global char *)src2 + src2_index + 4));
- float src2_data_2 = *((__global float *)((__global char *)src2 + src2_index + 8));
-
- uchar mask_data = * (mask + mask_index);
-
- float data_0 = *((__global float *)((__global char *)dst + dst_index + 0));
- float data_1 = *((__global float *)((__global char *)dst + dst_index + 4));
- float data_2 = *((__global float *)((__global char *)dst + dst_index + 8));
-
- float tmp_data_0 = src1_data_0 + src2_data_0;
- float tmp_data_1 = src1_data_1 + src2_data_1;
- float tmp_data_2 = src1_data_2 + src2_data_2;
-
- data_0 = mask_data ? tmp_data_0 : data_0;
- data_1 = mask_data ? tmp_data_1 : data_1;
- data_2 = mask_data ? tmp_data_2 : data_2;
-
- *((__global float *)((__global char *)dst + dst_index + 0))= data_0;
- *((__global float *)((__global char *)dst + dst_index + 4))= data_1;
- *((__global float *)((__global char *)dst + dst_index + 8))= data_2;
- }
-}
-
-#if defined (DOUBLE_SUPPORT)
-__kernel void arithm_add_with_mask_C3_D6 (__global double *src1, int src1_step, int src1_offset,
- __global double *src2, int src2_step, int src2_offset,
- __global uchar *mask, int mask_step, int mask_offset,
- __global double *dst, int dst_step, int dst_offset,
- int rows, int cols, int dst_step1)
-{
-
- int x = get_global_id(0);
- int y = get_global_id(1);
-
- if (x < cols && y < rows)
- {
- int src1_index = mad24(y, src1_step, (x * 24) + src1_offset);
- int src2_index = mad24(y, src2_step, (x * 24) + src2_offset);
- int mask_index = mad24(y, mask_step, x + mask_offset);
- int dst_index = mad24(y, dst_step, dst_offset + (x * 24));
-
- double src1_data_0 = *((__global double *)((__global char *)src1 + src1_index + 0 ));
- double src1_data_1 = *((__global double *)((__global char *)src1 + src1_index + 8 ));
- double src1_data_2 = *((__global double *)((__global char *)src1 + src1_index + 16));
-
- double src2_data_0 = *((__global double *)((__global char *)src2 + src2_index + 0 ));
- double src2_data_1 = *((__global double *)((__global char *)src2 + src2_index + 8 ));
- double src2_data_2 = *((__global double *)((__global char *)src2 + src2_index + 16));
-
- uchar mask_data = * (mask + mask_index);
-
- double data_0 = *((__global double *)((__global char *)dst + dst_index + 0 ));
- double data_1 = *((__global double *)((__global char *)dst + dst_index + 8 ));
- double data_2 = *((__global double *)((__global char *)dst + dst_index + 16));
-
- double tmp_data_0 = src1_data_0 + src2_data_0;
- double tmp_data_1 = src1_data_1 + src2_data_1;
- double tmp_data_2 = src1_data_2 + src2_data_2;
-
- data_0 = mask_data ? tmp_data_0 : data_0;
- data_1 = mask_data ? tmp_data_1 : data_1;
- data_2 = mask_data ? tmp_data_2 : data_2;
-
- *((__global double *)((__global char *)dst + dst_index + 0 ))= data_0;
- *((__global double *)((__global char *)dst + dst_index + 8 ))= data_1;
- *((__global double *)((__global char *)dst + dst_index + 16))= data_2;
- }
-}
-#endif
__kernel void arithm_add_with_mask_C4_D0 (__global uchar *src1, int src1_step, int src1_offset,
__global uchar *src2, int src2_step, int src2_offset,
__global uchar *mask, int mask_step, int mask_offset,
uchar4 src_data2 = *((__global uchar4 *)(src2 + src2_index));
uchar4 dst_data = *((__global uchar4 *)(dst + dst_index));
- uchar4 data = convert_uchar4_sat(convert_ushort4_sat(src_data1) + convert_ushort4_sat(src_data2));
+ uchar4 data = convert_uchar4_sat(ARITHM_OP(convert_short4_sat(src_data1), convert_short4_sat(src_data2)));
data = mask_data ? data : dst_data;
*((__global uchar4 *)(dst + dst_index)) = data;
ushort4 src_data2 = *((__global ushort4 *)((__global char *)src2 + src2_index));
ushort4 dst_data = *((__global ushort4 *)((__global char *)dst + dst_index));
- ushort4 data = convert_ushort4_sat(convert_int4_sat(src_data1) + convert_int4_sat(src_data2));
+ ushort4 data = convert_ushort4_sat(ARITHM_OP(convert_int4_sat(src_data1), convert_int4_sat(src_data2)));
data = mask_data ? data : dst_data;
*((__global ushort4 *)((__global char *)dst + dst_index)) = data;
short4 src_data2 = *((__global short4 *)((__global char *)src2 + src2_index));
short4 dst_data = *((__global short4 *)((__global char *)dst + dst_index));
- short4 data = convert_short4_sat(convert_int4_sat(src_data1) + convert_int4_sat(src_data2));
+ short4 data = convert_short4_sat(ARITHM_OP(convert_int4_sat(src_data1), convert_int4_sat(src_data2)));
data = mask_data ? data : dst_data;
*((__global short4 *)((__global char *)dst + dst_index)) = data;
int4 src_data2 = *((__global int4 *)((__global char *)src2 + src2_index));
int4 dst_data = *((__global int4 *)((__global char *)dst + dst_index));
- int4 data = convert_int4_sat(convert_long4_sat(src_data1) + convert_long4_sat(src_data2));
+ int4 data = convert_int4_sat(ARITHM_OP(convert_long4_sat(src_data1), convert_long4_sat(src_data2)));
data = mask_data ? data : dst_data;
*((__global int4 *)((__global char *)dst + dst_index)) = data;
float4 src_data2 = *((__global float4 *)((__global char *)src2 + src2_index));
float4 dst_data = *((__global float4 *)((__global char *)dst + dst_index));
- float4 data = src_data1 + src_data2;
+ float4 data = ARITHM_OP(src_data1, src_data2);
data = mask_data ? data : dst_data;
*((__global float4 *)((__global char *)dst + dst_index)) = data;
double4 src_data2 = *((__global double4 *)((__global char *)src2 + src2_index));
double4 dst_data = *((__global double4 *)((__global char *)dst + dst_index));
- double4 data = src_data1 + src_data2;
+ double4 data = ARITHM_OP(src_data1, src_data2);
data = mask_data ? data : dst_data;
*((__global double4 *)((__global char *)dst + dst_index)) = data;
#elif defined (cl_amd_fp64)
#pragma OPENCL EXTENSION cl_amd_fp64:enable
#endif
+#endif
+#ifdef ARITHM_ADD
+ #define ARITHM_OP(A,B) ((A)+(B))
+#elif defined ARITHM_SUB
+ #define ARITHM_OP(A,B) ((A)-(B))
#endif
/**************************************add with scalar without mask**************************************/
__kernel void arithm_s_add_C1_D0 (__global uchar *src1, int src1_step, int src1_offset,
}
uchar4 data = *((__global uchar4 *)(dst + dst_index));
- int4 tmp = convert_int4_sat(src1_data) + src2_data;
+ int4 tmp = ARITHM_OP(convert_int4_sat(src1_data), src2_data);
uchar4 tmp_data = convert_uchar4_sat(tmp);
data.x = ((dst_index + 0 >= dst_start) && (dst_index + 0 < dst_end)) ? tmp_data.x : data.x;
int2 src2_data = (int2)(src2.x, src2.x);
ushort2 data = *((__global ushort2 *)((__global uchar *)dst + dst_index));
- int2 tmp = convert_int2_sat(src1_data) + src2_data;
+ int2 tmp = ARITHM_OP(convert_int2_sat(src1_data), src2_data);
ushort2 tmp_data = convert_ushort2_sat(tmp);
data.x = (dst_index + 0 >= dst_start) ? tmp_data.x : data.x;
int2 src2_data = (int2)(src2.x, src2.x);
short2 data = *((__global short2 *)((__global uchar *)dst + dst_index));
- int2 tmp = convert_int2_sat(src1_data) + src2_data;
+ int2 tmp = ARITHM_OP(convert_int2_sat(src1_data), src2_data);
short2 tmp_data = convert_short2_sat(tmp);
data.x = (dst_index + 0 >= dst_start) ? tmp_data.x : data.x;
int src_data2 = src2.x;
int dst_data = *((__global int *)((__global char *)dst + dst_index));
- int data = convert_int_sat((long)src_data1 + (long)src_data2);
+ int data = convert_int_sat(ARITHM_OP((long)src_data1, (long)src_data2));
*((__global int *)((__global char *)dst + dst_index)) = data;
}
float src_data2 = src2.x;
float dst_data = *((__global float *)((__global char *)dst + dst_index));
- float data = src_data1 + src_data2;
+ float data = ARITHM_OP(src_data1, src_data2);
*((__global float *)((__global char *)dst + dst_index)) = data;
}
double src2_data = src2.x;
double dst_data = *((__global double *)((__global char *)dst + dst_index));
- double data = src_data1 + src2_data;
+ double data = ARITHM_OP(src_data1, src2_data);
*((__global double *)((__global char *)dst + dst_index)) = data;
}
int4 src2_data = (int4)(src2.x, src2.y, src2.x, src2.y);
uchar4 data = *((__global uchar4 *)(dst + dst_index));
- int4 tmp = convert_int4_sat(src1_data) + src2_data;
+ int4 tmp = ARITHM_OP(convert_int4_sat(src1_data), src2_data);
uchar4 tmp_data = convert_uchar4_sat(tmp);
data.xy = (dst_index + 0 >= dst_start) ? tmp_data.xy : data.xy;
int2 src_data2 = (int2)(src2.x, src2.y);
ushort2 dst_data = *((__global ushort2 *)((__global char *)dst + dst_index));
- int2 tmp = convert_int2_sat(src_data1) + src_data2;
+ int2 tmp = ARITHM_OP(convert_int2_sat(src_data1), src_data2);
ushort2 data = convert_ushort2_sat(tmp);
*((__global ushort2 *)((__global char *)dst + dst_index)) = data;
int2 src_data2 = (int2)(src2.x, src2.y);
short2 dst_data = *((__global short2 *)((__global char *)dst + dst_index));
- int2 tmp = convert_int2_sat(src_data1) + src_data2;
+ int2 tmp = ARITHM_OP(convert_int2_sat(src_data1), src_data2);
short2 data = convert_short2_sat(tmp);
*((__global short2 *)((__global char *)dst + dst_index)) = data;
int2 src_data2 = (int2)(src2.x, src2.y);
int2 dst_data = *((__global int2 *)((__global char *)dst + dst_index));
- int2 data = convert_int2_sat(convert_long2_sat(src_data1) + convert_long2_sat(src_data2));
+ int2 data = convert_int2_sat(ARITHM_OP(convert_long2_sat(src_data1), convert_long2_sat(src_data2)));
*((__global int2 *)((__global char *)dst + dst_index)) = data;
}
}
float2 src_data2 = (float2)(src2.x, src2.y);
float2 dst_data = *((__global float2 *)((__global char *)dst + dst_index));
- float2 data = src_data1 + src_data2;
+ float2 data = ARITHM_OP(src_data1, src_data2);
*((__global float2 *)((__global char *)dst + dst_index)) = data;
}
}
double2 src_data2 = (double2)(src2.x, src2.y);
double2 dst_data = *((__global double2 *)((__global char *)dst + dst_index));
- double2 data = src_data1 + src_data2;
+ double2 data = ARITHM_OP(src_data1, src_data2);
*((__global double2 *)((__global char *)dst + dst_index)) = data;
}
}
#endif
-__kernel void arithm_s_add_C3_D0 (__global uchar *src1, int src1_step, int src1_offset,
- __global uchar *dst, int dst_step, int dst_offset,
- int4 src2, int rows, int cols, int dst_step1)
-{
-
- int x = get_global_id(0);
- int y = get_global_id(1);
-
- if (x < cols && y < rows)
- {
- x = x << 2;
-
-#ifdef dst_align
-#undef dst_align
-#endif
-#define dst_align (((dst_offset % dst_step) / 3 ) & 3)
- int src1_index = mad24(y, src1_step, (x * 3) + src1_offset - (dst_align * 3));
-
- int dst_start = mad24(y, dst_step, dst_offset);
- int dst_end = mad24(y, dst_step, dst_offset + dst_step1);
- int dst_index = mad24(y, dst_step, dst_offset + (x * 3) - (dst_align * 3));
-
- uchar4 src1_data_0 = vload4(0, src1 + src1_index + 0);
- uchar4 src1_data_1 = vload4(0, src1 + src1_index + 4);
- uchar4 src1_data_2 = vload4(0, src1 + src1_index + 8);
-
- int4 src2_data_0 = (int4)(src2.x, src2.y, src2.z, src2.x);
- int4 src2_data_1 = (int4)(src2.y, src2.z, src2.x, src2.y);
- int4 src2_data_2 = (int4)(src2.z, src2.x, src2.y, src2.z);
-
- uchar4 data_0 = *((__global uchar4 *)(dst + dst_index + 0));
- uchar4 data_1 = *((__global uchar4 *)(dst + dst_index + 4));
- uchar4 data_2 = *((__global uchar4 *)(dst + dst_index + 8));
-
- uchar4 tmp_data_0 = convert_uchar4_sat(convert_int4_sat(src1_data_0) + src2_data_0);
- uchar4 tmp_data_1 = convert_uchar4_sat(convert_int4_sat(src1_data_1) + src2_data_1);
- uchar4 tmp_data_2 = convert_uchar4_sat(convert_int4_sat(src1_data_2) + src2_data_2);
-
- data_0.xyz = ((dst_index + 0 >= dst_start)) ? tmp_data_0.xyz : data_0.xyz;
- data_0.w = ((dst_index + 3 >= dst_start) && (dst_index + 3 < dst_end))
- ? tmp_data_0.w : data_0.w;
-
- data_1.xy = ((dst_index + 3 >= dst_start) && (dst_index + 3 < dst_end))
- ? tmp_data_1.xy : data_1.xy;
- data_1.zw = ((dst_index + 6 >= dst_start) && (dst_index + 6 < dst_end))
- ? tmp_data_1.zw : data_1.zw;
-
- data_2.x = ((dst_index + 6 >= dst_start) && (dst_index + 6 < dst_end))
- ? tmp_data_2.x : data_2.x;
- data_2.yzw = ((dst_index + 9 >= dst_start) && (dst_index + 9 < dst_end))
- ? tmp_data_2.yzw : data_2.yzw;
-
- *((__global uchar4 *)(dst + dst_index + 0)) = data_0;
- *((__global uchar4 *)(dst + dst_index + 4)) = data_1;
- *((__global uchar4 *)(dst + dst_index + 8)) = data_2;
- }
-}
-__kernel void arithm_s_add_C3_D2 (__global ushort *src1, int src1_step, int src1_offset,
- __global ushort *dst, int dst_step, int dst_offset,
- int4 src2, int rows, int cols, int dst_step1)
-{
-
- int x = get_global_id(0);
- int y = get_global_id(1);
-
- if (x < cols && y < rows)
- {
- x = x << 1;
-
-#ifdef dst_align
-#undef dst_align
-#endif
-#define dst_align (((dst_offset % dst_step) / 6 ) & 1)
- int src1_index = mad24(y, src1_step, (x * 6) + src1_offset - (dst_align * 6));
-
- int dst_start = mad24(y, dst_step, dst_offset);
- int dst_end = mad24(y, dst_step, dst_offset + dst_step1);
- int dst_index = mad24(y, dst_step, dst_offset + (x * 6) - (dst_align * 6));
-
- ushort2 src1_data_0 = vload2(0, (__global ushort *)((__global char *)src1 + src1_index + 0));
- ushort2 src1_data_1 = vload2(0, (__global ushort *)((__global char *)src1 + src1_index + 4));
- ushort2 src1_data_2 = vload2(0, (__global ushort *)((__global char *)src1 + src1_index + 8));
-
- int2 src2_data_0 = (int2)(src2.x, src2.y);
- int2 src2_data_1 = (int2)(src2.z, src2.x);
- int2 src2_data_2 = (int2)(src2.y, src2.z);
-
- ushort2 data_0 = *((__global ushort2 *)((__global char *)dst + dst_index + 0));
- ushort2 data_1 = *((__global ushort2 *)((__global char *)dst + dst_index + 4));
- ushort2 data_2 = *((__global ushort2 *)((__global char *)dst + dst_index + 8));
-
- ushort2 tmp_data_0 = convert_ushort2_sat(convert_int2_sat(src1_data_0) + src2_data_0);
- ushort2 tmp_data_1 = convert_ushort2_sat(convert_int2_sat(src1_data_1) + src2_data_1);
- ushort2 tmp_data_2 = convert_ushort2_sat(convert_int2_sat(src1_data_2) + src2_data_2);
-
- data_0.xy = ((dst_index + 0 >= dst_start)) ? tmp_data_0.xy : data_0.xy;
-
- data_1.x = ((dst_index + 0 >= dst_start) && (dst_index + 0 < dst_end))
- ? tmp_data_1.x : data_1.x;
- data_1.y = ((dst_index + 6 >= dst_start) && (dst_index + 6 < dst_end))
- ? tmp_data_1.y : data_1.y;
-
- data_2.xy = ((dst_index + 6 >= dst_start) && (dst_index + 6 < dst_end))
- ? tmp_data_2.xy : data_2.xy;
-
- *((__global ushort2 *)((__global char *)dst + dst_index + 0))= data_0;
- *((__global ushort2 *)((__global char *)dst + dst_index + 4))= data_1;
- *((__global ushort2 *)((__global char *)dst + dst_index + 8))= data_2;
- }
-}
-__kernel void arithm_s_add_C3_D3 (__global short *src1, int src1_step, int src1_offset,
- __global short *dst, int dst_step, int dst_offset,
- int4 src2, int rows, int cols, int dst_step1)
-{
-
- int x = get_global_id(0);
- int y = get_global_id(1);
-
- if (x < cols && y < rows)
- {
- x = x << 1;
-
-#ifdef dst_align
-#undef dst_align
-#endif
-#define dst_align (((dst_offset % dst_step) / 6 ) & 1)
- int src1_index = mad24(y, src1_step, (x * 6) + src1_offset - (dst_align * 6));
-
- int dst_start = mad24(y, dst_step, dst_offset);
- int dst_end = mad24(y, dst_step, dst_offset + dst_step1);
- int dst_index = mad24(y, dst_step, dst_offset + (x * 6) - (dst_align * 6));
-
- short2 src1_data_0 = vload2(0, (__global short *)((__global char *)src1 + src1_index + 0));
- short2 src1_data_1 = vload2(0, (__global short *)((__global char *)src1 + src1_index + 4));
- short2 src1_data_2 = vload2(0, (__global short *)((__global char *)src1 + src1_index + 8));
-
- int2 src2_data_0 = (int2)(src2.x, src2.y);
- int2 src2_data_1 = (int2)(src2.z, src2.x);
- int2 src2_data_2 = (int2)(src2.y, src2.z);
-
- short2 data_0 = *((__global short2 *)((__global char *)dst + dst_index + 0));
- short2 data_1 = *((__global short2 *)((__global char *)dst + dst_index + 4));
- short2 data_2 = *((__global short2 *)((__global char *)dst + dst_index + 8));
-
- short2 tmp_data_0 = convert_short2_sat(convert_int2_sat(src1_data_0) + src2_data_0);
- short2 tmp_data_1 = convert_short2_sat(convert_int2_sat(src1_data_1) + src2_data_1);
- short2 tmp_data_2 = convert_short2_sat(convert_int2_sat(src1_data_2) + src2_data_2);
-
- data_0.xy = ((dst_index + 0 >= dst_start)) ? tmp_data_0.xy : data_0.xy;
-
- data_1.x = ((dst_index + 0 >= dst_start) && (dst_index + 0 < dst_end))
- ? tmp_data_1.x : data_1.x;
- data_1.y = ((dst_index + 6 >= dst_start) && (dst_index + 6 < dst_end))
- ? tmp_data_1.y : data_1.y;
-
- data_2.xy = ((dst_index + 6 >= dst_start) && (dst_index + 6 < dst_end))
- ? tmp_data_2.xy : data_2.xy;
-
- *((__global short2 *)((__global char *)dst + dst_index + 0))= data_0;
- *((__global short2 *)((__global char *)dst + dst_index + 4))= data_1;
- *((__global short2 *)((__global char *)dst + dst_index + 8))= data_2;
- }
-}
-__kernel void arithm_s_add_C3_D4 (__global int *src1, int src1_step, int src1_offset,
- __global int *dst, int dst_step, int dst_offset,
- int4 src2, int rows, int cols, int dst_step1)
-{
-
- int x = get_global_id(0);
- int y = get_global_id(1);
-
- if (x < cols && y < rows)
- {
- int src1_index = mad24(y, src1_step, (x * 12) + src1_offset);
- int dst_index = mad24(y, dst_step, dst_offset + (x * 12));
-
- int src1_data_0 = *((__global int *)((__global char *)src1 + src1_index + 0));
- int src1_data_1 = *((__global int *)((__global char *)src1 + src1_index + 4));
- int src1_data_2 = *((__global int *)((__global char *)src1 + src1_index + 8));
-
- int src2_data_0 = src2.x;
- int src2_data_1 = src2.y;
- int src2_data_2 = src2.z;
-
- int data_0 = *((__global int *)((__global char *)dst + dst_index + 0));
- int data_1 = *((__global int *)((__global char *)dst + dst_index + 4));
- int data_2 = *((__global int *)((__global char *)dst + dst_index + 8));
-
- int tmp_data_0 = convert_int_sat((long)src1_data_0 + (long)src2_data_0);
- int tmp_data_1 = convert_int_sat((long)src1_data_1 + (long)src2_data_1);
- int tmp_data_2 = convert_int_sat((long)src1_data_2 + (long)src2_data_2);
-
- *((__global int *)((__global char *)dst + dst_index + 0))= tmp_data_0;
- *((__global int *)((__global char *)dst + dst_index + 4))= tmp_data_1;
- *((__global int *)((__global char *)dst + dst_index + 8))= tmp_data_2;
- }
-}
-__kernel void arithm_s_add_C3_D5 (__global float *src1, int src1_step, int src1_offset,
- __global float *dst, int dst_step, int dst_offset,
- float4 src2, int rows, int cols, int dst_step1)
-{
-
- int x = get_global_id(0);
- int y = get_global_id(1);
-
- if (x < cols && y < rows)
- {
- int src1_index = mad24(y, src1_step, (x * 12) + src1_offset);
- int dst_index = mad24(y, dst_step, dst_offset + (x * 12));
-
- float src1_data_0 = *((__global float *)((__global char *)src1 + src1_index + 0));
- float src1_data_1 = *((__global float *)((__global char *)src1 + src1_index + 4));
- float src1_data_2 = *((__global float *)((__global char *)src1 + src1_index + 8));
- float src2_data_0 = src2.x;
- float src2_data_1 = src2.y;
- float src2_data_2 = src2.z;
-
- float data_0 = *((__global float *)((__global char *)dst + dst_index + 0));
- float data_1 = *((__global float *)((__global char *)dst + dst_index + 4));
- float data_2 = *((__global float *)((__global char *)dst + dst_index + 8));
-
- float tmp_data_0 = src1_data_0 + src2_data_0;
- float tmp_data_1 = src1_data_1 + src2_data_1;
- float tmp_data_2 = src1_data_2 + src2_data_2;
-
- *((__global float *)((__global char *)dst + dst_index + 0))= tmp_data_0;
- *((__global float *)((__global char *)dst + dst_index + 4))= tmp_data_1;
- *((__global float *)((__global char *)dst + dst_index + 8))= tmp_data_2;
- }
-}
-
-#if defined (DOUBLE_SUPPORT)
-__kernel void arithm_s_add_C3_D6 (__global double *src1, int src1_step, int src1_offset,
- __global double *dst, int dst_step, int dst_offset,
- double4 src2, int rows, int cols, int dst_step1)
-{
-
- int x = get_global_id(0);
- int y = get_global_id(1);
-
- if (x < cols && y < rows)
- {
- int src1_index = mad24(y, src1_step, (x * 24) + src1_offset);
- int dst_index = mad24(y, dst_step, dst_offset + (x * 24));
-
- double src1_data_0 = *((__global double *)((__global char *)src1 + src1_index + 0 ));
- double src1_data_1 = *((__global double *)((__global char *)src1 + src1_index + 8 ));
- double src1_data_2 = *((__global double *)((__global char *)src1 + src1_index + 16));
-
- double src2_data_0 = src2.x;
- double src2_data_1 = src2.y;
- double src2_data_2 = src2.z;
-
- double data_0 = *((__global double *)((__global char *)dst + dst_index + 0 ));
- double data_1 = *((__global double *)((__global char *)dst + dst_index + 8 ));
- double data_2 = *((__global double *)((__global char *)dst + dst_index + 16));
-
- double tmp_data_0 = src1_data_0 + src2_data_0;
- double tmp_data_1 = src1_data_1 + src2_data_1;
- double tmp_data_2 = src1_data_2 + src2_data_2;
-
- *((__global double *)((__global char *)dst + dst_index + 0 ))= tmp_data_0;
- *((__global double *)((__global char *)dst + dst_index + 8 ))= tmp_data_1;
- *((__global double *)((__global char *)dst + dst_index + 16))= tmp_data_2;
- }
-}
-#endif
__kernel void arithm_s_add_C4_D0 (__global uchar *src1, int src1_step, int src1_offset,
__global uchar *dst, int dst_step, int dst_offset,
int4 src2, int rows, int cols, int dst_step1)
uchar4 src_data1 = *((__global uchar4 *)(src1 + src1_index));
- uchar4 data = convert_uchar4_sat(convert_int4_sat(src_data1) + src2);
+ uchar4 data = convert_uchar4_sat(ARITHM_OP(convert_int4_sat(src_data1), src2));
*((__global uchar4 *)(dst + dst_index)) = data;
}
ushort4 src_data1 = *((__global ushort4 *)((__global char *)src1 + src1_index));
- ushort4 data = convert_ushort4_sat(convert_int4_sat(src_data1) + src2);
+ ushort4 data = convert_ushort4_sat(ARITHM_OP(convert_int4_sat(src_data1), src2));
*((__global ushort4 *)((__global char *)dst + dst_index)) = data;
}
short4 src_data1 = *((__global short4 *)((__global char *)src1 + src1_index));
- short4 data = convert_short4_sat(convert_int4_sat(src_data1) + src2);
+ short4 data = convert_short4_sat(ARITHM_OP(convert_int4_sat(src_data1), src2));
*((__global short4 *)((__global char *)dst + dst_index)) = data;
}
int4 src_data1 = *((__global int4 *)((__global char *)src1 + src1_index));
- int4 data = convert_int4_sat(convert_long4_sat(src_data1) + convert_long4_sat(src2));
+ int4 data = convert_int4_sat(ARITHM_OP(convert_long4_sat(src_data1), convert_long4_sat(src2)));
*((__global int4 *)((__global char *)dst + dst_index)) = data;
}
float4 src_data1 = *((__global float4 *)((__global char *)src1 + src1_index));
- float4 data = src_data1 + src2;
+ float4 data = ARITHM_OP(src_data1, src2);
*((__global float4 *)((__global char *)dst + dst_index)) = data;
}
double4 src_data1 = *((__global double4 *)((__global char *)src1 + src1_index));
- double4 data = src_data1 + src2;
+ double4 data = ARITHM_OP(src_data1, src2);
*((__global double4 *)((__global char *)dst + dst_index)) = data;
}
#endif
#endif
+#ifdef ARITHM_ADD
+ #define ARITHM_OP(A,B) ((A)+(B))
+#elif defined ARITHM_SUB
+ #define ARITHM_OP(A,B) ((A)-(B))
+#endif
/**************************************add with scalar with mask**************************************/
__kernel void arithm_s_add_with_mask_C1_D0 (__global uchar *src1, int src1_step, int src1_offset,
__global uchar *dst, int dst_step, int dst_offset,
}
uchar4 data = *((__global uchar4 *)(dst + dst_index));
- int4 tmp = convert_int4_sat(src1_data) + src2_data;
+ int4 tmp = ARITHM_OP(convert_int4_sat(src1_data), src2_data);
uchar4 tmp_data = convert_uchar4_sat(tmp);
data.x = ((mask_data.x) && (dst_index + 0 >= dst_start) && (dst_index + 0 < dst_end)) ? tmp_data.x : data.x;
uchar2 mask_data = vload2(0, mask + mask_index);
ushort2 data = *((__global ushort2 *)((__global uchar *)dst + dst_index));
- int2 tmp = convert_int2_sat(src1_data) + src2_data;
+ int2 tmp = ARITHM_OP(convert_int2_sat(src1_data), src2_data);
ushort2 tmp_data = convert_ushort2_sat(tmp);
data.x = ((mask_data.x) && (dst_index + 0 >= dst_start)) ? tmp_data.x : data.x;
uchar2 mask_data = vload2(0, mask + mask_index);
short2 data = *((__global short2 *)((__global uchar *)dst + dst_index));
- int2 tmp = convert_int2_sat(src1_data) + src2_data;
+ int2 tmp = ARITHM_OP(convert_int2_sat(src1_data), src2_data);
short2 tmp_data = convert_short2_sat(tmp);
data.x = ((mask_data.x) && (dst_index + 0 >= dst_start)) ? tmp_data.x : data.x;
int src_data2 = src2.x;
int dst_data = *((__global int *)((__global char *)dst + dst_index));
- int data = convert_int_sat((long)src_data1 + (long)src_data2);
+ int data = convert_int_sat(ARITHM_OP((long)src_data1, (long)src_data2));
data = mask_data ? data : dst_data;
*((__global int *)((__global char *)dst + dst_index)) = data;
float src_data2 = src2.x;
float dst_data = *((__global float *)((__global char *)dst + dst_index));
- float data = src_data1 + src_data2;
+ float data = ARITHM_OP(src_data1, src_data2);
data = mask_data ? data : dst_data;
*((__global float *)((__global char *)dst + dst_index)) = data;
double src_data2 = src2.x;
double dst_data = *((__global double *)((__global char *)dst + dst_index));
- double data = src_data1 + src_data2;
+ double data = ARITHM_OP(src_data1, src_data2);
data = mask_data ? data : dst_data;
*((__global double *)((__global char *)dst + dst_index)) = data;
uchar2 mask_data = vload2(0, mask + mask_index);
uchar4 data = *((__global uchar4 *)(dst + dst_index));
- int4 tmp = convert_int4_sat(src1_data) + src2_data;
+ int4 tmp = ARITHM_OP(convert_int4_sat(src1_data), src2_data);
uchar4 tmp_data = convert_uchar4_sat(tmp);
data.xy = ((mask_data.x) && (dst_index + 0 >= dst_start)) ? tmp_data.xy : data.xy;
int2 src_data2 = (int2)(src2.x, src2.y);
ushort2 dst_data = *((__global ushort2 *)((__global char *)dst + dst_index));
- int2 tmp = convert_int2_sat(src_data1) + src_data2;
+ int2 tmp = ARITHM_OP(convert_int2_sat(src_data1), src_data2);
ushort2 data = convert_ushort2_sat(tmp);
data = mask_data ? data : dst_data;
int2 src_data2 = (int2)(src2.x, src2.y);
short2 dst_data = *((__global short2 *)((__global char *)dst + dst_index));
- int2 tmp = convert_int2_sat(src_data1) + src_data2;
+ int2 tmp = ARITHM_OP(convert_int2_sat(src_data1), src_data2);
short2 data = convert_short2_sat(tmp);
data = mask_data ? data : dst_data;
int2 src_data2 = (int2)(src2.x, src2.y);
int2 dst_data = *((__global int2 *)((__global char *)dst + dst_index));
- int2 data = convert_int2_sat(convert_long2_sat(src_data1) + convert_long2_sat(src_data2));
+ int2 data = convert_int2_sat(ARITHM_OP(convert_long2_sat(src_data1), convert_long2_sat(src_data2)));
data = mask_data ? data : dst_data;
*((__global int2 *)((__global char *)dst + dst_index)) = data;
float2 src_data2 = (float2)(src2.x, src2.y);
float2 dst_data = *((__global float2 *)((__global char *)dst + dst_index));
- float2 data = src_data1 + src_data2;
+ float2 data = ARITHM_OP(src_data1, src_data2);
data = mask_data ? data : dst_data;
*((__global float2 *)((__global char *)dst + dst_index)) = data;
double2 src_data2 = (double2)(src2.x, src2.y);
double2 dst_data = *((__global double2 *)((__global char *)dst + dst_index));
- double2 data = src_data1 + src_data2;
+ double2 data = ARITHM_OP(src_data1, src_data2);
data = mask_data ? data : dst_data;
*((__global double2 *)((__global char *)dst + dst_index)) = data;
}
#endif
-__kernel void arithm_s_add_with_mask_C3_D0 (__global uchar *src1, int src1_step, int src1_offset,
- __global uchar *dst, int dst_step, int dst_offset,
- __global uchar *mask, int mask_step, int mask_offset,
- int4 src2, int rows, int cols, int dst_step1)
-{
-
- int x = get_global_id(0);
- int y = get_global_id(1);
-
- if (x < cols && y < rows)
- {
- x = x << 2;
-
-#ifdef dst_align
-#undef dst_align
-#endif
-#define dst_align (((dst_offset % dst_step) / 3 ) & 3)
- int src1_index = mad24(y, src1_step, (x * 3) + src1_offset - (dst_align * 3));
- int mask_index = mad24(y, mask_step, x + mask_offset - dst_align);
-
- int dst_start = mad24(y, dst_step, dst_offset);
- int dst_end = mad24(y, dst_step, dst_offset + dst_step1);
- int dst_index = mad24(y, dst_step, dst_offset + (x * 3) - (dst_align * 3));
-
- uchar4 src1_data_0 = vload4(0, src1 + src1_index + 0);
- uchar4 src1_data_1 = vload4(0, src1 + src1_index + 4);
- uchar4 src1_data_2 = vload4(0, src1 + src1_index + 8);
-
- int4 src2_data_0 = (int4)(src2.x, src2.y, src2.z, src2.x);
- int4 src2_data_1 = (int4)(src2.y, src2.z, src2.x, src2.y);
- int4 src2_data_2 = (int4)(src2.z, src2.x, src2.y, src2.z);
-
- uchar4 mask_data = vload4(0, mask + mask_index);
-
- uchar4 data_0 = *((__global uchar4 *)(dst + dst_index + 0));
- uchar4 data_1 = *((__global uchar4 *)(dst + dst_index + 4));
- uchar4 data_2 = *((__global uchar4 *)(dst + dst_index + 8));
-
- uchar4 tmp_data_0 = convert_uchar4_sat(convert_int4_sat(src1_data_0) + src2_data_0);
- uchar4 tmp_data_1 = convert_uchar4_sat(convert_int4_sat(src1_data_1) + src2_data_1);
- uchar4 tmp_data_2 = convert_uchar4_sat(convert_int4_sat(src1_data_2) + src2_data_2);
-
- data_0.xyz = ((mask_data.x) && (dst_index + 0 >= dst_start)) ? tmp_data_0.xyz : data_0.xyz;
- data_0.w = ((mask_data.y) && (dst_index + 3 >= dst_start) && (dst_index + 3 < dst_end))
- ? tmp_data_0.w : data_0.w;
-
- data_1.xy = ((mask_data.y) && (dst_index + 3 >= dst_start) && (dst_index + 3 < dst_end))
- ? tmp_data_1.xy : data_1.xy;
- data_1.zw = ((mask_data.z) && (dst_index + 6 >= dst_start) && (dst_index + 6 < dst_end))
- ? tmp_data_1.zw : data_1.zw;
-
- data_2.x = ((mask_data.z) && (dst_index + 6 >= dst_start) && (dst_index + 6 < dst_end))
- ? tmp_data_2.x : data_2.x;
- data_2.yzw = ((mask_data.w) && (dst_index + 9 >= dst_start) && (dst_index + 9 < dst_end))
- ? tmp_data_2.yzw : data_2.yzw;
-
- *((__global uchar4 *)(dst + dst_index + 0)) = data_0;
- *((__global uchar4 *)(dst + dst_index + 4)) = data_1;
- *((__global uchar4 *)(dst + dst_index + 8)) = data_2;
- }
-}
-__kernel void arithm_s_add_with_mask_C3_D2 (__global ushort *src1, int src1_step, int src1_offset,
- __global ushort *dst, int dst_step, int dst_offset,
- __global uchar *mask, int mask_step, int mask_offset,
- int4 src2, int rows, int cols, int dst_step1)
-{
-
- int x = get_global_id(0);
- int y = get_global_id(1);
-
- if (x < cols && y < rows)
- {
- x = x << 1;
-
-#ifdef dst_align
-#undef dst_align
-#endif
-#define dst_align (((dst_offset % dst_step) / 6 ) & 1)
- int src1_index = mad24(y, src1_step, (x * 6) + src1_offset - (dst_align * 6));
- int mask_index = mad24(y, mask_step, x + mask_offset - dst_align);
-
- int dst_start = mad24(y, dst_step, dst_offset);
- int dst_end = mad24(y, dst_step, dst_offset + dst_step1);
- int dst_index = mad24(y, dst_step, dst_offset + (x * 6) - (dst_align * 6));
-
- ushort2 src1_data_0 = vload2(0, (__global ushort *)((__global char *)src1 + src1_index + 0));
- ushort2 src1_data_1 = vload2(0, (__global ushort *)((__global char *)src1 + src1_index + 4));
- ushort2 src1_data_2 = vload2(0, (__global ushort *)((__global char *)src1 + src1_index + 8));
-
- int2 src2_data_0 = (int2)(src2.x, src2.y);
- int2 src2_data_1 = (int2)(src2.z, src2.x);
- int2 src2_data_2 = (int2)(src2.y, src2.z);
-
- uchar2 mask_data = vload2(0, mask + mask_index);
-
- ushort2 data_0 = *((__global ushort2 *)((__global char *)dst + dst_index + 0));
- ushort2 data_1 = *((__global ushort2 *)((__global char *)dst + dst_index + 4));
- ushort2 data_2 = *((__global ushort2 *)((__global char *)dst + dst_index + 8));
-
- ushort2 tmp_data_0 = convert_ushort2_sat(convert_int2_sat(src1_data_0) + src2_data_0);
- ushort2 tmp_data_1 = convert_ushort2_sat(convert_int2_sat(src1_data_1) + src2_data_1);
- ushort2 tmp_data_2 = convert_ushort2_sat(convert_int2_sat(src1_data_2) + src2_data_2);
-
- data_0.xy = ((mask_data.x) && (dst_index + 0 >= dst_start)) ? tmp_data_0.xy : data_0.xy;
-
- data_1.x = ((mask_data.x) && (dst_index + 0 >= dst_start) && (dst_index + 0 < dst_end))
- ? tmp_data_1.x : data_1.x;
- data_1.y = ((mask_data.y) && (dst_index + 6 >= dst_start) && (dst_index + 6 < dst_end))
- ? tmp_data_1.y : data_1.y;
-
- data_2.xy = ((mask_data.y) && (dst_index + 6 >= dst_start) && (dst_index + 6 < dst_end))
- ? tmp_data_2.xy : data_2.xy;
-
- *((__global ushort2 *)((__global char *)dst + dst_index + 0))= data_0;
- *((__global ushort2 *)((__global char *)dst + dst_index + 4))= data_1;
- *((__global ushort2 *)((__global char *)dst + dst_index + 8))= data_2;
- }
-}
-__kernel void arithm_s_add_with_mask_C3_D3 (__global short *src1, int src1_step, int src1_offset,
- __global short *dst, int dst_step, int dst_offset,
- __global uchar *mask, int mask_step, int mask_offset,
- int4 src2, int rows, int cols, int dst_step1)
-{
-
- int x = get_global_id(0);
- int y = get_global_id(1);
-
- if (x < cols && y < rows)
- {
- x = x << 1;
-
-#ifdef dst_align
-#undef dst_align
-#endif
-#define dst_align (((dst_offset % dst_step) / 6 ) & 1)
- int src1_index = mad24(y, src1_step, (x * 6) + src1_offset - (dst_align * 6));
- int mask_index = mad24(y, mask_step, x + mask_offset - dst_align);
-
- int dst_start = mad24(y, dst_step, dst_offset);
- int dst_end = mad24(y, dst_step, dst_offset + dst_step1);
- int dst_index = mad24(y, dst_step, dst_offset + (x * 6) - (dst_align * 6));
-
- short2 src1_data_0 = vload2(0, (__global short *)((__global char *)src1 + src1_index + 0));
- short2 src1_data_1 = vload2(0, (__global short *)((__global char *)src1 + src1_index + 4));
- short2 src1_data_2 = vload2(0, (__global short *)((__global char *)src1 + src1_index + 8));
-
- int2 src2_data_0 = (int2)(src2.x, src2.y);
- int2 src2_data_1 = (int2)(src2.z, src2.x);
- int2 src2_data_2 = (int2)(src2.y, src2.z);
-
- uchar2 mask_data = vload2(0, mask + mask_index);
-
- short2 data_0 = *((__global short2 *)((__global char *)dst + dst_index + 0));
- short2 data_1 = *((__global short2 *)((__global char *)dst + dst_index + 4));
- short2 data_2 = *((__global short2 *)((__global char *)dst + dst_index + 8));
-
- short2 tmp_data_0 = convert_short2_sat(convert_int2_sat(src1_data_0) + src2_data_0);
- short2 tmp_data_1 = convert_short2_sat(convert_int2_sat(src1_data_1) + src2_data_1);
- short2 tmp_data_2 = convert_short2_sat(convert_int2_sat(src1_data_2) + src2_data_2);
-
- data_0.xy = ((mask_data.x) && (dst_index + 0 >= dst_start)) ? tmp_data_0.xy : data_0.xy;
-
- data_1.x = ((mask_data.x) && (dst_index + 0 >= dst_start) && (dst_index + 0 < dst_end))
- ? tmp_data_1.x : data_1.x;
- data_1.y = ((mask_data.y) && (dst_index + 6 >= dst_start) && (dst_index + 6 < dst_end))
- ? tmp_data_1.y : data_1.y;
-
- data_2.xy = ((mask_data.y) && (dst_index + 6 >= dst_start) && (dst_index + 6 < dst_end))
- ? tmp_data_2.xy : data_2.xy;
-
- *((__global short2 *)((__global char *)dst + dst_index + 0))= data_0;
- *((__global short2 *)((__global char *)dst + dst_index + 4))= data_1;
- *((__global short2 *)((__global char *)dst + dst_index + 8))= data_2;
- }
-}
-__kernel void arithm_s_add_with_mask_C3_D4 (__global int *src1, int src1_step, int src1_offset,
- __global int *dst, int dst_step, int dst_offset,
- __global uchar *mask, int mask_step, int mask_offset,
- int4 src2, int rows, int cols, int dst_step1)
-{
-
- int x = get_global_id(0);
- int y = get_global_id(1);
-
- if (x < cols && y < rows)
- {
- int src1_index = mad24(y, src1_step, (x * 12) + src1_offset);
- int mask_index = mad24(y, mask_step, x + mask_offset);
- int dst_index = mad24(y, dst_step, dst_offset + (x * 12));
-
- int src1_data_0 = *((__global int *)((__global char *)src1 + src1_index + 0));
- int src1_data_1 = *((__global int *)((__global char *)src1 + src1_index + 4));
- int src1_data_2 = *((__global int *)((__global char *)src1 + src1_index + 8));
-
- int src2_data_0 = src2.x;
- int src2_data_1 = src2.y;
- int src2_data_2 = src2.z;
-
- uchar mask_data = * (mask + mask_index);
-
- int data_0 = *((__global int *)((__global char *)dst + dst_index + 0));
- int data_1 = *((__global int *)((__global char *)dst + dst_index + 4));
- int data_2 = *((__global int *)((__global char *)dst + dst_index + 8));
-
- int tmp_data_0 = convert_int_sat((long)src1_data_0 + (long)src2_data_0);
- int tmp_data_1 = convert_int_sat((long)src1_data_1 + (long)src2_data_1);
- int tmp_data_2 = convert_int_sat((long)src1_data_2 + (long)src2_data_2);
-
- data_0 = mask_data ? tmp_data_0 : data_0;
- data_1 = mask_data ? tmp_data_1 : data_1;
- data_2 = mask_data ? tmp_data_2 : data_2;
-
- *((__global int *)((__global char *)dst + dst_index + 0))= data_0;
- *((__global int *)((__global char *)dst + dst_index + 4))= data_1;
- *((__global int *)((__global char *)dst + dst_index + 8))= data_2;
- }
-}
-__kernel void arithm_s_add_with_mask_C3_D5 (__global float *src1, int src1_step, int src1_offset,
- __global float *dst, int dst_step, int dst_offset,
- __global uchar *mask, int mask_step, int mask_offset,
- float4 src2, int rows, int cols, int dst_step1)
-{
-
- int x = get_global_id(0);
- int y = get_global_id(1);
-
- if (x < cols && y < rows)
- {
- int src1_index = mad24(y, src1_step, (x * 12) + src1_offset);
- int mask_index = mad24(y, mask_step, x + mask_offset);
- int dst_index = mad24(y, dst_step, dst_offset + (x * 12));
-
- float src1_data_0 = *((__global float *)((__global char *)src1 + src1_index + 0));
- float src1_data_1 = *((__global float *)((__global char *)src1 + src1_index + 4));
- float src1_data_2 = *((__global float *)((__global char *)src1 + src1_index + 8));
-
- float src2_data_0 = src2.x;
- float src2_data_1 = src2.y;
- float src2_data_2 = src2.z;
-
- uchar mask_data = * (mask + mask_index);
-
- float data_0 = *((__global float *)((__global char *)dst + dst_index + 0));
- float data_1 = *((__global float *)((__global char *)dst + dst_index + 4));
- float data_2 = *((__global float *)((__global char *)dst + dst_index + 8));
-
- float tmp_data_0 = src1_data_0 + src2_data_0;
- float tmp_data_1 = src1_data_1 + src2_data_1;
- float tmp_data_2 = src1_data_2 + src2_data_2;
-
- data_0 = mask_data ? tmp_data_0 : data_0;
- data_1 = mask_data ? tmp_data_1 : data_1;
- data_2 = mask_data ? tmp_data_2 : data_2;
-
- *((__global float *)((__global char *)dst + dst_index + 0))= data_0;
- *((__global float *)((__global char *)dst + dst_index + 4))= data_1;
- *((__global float *)((__global char *)dst + dst_index + 8))= data_2;
- }
-}
-
-#if defined (DOUBLE_SUPPORT)
-__kernel void arithm_s_add_with_mask_C3_D6 (__global double *src1, int src1_step, int src1_offset,
- __global double *dst, int dst_step, int dst_offset,
- __global uchar *mask, int mask_step, int mask_offset,
- double4 src2, int rows, int cols, int dst_step1)
-{
-
- int x = get_global_id(0);
- int y = get_global_id(1);
-
- if (x < cols && y < rows)
- {
- int src1_index = mad24(y, src1_step, (x * 24) + src1_offset);
- int mask_index = mad24(y, mask_step, x + mask_offset);
- int dst_index = mad24(y, dst_step, dst_offset + (x * 24));
-
- double src1_data_0 = *((__global double *)((__global char *)src1 + src1_index + 0 ));
- double src1_data_1 = *((__global double *)((__global char *)src1 + src1_index + 8 ));
- double src1_data_2 = *((__global double *)((__global char *)src1 + src1_index + 16));
-
- double src2_data_0 = src2.x;
- double src2_data_1 = src2.y;
- double src2_data_2 = src2.z;
-
- uchar mask_data = * (mask + mask_index);
-
- double data_0 = *((__global double *)((__global char *)dst + dst_index + 0 ));
- double data_1 = *((__global double *)((__global char *)dst + dst_index + 8 ));
- double data_2 = *((__global double *)((__global char *)dst + dst_index + 16));
-
- double tmp_data_0 = src1_data_0 + src2_data_0;
- double tmp_data_1 = src1_data_1 + src2_data_1;
- double tmp_data_2 = src1_data_2 + src2_data_2;
-
- data_0 = mask_data ? tmp_data_0 : data_0;
- data_1 = mask_data ? tmp_data_1 : data_1;
- data_2 = mask_data ? tmp_data_2 : data_2;
-
- *((__global double *)((__global char *)dst + dst_index + 0 ))= data_0;
- *((__global double *)((__global char *)dst + dst_index + 8 ))= data_1;
- *((__global double *)((__global char *)dst + dst_index + 16))= data_2;
- }
-}
-#endif
-
__kernel void arithm_s_add_with_mask_C4_D0 (__global uchar *src1, int src1_step, int src1_offset,
__global uchar *dst, int dst_step, int dst_offset,
__global uchar *mask, int mask_step, int mask_offset,
int4 src2, int rows, int cols, int dst_step1)
{
-
int x = get_global_id(0);
int y = get_global_id(1);
uchar4 src_data1 = *((__global uchar4 *)(src1 + src1_index));
uchar4 dst_data = *((__global uchar4 *)(dst + dst_index));
- uchar4 data = convert_uchar4_sat(convert_int4_sat(src_data1) + src2);
+ uchar4 data = convert_uchar4_sat(ARITHM_OP(convert_int4_sat(src_data1), src2));
data = mask_data ? data : dst_data;
*((__global uchar4 *)(dst + dst_index)) = data;
ushort4 src_data1 = *((__global ushort4 *)((__global char *)src1 + src1_index));
ushort4 dst_data = *((__global ushort4 *)((__global char *)dst + dst_index));
- ushort4 data = convert_ushort4_sat(convert_int4_sat(src_data1) + src2);
+ ushort4 data = convert_ushort4_sat(ARITHM_OP(convert_int4_sat(src_data1), src2));
data = mask_data ? data : dst_data;
*((__global ushort4 *)((__global char *)dst + dst_index)) = data;
short4 src_data1 = *((__global short4 *)((__global char *)src1 + src1_index));
short4 dst_data = *((__global short4 *)((__global char *)dst + dst_index));
- short4 data = convert_short4_sat(convert_int4_sat(src_data1) + src2);
+ short4 data = convert_short4_sat(ARITHM_OP(convert_int4_sat(src_data1), src2));
data = mask_data ? data : dst_data;
*((__global short4 *)((__global char *)dst + dst_index)) = data;
int4 src_data1 = *((__global int4 *)((__global char *)src1 + src1_index));
int4 dst_data = *((__global int4 *)((__global char *)dst + dst_index));
- int4 data = convert_int4_sat(convert_long4_sat(src_data1) + convert_long4_sat(src2));
+ int4 data = convert_int4_sat(ARITHM_OP(convert_long4_sat(src_data1), convert_long4_sat(src2)));
data = mask_data ? data : dst_data;
*((__global int4 *)((__global char *)dst + dst_index)) = data;
float4 src_data1 = *((__global float4 *)((__global char *)src1 + src1_index));
float4 dst_data = *((__global float4 *)((__global char *)dst + dst_index));
- float4 data = src_data1 + src2;
+ float4 data = ARITHM_OP(src_data1, src2);
data = mask_data ? data : dst_data;
*((__global float4 *)((__global char *)dst + dst_index)) = data;
double4 src_data1 = *((__global double4 *)((__global char *)src1 + src1_index));
double4 dst_data = *((__global double4 *)((__global char *)dst + dst_index));
- double4 data = src_data1 + src2;
+ double4 data = ARITHM_OP(src_data1, src2);
data = mask_data ? data : dst_data;
*((__global double4 *)((__global char *)dst + dst_index)) = data;
+++ /dev/null
-////////////////////////////////////////////////////////////////////////////////////////
-//
-// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
-//
-// By downloading, copying, installing or using the software you agree to this license.
-// If you do not agree to this license, do not download, install,
-// copy or use the software.
-//
-//
-// License Agreement
-// For Open Source Computer Vision Library
-//
-// Copyright (C) 2010-2012, Institute Of Software Chinese Academy Of Science, all rights reserved.
-// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved.
-// Third party copyrights are property of their respective owners.
-//
-// @Authors
-// Jiang Liyuan, jlyuan001.good@163.com
-//
-// Redistribution and use in source and binary forms, with or without modification,
-// are permitted provided that the following conditions are met:
-//
-// * Redistribution's of source code must retain the above copyright notice,
-// this list of conditions and the following disclaimer.
-//
-// * Redistribution's in binary form must reproduce the above copyright notice,
-// this list of conditions and the following disclaimer in the documentation
-// and/or other oclMaterials provided with the distribution.
-//
-// * The name of the copyright holders may not be used to endorse or promote products
-// derived from this software without specific prior written permission.
-//
-// This software is provided by the copyright holders and contributors as is and
-// any express or implied warranties, including, but not limited to, the implied
-// warranties of merchantability and fitness for a particular purpose are disclaimed.
-// In no event shall the Intel Corporation or contributors be liable for any direct,
-// indirect, incidental, special, exemplary, or consequential damages
-// (including, but not limited to, procurement of substitute goods or services;
-// loss of use, data, or profits; or business interruption) however caused
-// and on any theory of liability, whether in contract, strict liability,
-// or tort (including negligence or otherwise) arising in any way out of
-// the use of this software, even if advised of the possibility of such damage.
-//
-//
-#if defined (DOUBLE_SUPPORT)
-#ifdef cl_khr_fp64
-#pragma OPENCL EXTENSION cl_khr_fp64:enable
-#elif defined (cl_amd_fp64)
-#pragma OPENCL EXTENSION cl_amd_fp64:enable
-#endif
-#endif
-
-//////////////////////////////////////////////////////////////////////////////////////////////////////
-////////////////////////////////////////////BITWISE_AND////////////////////////////////////////////////////
-///////////////////////////////////////////////////////////////////////////////////////////////////////
-/**************************************and with scalar without mask**************************************/
-__kernel void arithm_s_bitwise_and_C1_D0 (
- __global uchar *src1, int src1_step, int src1_offset,
- __global uchar *dst, int dst_step, int dst_offset,
- uchar4 src2, int rows, int cols, int dst_step1)
-{
- int x = get_global_id(0);
- int y = get_global_id(1);
-
- if (x < cols && y < rows)
- {
- x = x << 2;
-
-#ifdef dst_align
-#undef dst_align
-#endif
-#define dst_align (dst_offset & 3)
- int src1_index = mad24(y, src1_step, x + src1_offset - dst_align);
-
- int dst_start = mad24(y, dst_step, dst_offset);
- int dst_end = mad24(y, dst_step, dst_offset + dst_step1);
- int dst_index = mad24(y, dst_step, dst_offset + x & (int)0xfffffffc);
-
- uchar4 src1_data = vload4(0, src1 + src1_index);
- uchar4 src2_data = (uchar4)(src2.x, src2.x, src2.x, src2.x);
-
- uchar4 data = *((__global uchar4 *)(dst + dst_index));
- uchar4 tmp_data = src1_data & src2_data;
-
- data.x = ((dst_index + 0 >= dst_start) && (dst_index + 0 < dst_end)) ? tmp_data.x : data.x;
- data.y = ((dst_index + 1 >= dst_start) && (dst_index + 1 < dst_end)) ? tmp_data.y : data.y;
- data.z = ((dst_index + 2 >= dst_start) && (dst_index + 2 < dst_end)) ? tmp_data.z : data.z;
- data.w = ((dst_index + 3 >= dst_start) && (dst_index + 3 < dst_end)) ? tmp_data.w : data.w;
-
- *((__global uchar4 *)(dst + dst_index)) = data;
- }
-}
-
-
-__kernel void arithm_s_bitwise_and_C1_D1 (
- __global char *src1, int src1_step, int src1_offset,
- __global char *dst, int dst_step, int dst_offset,
- char4 src2, int rows, int cols, int dst_step1)
-{
- int x = get_global_id(0);
- int y = get_global_id(1);
-
- if (x < cols && y < rows)
- {
- x = x << 2;
-
-#ifdef dst_align
-#undef dst_align
-#endif
-#define dst_align (dst_offset & 3)
- int src1_index = mad24(y, src1_step, x + src1_offset - dst_align);
-
- int dst_start = mad24(y, dst_step, dst_offset);
- int dst_end = mad24(y, dst_step, dst_offset + dst_step1);
- int dst_index = mad24(y, dst_step, dst_offset + x & (int)0xfffffffc);
-
- char4 src1_data = vload4(0, src1 + src1_index);
- char4 src2_data = (char4)(src2.x, src2.x, src2.x, src2.x);
-
- char4 data = *((__global char4 *)(dst + dst_index));
- char4 tmp_data = src1_data & src2_data;
-
- data.x = ((dst_index + 0 >= dst_start) && (dst_index + 0 < dst_end)) ? tmp_data.x : data.x;
- data.y = ((dst_index + 1 >= dst_start) && (dst_index + 1 < dst_end)) ? tmp_data.y : data.y;
- data.z = ((dst_index + 2 >= dst_start) && (dst_index + 2 < dst_end)) ? tmp_data.z : data.z;
- data.w = ((dst_index + 3 >= dst_start) && (dst_index + 3 < dst_end)) ? tmp_data.w : data.w;
-
- *((__global char4 *)(dst + dst_index)) = data;
- }
-}
-
-__kernel void arithm_s_bitwise_and_C1_D2 (
- __global ushort *src1, int src1_step, int src1_offset,
- __global ushort *dst, int dst_step, int dst_offset,
- ushort4 src2, int rows, int cols, int dst_step1)
-{
-
- int x = get_global_id(0);
- int y = get_global_id(1);
-
- if (x < cols && y < rows)
- {
- x = x << 1;
-
-#ifdef dst_align
-#undef dst_align
-#endif
-#define dst_align ((dst_offset >> 1) & 1)
- int src1_index = mad24(y, src1_step, (x << 1) + src1_offset - (dst_align << 1));
-
- int dst_start = mad24(y, dst_step, dst_offset);
- int dst_end = mad24(y, dst_step, dst_offset + dst_step1);
- int dst_index = mad24(y, dst_step, dst_offset + (x << 1) & (int)0xfffffffc);
-
- ushort2 src1_data = vload2(0, (__global ushort *)((__global char *)src1 + src1_index));
- ushort2 src2_data = (ushort2)(src2.x, src2.x);
-
- ushort2 data = *((__global ushort2 *)((__global uchar *)dst + dst_index));
- ushort2 tmp_data = src1_data & src2_data;
-
- data.x = (dst_index + 0 >= dst_start) ? tmp_data.x : data.x;
- data.y = (dst_index + 2 < dst_end ) ? tmp_data.y : data.y;
-
- *((__global ushort2 *)((__global uchar *)dst + dst_index)) = data;
- }
-}
-__kernel void arithm_s_bitwise_and_C1_D3 (
- __global short *src1, int src1_step, int src1_offset,
- __global short *dst, int dst_step, int dst_offset,
- short4 src2, int rows, int cols, int dst_step1)
-{
-
- int x = get_global_id(0);
- int y = get_global_id(1);
-
- if (x < cols && y < rows)
- {
- x = x << 1;
-
-#ifdef dst_align
-#undef dst_align
-#endif
-#define dst_align ((dst_offset >> 1) & 1)
- int src1_index = mad24(y, src1_step, (x << 1) + src1_offset - (dst_align << 1));
-
- int dst_start = mad24(y, dst_step, dst_offset);
- int dst_end = mad24(y, dst_step, dst_offset + dst_step1);
- int dst_index = mad24(y, dst_step, dst_offset + (x << 1) & (int)0xfffffffc);
-
- short2 src1_data = vload2(0, (__global short *)((__global char *)src1 + src1_index));
- short2 src2_data = (short2)(src2.x, src2.x);
- short2 data = *((__global short2 *)((__global uchar *)dst + dst_index));
-
- short2 tmp_data = src1_data & src2_data;
-
- data.x = (dst_index + 0 >= dst_start) ? tmp_data.x : data.x;
- data.y = (dst_index + 2 < dst_end ) ? tmp_data.y : data.y;
-
- *((__global short2 *)((__global uchar *)dst + dst_index)) = data;
- }
-}
-__kernel void arithm_s_bitwise_and_C1_D4 (
- __global int *src1, int src1_step, int src1_offset,
- __global int *dst, int dst_step, int dst_offset,
- int4 src2, int rows, int cols, int dst_step1)
-{
-
- int x = get_global_id(0);
- int y = get_global_id(1);
-
- if (x < cols && y < rows)
- {
- int src1_index = mad24(y, src1_step, (x << 2) + src1_offset);
- int dst_index = mad24(y, dst_step, (x << 2) + dst_offset);
-
- int src_data1 = *((__global int *)((__global char *)src1 + src1_index));
- int src_data2 = src2.x;
-
- int data = src_data1 & src_data2;
-
- *((__global int *)((__global char *)dst + dst_index)) = data;
- }
-}
-__kernel void arithm_s_bitwise_and_C1_D5 (
- __global char *src1, int src1_step, int src1_offset,
- __global char *dst, int dst_step, int dst_offset,
- char16 src2, int rows, int cols, int dst_step1)
-{
-
- int x = get_global_id(0);
- int y = get_global_id(1);
-
- if (x < cols && y < rows)
- {
- int src1_index = mad24(y, src1_step, (x << 2) + src1_offset);
-
- int dst_start = mad24(y, dst_step, dst_offset);
- int dst_end = mad24(y, dst_step, dst_offset + dst_step1);
- int dst_index = mad24(y, dst_step, (x << 2) + dst_offset);
-
- char4 src1_data = *((__global char4 *)((__global char *)src1 + src1_index));
- char4 src2_data = (char4)(src2.s0, src2.s1, src2.s2, src2.s3);
-
- char4 data = *((__global char4 *)((__global char *)dst + dst_index));
- char4 tmp_data = src1_data & src2_data;
-
- data.x = ((dst_index + 0 >= dst_start) && (dst_index + 0 < dst_end)) ? tmp_data.x : data.x;
- data.y = ((dst_index + 1 >= dst_start) && (dst_index + 1 < dst_end)) ? tmp_data.y : data.y;
- data.z = ((dst_index + 2 >= dst_start) && (dst_index + 2 < dst_end)) ? tmp_data.z : data.z;
- data.w = ((dst_index + 3 >= dst_start) && (dst_index + 3 < dst_end)) ? tmp_data.w : data.w;
-
- *((__global char4 *)((__global char *)dst + dst_index)) = data;
- }
-}
-#if defined (DOUBLE_SUPPORT)
-__kernel void arithm_s_bitwise_and_C1_D6 (
- __global short *src1, int src1_step, int src1_offset,
- __global short *dst, int dst_step, int dst_offset,
- short16 src2, int rows, int cols, int dst_step1)
-{
-
- int x = get_global_id(0);
- int y = get_global_id(1);
-
- if (x < cols && y < rows)
- {
- int src1_index = mad24(y, src1_step, (x << 3) + src1_offset);
- int dst_index = mad24(y, dst_step, (x << 3) + dst_offset);
-
- short4 src1_data = *((__global short4 *)((__global char *)src1 + src1_index));
- short4 src2_data = (short4)(src2.s0, src2.s1, src2.s2, src2.s3);
-
- short4 tmp_data = src1_data & src2_data;
-
- *((__global short4 *)((__global char *)dst + dst_index)) = tmp_data;
- }
-}
-#endif
-__kernel void arithm_s_bitwise_and_C2_D0 (
- __global uchar *src1, int src1_step, int src1_offset,
- __global uchar *dst, int dst_step, int dst_offset,
- uchar4 src2, int rows, int cols, int dst_step1)
-{
-
- int x = get_global_id(0);
- int y = get_global_id(1);
-
- if (x < cols && y < rows)
- {
- x = x << 1;
-
-#ifdef dst_align
-#undef dst_align
-#endif
-#define dst_align ((dst_offset >> 1) & 1)
- int src1_index = mad24(y, src1_step, (x << 1) + src1_offset - (dst_align << 1));
-
- int dst_start = mad24(y, dst_step, dst_offset);
- int dst_end = mad24(y, dst_step, dst_offset + dst_step1);
- int dst_index = mad24(y, dst_step, dst_offset + (x << 1) & (int)0xfffffffc);
-
- uchar4 src1_data = vload4(0, src1 + src1_index);
- uchar4 src2_data = (uchar4)(src2.x, src2.y, src2.x, src2.y);
-
- uchar4 data = *((__global uchar4 *)(dst + dst_index));
- uchar4 tmp_data = src1_data & src2_data;
-
-
- data.xy = (dst_index + 0 >= dst_start) ? tmp_data.xy : data.xy;
- data.zw = (dst_index + 2 < dst_end ) ? tmp_data.zw : data.zw;
-
- *((__global uchar4 *)(dst + dst_index)) = data;
- }
-}
-
-
-__kernel void arithm_s_bitwise_and_C2_D1 (
- __global char *src1, int src1_step, int src1_offset,
- __global char *dst, int dst_step, int dst_offset,
- char4 src2, int rows, int cols, int dst_step1)
-{
-
- int x = get_global_id(0);
- int y = get_global_id(1);
-
- if (x < cols && y < rows)
- {
- x = x << 1;
-
-#ifdef dst_align
-#undef dst_align
-#endif
-#define dst_align ((dst_offset >> 1) & 1)
- int src1_index = mad24(y, src1_step, (x << 1) + src1_offset - (dst_align << 1));
-
- int dst_start = mad24(y, dst_step, dst_offset);
- int dst_end = mad24(y, dst_step, dst_offset + dst_step1);
- int dst_index = mad24(y, dst_step, dst_offset + (x << 1) & (int)0xfffffffc);
-
- char4 src1_data = vload4(0, src1 + src1_index);
- char4 src2_data = (char4)(src2.x, src2.y, src2.x, src2.y);
-
- char4 data = *((__global char4 *)(dst + dst_index));
- char4 tmp_data = src1_data & src2_data;
-
- data.xy = (dst_index + 0 >= dst_start) ? tmp_data.xy : data.xy;
- data.zw = (dst_index + 2 < dst_end ) ? tmp_data.zw : data.zw;
-
- *((__global char4 *)(dst + dst_index)) = data;
- }
-}
-
-__kernel void arithm_s_bitwise_and_C2_D2 (
- __global ushort *src1, int src1_step, int src1_offset,
- __global ushort *dst, int dst_step, int dst_offset,
- ushort4 src2, int rows, int cols, int dst_step1)
-{
-
- int x = get_global_id(0);
- int y = get_global_id(1);
-
- if (x < cols && y < rows)
- {
- int src1_index = mad24(y, src1_step, (x << 2) + src1_offset);
- int dst_index = mad24(y, dst_step, (x << 2) + dst_offset);
-
- ushort2 src_data1 = *((__global ushort2 *)((__global char *)src1 + src1_index));
- ushort2 src_data2 = (ushort2)(src2.x, src2.y);
-
- ushort2 data = src_data1 & src_data2;
-
- *((__global ushort2 *)((__global char *)dst + dst_index)) = data;
- }
-}
-__kernel void arithm_s_bitwise_and_C2_D3 (
- __global short *src1, int src1_step, int src1_offset,
- __global short *dst, int dst_step, int dst_offset,
- short4 src2, int rows, int cols, int dst_step1)
-{
-
- int x = get_global_id(0);
- int y = get_global_id(1);
-
- if (x < cols && y < rows)
- {
- int src1_index = mad24(y, src1_step, (x << 2) + src1_offset);
- int dst_index = mad24(y, dst_step, (x << 2) + dst_offset);
-
- short2 src_data1 = *((__global short2 *)((__global char *)src1 + src1_index));
- short2 src_data2 = (short2)(src2.x, src2.y);
-
- short2 data = src_data1 & src_data2;
-
- *((__global short2 *)((__global char *)dst + dst_index)) = data;
- }
-}
-__kernel void arithm_s_bitwise_and_C2_D4 (
- __global int *src1, int src1_step, int src1_offset,
- __global int *dst, int dst_step, int dst_offset,
- int4 src2, int rows, int cols, int dst_step1)
-{
-
- int x = get_global_id(0);
- int y = get_global_id(1);
-
- if (x < cols && y < rows)
- {
- int src1_index = mad24(y, src1_step, (x << 3) + src1_offset);
- int dst_index = mad24(y, dst_step, (x << 3) + dst_offset);
-
- int2 src_data1 = *((__global int2 *)((__global char *)src1 + src1_index));
- int2 src_data2 = (int2)(src2.x, src2.y);
-
- int2 data = src_data1 & src_data2;
- *((__global int2 *)((__global char *)dst + dst_index)) = data;
- }
-}
-__kernel void arithm_s_bitwise_and_C2_D5 (
- __global char *src1, int src1_step, int src1_offset,
- __global char *dst, int dst_step, int dst_offset,
- char16 src2, int rows, int cols, int dst_step1)
-{
-
- int x = get_global_id(0);
- int y = get_global_id(1);
-
- if (x < cols && y < rows)
- {
- int src1_index = mad24(y, src1_step, (x << 3) + src1_offset);
- int dst_index = mad24(y, dst_step, (x << 3) + dst_offset);
-
- char8 src1_data = *((__global char8 *)((__global char *)src1 + src1_index));
- char8 src2_data = (char8)(src2.s0, src2.s1, src2.s2, src2.s3, src2.s4, src2.s5, src2.s6, src2.s7);
-
- char8 tmp_data = src1_data & src2_data;
-
- *((__global char8 *)((__global char *)dst + dst_index)) = tmp_data;
- }
-}
-#if defined (DOUBLE_SUPPORT)
-__kernel void arithm_s_bitwise_and_C2_D6 (
- __global short *src1, int src1_step, int src1_offset,
- __global short *dst, int dst_step, int dst_offset,
- short16 src2, int rows, int cols, int dst_step1)
-{
-
- int x = get_global_id(0);
- int y = get_global_id(1);
-
- if (x < cols && y < rows)
- {
- int src1_index = mad24(y, src1_step, (x << 4) + src1_offset);
- int dst_index = mad24(y, dst_step, (x << 4) + dst_offset);
-
- short8 src1_data = *((__global short8 *)((__global char *)src1 + src1_index));
- short8 src2_data = (short8)(src2.s0, src2.s1, src2.s2, src2.s3, src2.s4, src2.s5, src2.s6, src2.s7);
-
- short8 tmp_data = src1_data & src2_data;
-
- *((__global short8 *)((__global char *)dst + dst_index)) = tmp_data;
- }
-}
-#endif
-__kernel void arithm_s_bitwise_and_C3_D0 (
- __global uchar *src1, int src1_step, int src1_offset,
- __global uchar *dst, int dst_step, int dst_offset,
- uchar4 src2, int rows, int cols, int dst_step1)
-{
-
- int x = get_global_id(0);
- int y = get_global_id(1);
-
- if (x < cols && y < rows)
- {
- x = x << 2;
-
-#ifdef dst_align
-#undef dst_align
-#endif
-#define dst_align (((dst_offset % dst_step) / 3 ) & 3)
- int src1_index = mad24(y, src1_step, (x * 3) + src1_offset - (dst_align * 3));
-
- int dst_start = mad24(y, dst_step, dst_offset);
- int dst_end = mad24(y, dst_step, dst_offset + dst_step1);
- int dst_index = mad24(y, dst_step, dst_offset + (x * 3) - (dst_align * 3));
-
- uchar4 src1_data_0 = vload4(0, src1 + src1_index + 0);
- uchar4 src1_data_1 = vload4(0, src1 + src1_index + 4);
- uchar4 src1_data_2 = vload4(0, src1 + src1_index + 8);
-
- uchar4 src2_data_0 = (uchar4)(src2.x, src2.y, src2.z, src2.x);
- uchar4 src2_data_1 = (uchar4)(src2.y, src2.z, src2.x, src2.y);
- uchar4 src2_data_2 = (uchar4)(src2.z, src2.x, src2.y, src2.z);
-
- uchar4 data_0 = *((__global uchar4 *)(dst + dst_index + 0));
- uchar4 data_1 = *((__global uchar4 *)(dst + dst_index + 4));
- uchar4 data_2 = *((__global uchar4 *)(dst + dst_index + 8));
-
- uchar4 tmp_data_0 = src1_data_0 & src2_data_0;
- uchar4 tmp_data_1 = src1_data_1 & src2_data_1;
- uchar4 tmp_data_2 = src1_data_2 & src2_data_2;
-
- data_0.xyz = ((dst_index + 0 >= dst_start)) ? tmp_data_0.xyz : data_0.xyz;
- data_0.w = ((dst_index + 3 >= dst_start) && (dst_index + 3 < dst_end))
- ? tmp_data_0.w : data_0.w;
-
- data_1.xy = ((dst_index + 3 >= dst_start) && (dst_index + 3 < dst_end))
- ? tmp_data_1.xy : data_1.xy;
- data_1.zw = ((dst_index + 6 >= dst_start) && (dst_index + 6 < dst_end))
- ? tmp_data_1.zw : data_1.zw;
-
- data_2.x = ((dst_index + 6 >= dst_start) && (dst_index + 6 < dst_end))
- ? tmp_data_2.x : data_2.x;
- data_2.yzw = ((dst_index + 9 >= dst_start) && (dst_index + 9 < dst_end))
- ? tmp_data_2.yzw : data_2.yzw;
-
- *((__global uchar4 *)(dst + dst_index + 0)) = data_0;
- *((__global uchar4 *)(dst + dst_index + 4)) = data_1;
- *((__global uchar4 *)(dst + dst_index + 8)) = data_2;
- }
-}
-
-
-__kernel void arithm_s_bitwise_and_C3_D1 (
- __global char *src1, int src1_step, int src1_offset,
- __global char *dst, int dst_step, int dst_offset,
- char4 src2, int rows, int cols, int dst_step1)
-{
-
- int x = get_global_id(0);
- int y = get_global_id(1);
-
- if (x < cols && y < rows)
- {
- x = x << 2;
-
-#ifdef dst_align
-#undef dst_align
-#endif
-#define dst_align (((dst_offset % dst_step) / 3 ) & 3)
- int src1_index = mad24(y, src1_step, (x * 3) + src1_offset - (dst_align * 3));
-
- int dst_start = mad24(y, dst_step, dst_offset);
- int dst_end = mad24(y, dst_step, dst_offset + dst_step1);
- int dst_index = mad24(y, dst_step, dst_offset + (x * 3) - (dst_align * 3));
-
- char4 src1_data_0 = vload4(0, src1 + src1_index + 0);
- char4 src1_data_1 = vload4(0, src1 + src1_index + 4);
- char4 src1_data_2 = vload4(0, src1 + src1_index + 8);
-
- char4 src2_data_0 = (char4)(src2.x, src2.y, src2.z, src2.x);
- char4 src2_data_1 = (char4)(src2.y, src2.z, src2.x, src2.y);
- char4 src2_data_2 = (char4)(src2.z, src2.x, src2.y, src2.z);
-
- char4 data_0 = *((__global char4 *)(dst + dst_index + 0));
- char4 data_1 = *((__global char4 *)(dst + dst_index + 4));
- char4 data_2 = *((__global char4 *)(dst + dst_index + 8));
-
- char4 tmp_data_0 = convert_char4_sat(convert_uchar4_sat(src1_data_0) & convert_uchar4_sat(src2_data_0));
- char4 tmp_data_1 = convert_char4_sat(convert_uchar4_sat(src1_data_1) & convert_uchar4_sat(src2_data_1));
- char4 tmp_data_2 = convert_char4_sat(convert_uchar4_sat(src1_data_2) & convert_uchar4_sat(src2_data_2));
-
- data_0.xyz = ((dst_index + 0 >= dst_start)) ? tmp_data_0.xyz : data_0.xyz;
- data_0.w = ((dst_index + 3 >= dst_start) && (dst_index + 3 < dst_end))
- ? tmp_data_0.w : data_0.w;
-
- data_1.xy = ((dst_index + 3 >= dst_start) && (dst_index + 3 < dst_end))
- ? tmp_data_1.xy : data_1.xy;
- data_1.zw = ((dst_index + 6 >= dst_start) && (dst_index + 6 < dst_end))
- ? tmp_data_1.zw : data_1.zw;
-
- data_2.x = ((dst_index + 6 >= dst_start) && (dst_index + 6 < dst_end))
- ? tmp_data_2.x : data_2.x;
- data_2.yzw = ((dst_index + 9 >= dst_start) && (dst_index + 9 < dst_end))
- ? tmp_data_2.yzw : data_2.yzw;
-
- *((__global char4 *)(dst + dst_index + 0)) = data_0;
- *((__global char4 *)(dst + dst_index + 4)) = data_1;
- *((__global char4 *)(dst + dst_index + 8)) = data_2;
- }
-}
-
-__kernel void arithm_s_bitwise_and_C3_D2 (
- __global ushort *src1, int src1_step, int src1_offset,
- __global ushort *dst, int dst_step, int dst_offset,
- ushort4 src2, int rows, int cols, int dst_step1)
-{
-
- int x = get_global_id(0);
- int y = get_global_id(1);
-
- if (x < cols && y < rows)
- {
- x = x << 1;
-
-#ifdef dst_align
-#undef dst_align
-#endif
-#define dst_align (((dst_offset % dst_step) / 6 ) & 1)
- int src1_index = mad24(y, src1_step, (x * 6) + src1_offset - (dst_align * 6));
-
- int dst_start = mad24(y, dst_step, dst_offset);
- int dst_end = mad24(y, dst_step, dst_offset + dst_step1);
- int dst_index = mad24(y, dst_step, dst_offset + (x * 6) - (dst_align * 6));
-
- ushort2 src1_data_0 = vload2(0, (__global ushort *)((__global char *)src1 + src1_index + 0));
- ushort2 src1_data_1 = vload2(0, (__global ushort *)((__global char *)src1 + src1_index + 4));
- ushort2 src1_data_2 = vload2(0, (__global ushort *)((__global char *)src1 + src1_index + 8));
-
- ushort2 src2_data_0 = (ushort2)(src2.x, src2.y);
- ushort2 src2_data_1 = (ushort2)(src2.z, src2.x);
- ushort2 src2_data_2 = (ushort2)(src2.y, src2.z);
-
- ushort2 data_0 = *((__global ushort2 *)((__global char *)dst + dst_index + 0));
- ushort2 data_1 = *((__global ushort2 *)((__global char *)dst + dst_index + 4));
- ushort2 data_2 = *((__global ushort2 *)((__global char *)dst + dst_index + 8));
-
- ushort2 tmp_data_0 = src1_data_0 & src2_data_0;
- ushort2 tmp_data_1 = src1_data_1 & src2_data_1;
- ushort2 tmp_data_2 = src1_data_2 & src2_data_2;
-
- data_0.xy = ((dst_index + 0 >= dst_start)) ? tmp_data_0.xy : data_0.xy;
-
- data_1.x = ((dst_index + 0 >= dst_start) && (dst_index + 0 < dst_end))
- ? tmp_data_1.x : data_1.x;
- data_1.y = ((dst_index + 6 >= dst_start) && (dst_index + 6 < dst_end))
- ? tmp_data_1.y : data_1.y;
-
- data_2.xy = ((dst_index + 6 >= dst_start) && (dst_index + 6 < dst_end))
- ? tmp_data_2.xy : data_2.xy;
-
- *((__global ushort2 *)((__global char *)dst + dst_index + 0))= data_0;
- *((__global ushort2 *)((__global char *)dst + dst_index + 4))= data_1;
- *((__global ushort2 *)((__global char *)dst + dst_index + 8))= data_2;
- }
-}
-__kernel void arithm_s_bitwise_and_C3_D3 (
- __global short *src1, int src1_step, int src1_offset,
- __global short *dst, int dst_step, int dst_offset,
- short4 src2, int rows, int cols, int dst_step1)
-{
-
- int x = get_global_id(0);
- int y = get_global_id(1);
-
- if (x < cols && y < rows)
- {
- x = x << 1;
-
-#ifdef dst_align
-#undef dst_align
-#endif
-#define dst_align (((dst_offset % dst_step) / 6 ) & 1)
- int src1_index = mad24(y, src1_step, (x * 6) + src1_offset - (dst_align * 6));
-
- int dst_start = mad24(y, dst_step, dst_offset);
- int dst_end = mad24(y, dst_step, dst_offset + dst_step1);
- int dst_index = mad24(y, dst_step, dst_offset + (x * 6) - (dst_align * 6));
-
- short2 src1_data_0 = vload2(0, (__global short *)((__global char *)src1 + src1_index + 0));
- short2 src1_data_1 = vload2(0, (__global short *)((__global char *)src1 + src1_index + 4));
- short2 src1_data_2 = vload2(0, (__global short *)((__global char *)src1 + src1_index + 8));
-
- short2 src2_data_0 = (short2)(src2.x, src2.y);
- short2 src2_data_1 = (short2)(src2.z, src2.x);
- short2 src2_data_2 = (short2)(src2.y, src2.z);
-
- short2 data_0 = *((__global short2 *)((__global char *)dst + dst_index + 0));
- short2 data_1 = *((__global short2 *)((__global char *)dst + dst_index + 4));
- short2 data_2 = *((__global short2 *)((__global char *)dst + dst_index + 8));
-
- short2 tmp_data_0 = src1_data_0 & src2_data_0;
- short2 tmp_data_1 = src1_data_1 & src2_data_1;
- short2 tmp_data_2 = src1_data_2 & src2_data_2;
-
- data_0.xy = ((dst_index + 0 >= dst_start)) ? tmp_data_0.xy : data_0.xy;
-
- data_1.x = ((dst_index + 0 >= dst_start) && (dst_index + 0 < dst_end))
- ? tmp_data_1.x : data_1.x;
- data_1.y = ((dst_index + 6 >= dst_start) && (dst_index + 6 < dst_end))
- ? tmp_data_1.y : data_1.y;
-
- data_2.xy = ((dst_index + 6 >= dst_start) && (dst_index + 6 < dst_end))
- ? tmp_data_2.xy : data_2.xy;
-
- *((__global short2 *)((__global char *)dst + dst_index + 0))= data_0;
- *((__global short2 *)((__global char *)dst + dst_index + 4))= data_1;
- *((__global short2 *)((__global char *)dst + dst_index + 8))= data_2;
- }
-}
-__kernel void arithm_s_bitwise_and_C3_D4 (
- __global int *src1, int src1_step, int src1_offset,
- __global int *dst, int dst_step, int dst_offset,
- int4 src2, int rows, int cols, int dst_step1)
-{
-
- int x = get_global_id(0);
- int y = get_global_id(1);
-
- if (x < cols && y < rows)
- {
- int src1_index = mad24(y, src1_step, (x * 12) + src1_offset);
- int dst_index = mad24(y, dst_step, dst_offset + (x * 12));
-
- int src1_data_0 = *((__global int *)((__global char *)src1 + src1_index + 0));
- int src1_data_1 = *((__global int *)((__global char *)src1 + src1_index + 4));
- int src1_data_2 = *((__global int *)((__global char *)src1 + src1_index + 8));
-
- int src2_data_0 = src2.x;
- int src2_data_1 = src2.y;
- int src2_data_2 = src2.z;
-
- int data_0 = *((__global int *)((__global char *)dst + dst_index + 0));
- int data_1 = *((__global int *)((__global char *)dst + dst_index + 4));
- int data_2 = *((__global int *)((__global char *)dst + dst_index + 8));
-
- int tmp_data_0 = src1_data_0 & src2_data_0;
- int tmp_data_1 = src1_data_1 & src2_data_1;
- int tmp_data_2 = src1_data_2 & src2_data_2;
-
- *((__global int *)((__global char *)dst + dst_index + 0))= tmp_data_0;
- *((__global int *)((__global char *)dst + dst_index + 4))= tmp_data_1;
- *((__global int *)((__global char *)dst + dst_index + 8))= tmp_data_2;
- }
-}
-__kernel void arithm_s_bitwise_and_C3_D5 (
- __global char *src1, int src1_step, int src1_offset,
- __global char *dst, int dst_step, int dst_offset,
- char16 src2, int rows, int cols, int dst_step1)
-{
-
- int x = get_global_id(0);
- int y = get_global_id(1);
-
- if (x < cols && y < rows)
- {
- int src1_index = mad24(y, src1_step, (x * 12) + src1_offset);
- int dst_index = mad24(y, dst_step, dst_offset + (x * 12));
-
- char4 src1_data_0 = *((__global char4 *)((__global char *)src1 + src1_index + 0));
- char4 src1_data_1 = *((__global char4 *)((__global char *)src1 + src1_index + 4));
- char4 src1_data_2 = *((__global char4 *)((__global char *)src1 + src1_index + 8));
-
- char4 src2_data_0 = (char4)(src2.s0, src2.s1, src2.s2, src2.s3);
- char4 src2_data_1 = (char4)(src2.s4, src2.s5, src2.s6, src2.s7);
- char4 src2_data_2 = (char4)(src2.s8, src2.s9, src2.sA, src2.sB);
-
- char4 data_0 = *((__global char4 *)((__global char *)dst + dst_index + 0));
- char4 data_1 = *((__global char4 *)((__global char *)dst + dst_index + 4));
- char4 data_2 = *((__global char4 *)((__global char *)dst + dst_index + 8));
-
- char4 tmp_data_0 = src1_data_0 & src2_data_0;
- char4 tmp_data_1 = src1_data_1 & src2_data_1;
- char4 tmp_data_2 = src1_data_2 & src2_data_2;
-
- *((__global char4 *)((__global char *)dst + dst_index + 0))= tmp_data_0;
- *((__global char4 *)((__global char *)dst + dst_index + 4))= tmp_data_1;
- *((__global char4 *)((__global char *)dst + dst_index + 8))= tmp_data_2;
- }
-}
-#if defined (DOUBLE_SUPPORT)
-__kernel void arithm_s_bitwise_and_C3_D6 (
- __global short *src1, int src1_step, int src1_offset,
- __global short *dst, int dst_step, int dst_offset,
- short16 src2, int rows, int cols, int dst_step1)
-{
-
- int x = get_global_id(0);
- int y = get_global_id(1);
-
- if (x < cols && y < rows)
- {
- int src1_index = mad24(y, src1_step, (x * 24) + src1_offset);
- int dst_index = mad24(y, dst_step, dst_offset + (x * 24));
-
- short4 src1_data_0 = *((__global short4 *)((__global char *)src1 + src1_index + 0 ));
- short4 src1_data_1 = *((__global short4 *)((__global char *)src1 + src1_index + 8 ));
- short4 src1_data_2 = *((__global short4 *)((__global char *)src1 + src1_index + 16));
-
- short4 src2_data_0 = (short4)(src2.s0, src2.s1, src2.s2, src2.s3);
- short4 src2_data_1 = (short4)(src2.s4, src2.s5, src2.s6, src2.s7);
- short4 src2_data_2 = (short4)(src2.s8, src2.s9, src2.sa, src2.sb);
-
- short4 data_0 = *((__global short4 *)((__global char *)dst + dst_index + 0 ));
- short4 data_1 = *((__global short4 *)((__global char *)dst + dst_index + 8 ));
- short4 data_2 = *((__global short4 *)((__global char *)dst + dst_index + 16));
-
- short4 tmp_data_0 = src1_data_0 & src2_data_0;
- short4 tmp_data_1 = src1_data_1 & src2_data_1;
- short4 tmp_data_2 = src1_data_2 & src2_data_2;
-
- *((__global short4 *)((__global char *)dst + dst_index + 0 ))= tmp_data_0;
- *((__global short4 *)((__global char *)dst + dst_index + 8 ))= tmp_data_1;
- *((__global short4 *)((__global char *)dst + dst_index + 16))= tmp_data_2;
- }
-}
-#endif
-__kernel void arithm_s_bitwise_and_C4_D0 (
- __global uchar *src1, int src1_step, int src1_offset,
- __global uchar *dst, int dst_step, int dst_offset,
- uchar4 src2, int rows, int cols, int dst_step1)
-{
-
- int x = get_global_id(0);
- int y = get_global_id(1);
-
- if (x < cols && y < rows)
- {
- int src1_index = mad24(y, src1_step, (x << 2) + src1_offset);
- int dst_index = mad24(y, dst_step, (x << 2) + dst_offset);
-
- uchar4 src_data1 = *((__global uchar4 *)(src1 + src1_index));
-
- uchar4 data = src_data1 & src2;
-
- *((__global uchar4 *)(dst + dst_index)) = data;
- }
-}
-
-
-__kernel void arithm_s_bitwise_and_C4_D1 (
- __global char *src1, int src1_step, int src1_offset,
- __global char *dst, int dst_step, int dst_offset,
- char4 src2, int rows, int cols, int dst_step1)
-{
-
- int x = get_global_id(0);
- int y = get_global_id(1);
-
- if (x < cols && y < rows)
- {
- int src1_index = mad24(y, src1_step, (x << 2) + src1_offset);
- int dst_index = mad24(y, dst_step, (x << 2) + dst_offset);
-
- char4 src_data1 = *((__global char4 *)(src1 + src1_index));
-
- char4 data = src_data1 & src2;
-
- *((__global char4 *)(dst + dst_index)) = data;
- }
-}
-
-__kernel void arithm_s_bitwise_and_C4_D2 (
- __global ushort *src1, int src1_step, int src1_offset,
- __global ushort *dst, int dst_step, int dst_offset,
- ushort4 src2, int rows, int cols, int dst_step1)
-{
-
- int x = get_global_id(0);
- int y = get_global_id(1);
-
- if (x < cols && y < rows)
- {
- int src1_index = mad24(y, src1_step, (x << 3) + src1_offset);
- int dst_index = mad24(y, dst_step, (x << 3) + dst_offset);
-
- ushort4 src_data1 = *((__global ushort4 *)((__global char *)src1 + src1_index));
-
- ushort4 data = src_data1 & src2;
-
- *((__global ushort4 *)((__global char *)dst + dst_index)) = data;
- }
-}
-__kernel void arithm_s_bitwise_and_C4_D3 (
- __global short *src1, int src1_step, int src1_offset,
- __global short *dst, int dst_step, int dst_offset,
- short4 src2, int rows, int cols, int dst_step1)
-{
-
- int x = get_global_id(0);
- int y = get_global_id(1);
-
- if (x < cols && y < rows)
- {
- int src1_index = mad24(y, src1_step, (x << 3) + src1_offset);
- int dst_index = mad24(y, dst_step, (x << 3) + dst_offset);
-
- short4 src_data1 = *((__global short4 *)((__global char *)src1 + src1_index));
-
- short4 data = src_data1 & src2;
-
- *((__global short4 *)((__global char *)dst + dst_index)) = data;
- }
-}
-__kernel void arithm_s_bitwise_and_C4_D4 (
- __global int *src1, int src1_step, int src1_offset,
- __global int *dst, int dst_step, int dst_offset,
- int4 src2, int rows, int cols, int dst_step1)
-{
-
- int x = get_global_id(0);
- int y = get_global_id(1);
-
- if (x < cols && y < rows)
- {
- int src1_index = mad24(y, src1_step, (x << 4) + src1_offset);
- int dst_index = mad24(y, dst_step, (x << 4) + dst_offset);
-
- int4 src_data1 = *((__global int4 *)((__global char *)src1 + src1_index));
-
- int4 data = src_data1 & src2;
-
- *((__global int4 *)((__global char *)dst + dst_index)) = data;
- }
-}
-__kernel void arithm_s_bitwise_and_C4_D5 (
- __global char *src1, int src1_step, int src1_offset,
- __global char *dst, int dst_step, int dst_offset,
- char16 src2, int rows, int cols, int dst_step1)
-{
-
- int x = get_global_id(0);
- int y = get_global_id(1);
-
- if (x < cols && y < rows)
- {
- int src1_index = mad24(y, src1_step, (x << 4) + src1_offset);
- int dst_index = mad24(y, dst_step, (x << 4) + dst_offset);
-
- char16 src1_data = *((__global char16 *)((__global char *)src1 + src1_index));
- char16 src2_data = (char16)(src2.s0, src2.s1, src2.s2, src2.s3, src2.s4, src2.s5, src2.s6, src2.s7,
- src2.s8, src2.s9, src2.sa, src2.sb, src2.sc, src2.sd, src2.se, src2.sf);
-
- char16 tmp_data = src1_data & src2_data;
-
- *((__global char16 *)((__global char *)dst + dst_index)) = tmp_data;
- }
-}
-#if defined (DOUBLE_SUPPORT)
-__kernel void arithm_s_bitwise_and_C4_D6 (
- __global short *src1, int src1_step, int src1_offset,
- __global short *dst, int dst_step, int dst_offset,
- short16 src2, int rows, int cols, int dst_step1)
-{
-
- int x = get_global_id(0);
- int y = get_global_id(1);
-
- if (x < cols && y < rows)
- {
- int src1_index = mad24(y, src1_step, (x << 5) + src1_offset);
- int dst_index = mad24(y, dst_step, (x << 5) + dst_offset);
-
- short4 src1_data_0 = *((__global short4 *)((__global char *)src1 + src1_index + 0));
- short4 src1_data_1 = *((__global short4 *)((__global char *)src1 + src1_index + 8));
- short4 src1_data_2 = *((__global short4 *)((__global char *)src1 + src1_index + 16));
- short4 src1_data_3 = *((__global short4 *)((__global char *)src1 + src1_index + 24));
-
- short4 src2_data_0 = (short4)(src2.s0, src2.s1, src2.s2, src2.s3);
- short4 src2_data_1 = (short4)(src2.s4, src2.s5, src2.s6, src2.s7);
- short4 src2_data_2 = (short4)(src2.s8, src2.s9, src2.sa, src2.sb);
- short4 src2_data_3 = (short4)(src2.sc, src2.sd, src2.se, src2.sf);
-
- short4 tmp_data_0 = src1_data_0 & src2_data_0;
- short4 tmp_data_1 = src1_data_1 & src2_data_1;
- short4 tmp_data_2 = src1_data_2 & src2_data_2;
- short4 tmp_data_3 = src1_data_3 & src2_data_3;
-
- *((__global short4 *)((__global char *)dst + dst_index + 0 ))= tmp_data_0;
- *((__global short4 *)((__global char *)dst + dst_index + 8 ))= tmp_data_1;
- *((__global short4 *)((__global char *)dst + dst_index + 16))= tmp_data_2;
- *((__global short4 *)((__global char *)dst + dst_index + 24))= tmp_data_3;
-
- }
-}
-#endif
+++ /dev/null
-/*M///////////////////////////////////////////////////////////////////////////////////////
-//
-// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
-//
-// By downloading, copying, installing or using the software you agree to this license.
-// If you do not agree to this license, do not download, install,
-// copy or use the software.
-//
-//
-// License Agreement
-// For Open Source Computer Vision Library
-//
-// Copyright (C) 2010-2012, Institute Of Software Chinese Academy Of Science, all rights reserved.
-// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved.
-// Third party copyrights are property of their respective owners.
-//
-// @Authors
-// Jiang Liyuan, jlyuan001.good@163.com
-//
-// Redistribution and use in source and binary forms, with or without modification,
-// are permitted provided that the following conditions are met:
-//
-// * Redistribution's of source code must retain the above copyright notice,
-// this list of conditions and the following disclaimer.
-//
-// * Redistribution's in binary form must reproduce the above copyright notice,
-// this list of conditions and the following disclaimer in the documentation
-// and/or other GpuMaterials provided with the distribution.
-//
-// * The name of the copyright holders may not be used to endorse or promote products
-// derived from this software without specific prior written permission.
-//
-// This software is provided by the copyright holders and contributors as is and
-// any express or implied warranties, including, but not limited to, the implied
-// warranties of merchantability and fitness for a particular purpose are disclaimed.
-// In no event shall the Intel Corporation or contributors be liable for any direct,
-// indirect, incidental, special, exemplary, or consequential damages
-// (including, but not limited to, procurement of substitute goods or services;
-// loss of use, data, or profits; or business interruption) however caused
-// and on any theory of liability, whether in contract, strict liability,
-// or tort (including negligence or otherwise) arising in any way out of
-// the use of this software, even if advised of the possibility of such damage.
-//
-//M*/
-#if defined (DOUBLE_SUPPORT)
-#ifdef cl_khr_fp64
-#pragma OPENCL EXTENSION cl_khr_fp64:enable
-#elif defined (cl_amd_fp64)
-#pragma OPENCL EXTENSION cl_amd_fp64:enable
-#endif
-#endif
-//////////////////////////////////////////////////////////////////////////////////////////////////////
-////////////////////////////////////////////BITWISE_AND////////////////////////////////////////////////////
-///////////////////////////////////////////////////////////////////////////////////////////////////////
-/**************************************bitwise_and with scalar with mask**************************************/
-__kernel void arithm_s_bitwise_and_with_mask_C1_D0 (
- __global uchar *src1, int src1_step, int src1_offset,
- __global uchar *dst, int dst_step, int dst_offset,
- __global uchar *mask, int mask_step, int mask_offset,
- uchar4 src2, int rows, int cols, int dst_step1)
-{
-
- int x = get_global_id(0);
- int y = get_global_id(1);
-
- if (x < cols && y < rows)
- {
- x = x << 2;
-
-#ifdef dst_align
-#undef dst_align
-#endif
-#define dst_align (dst_offset & 3)
- int src1_index = mad24(y, src1_step, x + src1_offset - dst_align);
- int mask_index = mad24(y, mask_step, x + mask_offset - dst_align);
-
- int dst_start = mad24(y, dst_step, dst_offset);
- int dst_end = mad24(y, dst_step, dst_offset + dst_step1);
- int dst_index = mad24(y, dst_step, dst_offset + x & (int)0xfffffffc);
-
- uchar4 src1_data = vload4(0, src1 + src1_index);
- uchar4 src2_data = (uchar4)(src2.x, src2.x, src2.x, src2.x);
- uchar4 mask_data = vload4(0, mask + mask_index);
-
- uchar4 data = *((__global uchar4 *)(dst + dst_index));
- uchar4 tmp_data = src1_data & src2_data;
-
- data.x = ((mask_data.x) && (dst_index + 0 >= dst_start) && (dst_index + 0 < dst_end)) ? tmp_data.x : data.x;
- data.y = ((mask_data.y) && (dst_index + 1 >= dst_start) && (dst_index + 1 < dst_end)) ? tmp_data.y : data.y;
- data.z = ((mask_data.z) && (dst_index + 2 >= dst_start) && (dst_index + 2 < dst_end)) ? tmp_data.z : data.z;
- data.w = ((mask_data.w) && (dst_index + 3 >= dst_start) && (dst_index + 3 < dst_end)) ? tmp_data.w : data.w;
-
- *((__global uchar4 *)(dst + dst_index)) = data;
- }
-}
-
-
-__kernel void arithm_s_bitwise_and_with_mask_C1_D1 (
- __global char *src1, int src1_step, int src1_offset,
- __global char *dst, int dst_step, int dst_offset,
- __global uchar *mask, int mask_step, int mask_offset,
- char4 src2, int rows, int cols, int dst_step1)
-{
-
- int x = get_global_id(0);
- int y = get_global_id(1);
-
- if (x < cols && y < rows)
- {
- x = x << 2;
-
-#ifdef dst_align
-#undef dst_align
-#endif
-#define dst_align (dst_offset & 3)
- int src1_index = mad24(y, src1_step, x + src1_offset - dst_align);
- int mask_index = mad24(y, mask_step, x + mask_offset - dst_align);
-
- int dst_start = mad24(y, dst_step, dst_offset);
- int dst_end = mad24(y, dst_step, dst_offset + dst_step1);
- int dst_index = mad24(y, dst_step, dst_offset + x & (int)0xfffffffc);
-
- char4 src1_data = vload4(0, src1 + src1_index);
- char4 src2_data = (char4)(src2.x, src2.x, src2.x, src2.x);
- uchar4 mask_data = vload4(0, mask + mask_index);
-
- char4 data = *((__global char4 *)(dst + dst_index));
- char4 tmp_data = src1_data & src2_data;
-
- data.x = ((mask_data.x) && (dst_index + 0 >= dst_start) && (dst_index + 0 < dst_end)) ? tmp_data.x : data.x;
- data.y = ((mask_data.y) && (dst_index + 1 >= dst_start) && (dst_index + 1 < dst_end)) ? tmp_data.y : data.y;
- data.z = ((mask_data.z) && (dst_index + 2 >= dst_start) && (dst_index + 2 < dst_end)) ? tmp_data.z : data.z;
- data.w = ((mask_data.w) && (dst_index + 3 >= dst_start) && (dst_index + 3 < dst_end)) ? tmp_data.w : data.w;
-
- *((__global char4 *)(dst + dst_index)) = data;
- }
-}
-
-__kernel void arithm_s_bitwise_and_with_mask_C1_D2 (
- __global ushort *src1, int src1_step, int src1_offset,
- __global ushort *dst, int dst_step, int dst_offset,
- __global uchar *mask, int mask_step, int mask_offset,
- ushort4 src2, int rows, int cols, int dst_step1)
-{
-
- int x = get_global_id(0);
- int y = get_global_id(1);
-
- if (x < cols && y < rows)
- {
- x = x << 1;
-
-#ifdef dst_align
-#undef dst_align
-#endif
-#define dst_align ((dst_offset >> 1) & 1)
- int src1_index = mad24(y, src1_step, (x << 1) + src1_offset - (dst_align << 1));
- int mask_index = mad24(y, mask_step, x + mask_offset - dst_align);
-
- int dst_start = mad24(y, dst_step, dst_offset);
- int dst_end = mad24(y, dst_step, dst_offset + dst_step1);
- int dst_index = mad24(y, dst_step, dst_offset + (x << 1) & (int)0xfffffffc);
-
- ushort2 src1_data = vload2(0, (__global ushort *)((__global char *)src1 + src1_index));
- ushort2 src2_data = (ushort2)(src2.x, src2.x);
- uchar2 mask_data = vload2(0, mask + mask_index);
-
- ushort2 data = *((__global ushort2 *)((__global uchar *)dst + dst_index));
- ushort2 tmp_data = src1_data & src2_data;
-
- data.x = ((mask_data.x) && (dst_index + 0 >= dst_start)) ? tmp_data.x : data.x;
- data.y = ((mask_data.y) && (dst_index + 2 < dst_end )) ? tmp_data.y : data.y;
-
- *((__global ushort2 *)((__global uchar *)dst + dst_index)) = data;
- }
-}
-__kernel void arithm_s_bitwise_and_with_mask_C1_D3 (
- __global short *src1, int src1_step, int src1_offset,
- __global short *dst, int dst_step, int dst_offset,
- __global uchar *mask, int mask_step, int mask_offset,
- short4 src2, int rows, int cols, int dst_step1)
-{
-
- int x = get_global_id(0);
- int y = get_global_id(1);
-
- if (x < cols && y < rows)
- {
- x = x << 1;
-
-#ifdef dst_align
-#undef dst_align
-#endif
-#define dst_align ((dst_offset >> 1) & 1)
- int src1_index = mad24(y, src1_step, (x << 1) + src1_offset - (dst_align << 1));
- int mask_index = mad24(y, mask_step, x + mask_offset - dst_align);
-
- int dst_start = mad24(y, dst_step, dst_offset);
- int dst_end = mad24(y, dst_step, dst_offset + dst_step1);
- int dst_index = mad24(y, dst_step, dst_offset + (x << 1) & (int)0xfffffffc);
-
- short2 src1_data = vload2(0, (__global short *)((__global char *)src1 + src1_index));
- short2 src2_data = (short2)(src2.x, src2.x);
- uchar2 mask_data = vload2(0, mask + mask_index);
-
- short2 data = *((__global short2 *)((__global uchar *)dst + dst_index));
- short2 tmp_data = src1_data & src2_data;
-
- data.x = ((mask_data.x) && (dst_index + 0 >= dst_start)) ? tmp_data.x : data.x;
- data.y = ((mask_data.y) && (dst_index + 2 < dst_end )) ? tmp_data.y : data.y;
-
- *((__global short2 *)((__global uchar *)dst + dst_index)) = data;
- }
-}
-__kernel void arithm_s_bitwise_and_with_mask_C1_D4 (
- __global int *src1, int src1_step, int src1_offset,
- __global int *dst, int dst_step, int dst_offset,
- __global uchar *mask, int mask_step, int mask_offset,
- int4 src2, int rows, int cols, int dst_step1)
-{
-
- int x = get_global_id(0);
- int y = get_global_id(1);
-
- if (x < cols && y < rows)
- {
- int src1_index = mad24(y, src1_step, (x << 2) + src1_offset);
- int mask_index = mad24(y, mask_step, x + mask_offset);
- int dst_index = mad24(y, dst_step, (x << 2) + dst_offset);
-
- uchar mask_data = *(mask + mask_index);
-
- int src_data1 = *((__global int *)((__global char *)src1 + src1_index));
- int src_data2 = src2.x;
- int dst_data = *((__global int *)((__global char *)dst + dst_index));
-
- int data = src_data1 & src_data2;
- data = mask_data ? data : dst_data;
-
- *((__global int *)((__global char *)dst + dst_index)) = data;
- }
-}
-
-__kernel void arithm_s_bitwise_and_with_mask_C1_D5 (
- __global char *src1, int src1_step, int src1_offset,
- __global char *dst, int dst_step, int dst_offset,
- __global uchar *mask, int mask_step, int mask_offset,
- char16 src2, int rows, int cols, int dst_step1)
-{
-
- int x = get_global_id(0);
- int y = get_global_id(1);
-
- if (x < cols && y < rows)
- {
- int src1_index = mad24(y, src1_step, (x << 2) + src1_offset);
- int mask_index = mad24(y, mask_step, x + mask_offset);
- int dst_index = mad24(y, dst_step, (x << 2) + dst_offset);
-
- uchar mask_data = *(mask + mask_index);
-
- char4 src1_data = *((__global char4 *)((__global char *)src1 + src1_index));
- char4 src2_data = (char4)(src2.s0, src2.s1, src2.s2, src2.s3);
- char4 dst_data = *((__global char4 *)((__global char *)dst + dst_index));
-
- char4 data = src1_data & src2_data;
- data = mask_data ? data : dst_data;
-
- *((__global char4 *)((__global char *)dst + dst_index)) = data;
- }
-}
-
-#if defined (DOUBLE_SUPPORT)
-__kernel void arithm_s_bitwise_and_with_mask_C1_D6 (
- __global short *src1, int src1_step, int src1_offset,
- __global short *dst, int dst_step, int dst_offset,
- __global uchar *mask, int mask_step, int mask_offset,
- short16 src2, int rows, int cols, int dst_step1)
-{
-
- int x = get_global_id(0);
- int y = get_global_id(1);
-
- if (x < cols && y < rows)
- {
- int src1_index = mad24(y, src1_step, (x << 3) + src1_offset);
- int mask_index = mad24(y, mask_step, x + mask_offset);
- int dst_index = mad24(y, dst_step, (x << 3) + dst_offset);
-
- uchar mask_data = *(mask + mask_index);
-
- short4 src1_data = *((__global short4 *)((__global char *)src1 + src1_index));
- short4 src2_data = (short4)(src2.s0, src2.s1, src2.s2, src2.s3);
- short4 dst_data = *((__global short4 *)((__global char *)dst + dst_index));
-
- short4 data = src1_data & src2_data;
- data = mask_data ? data : dst_data;
-
- *((__global short4 *)((__global char *)dst + dst_index)) = data;
- }
-}
-#endif
-__kernel void arithm_s_bitwise_and_with_mask_C2_D0 (
- __global uchar *src1, int src1_step, int src1_offset,
- __global uchar *dst, int dst_step, int dst_offset,
- __global uchar *mask, int mask_step, int mask_offset,
- uchar4 src2, int rows, int cols, int dst_step1)
-{
-
- int x = get_global_id(0);
- int y = get_global_id(1);
-
- if (x < cols && y < rows)
- {
- x = x << 1;
-
-#ifdef dst_align
-#undef dst_align
-#endif
-#define dst_align ((dst_offset >> 1) & 1)
- int src1_index = mad24(y, src1_step, (x << 1) + src1_offset - (dst_align << 1));
- int mask_index = mad24(y, mask_step, x + mask_offset - dst_align);
-
- int dst_start = mad24(y, dst_step, dst_offset);
- int dst_end = mad24(y, dst_step, dst_offset + dst_step1);
- int dst_index = mad24(y, dst_step, dst_offset + (x << 1) & (int)0xfffffffc);
-
- uchar4 src1_data = vload4(0, src1 + src1_index);
- uchar4 src2_data = (uchar4)(src2.x, src2.y, src2.x, src2.y);
- uchar2 mask_data = vload2(0, mask + mask_index);
-
- uchar4 data = *((__global uchar4 *)(dst + dst_index));
- uchar4 tmp_data = src1_data & src2_data;
-
- data.xy = ((mask_data.x) && (dst_index + 0 >= dst_start)) ? tmp_data.xy : data.xy;
- data.zw = ((mask_data.y) && (dst_index + 2 < dst_end )) ? tmp_data.zw : data.zw;
-
- *((__global uchar4 *)(dst + dst_index)) = data;
- }
-}
-
-
-__kernel void arithm_s_bitwise_and_with_mask_C2_D1 (
- __global char *src1, int src1_step, int src1_offset,
- __global char *dst, int dst_step, int dst_offset,
- __global uchar *mask, int mask_step, int mask_offset,
- char4 src2, int rows, int cols, int dst_step1)
-{
-
- int x = get_global_id(0);
- int y = get_global_id(1);
-
- if (x < cols && y < rows)
- {
- x = x << 1;
-
-#ifdef dst_align
-#undef dst_align
-#endif
-#define dst_align ((dst_offset >> 1) & 1)
- int src1_index = mad24(y, src1_step, (x << 1) + src1_offset - (dst_align << 1));
- int mask_index = mad24(y, mask_step, x + mask_offset - dst_align);
-
- int dst_start = mad24(y, dst_step, dst_offset);
- int dst_end = mad24(y, dst_step, dst_offset + dst_step1);
- int dst_index = mad24(y, dst_step, dst_offset + (x << 1) & (int)0xfffffffc);
-
- char4 src1_data = vload4(0, src1 + src1_index);
- char4 src2_data = (char4)(src2.x, src2.y, src2.x, src2.y);
- uchar2 mask_data = vload2(0, mask + mask_index);
-
- char4 data = *((__global char4 *)(dst + dst_index));
- char4 tmp_data = src1_data & src2_data;
-
- data.xy = ((mask_data.x) && (dst_index + 0 >= dst_start)) ? tmp_data.xy : data.xy;
- data.zw = ((mask_data.y) && (dst_index + 2 < dst_end )) ? tmp_data.zw : data.zw;
-
- *((__global char4 *)(dst + dst_index)) = data;
- }
-}
-
-__kernel void arithm_s_bitwise_and_with_mask_C2_D2 (
- __global ushort *src1, int src1_step, int src1_offset,
- __global ushort *dst, int dst_step, int dst_offset,
- __global uchar *mask, int mask_step, int mask_offset,
- ushort4 src2, int rows, int cols, int dst_step1)
-{
-
- int x = get_global_id(0);
- int y = get_global_id(1);
-
- if (x < cols && y < rows)
- {
- int src1_index = mad24(y, src1_step, (x << 2) + src1_offset);
- int mask_index = mad24(y, mask_step, x + mask_offset);
- int dst_index = mad24(y, dst_step, (x << 2) + dst_offset);
-
- uchar mask_data = *(mask + mask_index);
-
- ushort2 src_data1 = *((__global ushort2 *)((__global char *)src1 + src1_index));
- ushort2 src_data2 = (ushort2)(src2.x, src2.y);
- ushort2 dst_data = *((__global ushort2 *)((__global char *)dst + dst_index));
-
- ushort2 data = src_data1 & src_data2;
- data = mask_data ? data : dst_data;
-
- *((__global ushort2 *)((__global char *)dst + dst_index)) = data;
- }
-}
-__kernel void arithm_s_bitwise_and_with_mask_C2_D3 (
- __global short *src1, int src1_step, int src1_offset,
- __global short *dst, int dst_step, int dst_offset,
- __global uchar *mask, int mask_step, int mask_offset,
- short4 src2, int rows, int cols, int dst_step1)
-{
-
- int x = get_global_id(0);
- int y = get_global_id(1);
-
- if (x < cols && y < rows)
- {
- int src1_index = mad24(y, src1_step, (x << 2) + src1_offset);
- int mask_index = mad24(y, mask_step, x + mask_offset);
- int dst_index = mad24(y, dst_step, (x << 2) + dst_offset);
-
- uchar mask_data = *(mask + mask_index);
-
- short2 src_data1 = *((__global short2 *)((__global char *)src1 + src1_index));
- short2 src_data2 = (short2)(src2.x, src2.y);
- short2 dst_data = *((__global short2 *)((__global char *)dst + dst_index));
-
- short2 data = src_data1 & src_data2;
- data = mask_data ? data : dst_data;
-
- *((__global short2 *)((__global char *)dst + dst_index)) = data;
- }
-}
-__kernel void arithm_s_bitwise_and_with_mask_C2_D4 (
- __global int *src1, int src1_step, int src1_offset,
- __global int *dst, int dst_step, int dst_offset,
- __global uchar *mask, int mask_step, int mask_offset,
- int4 src2, int rows, int cols, int dst_step1)
-{
-
- int x = get_global_id(0);
- int y = get_global_id(1);
-
- if (x < cols && y < rows)
- {
- int src1_index = mad24(y, src1_step, (x << 3) + src1_offset);
- int mask_index = mad24(y, mask_step, x + mask_offset);
- int dst_index = mad24(y, dst_step, (x << 3) + dst_offset);
-
- uchar mask_data = *(mask + mask_index);
-
- int2 src_data1 = *((__global int2 *)((__global char *)src1 + src1_index));
- int2 src_data2 = (int2)(src2.x, src2.y);
- int2 dst_data = *((__global int2 *)((__global char *)dst + dst_index));
-
- int2 data = src_data1 & src_data2;
- data = mask_data ? data : dst_data;
-
- *((__global int2 *)((__global char *)dst + dst_index)) = data;
- }
-}
-__kernel void arithm_s_bitwise_and_with_mask_C2_D5 (
- __global char *src1, int src1_step, int src1_offset,
- __global char *dst, int dst_step, int dst_offset,
- __global uchar *mask, int mask_step, int mask_offset,
- char16 src2, int rows, int cols, int dst_step1)
-{
-
- int x = get_global_id(0);
- int y = get_global_id(1);
-
- if (x < cols && y < rows)
- {
- int src1_index = mad24(y, src1_step, (x << 3) + src1_offset);
- int mask_index = mad24(y, mask_step, x + mask_offset);
- int dst_index = mad24(y, dst_step, (x << 3) + dst_offset);
-
- uchar mask_data = *(mask + mask_index);
-
- char8 src1_data = *((__global char8 *)((__global char *)src1 + src1_index));
- char8 src2_data = (char8)(src2.s0, src2.s1, src2.s2, src2.s3, src2.s4, src2.s5, src2.s6, src2.s7);
- char8 dst_data = *((__global char8 *)((__global char *)dst + dst_index));
-
- char8 data = src1_data & src2_data;
-
- data = mask_data ? data : dst_data;
-
- *((__global char8 *)((__global char *)dst + dst_index)) = data;
- }
-}
-#if defined (DOUBLE_SUPPORT)
-__kernel void arithm_s_bitwise_and_with_mask_C2_D6 (
- __global short *src1, int src1_step, int src1_offset,
- __global short *dst, int dst_step, int dst_offset,
- __global uchar *mask, int mask_step, int mask_offset,
- short16 src2, int rows, int cols, int dst_step1)
-{
-
- int x = get_global_id(0);
- int y = get_global_id(1);
-
- if (x < cols && y < rows)
- {
- int src1_index = mad24(y, src1_step, (x << 4) + src1_offset);
- int mask_index = mad24(y, mask_step, x + mask_offset);
- int dst_index = mad24(y, dst_step, (x << 4) + dst_offset);
-
- uchar mask_data = *(mask + mask_index);
-
- short8 src1_data = *((__global short8 *)((__global char *)src1 + src1_index));
- short8 src2_data = (short8)(src2.s0, src2.s1, src2.s2, src2.s3, src2.s4, src2.s5, src2.s6, src2.s7);
- short8 dst_data = *((__global short8 *)((__global char *)dst + dst_index));
-
- short8 data = src1_data & src2_data;
- data = mask_data ? data : dst_data;
-
- *((__global short8 *)((__global char *)dst + dst_index)) = data;
- }
-}
-#endif
-__kernel void arithm_s_bitwise_and_with_mask_C3_D0 (
- __global uchar *src1, int src1_step, int src1_offset,
- __global uchar *dst, int dst_step, int dst_offset,
- __global uchar *mask, int mask_step, int mask_offset,
- uchar4 src2, int rows, int cols, int dst_step1)
-{
-
- int x = get_global_id(0);
- int y = get_global_id(1);
-
- if (x < cols && y < rows)
- {
- x = x << 2;
-
-#ifdef dst_align
-#undef dst_align
-#endif
-#define dst_align (((dst_offset % dst_step) / 3 ) & 3)
- int src1_index = mad24(y, src1_step, (x * 3) + src1_offset - (dst_align * 3));
- int mask_index = mad24(y, mask_step, x + mask_offset - dst_align);
-
- int dst_start = mad24(y, dst_step, dst_offset);
- int dst_end = mad24(y, dst_step, dst_offset + dst_step1);
- int dst_index = mad24(y, dst_step, dst_offset + (x * 3) - (dst_align * 3));
-
- uchar4 src1_data_0 = vload4(0, src1 + src1_index + 0);
- uchar4 src1_data_1 = vload4(0, src1 + src1_index + 4);
- uchar4 src1_data_2 = vload4(0, src1 + src1_index + 8);
-
- uchar4 src2_data_0 = (uchar4)(src2.x, src2.y, src2.z, src2.x);
- uchar4 src2_data_1 = (uchar4)(src2.y, src2.z, src2.x, src2.y);
- uchar4 src2_data_2 = (uchar4)(src2.z, src2.x, src2.y, src2.z);
-
- uchar4 mask_data = vload4(0, mask + mask_index);
-
- uchar4 data_0 = *((__global uchar4 *)(dst + dst_index + 0));
- uchar4 data_1 = *((__global uchar4 *)(dst + dst_index + 4));
- uchar4 data_2 = *((__global uchar4 *)(dst + dst_index + 8));
-
- uchar4 tmp_data_0 = src1_data_0 & src2_data_0;
- uchar4 tmp_data_1 = src1_data_1 & src2_data_1;
- uchar4 tmp_data_2 = src1_data_2 & src2_data_2;
-
- data_0.xyz = ((mask_data.x) && (dst_index + 0 >= dst_start)) ? tmp_data_0.xyz : data_0.xyz;
- data_0.w = ((mask_data.y) && (dst_index + 3 >= dst_start) && (dst_index + 3 < dst_end))
- ? tmp_data_0.w : data_0.w;
-
- data_1.xy = ((mask_data.y) && (dst_index + 3 >= dst_start) && (dst_index + 3 < dst_end))
- ? tmp_data_1.xy : data_1.xy;
- data_1.zw = ((mask_data.z) && (dst_index + 6 >= dst_start) && (dst_index + 6 < dst_end))
- ? tmp_data_1.zw : data_1.zw;
-
- data_2.x = ((mask_data.z) && (dst_index + 6 >= dst_start) && (dst_index + 6 < dst_end))
- ? tmp_data_2.x : data_2.x;
- data_2.yzw = ((mask_data.w) && (dst_index + 9 >= dst_start) && (dst_index + 9 < dst_end))
- ? tmp_data_2.yzw : data_2.yzw;
-
- *((__global uchar4 *)(dst + dst_index + 0)) = data_0;
- *((__global uchar4 *)(dst + dst_index + 4)) = data_1;
- *((__global uchar4 *)(dst + dst_index + 8)) = data_2;
- }
-}
-
-
-__kernel void arithm_s_bitwise_and_with_mask_C3_D1 (
- __global char *src1, int src1_step, int src1_offset,
- __global char *dst, int dst_step, int dst_offset,
- __global uchar *mask, int mask_step, int mask_offset,
- char4 src2, int rows, int cols, int dst_step1)
-{
-
- int x = get_global_id(0);
- int y = get_global_id(1);
-
- if (x < cols && y < rows)
- {
- x = x << 2;
-
-#ifdef dst_align
-#undef dst_align
-#endif
-#define dst_align (((dst_offset % dst_step) / 3 ) & 3)
- int src1_index = mad24(y, src1_step, (x * 3) + src1_offset - (dst_align * 3));
- int mask_index = mad24(y, mask_step, x + mask_offset - dst_align);
-
- int dst_start = mad24(y, dst_step, dst_offset);
- int dst_end = mad24(y, dst_step, dst_offset + dst_step1);
- int dst_index = mad24(y, dst_step, dst_offset + (x * 3) - (dst_align * 3));
-
- char4 src1_data_0 = vload4(0, src1 + src1_index + 0);
- char4 src1_data_1 = vload4(0, src1 + src1_index + 4);
- char4 src1_data_2 = vload4(0, src1 + src1_index + 8);
-
- char4 src2_data_0 = (char4)(src2.x, src2.y, src2.z, src2.x);
- char4 src2_data_1 = (char4)(src2.y, src2.z, src2.x, src2.y);
- char4 src2_data_2 = (char4)(src2.z, src2.x, src2.y, src2.z);
-
- uchar4 mask_data = vload4(0, mask + mask_index);
-
- char4 data_0 = *((__global char4 *)(dst + dst_index + 0));
- char4 data_1 = *((__global char4 *)(dst + dst_index + 4));
- char4 data_2 = *((__global char4 *)(dst + dst_index + 8));
-
- char4 tmp_data_0 = src1_data_0 & src2_data_0;
- char4 tmp_data_1 = src1_data_1 & src2_data_1;
- char4 tmp_data_2 = src1_data_2 & src2_data_2;
-
- data_0.xyz = ((mask_data.x) && (dst_index + 0 >= dst_start)) ? tmp_data_0.xyz : data_0.xyz;
- data_0.w = ((mask_data.y) && (dst_index + 3 >= dst_start) && (dst_index + 3 < dst_end))
- ? tmp_data_0.w : data_0.w;
-
- data_1.xy = ((mask_data.y) && (dst_index + 3 >= dst_start) && (dst_index + 3 < dst_end))
- ? tmp_data_1.xy : data_1.xy;
- data_1.zw = ((mask_data.z) && (dst_index + 6 >= dst_start) && (dst_index + 6 < dst_end))
- ? tmp_data_1.zw : data_1.zw;
-
- data_2.x = ((mask_data.z) && (dst_index + 6 >= dst_start) && (dst_index + 6 < dst_end))
- ? tmp_data_2.x : data_2.x;
- data_2.yzw = ((mask_data.w) && (dst_index + 9 >= dst_start) && (dst_index + 9 < dst_end))
- ? tmp_data_2.yzw : data_2.yzw;
-
- *((__global char4 *)(dst + dst_index + 0)) = data_0;
- *((__global char4 *)(dst + dst_index + 4)) = data_1;
- *((__global char4 *)(dst + dst_index + 8)) = data_2;
- }
-}
-
-__kernel void arithm_s_bitwise_and_with_mask_C3_D2 (
- __global ushort *src1, int src1_step, int src1_offset,
- __global ushort *dst, int dst_step, int dst_offset,
- __global uchar *mask, int mask_step, int mask_offset,
- ushort4 src2, int rows, int cols, int dst_step1)
-{
-
- int x = get_global_id(0);
- int y = get_global_id(1);
-
- if (x < cols && y < rows)
- {
- x = x << 1;
-
-#ifdef dst_align
-#undef dst_align
-#endif
-#define dst_align (((dst_offset % dst_step) / 6 ) & 1)
- int src1_index = mad24(y, src1_step, (x * 6) + src1_offset - (dst_align * 6));
- int mask_index = mad24(y, mask_step, x + mask_offset - dst_align);
-
- int dst_start = mad24(y, dst_step, dst_offset);
- int dst_end = mad24(y, dst_step, dst_offset + dst_step1);
- int dst_index = mad24(y, dst_step, dst_offset + (x * 6) - (dst_align * 6));
-
- ushort2 src1_data_0 = vload2(0, (__global ushort *)((__global char *)src1 + src1_index + 0));
- ushort2 src1_data_1 = vload2(0, (__global ushort *)((__global char *)src1 + src1_index + 4));
- ushort2 src1_data_2 = vload2(0, (__global ushort *)((__global char *)src1 + src1_index + 8));
-
- ushort2 src2_data_0 = (ushort2)(src2.x, src2.y);
- ushort2 src2_data_1 = (ushort2)(src2.z, src2.x);
- ushort2 src2_data_2 = (ushort2)(src2.y, src2.z);
-
- uchar2 mask_data = vload2(0, mask + mask_index);
-
- ushort2 data_0 = *((__global ushort2 *)((__global char *)dst + dst_index + 0));
- ushort2 data_1 = *((__global ushort2 *)((__global char *)dst + dst_index + 4));
- ushort2 data_2 = *((__global ushort2 *)((__global char *)dst + dst_index + 8));
-
- ushort2 tmp_data_0 = src1_data_0 & src2_data_0;
- ushort2 tmp_data_1 = src1_data_1 & src2_data_1;
- ushort2 tmp_data_2 = src1_data_2 & src2_data_2;
-
- data_0.xy = ((mask_data.x) && (dst_index + 0 >= dst_start)) ? tmp_data_0.xy : data_0.xy;
-
- data_1.x = ((mask_data.x) && (dst_index + 0 >= dst_start) && (dst_index + 0 < dst_end))
- ? tmp_data_1.x : data_1.x;
- data_1.y = ((mask_data.y) && (dst_index + 6 >= dst_start) && (dst_index + 6 < dst_end))
- ? tmp_data_1.y : data_1.y;
-
- data_2.xy = ((mask_data.y) && (dst_index + 6 >= dst_start) && (dst_index + 6 < dst_end))
- ? tmp_data_2.xy : data_2.xy;
-
- *((__global ushort2 *)((__global char *)dst + dst_index + 0))= data_0;
- *((__global ushort2 *)((__global char *)dst + dst_index + 4))= data_1;
- *((__global ushort2 *)((__global char *)dst + dst_index + 8))= data_2;
- }
-}
-__kernel void arithm_s_bitwise_and_with_mask_C3_D3 (
- __global short *src1, int src1_step, int src1_offset,
- __global short *dst, int dst_step, int dst_offset,
- __global uchar *mask, int mask_step, int mask_offset,
- short4 src2, int rows, int cols, int dst_step1)
-{
-
- int x = get_global_id(0);
- int y = get_global_id(1);
-
- if (x < cols && y < rows)
- {
- x = x << 1;
-
-#ifdef dst_align
-#undef dst_align
-#endif
-#define dst_align (((dst_offset % dst_step) / 6 ) & 1)
- int src1_index = mad24(y, src1_step, (x * 6) + src1_offset - (dst_align * 6));
- int mask_index = mad24(y, mask_step, x + mask_offset - dst_align);
-
- int dst_start = mad24(y, dst_step, dst_offset);
- int dst_end = mad24(y, dst_step, dst_offset + dst_step1);
- int dst_index = mad24(y, dst_step, dst_offset + (x * 6) - (dst_align * 6));
-
- short2 src1_data_0 = vload2(0, (__global short *)((__global char *)src1 + src1_index + 0));
- short2 src1_data_1 = vload2(0, (__global short *)((__global char *)src1 + src1_index + 4));
- short2 src1_data_2 = vload2(0, (__global short *)((__global char *)src1 + src1_index + 8));
-
- short2 src2_data_0 = (short2)(src2.x, src2.y);
- short2 src2_data_1 = (short2)(src2.z, src2.x);
- short2 src2_data_2 = (short2)(src2.y, src2.z);
-
- uchar2 mask_data = vload2(0, mask + mask_index);
-
- short2 data_0 = *((__global short2 *)((__global char *)dst + dst_index + 0));
- short2 data_1 = *((__global short2 *)((__global char *)dst + dst_index + 4));
- short2 data_2 = *((__global short2 *)((__global char *)dst + dst_index + 8));
-
- short2 tmp_data_0 = src1_data_0 & src2_data_0;
- short2 tmp_data_1 = src1_data_1 & src2_data_1;
- short2 tmp_data_2 = src1_data_2 & src2_data_2;
-
- data_0.xy = ((mask_data.x) && (dst_index + 0 >= dst_start)) ? tmp_data_0.xy : data_0.xy;
-
- data_1.x = ((mask_data.x) && (dst_index + 0 >= dst_start) && (dst_index + 0 < dst_end))
- ? tmp_data_1.x : data_1.x;
- data_1.y = ((mask_data.y) && (dst_index + 6 >= dst_start) && (dst_index + 6 < dst_end))
- ? tmp_data_1.y : data_1.y;
-
- data_2.xy = ((mask_data.y) && (dst_index + 6 >= dst_start) && (dst_index + 6 < dst_end))
- ? tmp_data_2.xy : data_2.xy;
-
- *((__global short2 *)((__global char *)dst + dst_index + 0))= data_0;
- *((__global short2 *)((__global char *)dst + dst_index + 4))= data_1;
- *((__global short2 *)((__global char *)dst + dst_index + 8))= data_2;
- }
-}
-__kernel void arithm_s_bitwise_and_with_mask_C3_D4 (
- __global int *src1, int src1_step, int src1_offset,
- __global int *dst, int dst_step, int dst_offset,
- __global uchar *mask, int mask_step, int mask_offset,
- int4 src2, int rows, int cols, int dst_step1)
-{
-
- int x = get_global_id(0);
- int y = get_global_id(1);
-
- if (x < cols && y < rows)
- {
- int src1_index = mad24(y, src1_step, (x * 12) + src1_offset);
- int mask_index = mad24(y, mask_step, x + mask_offset);
- int dst_index = mad24(y, dst_step, dst_offset + (x * 12));
-
- int src1_data_0 = *((__global int *)((__global char *)src1 + src1_index + 0));
- int src1_data_1 = *((__global int *)((__global char *)src1 + src1_index + 4));
- int src1_data_2 = *((__global int *)((__global char *)src1 + src1_index + 8));
-
- int src2_data_0 = src2.x;
- int src2_data_1 = src2.y;
- int src2_data_2 = src2.z;
-
- uchar mask_data = * (mask + mask_index);
-
- int data_0 = *((__global int *)((__global char *)dst + dst_index + 0));
- int data_1 = *((__global int *)((__global char *)dst + dst_index + 4));
- int data_2 = *((__global int *)((__global char *)dst + dst_index + 8));
-
- int tmp_data_0 = src1_data_0 & src2_data_0;
- int tmp_data_1 = src1_data_1 & src2_data_1;
- int tmp_data_2 = src1_data_2 & src2_data_2;
-
- data_0 = mask_data ? tmp_data_0 : data_0;
- data_1 = mask_data ? tmp_data_1 : data_1;
- data_2 = mask_data ? tmp_data_2 : data_2;
-
- *((__global int *)((__global char *)dst + dst_index + 0))= data_0;
- *((__global int *)((__global char *)dst + dst_index + 4))= data_1;
- *((__global int *)((__global char *)dst + dst_index + 8))= data_2;
- }
-}
-__kernel void arithm_s_bitwise_and_with_mask_C3_D5 (
- __global char *src1, int src1_step, int src1_offset,
- __global char *dst, int dst_step, int dst_offset,
- __global uchar *mask, int mask_step, int mask_offset,
- char16 src2, int rows, int cols, int dst_step1)
-{
-
- int x = get_global_id(0);
- int y = get_global_id(1);
-
- if (x < cols && y < rows)
- {
- int src1_index = mad24(y, src1_step, (x * 12) + src1_offset);
- int mask_index = mad24(y, mask_step, x + mask_offset);
- int dst_index = mad24(y, dst_step, dst_offset + (x * 12));
-
- char4 src1_data_0 = *((__global char4 *)((__global char *)src1 + src1_index + 0));
- char4 src1_data_1 = *((__global char4 *)((__global char *)src1 + src1_index + 4));
- char4 src1_data_2 = *((__global char4 *)((__global char *)src1 + src1_index + 8));
-
- char4 src2_data_0 = (char4)(src2.s0, src2.s1, src2.s2, src2.s3);
- char4 src2_data_1 = (char4)(src2.s4, src2.s5, src2.s6, src2.s7);
- char4 src2_data_2 = (char4)(src2.s8, src2.s9, src2.sA, src2.sB);
-
- uchar mask_data = * (mask + mask_index);
-
- char4 data_0 = *((__global char4 *)((__global char *)dst + dst_index + 0));
- char4 data_1 = *((__global char4 *)((__global char *)dst + dst_index + 4));
- char4 data_2 = *((__global char4 *)((__global char *)dst + dst_index + 8));
-
- char4 tmp_data_0 = src1_data_0 & src2_data_0;
- char4 tmp_data_1 = src1_data_1 & src2_data_1;
- char4 tmp_data_2 = src1_data_2 & src2_data_2;
-
- data_0 = mask_data ? tmp_data_0 : data_0;
- data_1 = mask_data ? tmp_data_1 : data_1;
- data_2 = mask_data ? tmp_data_2 : data_2;
-
- *((__global char4 *)((__global char *)dst + dst_index + 0))= data_0;
- *((__global char4 *)((__global char *)dst + dst_index + 4))= data_1;
- *((__global char4 *)((__global char *)dst + dst_index + 8))= data_2;
- }
-}
-#if defined (DOUBLE_SUPPORT)
-__kernel void arithm_s_bitwise_and_with_mask_C3_D6 (
- __global short *src1, int src1_step, int src1_offset,
- __global short *dst, int dst_step, int dst_offset,
- __global uchar *mask, int mask_step, int mask_offset,
- short16 src2, int rows, int cols, int dst_step1)
-{
-
- int x = get_global_id(0);
- int y = get_global_id(1);
-
- if (x < cols && y < rows)
- {
- int src1_index = mad24(y, src1_step, (x * 24) + src1_offset);
- int mask_index = mad24(y, mask_step, x + mask_offset);
- int dst_index = mad24(y, dst_step, dst_offset + (x * 24));
-
- short4 src1_data_0 = *((__global short4 *)((__global char *)src1 + src1_index + 0 ));
- short4 src1_data_1 = *((__global short4 *)((__global char *)src1 + src1_index + 8 ));
- short4 src1_data_2 = *((__global short4 *)((__global char *)src1 + src1_index + 16));
-
- short4 src2_data_0 = (short4)(src2.s0, src2.s1, src2.s2, src2.s3);
- short4 src2_data_1 = (short4)(src2.s4, src2.s5, src2.s6, src2.s7);
- short4 src2_data_2 = (short4)(src2.s8, src2.s9, src2.sa, src2.sb);
-
- uchar mask_data = * (mask + mask_index);
-
- short4 data_0 = *((__global short4 *)((__global char *)dst + dst_index + 0 ));
- short4 data_1 = *((__global short4 *)((__global char *)dst + dst_index + 8 ));
- short4 data_2 = *((__global short4 *)((__global char *)dst + dst_index + 16));
-
- short4 tmp_data_0 = src1_data_0 & src2_data_0;
- short4 tmp_data_1 = src1_data_1 & src2_data_1;
- short4 tmp_data_2 = src1_data_2 & src2_data_2;
-
- data_0 = mask_data ? tmp_data_0 : data_0;
- data_1 = mask_data ? tmp_data_1 : data_1;
- data_2 = mask_data ? tmp_data_2 : data_2;
-
- *((__global short4 *)((__global char *)dst + dst_index + 0 ))= data_0;
- *((__global short4 *)((__global char *)dst + dst_index + 8 ))= data_1;
- *((__global short4 *)((__global char *)dst + dst_index + 16))= data_2;
- }
-}
-#endif
-__kernel void arithm_s_bitwise_and_with_mask_C4_D0 (
- __global uchar *src1, int src1_step, int src1_offset,
- __global uchar *dst, int dst_step, int dst_offset,
- __global uchar *mask, int mask_step, int mask_offset,
- uchar4 src2, int rows, int cols, int dst_step1)
-{
-
- int x = get_global_id(0);
- int y = get_global_id(1);
-
- if (x < cols && y < rows)
- {
- int src1_index = mad24(y, src1_step, (x << 2) + src1_offset);
- int mask_index = mad24(y, mask_step, x + mask_offset);
- int dst_index = mad24(y, dst_step, (x << 2) + dst_offset);
-
- uchar mask_data = *(mask + mask_index);
-
- uchar4 src_data1 = *((__global uchar4 *)(src1 + src1_index));
- uchar4 dst_data = *((__global uchar4 *)(dst + dst_index));
-
- uchar4 data = src_data1 & src2;
- data = mask_data ? data : dst_data;
-
- *((__global uchar4 *)(dst + dst_index)) = data;
- }
-}
-
-
-__kernel void arithm_s_bitwise_and_with_mask_C4_D1 (
- __global char *src1, int src1_step, int src1_offset,
- __global char *dst, int dst_step, int dst_offset,
- __global uchar *mask, int mask_step, int mask_offset,
- char4 src2, int rows, int cols, int dst_step1)
-{
-
- int x = get_global_id(0);
- int y = get_global_id(1);
-
- if (x < cols && y < rows)
- {
- int src1_index = mad24(y, src1_step, (x << 2) + src1_offset);
- int mask_index = mad24(y, mask_step, x + mask_offset);
- int dst_index = mad24(y, dst_step, (x << 2) + dst_offset);
-
- uchar mask_data = *(mask + mask_index);
-
- char4 src_data1 = *((__global char4 *)(src1 + src1_index));
- char4 dst_data = *((__global char4 *)(dst + dst_index));
-
- char4 data = src_data1 & src2;
- data = mask_data ? data : dst_data;
-
- *((__global char4 *)(dst + dst_index)) = data;
- }
-}
-
-__kernel void arithm_s_bitwise_and_with_mask_C4_D2 (
- __global ushort *src1, int src1_step, int src1_offset,
- __global ushort *dst, int dst_step, int dst_offset,
- __global uchar *mask, int mask_step, int mask_offset,
- ushort4 src2, int rows, int cols, int dst_step1)
-{
-
- int x = get_global_id(0);
- int y = get_global_id(1);
-
- if (x < cols && y < rows)
- {
- int src1_index = mad24(y, src1_step, (x << 3) + src1_offset);
- int mask_index = mad24(y, mask_step, x + mask_offset);
- int dst_index = mad24(y, dst_step, (x << 3) + dst_offset);
-
- uchar mask_data = *(mask + mask_index);
-
- ushort4 src_data1 = *((__global ushort4 *)((__global char *)src1 + src1_index));
- ushort4 dst_data = *((__global ushort4 *)((__global char *)dst + dst_index));
-
- ushort4 data = src_data1 & src2;
- data = mask_data ? data : dst_data;
-
- *((__global ushort4 *)((__global char *)dst + dst_index)) = data;
- }
-}
-__kernel void arithm_s_bitwise_and_with_mask_C4_D3 (
- __global short *src1, int src1_step, int src1_offset,
- __global short *dst, int dst_step, int dst_offset,
- __global uchar *mask, int mask_step, int mask_offset,
- short4 src2, int rows, int cols, int dst_step1)
-{
-
- int x = get_global_id(0);
- int y = get_global_id(1);
-
- if (x < cols && y < rows)
- {
- int src1_index = mad24(y, src1_step, (x << 3) + src1_offset);
- int mask_index = mad24(y, mask_step, x + mask_offset);
- int dst_index = mad24(y, dst_step, (x << 3) + dst_offset);
-
- uchar mask_data = *(mask + mask_index);
-
- short4 src_data1 = *((__global short4 *)((__global char *)src1 + src1_index));
- short4 dst_data = *((__global short4 *)((__global char *)dst + dst_index));
-
- short4 data = src_data1 & src2;
- data = mask_data ? data : dst_data;
-
- *((__global short4 *)((__global char *)dst + dst_index)) = data;
- }
-}
-__kernel void arithm_s_bitwise_and_with_mask_C4_D4 (
- __global int *src1, int src1_step, int src1_offset,
- __global int *dst, int dst_step, int dst_offset,
- __global uchar *mask, int mask_step, int mask_offset,
- int4 src2, int rows, int cols, int dst_step1)
-{
-
- int x = get_global_id(0);
- int y = get_global_id(1);
-
- if (x < cols && y < rows)
- {
- int src1_index = mad24(y, src1_step, (x << 4) + src1_offset);
- int mask_index = mad24(y, mask_step, x + mask_offset);
- int dst_index = mad24(y, dst_step, (x << 4) + dst_offset);
-
- uchar mask_data = *(mask + mask_index);
-
- int4 src_data1 = *((__global int4 *)((__global char *)src1 + src1_index));
- int4 dst_data = *((__global int4 *)((__global char *)dst + dst_index));
-
- int4 data = src_data1 & src2;
- data = mask_data ? data : dst_data;
-
- *((__global int4 *)((__global char *)dst + dst_index)) = data;
- }
-}
-__kernel void arithm_s_bitwise_and_with_mask_C4_D5 (
- __global char *src1, int src1_step, int src1_offset,
- __global char *dst, int dst_step, int dst_offset,
- __global uchar *mask, int mask_step, int mask_offset,
- char16 src2, int rows, int cols, int dst_step1)
-{
-
- int x = get_global_id(0);
- int y = get_global_id(1);
-
- if (x < cols && y < rows)
- {
- int src1_index = mad24(y, src1_step, (x << 4) + src1_offset);
- int mask_index = mad24(y, mask_step, x + mask_offset);
- int dst_index = mad24(y, dst_step, (x << 4) + dst_offset);
-
- uchar mask_data = *(mask + mask_index);
-
- char16 src1_data = *((__global char16 *)((__global char *)src1 + src1_index));
- char16 src2_data = (char16)(src2.s0, src2.s1, src2.s2, src2.s3, src2.s4, src2.s5, src2.s6, src2.s7,
- src2.s8, src2.s9, src2.sa, src2.sb, src2.sc, src2.sd, src2.se, src2.sf);
- char16 dst_data = *((__global char16 *)((__global char *)dst + dst_index));
-
- char16 data = src1_data & src2_data;
- data = mask_data ? data : dst_data;
-
- *((__global char16 *)((__global char *)dst + dst_index)) = data;
- }
-}
-#if defined (DOUBLE_SUPPORT)
-__kernel void arithm_s_bitwise_and_with_mask_C4_D6 (
- __global short *src1, int src1_step, int src1_offset,
- __global short *dst, int dst_step, int dst_offset,
- __global uchar *mask, int mask_step, int mask_offset,
- short16 src2, int rows, int cols, int dst_step1)
-{
-
- int x = get_global_id(0);
- int y = get_global_id(1);
-
- if (x < cols && y < rows)
- {
- int src1_index = mad24(y, src1_step, (x << 5) + src1_offset);
- int mask_index = mad24(y, mask_step, x + mask_offset);
- int dst_index = mad24(y, dst_step, (x << 5) + dst_offset);
-
- uchar mask_data = *(mask + mask_index);
-
- short4 src1_data_0 = *((__global short4 *)((__global char *)src1 + src1_index + 0));
- short4 src1_data_1 = *((__global short4 *)((__global char *)src1 + src1_index + 8));
- short4 src1_data_2 = *((__global short4 *)((__global char *)src1 + src1_index + 16));
- short4 src1_data_3 = *((__global short4 *)((__global char *)src1 + src1_index + 24));
-
- short4 src2_data_0 = (short4)(src2.s0, src2.s1, src2.s2, src2.s3);
- short4 src2_data_1 = (short4)(src2.s4, src2.s5, src2.s6, src2.s7);
- short4 src2_data_2 = (short4)(src2.s8, src2.s9, src2.sa, src2.sb);
- short4 src2_data_3 = (short4)(src2.sc, src2.sd, src2.se, src2.sf);
-
- short4 dst_data_0 = *((__global short4 *)((__global char *)dst + dst_index + 0));
- short4 dst_data_1 = *((__global short4 *)((__global char *)dst + dst_index + 8));
- short4 dst_data_2 = *((__global short4 *)((__global char *)dst + dst_index + 16));
- short4 dst_data_3 = *((__global short4 *)((__global char *)dst + dst_index + 24));
-
- short4 data_0 = src1_data_0 & src2_data_0;
- short4 data_1 = src1_data_1 & src2_data_1;
- short4 data_2 = src1_data_2 & src2_data_2;
- short4 data_3 = src1_data_3 & src2_data_3;
-
- data_0 = mask_data ? data_0 : dst_data_0;
- data_1 = mask_data ? data_1 : dst_data_1;
- data_2 = mask_data ? data_2 : dst_data_2;
- data_3 = mask_data ? data_3 : dst_data_3;
-
- *((__global short4 *)((__global char *)dst + dst_index + 0)) = data_0;
- *((__global short4 *)((__global char *)dst + dst_index + 8)) = data_1;
- *((__global short4 *)((__global char *)dst + dst_index + 16)) = data_2;
- *((__global short4 *)((__global char *)dst + dst_index + 24)) = data_3;
- }
-}
-#endif
-
//
// @Authors
// Jiang Liyuan, jlyuan001.good@163.com
+// Peng Xiao, pengxiao@outlook.com
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
#endif
#endif
-//////////////////////////////////////////////////////////////////////////////////////////////////////
-////////////////////////////////////////////BITWISE_AND////////////////////////////////////////////////////
-///////////////////////////////////////////////////////////////////////////////////////////////////////
-/**************************************bitwise_and without mask**************************************/
-__kernel void arithm_bitwise_and_D0 (__global uchar *src1, int src1_step, int src1_offset,
+//bitwise_binary without mask for and, or, xor operators
+
+/////////////////////////////////////////////////////////////////////////////////////////////////////
+////////////////////////////////////////////bitwise_binary///////////////////////////////////////////
+/////////////////////////////////////////////////////////////////////////////////////////////////////
+
+#ifndef OP_BINARY
+#define OP_BINARY &
+#endif
+
+__kernel void arithm_bitwise_binary_D0 (__global uchar *src1, int src1_step, int src1_offset,
__global uchar *src2, int src2_step, int src2_offset,
__global uchar *dst, int dst_step, int dst_offset,
int rows, int cols, int dst_step1)
}
uchar4 dst_data = *((__global uchar4 *)(dst + dst_index));
- uchar4 tmp_data = src1_data & src2_data;
+ uchar4 tmp_data = src1_data OP_BINARY src2_data;
dst_data.x = ((dst_index + 0 >= dst_start) && (dst_index + 0 < dst_end)) ? tmp_data.x : dst_data.x;
dst_data.y = ((dst_index + 1 >= dst_start) && (dst_index + 1 < dst_end)) ? tmp_data.y : dst_data.y;
}
-__kernel void arithm_bitwise_and_D1 (__global char *src1, int src1_step, int src1_offset,
+__kernel void arithm_bitwise_binary_D1 (__global char *src1, int src1_step, int src1_offset,
__global char *src2, int src2_step, int src2_offset,
__global char *dst, int dst_step, int dst_offset,
int rows, int cols, int dst_step1)
src2_data.xyzw = (src2_index == -1) ? src2_data.wxyz:tmp.xyzw;
}
char4 dst_data = *((__global char4 *)(dst + dst_index));
- char4 tmp_data = src1_data & src2_data;
+ char4 tmp_data = src1_data OP_BINARY src2_data;
dst_data.x = ((dst_index + 0 >= dst_start) && (dst_index + 0 < dst_end)) ? tmp_data.x : dst_data.x;
dst_data.y = ((dst_index + 1 >= dst_start) && (dst_index + 1 < dst_end)) ? tmp_data.y : dst_data.y;
}
-__kernel void arithm_bitwise_and_D2 (__global ushort *src1, int src1_step, int src1_offset,
+__kernel void arithm_bitwise_binary_D2 (__global ushort *src1, int src1_step, int src1_offset,
__global ushort *src2, int src2_step, int src2_offset,
__global ushort *dst, int dst_step, int dst_offset,
int rows, int cols, int dst_step1)
src2_data.xyzw = (src2_index == -1) ? src2_data.wxyz:tmp.xyzw;
}
ushort4 dst_data = *((__global ushort4 *)((__global char *)dst + dst_index));
- ushort4 tmp_data = src1_data & src2_data;
+ ushort4 tmp_data = src1_data OP_BINARY src2_data;
dst_data.x = ((dst_index + 0 >= dst_start) && (dst_index + 0 < dst_end)) ? tmp_data.x : dst_data.x;
dst_data.y = ((dst_index + 2 >= dst_start) && (dst_index + 2 < dst_end)) ? tmp_data.y : dst_data.y;
-__kernel void arithm_bitwise_and_D3 (__global short *src1, int src1_step, int src1_offset,
+__kernel void arithm_bitwise_binary_D3 (__global short *src1, int src1_step, int src1_offset,
__global short *src2, int src2_step, int src2_offset,
__global short *dst, int dst_step, int dst_offset,
int rows, int cols, int dst_step1)
src2_data.xyzw = (src2_index == -1) ? src2_data.wxyz:tmp.xyzw;
}
short4 dst_data = *((__global short4 *)((__global char *)dst + dst_index));
- short4 tmp_data = src1_data & src2_data;
+ short4 tmp_data = src1_data OP_BINARY src2_data;
dst_data.x = ((dst_index + 0 >= dst_start) && (dst_index + 0 < dst_end)) ? tmp_data.x : dst_data.x;
dst_data.y = ((dst_index + 2 >= dst_start) && (dst_index + 2 < dst_end)) ? tmp_data.y : dst_data.y;
-__kernel void arithm_bitwise_and_D4 (__global int *src1, int src1_step, int src1_offset,
+__kernel void arithm_bitwise_binary_D4 (__global int *src1, int src1_step, int src1_offset,
__global int *src2, int src2_step, int src2_offset,
__global int *dst, int dst_step, int dst_offset,
int rows, int cols, int dst_step1)
int data1 = *((__global int *)((__global char *)src1 + src1_index));
int data2 = *((__global int *)((__global char *)src2 + src2_index));
- int tmp = data1 & data2;
+ int tmp = data1 OP_BINARY data2;
*((__global int *)((__global char *)dst + dst_index)) = tmp;
}
}
-__kernel void arithm_bitwise_and_D5 (__global char *src1, int src1_step, int src1_offset,
+__kernel void arithm_bitwise_binary_D5 (__global char *src1, int src1_step, int src1_offset,
__global char *src2, int src2_step, int src2_offset,
__global char *dst, int dst_step, int dst_offset,
int rows, int cols, int dst_step1)
char4 data1 = *((__global char4 *)((__global char *)src1 + src1_index));
char4 data2 = *((__global char4 *)((__global char *)src2 + src2_index));
- char4 tmp = data1 & data2;
+ char4 tmp = data1 OP_BINARY data2;
*((__global char4 *)((__global char *)dst + dst_index)) = tmp;
}
}
#if defined (DOUBLE_SUPPORT)
-__kernel void arithm_bitwise_and_D6 (__global char *src1, int src1_step, int src1_offset,
+__kernel void arithm_bitwise_binary_D6 (__global char *src1, int src1_step, int src1_offset,
__global char *src2, int src2_step, int src2_offset,
__global char *dst, int dst_step, int dst_offset,
int rows, int cols, int dst_step1)
char8 data1 = *((__global char8 *)((__global char *)src1 + src1_index));
char8 data2 = *((__global char8 *)((__global char *)src2 + src2_index));
- *((__global char8 *)((__global char *)dst + dst_index)) = data1 & data2;
+ *((__global char8 *)((__global char *)dst + dst_index)) = data1 OP_BINARY data2;
}
}
#endif
//
// @Authors
// Jiang Liyuan, jlyuan001.good@163.com
+// Peng Xiao, pengxiao@outlook.com
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
#pragma OPENCL EXTENSION cl_amd_fp64:enable
#endif
#endif
+
+#ifndef OP_BINARY
+#define OP_BINARY &
+#endif
+
+//////////////////////////////////////////////////////////////////////////////////////////////////////
+////////////////////////////////////////////bitwise_binary////////////////////////////////////////////
//////////////////////////////////////////////////////////////////////////////////////////////////////
-////////////////////////////////////////////BITWISE_AND////////////////////////////////////////////////////
-///////////////////////////////////////////////////////////////////////////////////////////////////////
-/**************************************bitwise_and with mask**************************************/
-__kernel void arithm_bitwise_and_with_mask_C1_D0 (
+/**************************************bitwise_binary with mask**************************************/
+__kernel void arithm_bitwise_binary_with_mask_C1_D0 (
__global uchar *src1, int src1_step, int src1_offset,
__global uchar *src2, int src2_step, int src2_offset,
__global uchar *mask, int mask_step, int mask_offset,
uchar4 mask_data = vload4(0, mask + mask_index);
uchar4 data = *((__global uchar4 *)(dst + dst_index));
- uchar4 tmp_data = src1_data & src2_data;
+ uchar4 tmp_data = src1_data OP_BINARY src2_data;
data.x = ((mask_data.x) && (dst_index + 0 >= dst_start) && (dst_index + 0 < dst_end)) ? tmp_data.x : data.x;
data.y = ((mask_data.y) && (dst_index + 1 >= dst_start) && (dst_index + 1 < dst_end)) ? tmp_data.y : data.y;
-__kernel void arithm_bitwise_and_with_mask_C1_D1 (
+__kernel void arithm_bitwise_binary_with_mask_C1_D1 (
__global char *src1, int src1_step, int src1_offset,
__global char *src2, int src2_step, int src2_offset,
__global uchar *mask, int mask_step, int mask_offset,
uchar4 mask_data = vload4(0, mask + mask_index);
char4 data = *((__global char4 *)(dst + dst_index));
- char4 tmp_data = src1_data & src2_data;
+ char4 tmp_data = src1_data OP_BINARY src2_data;
data.x = convert_char((mask_data.x) && (dst_index + 0 >= dst_start) && (dst_index + 0 < dst_end)) ? tmp_data.x : data.x;
data.y = convert_char((mask_data.y) && (dst_index + 1 >= dst_start) && (dst_index + 1 < dst_end)) ? tmp_data.y : data.y;
-__kernel void arithm_bitwise_and_with_mask_C1_D2 (
+__kernel void arithm_bitwise_binary_with_mask_C1_D2 (
__global ushort *src1, int src1_step, int src1_offset,
__global ushort *src2, int src2_step, int src2_offset,
__global uchar *mask, int mask_step, int mask_offset,
#ifdef dst_align
#undef dst_align
#endif
-#define dst_align ((dst_offset >> 1) & 1)
+#define dst_align ((dst_offset / 2) & 1)
int src1_index = mad24(y, src1_step, (x << 1) + src1_offset - (dst_align << 1));
int src2_index = mad24(y, src2_step, (x << 1) + src2_offset - (dst_align << 1));
int mask_index = mad24(y, mask_step, x + mask_offset - dst_align);
uchar2 mask_data = vload2(0, mask + mask_index);
ushort2 data = *((__global ushort2 *)((__global uchar *)dst + dst_index));
- ushort2 tmp_data = src1_data & src2_data;
+ ushort2 tmp_data = src1_data OP_BINARY src2_data;
data.x = convert_ushort((mask_data.x) && (dst_index + 0 >= dst_start) && (dst_index + 0 < dst_end)) ? tmp_data.x : data.x;
data.y = convert_ushort((mask_data.y) && (dst_index + 2 >= dst_start) && (dst_index + 2 < dst_end)) ? tmp_data.y : data.y;
-__kernel void arithm_bitwise_and_with_mask_C1_D3 (
+__kernel void arithm_bitwise_binary_with_mask_C1_D3 (
__global short *src1, int src1_step, int src1_offset,
__global short *src2, int src2_step, int src2_offset,
__global uchar *mask, int mask_step, int mask_offset,
#ifdef dst_align
#undef dst_align
#endif
-#define dst_align ((dst_offset >> 1) & 1)
+#define dst_align ((dst_offset / 2) & 1)
int src1_index = mad24(y, src1_step, (x << 1) + src1_offset - (dst_align << 1));
int src2_index = mad24(y, src2_step, (x << 1) + src2_offset - (dst_align << 1));
int mask_index = mad24(y, mask_step, x + mask_offset - dst_align);
uchar2 mask_data = vload2(0, mask + mask_index);
short2 data = *((__global short2 *)((__global uchar *)dst + dst_index));
- short2 tmp_data = src1_data & src2_data;
+ short2 tmp_data = src1_data OP_BINARY src2_data;
data.x = convert_short((mask_data.x) && (dst_index + 0 >= dst_start) && (dst_index + 0 < dst_end)) ? tmp_data.x : data.x;
data.y = convert_short((mask_data.y) && (dst_index + 2 >= dst_start) && (dst_index + 2 < dst_end)) ? tmp_data.y : data.y;
-__kernel void arithm_bitwise_and_with_mask_C1_D4 (
+__kernel void arithm_bitwise_binary_with_mask_C1_D4 (
__global int *src1, int src1_step, int src1_offset,
__global int *src2, int src2_step, int src2_offset,
__global uchar *mask, int mask_step, int mask_offset,
int src_data2 = *((__global int *)((__global char *)src2 + src2_index));
int dst_data = *((__global int *)((__global char *)dst + dst_index));
- int data = src_data1 & src_data2;
+ int data = src_data1 OP_BINARY src_data2;
data = mask_data ? data : dst_data;
*((__global int *)((__global char *)dst + dst_index)) = data;
-__kernel void arithm_bitwise_and_with_mask_C1_D5 (
+__kernel void arithm_bitwise_binary_with_mask_C1_D5 (
__global char *src1, int src1_step, int src1_offset,
__global char *src2, int src2_step, int src2_offset,
__global uchar *mask, int mask_step, int mask_offset,
char4 src_data2 = *((__global char4 *)((__global char *)src2 + src2_index));
char4 dst_data = *((__global char4 *)((__global char *)dst + dst_index));
- char4 data = src_data1 & src_data2;
+ char4 data = src_data1 OP_BINARY src_data2;
data = mask_data ? data : dst_data;
*((__global char4 *)((__global char *)dst + dst_index)) = data;
-__kernel void arithm_bitwise_and_with_mask_C1_D6 (
+__kernel void arithm_bitwise_binary_with_mask_C1_D6 (
__global char *src1, int src1_step, int src1_offset,
__global char *src2, int src2_step, int src2_offset,
__global uchar *mask, int mask_step, int mask_offset,
char8 src_data2 = *((__global char8 *)((__global char *)src2 + src2_index));
char8 dst_data = *((__global char8 *)((__global char *)dst + dst_index));
- char8 data = src_data1 & src_data2;
+ char8 data = src_data1 OP_BINARY src_data2;
data = mask_data ? data : dst_data;
*((__global char8 *)((__global char *)dst + dst_index)) = data;
-__kernel void arithm_bitwise_and_with_mask_C2_D0 (
+__kernel void arithm_bitwise_binary_with_mask_C2_D0 (
__global uchar *src1, int src1_step, int src1_offset,
__global uchar *src2, int src2_step, int src2_offset,
__global uchar *mask, int mask_step, int mask_offset,
#ifdef dst_align
#undef dst_align
#endif
-#define dst_align ((dst_offset >> 1) & 1)
+#define dst_align ((dst_offset / 2) & 1)
int src1_index = mad24(y, src1_step, (x << 1) + src1_offset - (dst_align << 1));
int src2_index = mad24(y, src2_step, (x << 1) + src2_offset - (dst_align << 1));
int mask_index = mad24(y, mask_step, x + mask_offset - dst_align);
uchar2 mask_data = vload2(0, mask + mask_index);
uchar4 data = *((__global uchar4 *)(dst + dst_index));
- uchar4 tmp_data = src1_data & src2_data;
+ uchar4 tmp_data = src1_data OP_BINARY src2_data;
data.xy = ((mask_data.x) && (dst_index + 0 >= dst_start)) ? tmp_data.xy : data.xy;
data.zw = ((mask_data.y) && (dst_index + 2 < dst_end )) ? tmp_data.zw : data.zw;
}
-__kernel void arithm_bitwise_and_with_mask_C2_D1 (
+__kernel void arithm_bitwise_binary_with_mask_C2_D1 (
__global char *src1, int src1_step, int src1_offset,
__global char *src2, int src2_step, int src2_offset,
__global uchar *mask, int mask_step, int mask_offset,
#ifdef dst_align
#undef dst_align
#endif
-#define dst_align ((dst_offset >> 1) & 1)
+#define dst_align ((dst_offset / 2) & 1)
int src1_index = mad24(y, src1_step, (x << 1) + src1_offset - (dst_align << 1));
int src2_index = mad24(y, src2_step, (x << 1) + src2_offset - (dst_align << 1));
int mask_index = mad24(y, mask_step, x + mask_offset - dst_align);
uchar2 mask_data = vload2(0, mask + mask_index);
char4 data = *((__global char4 *)(dst + dst_index));
- char4 tmp_data = src1_data & src2_data;
+ char4 tmp_data = src1_data OP_BINARY src2_data;
data.xy = ((mask_data.x) && (dst_index + 0 >= dst_start)) ? tmp_data.xy : data.xy;
data.zw = ((mask_data.y) && (dst_index + 2 < dst_end )) ? tmp_data.zw : data.zw;
}
}
-__kernel void arithm_bitwise_and_with_mask_C2_D2 (
+__kernel void arithm_bitwise_binary_with_mask_C2_D2 (
__global ushort *src1, int src1_step, int src1_offset,
__global ushort *src2, int src2_step, int src2_offset,
__global uchar *mask, int mask_step, int mask_offset,
ushort2 src_data2 = *((__global ushort2 *)((__global char *)src2 + src2_index));
ushort2 dst_data = *((__global ushort2 *)((__global char *)dst + dst_index));
- ushort2 data = src_data1 & src_data2;
+ ushort2 data = src_data1 OP_BINARY src_data2;
data = mask_data ? data : dst_data;
*((__global ushort2 *)((__global char *)dst + dst_index)) = data;
}
}
-__kernel void arithm_bitwise_and_with_mask_C2_D3 (
+__kernel void arithm_bitwise_binary_with_mask_C2_D3 (
__global short *src1, int src1_step, int src1_offset,
__global short *src2, int src2_step, int src2_offset,
__global uchar *mask, int mask_step, int mask_offset,
short2 src_data2 = *((__global short2 *)((__global char *)src2 + src2_index));
short2 dst_data = *((__global short2 *)((__global char *)dst + dst_index));
- short2 data = src_data1 & src_data2;
+ short2 data = src_data1 OP_BINARY src_data2;
data = mask_data ? data : dst_data;
*((__global short2 *)((__global char *)dst + dst_index)) = data;
}
}
-__kernel void arithm_bitwise_and_with_mask_C2_D4 (
+__kernel void arithm_bitwise_binary_with_mask_C2_D4 (
__global int *src1, int src1_step, int src1_offset,
__global int *src2, int src2_step, int src2_offset,
__global uchar *mask, int mask_step, int mask_offset,
int2 src_data2 = *((__global int2 *)((__global char *)src2 + src2_index));
int2 dst_data = *((__global int2 *)((__global char *)dst + dst_index));
- int2 data = src_data1 & src_data2;
+ int2 data = src_data1 OP_BINARY src_data2;
data = mask_data ? data : dst_data;
*((__global int2 *)((__global char *)dst + dst_index)) = data;
}
}
-__kernel void arithm_bitwise_and_with_mask_C2_D5 (
+__kernel void arithm_bitwise_binary_with_mask_C2_D5 (
__global char *src1, int src1_step, int src1_offset,
__global char *src2, int src2_step, int src2_offset,
__global uchar *mask, int mask_step, int mask_offset,
char8 src_data2 = *((__global char8 *)((__global char *)src2 + src2_index));
char8 dst_data = *((__global char8 *)((__global char *)dst + dst_index));
- char8 data = src_data1 & src_data2;
+ char8 data = src_data1 OP_BINARY src_data2;
data = mask_data ? data : dst_data;
*((__global char8 *)((__global char *)dst + dst_index)) = data;
}
}
-__kernel void arithm_bitwise_and_with_mask_C2_D6 (
+__kernel void arithm_bitwise_binary_with_mask_C2_D6 (
__global char *src1, int src1_step, int src1_offset,
__global char *src2, int src2_step, int src2_offset,
__global uchar *mask, int mask_step, int mask_offset,
char16 src_data2 = *((__global char16 *)((__global char *)src2 + src2_index));
char16 dst_data = *((__global char16 *)((__global char *)dst + dst_index));
- char16 data = src_data1 & src_data2;
+ char16 data = src_data1 OP_BINARY src_data2;
data = mask_data ? data : dst_data;
*((__global char16 *)((__global char *)dst + dst_index)) = data;
}
-
-__kernel void arithm_bitwise_and_with_mask_C3_D0 (
- __global uchar *src1, int src1_step, int src1_offset,
- __global uchar *src2, int src2_step, int src2_offset,
- __global uchar *mask, int mask_step, int mask_offset,
- __global uchar *dst, int dst_step, int dst_offset,
- int rows, int cols, int dst_step1)
-{
-
- int x = get_global_id(0);
- int y = get_global_id(1);
-
- if (x < cols && y < rows)
- {
- x = x << 2;
-
-#ifdef dst_align
-#undef dst_align
-#endif
-#define dst_align (((dst_offset % dst_step) / 3 ) & 3)
- int src1_index = mad24(y, src1_step, (x * 3) + src1_offset - (dst_align * 3));
- int src2_index = mad24(y, src2_step, (x * 3) + src2_offset - (dst_align * 3));
- int mask_index = mad24(y, mask_step, x + mask_offset - dst_align);
-
- int dst_start = mad24(y, dst_step, dst_offset);
- int dst_end = mad24(y, dst_step, dst_offset + dst_step1);
- int dst_index = mad24(y, dst_step, dst_offset + (x * 3) - (dst_align * 3));
-
- uchar4 src1_data_0 = vload4(0, src1 + src1_index + 0);
- uchar4 src1_data_1 = vload4(0, src1 + src1_index + 4);
- uchar4 src1_data_2 = vload4(0, src1 + src1_index + 8);
-
- uchar4 src2_data_0 = vload4(0, src2 + src2_index + 0);
- uchar4 src2_data_1 = vload4(0, src2 + src2_index + 4);
- uchar4 src2_data_2 = vload4(0, src2 + src2_index + 8);
-
- uchar4 mask_data = vload4(0, mask + mask_index);
-
- uchar4 data_0 = *((__global uchar4 *)(dst + dst_index + 0));
- uchar4 data_1 = *((__global uchar4 *)(dst + dst_index + 4));
- uchar4 data_2 = *((__global uchar4 *)(dst + dst_index + 8));
-
- uchar4 tmp_data_0 = src1_data_0 & src2_data_0;
- uchar4 tmp_data_1 = src1_data_1 & src2_data_1;
- uchar4 tmp_data_2 = src1_data_2 & src2_data_2;
-
- data_0.xyz = ((mask_data.x) && (dst_index + 0 >= dst_start)) ? tmp_data_0.xyz : data_0.xyz;
- data_0.w = ((mask_data.y) && (dst_index + 3 >= dst_start) && (dst_index + 3 < dst_end))
- ? tmp_data_0.w : data_0.w;
-
- data_1.xy = ((mask_data.y) && (dst_index + 3 >= dst_start) && (dst_index + 3 < dst_end))
- ? tmp_data_1.xy : data_1.xy;
- data_1.zw = ((mask_data.z) && (dst_index + 6 >= dst_start) && (dst_index + 6 < dst_end))
- ? tmp_data_1.zw : data_1.zw;
-
- data_2.x = ((mask_data.z) && (dst_index + 6 >= dst_start) && (dst_index + 6 < dst_end))
- ? tmp_data_2.x : data_2.x;
- data_2.yzw = ((mask_data.w) && (dst_index + 9 >= dst_start) && (dst_index + 9 < dst_end))
- ? tmp_data_2.yzw : data_2.yzw;
-
- *((__global uchar4 *)(dst + dst_index + 0)) = data_0;
- *((__global uchar4 *)(dst + dst_index + 4)) = data_1;
- *((__global uchar4 *)(dst + dst_index + 8)) = data_2;
- }
-}
-
-
-__kernel void arithm_bitwise_and_with_mask_C3_D1 (
- __global char *src1, int src1_step, int src1_offset,
- __global char *src2, int src2_step, int src2_offset,
- __global uchar *mask, int mask_step, int mask_offset,
- __global char *dst, int dst_step, int dst_offset,
- int rows, int cols, int dst_step1)
-{
-
- int x = get_global_id(0);
- int y = get_global_id(1);
-
- if (x < cols && y < rows)
- {
- x = x << 2;
-
-#ifdef dst_align
-#undef dst_align
-#endif
-#define dst_align (((dst_offset % dst_step) / 3 ) & 3)
- int src1_index = mad24(y, src1_step, (x * 3) + src1_offset - (dst_align * 3));
- int src2_index = mad24(y, src2_step, (x * 3) + src2_offset - (dst_align * 3));
- int mask_index = mad24(y, mask_step, x + mask_offset - dst_align);
-
- int dst_start = mad24(y, dst_step, dst_offset);
- int dst_end = mad24(y, dst_step, dst_offset + dst_step1);
- int dst_index = mad24(y, dst_step, dst_offset + (x * 3) - (dst_align * 3));
-
- char4 src1_data_0 = vload4(0, src1 + src1_index + 0);
- char4 src1_data_1 = vload4(0, src1 + src1_index + 4);
- char4 src1_data_2 = vload4(0, src1 + src1_index + 8);
-
- char4 src2_data_0 = vload4(0, src2 + src2_index + 0);
- char4 src2_data_1 = vload4(0, src2 + src2_index + 4);
- char4 src2_data_2 = vload4(0, src2 + src2_index + 8);
-
- uchar4 mask_data = vload4(0, mask + mask_index);
-
- char4 data_0 = *((__global char4 *)(dst + dst_index + 0));
- char4 data_1 = *((__global char4 *)(dst + dst_index + 4));
- char4 data_2 = *((__global char4 *)(dst + dst_index + 8));
-
- char4 tmp_data_0 = src1_data_0 & src2_data_0;
- char4 tmp_data_1 = src1_data_1 & src2_data_1;
- char4 tmp_data_2 = src1_data_2 & src2_data_2;
-
- data_0.xyz = ((mask_data.x) && (dst_index + 0 >= dst_start)) ? tmp_data_0.xyz : data_0.xyz;
- data_0.w = ((mask_data.y) && (dst_index + 3 >= dst_start) && (dst_index + 3 < dst_end))
- ? tmp_data_0.w : data_0.w;
-
- data_1.xy = ((mask_data.y) && (dst_index + 3 >= dst_start) && (dst_index + 3 < dst_end))
- ? tmp_data_1.xy : data_1.xy;
- data_1.zw = ((mask_data.z) && (dst_index + 6 >= dst_start) && (dst_index + 6 < dst_end))
- ? tmp_data_1.zw : data_1.zw;
-
- data_2.x = ((mask_data.z) && (dst_index + 6 >= dst_start) && (dst_index + 6 < dst_end))
- ? tmp_data_2.x : data_2.x;
- data_2.yzw = ((mask_data.w) && (dst_index + 9 >= dst_start) && (dst_index + 9 < dst_end))
- ? tmp_data_2.yzw : data_2.yzw;
-
- *((__global char4 *)(dst + dst_index + 0)) = data_0;
- *((__global char4 *)(dst + dst_index + 4)) = data_1;
- *((__global char4 *)(dst + dst_index + 8)) = data_2;
- }
-}
-
-__kernel void arithm_bitwise_and_with_mask_C3_D2 (
- __global ushort *src1, int src1_step, int src1_offset,
- __global ushort *src2, int src2_step, int src2_offset,
- __global uchar *mask, int mask_step, int mask_offset,
- __global ushort *dst, int dst_step, int dst_offset,
- int rows, int cols, int dst_step1)
-{
-
- int x = get_global_id(0);
- int y = get_global_id(1);
-
- if (x < cols && y < rows)
- {
- x = x << 1;
-
-#ifdef dst_align
-#undef dst_align
-#endif
-#define dst_align (((dst_offset % dst_step) / 6 ) & 1)
- int src1_index = mad24(y, src1_step, (x * 6) + src1_offset - (dst_align * 6));
- int src2_index = mad24(y, src2_step, (x * 6) + src2_offset - (dst_align * 6));
- int mask_index = mad24(y, mask_step, x + mask_offset - dst_align);
-
- int dst_start = mad24(y, dst_step, dst_offset);
- int dst_end = mad24(y, dst_step, dst_offset + dst_step1);
- int dst_index = mad24(y, dst_step, dst_offset + (x * 6) - (dst_align * 6));
-
- ushort2 src1_data_0 = vload2(0, (__global ushort *)((__global char *)src1 + src1_index + 0));
- ushort2 src1_data_1 = vload2(0, (__global ushort *)((__global char *)src1 + src1_index + 4));
- ushort2 src1_data_2 = vload2(0, (__global ushort *)((__global char *)src1 + src1_index + 8));
-
- ushort2 src2_data_0 = vload2(0, (__global ushort *)((__global char *)src2 + src2_index + 0));
- ushort2 src2_data_1 = vload2(0, (__global ushort *)((__global char *)src2 + src2_index + 4));
- ushort2 src2_data_2 = vload2(0, (__global ushort *)((__global char *)src2 + src2_index + 8));
-
- uchar2 mask_data = vload2(0, mask + mask_index);
-
- ushort2 data_0 = *((__global ushort2 *)((__global char *)dst + dst_index + 0));
- ushort2 data_1 = *((__global ushort2 *)((__global char *)dst + dst_index + 4));
- ushort2 data_2 = *((__global ushort2 *)((__global char *)dst + dst_index + 8));
-
- ushort2 tmp_data_0 = src1_data_0 & src2_data_0 ;
- ushort2 tmp_data_1 = src1_data_1 & src2_data_1 ;
- ushort2 tmp_data_2 = src1_data_2 & src2_data_2 ;
-
- data_0.xy = ((mask_data.x) && (dst_index + 0 >= dst_start)) ? tmp_data_0.xy : data_0.xy;
-
- data_1.x = ((mask_data.x) && (dst_index + 0 >= dst_start) && (dst_index + 0 < dst_end))
- ? tmp_data_1.x : data_1.x;
- data_1.y = ((mask_data.y) && (dst_index + 6 >= dst_start) && (dst_index + 6 < dst_end))
- ? tmp_data_1.y : data_1.y;
-
- data_2.xy = ((mask_data.y) && (dst_index + 6 >= dst_start) && (dst_index + 6 < dst_end))
- ? tmp_data_2.xy : data_2.xy;
-
- *((__global ushort2 *)((__global char *)dst + dst_index + 0))= data_0;
- *((__global ushort2 *)((__global char *)dst + dst_index + 4))= data_1;
- *((__global ushort2 *)((__global char *)dst + dst_index + 8))= data_2;
- }
-}
-__kernel void arithm_bitwise_and_with_mask_C3_D3 (
- __global short *src1, int src1_step, int src1_offset,
- __global short *src2, int src2_step, int src2_offset,
- __global uchar *mask, int mask_step, int mask_offset,
- __global short *dst, int dst_step, int dst_offset,
- int rows, int cols, int dst_step1)
-{
-
- int x = get_global_id(0);
- int y = get_global_id(1);
-
- if (x < cols && y < rows)
- {
- x = x << 1;
-
-#ifdef dst_align
-#undef dst_align
-#endif
-#define dst_align (((dst_offset % dst_step) / 6 ) & 1)
- int src1_index = mad24(y, src1_step, (x * 6) + src1_offset - (dst_align * 6));
- int src2_index = mad24(y, src2_step, (x * 6) + src2_offset - (dst_align * 6));
- int mask_index = mad24(y, mask_step, x + mask_offset - dst_align);
-
- int dst_start = mad24(y, dst_step, dst_offset);
- int dst_end = mad24(y, dst_step, dst_offset + dst_step1);
- int dst_index = mad24(y, dst_step, dst_offset + (x * 6) - (dst_align * 6));
-
- short2 src1_data_0 = vload2(0, (__global short *)((__global char *)src1 + src1_index + 0));
- short2 src1_data_1 = vload2(0, (__global short *)((__global char *)src1 + src1_index + 4));
- short2 src1_data_2 = vload2(0, (__global short *)((__global char *)src1 + src1_index + 8));
-
- short2 src2_data_0 = vload2(0, (__global short *)((__global char *)src2 + src2_index + 0));
- short2 src2_data_1 = vload2(0, (__global short *)((__global char *)src2 + src2_index + 4));
- short2 src2_data_2 = vload2(0, (__global short *)((__global char *)src2 + src2_index + 8));
-
- uchar2 mask_data = vload2(0, mask + mask_index);
-
- short2 data_0 = *((__global short2 *)((__global char *)dst + dst_index + 0));
- short2 data_1 = *((__global short2 *)((__global char *)dst + dst_index + 4));
- short2 data_2 = *((__global short2 *)((__global char *)dst + dst_index + 8));
-
- short2 tmp_data_0 = src1_data_0 & src2_data_0 ;
- short2 tmp_data_1 = src1_data_1 & src2_data_1 ;
- short2 tmp_data_2 = src1_data_2 & src2_data_2 ;
-
- data_0.xy = ((mask_data.x) && (dst_index + 0 >= dst_start)) ? tmp_data_0.xy : data_0.xy;
-
- data_1.x = ((mask_data.x) && (dst_index + 0 >= dst_start) && (dst_index + 0 < dst_end))
- ? tmp_data_1.x : data_1.x;
- data_1.y = ((mask_data.y) && (dst_index + 6 >= dst_start) && (dst_index + 6 < dst_end))
- ? tmp_data_1.y : data_1.y;
-
- data_2.xy = ((mask_data.y) && (dst_index + 6 >= dst_start) && (dst_index + 6 < dst_end))
- ? tmp_data_2.xy : data_2.xy;
-
- *((__global short2 *)((__global char *)dst + dst_index + 0))= data_0;
- *((__global short2 *)((__global char *)dst + dst_index + 4))= data_1;
- *((__global short2 *)((__global char *)dst + dst_index + 8))= data_2;
- }
-}
-__kernel void arithm_bitwise_and_with_mask_C3_D4 (
- __global int *src1, int src1_step, int src1_offset,
- __global int *src2, int src2_step, int src2_offset,
- __global uchar *mask, int mask_step, int mask_offset,
- __global int *dst, int dst_step, int dst_offset,
- int rows, int cols, int dst_step1)
-{
-
- int x = get_global_id(0);
- int y = get_global_id(1);
-
- if (x < cols && y < rows)
- {
- int src1_index = mad24(y, src1_step, (x * 12) + src1_offset);
- int src2_index = mad24(y, src2_step, (x * 12) + src2_offset);
- int mask_index = mad24(y, mask_step, x + mask_offset);
- int dst_index = mad24(y, dst_step, dst_offset + (x * 12));
-
- int src1_data_0 = *((__global int *)((__global char *)src1 + src1_index + 0));
- int src1_data_1 = *((__global int *)((__global char *)src1 + src1_index + 4));
- int src1_data_2 = *((__global int *)((__global char *)src1 + src1_index + 8));
-
- int src2_data_0 = *((__global int *)((__global char *)src2 + src2_index + 0));
- int src2_data_1 = *((__global int *)((__global char *)src2 + src2_index + 4));
- int src2_data_2 = *((__global int *)((__global char *)src2 + src2_index + 8));
-
- uchar mask_data = * (mask + mask_index);
-
- int data_0 = *((__global int *)((__global char *)dst + dst_index + 0));
- int data_1 = *((__global int *)((__global char *)dst + dst_index + 4));
- int data_2 = *((__global int *)((__global char *)dst + dst_index + 8));
-
- int tmp_data_0 = src1_data_0 & src2_data_0 ;
- int tmp_data_1 = src1_data_1 & src2_data_1 ;
- int tmp_data_2 = src1_data_2 & src2_data_2 ;
-
- data_0 = mask_data ? tmp_data_0 : data_0;
- data_1 = mask_data ? tmp_data_1 : data_1;
- data_2 = mask_data ? tmp_data_2 : data_2;
-
- *((__global int *)((__global char *)dst + dst_index + 0))= data_0;
- *((__global int *)((__global char *)dst + dst_index + 4))= data_1;
- *((__global int *)((__global char *)dst + dst_index + 8))= data_2;
- }
-}
-__kernel void arithm_bitwise_and_with_mask_C3_D5 (
- __global char *src1, int src1_step, int src1_offset,
- __global char *src2, int src2_step, int src2_offset,
- __global uchar *mask, int mask_step, int mask_offset,
- __global char *dst, int dst_step, int dst_offset,
- int rows, int cols, int dst_step1)
-{
-
- int x = get_global_id(0);
- int y = get_global_id(1);
-
- if (x < cols && y < rows)
- {
- int src1_index = mad24(y, src1_step, (x * 12) + src1_offset);
- int src2_index = mad24(y, src2_step, (x * 12) + src2_offset);
- int mask_index = mad24(y, mask_step, x + mask_offset);
- int dst_index = mad24(y, dst_step, dst_offset + (x * 12));
-
- char4 src1_data_0 = *((__global char4 *)((__global char *)src1 + src1_index + 0));
- char4 src1_data_1 = *((__global char4 *)((__global char *)src1 + src1_index + 4));
- char4 src1_data_2 = *((__global char4 *)((__global char *)src1 + src1_index + 8));
-
- char4 src2_data_0 = *((__global char4 *)((__global char *)src2 + src2_index + 0));
- char4 src2_data_1 = *((__global char4 *)((__global char *)src2 + src2_index + 4));
- char4 src2_data_2 = *((__global char4 *)((__global char *)src2 + src2_index + 8));
-
- uchar mask_data = * (mask + mask_index);
-
- char4 data_0 = *((__global char4 *)((__global char *)dst + dst_index + 0));
- char4 data_1 = *((__global char4 *)((__global char *)dst + dst_index + 4));
- char4 data_2 = *((__global char4 *)((__global char *)dst + dst_index + 8));
-
- char4 tmp_data_0 = src1_data_0 & src2_data_0;
- char4 tmp_data_1 = src1_data_1 & src2_data_1;
- char4 tmp_data_2 = src1_data_2 & src2_data_2;
-
- data_0 = mask_data ? tmp_data_0 : data_0;
- data_1 = mask_data ? tmp_data_1 : data_1;
- data_2 = mask_data ? tmp_data_2 : data_2;
-
- *((__global char4 *)((__global char *)dst + dst_index + 0))= data_0;
- *((__global char4 *)((__global char *)dst + dst_index + 4))= data_1;
- *((__global char4 *)((__global char *)dst + dst_index + 8))= data_2;
- }
-}
-#if defined (DOUBLE_SUPPORT)
-__kernel void arithm_bitwise_and_with_mask_C3_D6 (
- __global char *src1, int src1_step, int src1_offset,
- __global char *src2, int src2_step, int src2_offset,
- __global uchar *mask, int mask_step, int mask_offset,
- __global char *dst, int dst_step, int dst_offset,
- int rows, int cols, int dst_step1)
-{
-
- int x = get_global_id(0);
- int y = get_global_id(1);
-
- if (x < cols && y < rows)
- {
- int src1_index = mad24(y, src1_step, (x * 24) + src1_offset);
- int src2_index = mad24(y, src2_step, (x * 24) + src2_offset);
- int mask_index = mad24(y, mask_step, x + mask_offset);
- int dst_index = mad24(y, dst_step, dst_offset + (x * 24));
-
- char8 src1_data_0 = *((__global char8 *)((__global char *)src1 + src1_index + 0 ));
- char8 src1_data_1 = *((__global char8 *)((__global char *)src1 + src1_index + 8 ));
- char8 src1_data_2 = *((__global char8 *)((__global char *)src1 + src1_index + 16));
-
- char8 src2_data_0 = *((__global char8 *)((__global char *)src2 + src2_index + 0 ));
- char8 src2_data_1 = *((__global char8 *)((__global char *)src2 + src2_index + 8 ));
- char8 src2_data_2 = *((__global char8 *)((__global char *)src2 + src2_index + 16));
-
- uchar mask_data = * (mask + mask_index);
-
- char8 data_0 = *((__global char8 *)((__global char *)dst + dst_index + 0 ));
- char8 data_1 = *((__global char8 *)((__global char *)dst + dst_index + 8 ));
- char8 data_2 = *((__global char8 *)((__global char *)dst + dst_index + 16));
-
- char8 tmp_data_0 = src1_data_0 & src2_data_0;
- char8 tmp_data_1 = src1_data_1 & src2_data_1;
- char8 tmp_data_2 = src1_data_2 & src2_data_2;
-
- data_0 = mask_data ? tmp_data_0 : data_0;
- data_1 = mask_data ? tmp_data_1 : data_1;
- data_2 = mask_data ? tmp_data_2 : data_2;
-
- *((__global char8 *)((__global char *)dst + dst_index + 0 ))= data_0;
- *((__global char8 *)((__global char *)dst + dst_index + 8 ))= data_1;
- *((__global char8 *)((__global char *)dst + dst_index + 16))= data_2;
- }
-}
-#endif
-
-
-__kernel void arithm_bitwise_and_with_mask_C4_D0 (
+__kernel void arithm_bitwise_binary_with_mask_C4_D0 (
__global uchar *src1, int src1_step, int src1_offset,
__global uchar *src2, int src2_step, int src2_offset,
__global uchar *mask, int mask_step, int mask_offset,
uchar4 src_data2 = *((__global uchar4 *)(src2 + src2_index));
uchar4 dst_data = *((__global uchar4 *)(dst + dst_index));
- uchar4 data = src_data1 & src_data2;
+ uchar4 data = src_data1 OP_BINARY src_data2;
data = mask_data ? data : dst_data;
*((__global uchar4 *)(dst + dst_index)) = data;
}
-__kernel void arithm_bitwise_and_with_mask_C4_D1 (
+__kernel void arithm_bitwise_binary_with_mask_C4_D1 (
__global char *src1, int src1_step, int src1_offset,
__global char *src2, int src2_step, int src2_offset,
__global uchar *mask, int mask_step, int mask_offset,
char4 src_data2 = *((__global char4 *)(src2 + src2_index));
char4 dst_data = *((__global char4 *)(dst + dst_index));
- char4 data = src_data1 & src_data2;
+ char4 data = src_data1 OP_BINARY src_data2;
data = mask_data ? data : dst_data;
*((__global char4 *)(dst + dst_index)) = data;
}
}
-__kernel void arithm_bitwise_and_with_mask_C4_D2 (
+__kernel void arithm_bitwise_binary_with_mask_C4_D2 (
__global ushort *src1, int src1_step, int src1_offset,
__global ushort *src2, int src2_step, int src2_offset,
__global uchar *mask, int mask_step, int mask_offset,
ushort4 src_data2 = *((__global ushort4 *)((__global char *)src2 + src2_index));
ushort4 dst_data = *((__global ushort4 *)((__global char *)dst + dst_index));
- ushort4 data = src_data1 & src_data2;
+ ushort4 data = src_data1 OP_BINARY src_data2;
data = mask_data ? data : dst_data;
*((__global ushort4 *)((__global char *)dst + dst_index)) = data;
}
}
-__kernel void arithm_bitwise_and_with_mask_C4_D3 (
+__kernel void arithm_bitwise_binary_with_mask_C4_D3 (
__global short *src1, int src1_step, int src1_offset,
__global short *src2, int src2_step, int src2_offset,
__global uchar *mask, int mask_step, int mask_offset,
short4 src_data2 = *((__global short4 *)((__global char *)src2 + src2_index));
short4 dst_data = *((__global short4 *)((__global char *)dst + dst_index));
- short4 data = src_data1 & src_data2;
+ short4 data = src_data1 OP_BINARY src_data2;
data = mask_data ? data : dst_data;
*((__global short4 *)((__global char *)dst + dst_index)) = data;
}
}
-__kernel void arithm_bitwise_and_with_mask_C4_D4 (
+__kernel void arithm_bitwise_binary_with_mask_C4_D4 (
__global int *src1, int src1_step, int src1_offset,
__global int *src2, int src2_step, int src2_offset,
__global uchar *mask, int mask_step, int mask_offset,
int4 src_data2 = *((__global int4 *)((__global char *)src2 + src2_index));
int4 dst_data = *((__global int4 *)((__global char *)dst + dst_index));
- int4 data = src_data1 & src_data2;
+ int4 data = src_data1 OP_BINARY src_data2;
data = mask_data ? data : dst_data;
*((__global int4 *)((__global char *)dst + dst_index)) = data;
}
}
-__kernel void arithm_bitwise_and_with_mask_C4_D5 (
+__kernel void arithm_bitwise_binary_with_mask_C4_D5 (
__global char *src1, int src1_step, int src1_offset,
__global char *src2, int src2_step, int src2_offset,
__global uchar *mask, int mask_step, int mask_offset,
char16 src_data2 = *((__global char16 *)((__global char *)src2 + src2_index));
char16 dst_data = *((__global char16 *)((__global char *)dst + dst_index));
- char16 data = src_data1 & src_data2;
+ char16 data = src_data1 OP_BINARY src_data2;
data = mask_data ? data : dst_data;
*((__global char16 *)((__global char *)dst + dst_index)) = data;
}
}
#if defined (DOUBLE_SUPPORT)
-__kernel void arithm_bitwise_and_with_mask_C4_D6 (
+__kernel void arithm_bitwise_binary_with_mask_C4_D6 (
__global char *src1, int src1_step, int src1_offset,
__global char *src2, int src2_step, int src2_offset,
__global uchar *mask, int mask_step, int mask_offset,
char8 dst_data_2 = *((__global char8 *)((__global char *)dst + dst_index + 16));
char8 dst_data_3 = *((__global char8 *)((__global char *)dst + dst_index + 24));
- char8 data_0 = src_data1_0 & src_data2_0;
- char8 data_1 = src_data1_1 & src_data2_1;
- char8 data_2 = src_data1_2 & src_data2_2;
- char8 data_3 = src_data1_3 & src_data2_3;
+ char8 data_0 = src_data1_0 OP_BINARY src_data2_0;
+ char8 data_1 = src_data1_1 OP_BINARY src_data2_1;
+ char8 data_2 = src_data1_2 OP_BINARY src_data2_2;
+ char8 data_3 = src_data1_3 OP_BINARY src_data2_3;
data_0 = mask_data ? data_0 : dst_data_0;
data_1 = mask_data ? data_1 : dst_data_1;
//
// @Authors
// Jiang Liyuan, jlyuan001.good@163.com
+// Peng Xiao, pengxiao@outlook.com
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
#pragma OPENCL EXTENSION cl_amd_fp64:enable
#endif
#endif
-//////////////////////////////////////////////////////////////////////////////////////////////////////
-////////////////////////////////////////////BITWISE_XOR////////////////////////////////////////////////////
+
+#ifndef OP_BINARY
+#define OP_BINARY &
+#endif
+
///////////////////////////////////////////////////////////////////////////////////////////////////////
-/**************************************xor with scalar without mask**************************************/
-__kernel void arithm_s_bitwise_xor_C1_D0 (
+////////////////////////////////////////////bitwise_binary/////////////////////////////////////////////
+///////////////////////////////////////////////////////////////////////////////////////////////////////
+/******************************bitwise binary with scalar without mask********************************/
+__kernel void arithm_s_bitwise_binary_C1_D0 (
__global uchar *src1, int src1_step, int src1_offset,
__global uchar *dst, int dst_step, int dst_offset,
uchar4 src2, int rows, int cols, int dst_step1)
uchar4 src2_data = (uchar4)(src2.x, src2.x, src2.x, src2.x);
uchar4 data = *((__global uchar4 *)(dst + dst_index));
- uchar4 tmp_data = src1_data ^ src2_data;
+ uchar4 tmp_data = src1_data OP_BINARY src2_data;
data.x = ((dst_index + 0 >= dst_start) && (dst_index + 0 < dst_end)) ? tmp_data.x : data.x;
data.y = ((dst_index + 1 >= dst_start) && (dst_index + 1 < dst_end)) ? tmp_data.y : data.y;
}
-__kernel void arithm_s_bitwise_xor_C1_D1 (
+__kernel void arithm_s_bitwise_binary_C1_D1 (
__global char *src1, int src1_step, int src1_offset,
__global char *dst, int dst_step, int dst_offset,
char4 src2, int rows, int cols, int dst_step1)
char4 src2_data = (char4)(src2.x, src2.x, src2.x, src2.x);
char4 data = *((__global char4 *)(dst + dst_index));
- char4 tmp_data = src1_data ^ src2_data;
+ char4 tmp_data = src1_data OP_BINARY src2_data;
data.x = ((dst_index + 0 >= dst_start) && (dst_index + 0 < dst_end)) ? tmp_data.x : data.x;
data.y = ((dst_index + 1 >= dst_start) && (dst_index + 1 < dst_end)) ? tmp_data.y : data.y;
}
}
-__kernel void arithm_s_bitwise_xor_C1_D2 (
+__kernel void arithm_s_bitwise_binary_C1_D2 (
__global ushort *src1, int src1_step, int src1_offset,
__global ushort *dst, int dst_step, int dst_offset,
ushort4 src2, int rows, int cols, int dst_step1)
ushort2 src2_data = (ushort2)(src2.x, src2.x);
ushort2 data = *((__global ushort2 *)((__global uchar *)dst + dst_index));
- ushort2 tmp_data = src1_data ^ src2_data;
+ ushort2 tmp_data = src1_data OP_BINARY src2_data;
data.x = (dst_index + 0 >= dst_start) ? tmp_data.x : data.x;
data.y = (dst_index + 2 < dst_end ) ? tmp_data.y : data.y;
*((__global ushort2 *)((__global uchar *)dst + dst_index)) = data;
}
}
-__kernel void arithm_s_bitwise_xor_C1_D3 (
+__kernel void arithm_s_bitwise_binary_C1_D3 (
__global short *src1, int src1_step, int src1_offset,
__global short *dst, int dst_step, int dst_offset,
short4 src2, int rows, int cols, int dst_step1)
short2 src2_data = (short2)(src2.x, src2.x);
short2 data = *((__global short2 *)((__global uchar *)dst + dst_index));
- short2 tmp_data = src1_data ^ src2_data;
+ short2 tmp_data = src1_data OP_BINARY src2_data;
data.x = (dst_index + 0 >= dst_start) ? tmp_data.x : data.x;
data.y = (dst_index + 2 < dst_end ) ? tmp_data.y : data.y;
*((__global short2 *)((__global uchar *)dst + dst_index)) = data;
}
}
-__kernel void arithm_s_bitwise_xor_C1_D4 (
+__kernel void arithm_s_bitwise_binary_C1_D4 (
__global int *src1, int src1_step, int src1_offset,
__global int *dst, int dst_step, int dst_offset,
int4 src2, int rows, int cols, int dst_step1)
int src_data1 = *((__global int *)((__global char *)src1 + src1_index));
int src_data2 = src2.x;
- int data = src_data1 ^ src_data2;
+ int data = src_data1 OP_BINARY src_data2;
*((__global int *)((__global char *)dst + dst_index)) = data;
}
}
-__kernel void arithm_s_bitwise_xor_C1_D5 (
+__kernel void arithm_s_bitwise_binary_C1_D5 (
__global char *src1, int src1_step, int src1_offset,
__global char *dst, int dst_step, int dst_offset,
char16 src2, int rows, int cols, int dst_step1)
char4 src2_data = (char4)(src2.s0, src2.s1, src2.s2, src2.s3);
char4 data = *((__global char4 *)((__global char *)dst + dst_index));
- char4 tmp_data = src1_data ^ src2_data;
+ char4 tmp_data = src1_data OP_BINARY src2_data;
data.x = ((dst_index + 0 >= dst_start) && (dst_index + 0 < dst_end)) ? tmp_data.x : data.x;
data.y = ((dst_index + 1 >= dst_start) && (dst_index + 1 < dst_end)) ? tmp_data.y : data.y;
*((__global char4 *)((__global char *)dst + dst_index)) = data;
}
}
-
#if defined (DOUBLE_SUPPORT)
-__kernel void arithm_s_bitwise_xor_C1_D6 (
+__kernel void arithm_s_bitwise_binary_C1_D6 (
__global short *src1, int src1_step, int src1_offset,
__global short *dst, int dst_step, int dst_offset,
short16 src2, int rows, int cols, int dst_step1)
short4 src1_data = *((__global short4 *)((__global char *)src1 + src1_index));
short4 src2_data = (short4)(src2.s0, src2.s1, src2.s2, src2.s3);
- short4 tmp_data = src1_data ^ src2_data;
+ short4 tmp_data = src1_data OP_BINARY src2_data;
*((__global short4 *)((__global char *)dst + dst_index)) = tmp_data;
}
}
#endif
-__kernel void arithm_s_bitwise_xor_C2_D0 (
+__kernel void arithm_s_bitwise_binary_C2_D0 (
__global uchar *src1, int src1_step, int src1_offset,
__global uchar *dst, int dst_step, int dst_offset,
uchar4 src2, int rows, int cols, int dst_step1)
uchar4 src2_data = (uchar4)(src2.x, src2.y, src2.x, src2.y);
uchar4 data = *((__global uchar4 *)(dst + dst_index));
- uchar4 tmp_data = src1_data ^ src2_data;
+ uchar4 tmp_data = src1_data OP_BINARY src2_data;
data.xy = (dst_index + 0 >= dst_start) ? tmp_data.xy : data.xy;
}
-__kernel void arithm_s_bitwise_xor_C2_D1 (
+__kernel void arithm_s_bitwise_binary_C2_D1 (
__global char *src1, int src1_step, int src1_offset,
__global char *dst, int dst_step, int dst_offset,
char4 src2, int rows, int cols, int dst_step1)
char4 src2_data = (char4)(src2.x, src2.y, src2.x, src2.y);
char4 data = *((__global char4 *)(dst + dst_index));
- char4 tmp_data = src1_data ^ src2_data;
+ char4 tmp_data = src1_data OP_BINARY src2_data;
data.xy = (dst_index + 0 >= dst_start) ? tmp_data.xy : data.xy;
data.zw = (dst_index + 2 < dst_end ) ? tmp_data.zw : data.zw;
}
}
-__kernel void arithm_s_bitwise_xor_C2_D2 (
+__kernel void arithm_s_bitwise_binary_C2_D2 (
__global ushort *src1, int src1_step, int src1_offset,
__global ushort *dst, int dst_step, int dst_offset,
ushort4 src2, int rows, int cols, int dst_step1)
ushort2 src_data1 = *((__global ushort2 *)((__global char *)src1 + src1_index));
ushort2 src_data2 = (ushort2)(src2.x, src2.y);
- ushort2 data = src_data1 ^ src_data2;
+ ushort2 data = src_data1 OP_BINARY src_data2;
*((__global ushort2 *)((__global char *)dst + dst_index)) = data;
}
}
-__kernel void arithm_s_bitwise_xor_C2_D3 (
+__kernel void arithm_s_bitwise_binary_C2_D3 (
__global short *src1, int src1_step, int src1_offset,
__global short *dst, int dst_step, int dst_offset,
short4 src2, int rows, int cols, int dst_step1)
short2 src_data1 = *((__global short2 *)((__global char *)src1 + src1_index));
short2 src_data2 = (short2)(src2.x, src2.y);
- short2 data = src_data1 ^ src_data2;
+ short2 data = src_data1 OP_BINARY src_data2;
*((__global short2 *)((__global char *)dst + dst_index)) = data;
}
}
-__kernel void arithm_s_bitwise_xor_C2_D4 (
+__kernel void arithm_s_bitwise_binary_C2_D4 (
__global int *src1, int src1_step, int src1_offset,
__global int *dst, int dst_step, int dst_offset,
int4 src2, int rows, int cols, int dst_step1)
int2 src_data1 = *((__global int2 *)((__global char *)src1 + src1_index));
int2 src_data2 = (int2)(src2.x, src2.y);
- int2 data = src_data1 ^ src_data2;
+ int2 data = src_data1 OP_BINARY src_data2;
*((__global int2 *)((__global char *)dst + dst_index)) = data;
}
}
-__kernel void arithm_s_bitwise_xor_C2_D5 (
+__kernel void arithm_s_bitwise_binary_C2_D5 (
__global char *src1, int src1_step, int src1_offset,
__global char *dst, int dst_step, int dst_offset,
char16 src2, int rows, int cols, int dst_step1)
char8 src1_data = *((__global char8 *)((__global char *)src1 + src1_index));
char8 src2_data = (char8)(src2.s0, src2.s1, src2.s2, src2.s3, src2.s4, src2.s5, src2.s6, src2.s7);
- char8 tmp_data = src1_data ^ src2_data;
+ char8 tmp_data = src1_data OP_BINARY src2_data;
*((__global char8 *)((__global char *)dst + dst_index)) = tmp_data;
}
}
#if defined (DOUBLE_SUPPORT)
-__kernel void arithm_s_bitwise_xor_C2_D6 (
+__kernel void arithm_s_bitwise_binary_C2_D6 (
__global short *src1, int src1_step, int src1_offset,
__global short *dst, int dst_step, int dst_offset,
short16 src2, int rows, int cols, int dst_step1)
short8 src1_data = *((__global short8 *)((__global char *)src1 + src1_index));
short8 src2_data = (short8)(src2.s0, src2.s1, src2.s2, src2.s3, src2.s4, src2.s5, src2.s6, src2.s7);
- short8 tmp_data = src1_data ^ src2_data;
+ short8 tmp_data = src1_data OP_BINARY src2_data;
*((__global short8 *)((__global char *)dst + dst_index)) = tmp_data;
}
}
#endif
-__kernel void arithm_s_bitwise_xor_C3_D0 (
- __global uchar *src1, int src1_step, int src1_offset,
- __global uchar *dst, int dst_step, int dst_offset,
- uchar4 src2, int rows, int cols, int dst_step1)
-{
-
- int x = get_global_id(0);
- int y = get_global_id(1);
-
- if (x < cols && y < rows)
- {
- x = x << 2;
-
-#ifdef dst_align
-#undef dst_align
-#endif
-#define dst_align (((dst_offset % dst_step) / 3 ) & 3)
- int src1_index = mad24(y, src1_step, (x * 3) + src1_offset - (dst_align * 3));
-
- int dst_start = mad24(y, dst_step, dst_offset);
- int dst_end = mad24(y, dst_step, dst_offset + dst_step1);
- int dst_index = mad24(y, dst_step, dst_offset + (x * 3) - (dst_align * 3));
-
- uchar4 src1_data_0 = vload4(0, src1 + src1_index + 0);
- uchar4 src1_data_1 = vload4(0, src1 + src1_index + 4);
- uchar4 src1_data_2 = vload4(0, src1 + src1_index + 8);
-
- uchar4 src2_data_0 = (uchar4)(src2.x, src2.y, src2.z, src2.x);
- uchar4 src2_data_1 = (uchar4)(src2.y, src2.z, src2.x, src2.y);
- uchar4 src2_data_2 = (uchar4)(src2.z, src2.x, src2.y, src2.z);
-
- uchar4 data_0 = *((__global uchar4 *)(dst + dst_index + 0));
- uchar4 data_1 = *((__global uchar4 *)(dst + dst_index + 4));
- uchar4 data_2 = *((__global uchar4 *)(dst + dst_index + 8));
-
- uchar4 tmp_data_0 = src1_data_0 ^ src2_data_0;
- uchar4 tmp_data_1 = src1_data_1 ^ src2_data_1;
- uchar4 tmp_data_2 = src1_data_2 ^ src2_data_2;
-
- data_0.xyz = ((dst_index + 0 >= dst_start)) ? tmp_data_0.xyz : data_0.xyz;
- data_0.w = ((dst_index + 3 >= dst_start) && (dst_index + 3 < dst_end))
- ? tmp_data_0.w : data_0.w;
-
- data_1.xy = ((dst_index + 3 >= dst_start) && (dst_index + 3 < dst_end))
- ? tmp_data_1.xy : data_1.xy;
- data_1.zw = ((dst_index + 6 >= dst_start) && (dst_index + 6 < dst_end))
- ? tmp_data_1.zw : data_1.zw;
-
- data_2.x = ((dst_index + 6 >= dst_start) && (dst_index + 6 < dst_end))
- ? tmp_data_2.x : data_2.x;
- data_2.yzw = ((dst_index + 9 >= dst_start) && (dst_index + 9 < dst_end))
- ? tmp_data_2.yzw : data_2.yzw;
-
- *((__global uchar4 *)(dst + dst_index + 0)) = data_0;
- *((__global uchar4 *)(dst + dst_index + 4)) = data_1;
- *((__global uchar4 *)(dst + dst_index + 8)) = data_2;
- }
-}
-
-
-__kernel void arithm_s_bitwise_xor_C3_D1 (
- __global char *src1, int src1_step, int src1_offset,
- __global char *dst, int dst_step, int dst_offset,
- char4 src2, int rows, int cols, int dst_step1)
-{
-
- int x = get_global_id(0);
- int y = get_global_id(1);
-
- if (x < cols && y < rows)
- {
- x = x << 2;
-
-#ifdef dst_align
-#undef dst_align
-#endif
-#define dst_align (((dst_offset % dst_step) / 3 ) & 3)
- int src1_index = mad24(y, src1_step, (x * 3) + src1_offset - (dst_align * 3));
-
- int dst_start = mad24(y, dst_step, dst_offset);
- int dst_end = mad24(y, dst_step, dst_offset + dst_step1);
- int dst_index = mad24(y, dst_step, dst_offset + (x * 3) - (dst_align * 3));
-
- char4 src1_data_0 = vload4(0, src1 + src1_index + 0);
- char4 src1_data_1 = vload4(0, src1 + src1_index + 4);
- char4 src1_data_2 = vload4(0, src1 + src1_index + 8);
-
- char4 src2_data_0 = (char4)(src2.x, src2.y, src2.z, src2.x);
- char4 src2_data_1 = (char4)(src2.y, src2.z, src2.x, src2.y);
- char4 src2_data_2 = (char4)(src2.z, src2.x, src2.y, src2.z);
-
- char4 data_0 = *((__global char4 *)(dst + dst_index + 0));
- char4 data_1 = *((__global char4 *)(dst + dst_index + 4));
- char4 data_2 = *((__global char4 *)(dst + dst_index + 8));
-
- char4 tmp_data_0 = src1_data_0 ^ src2_data_0;
- char4 tmp_data_1 = src1_data_1 ^ src2_data_1;
- char4 tmp_data_2 = src1_data_2 ^ src2_data_2;
-
- data_0.xyz = ((dst_index + 0 >= dst_start)) ? tmp_data_0.xyz : data_0.xyz;
- data_0.w = ((dst_index + 3 >= dst_start) && (dst_index + 3 < dst_end))
- ? tmp_data_0.w : data_0.w;
-
- data_1.xy = ((dst_index + 3 >= dst_start) && (dst_index + 3 < dst_end))
- ? tmp_data_1.xy : data_1.xy;
- data_1.zw = ((dst_index + 6 >= dst_start) && (dst_index + 6 < dst_end))
- ? tmp_data_1.zw : data_1.zw;
-
- data_2.x = ((dst_index + 6 >= dst_start) && (dst_index + 6 < dst_end))
- ? tmp_data_2.x : data_2.x;
- data_2.yzw = ((dst_index + 9 >= dst_start) && (dst_index + 9 < dst_end))
- ? tmp_data_2.yzw : data_2.yzw;
-
- *((__global char4 *)(dst + dst_index + 0)) = data_0;
- *((__global char4 *)(dst + dst_index + 4)) = data_1;
- *((__global char4 *)(dst + dst_index + 8)) = data_2;
- }
-}
-
-__kernel void arithm_s_bitwise_xor_C3_D2 (
- __global ushort *src1, int src1_step, int src1_offset,
- __global ushort *dst, int dst_step, int dst_offset,
- ushort4 src2, int rows, int cols, int dst_step1)
-{
-
- int x = get_global_id(0);
- int y = get_global_id(1);
-
- if (x < cols && y < rows)
- {
- x = x << 1;
-
-#ifdef dst_align
-#undef dst_align
-#endif
-#define dst_align (((dst_offset % dst_step) / 6 ) & 1)
- int src1_index = mad24(y, src1_step, (x * 6) + src1_offset - (dst_align * 6));
-
- int dst_start = mad24(y, dst_step, dst_offset);
- int dst_end = mad24(y, dst_step, dst_offset + dst_step1);
- int dst_index = mad24(y, dst_step, dst_offset + (x * 6) - (dst_align * 6));
-
- ushort2 src1_data_0 = vload2(0, (__global ushort *)((__global char *)src1 + src1_index + 0));
- ushort2 src1_data_1 = vload2(0, (__global ushort *)((__global char *)src1 + src1_index + 4));
- ushort2 src1_data_2 = vload2(0, (__global ushort *)((__global char *)src1 + src1_index + 8));
-
- ushort2 src2_data_0 = (ushort2)(src2.x, src2.y);
- ushort2 src2_data_1 = (ushort2)(src2.z, src2.x);
- ushort2 src2_data_2 = (ushort2)(src2.y, src2.z);
-
- ushort2 data_0 = *((__global ushort2 *)((__global char *)dst + dst_index + 0));
- ushort2 data_1 = *((__global ushort2 *)((__global char *)dst + dst_index + 4));
- ushort2 data_2 = *((__global ushort2 *)((__global char *)dst + dst_index + 8));
-
- ushort2 tmp_data_0 = src1_data_0 ^ src2_data_0;
- ushort2 tmp_data_1 = src1_data_1 ^ src2_data_1;
- ushort2 tmp_data_2 = src1_data_2 ^ src2_data_2;
-
- data_0.xy = ((dst_index + 0 >= dst_start)) ? tmp_data_0.xy : data_0.xy;
-
- data_1.x = ((dst_index + 0 >= dst_start) && (dst_index + 0 < dst_end))
- ? tmp_data_1.x : data_1.x;
- data_1.y = ((dst_index + 6 >= dst_start) && (dst_index + 6 < dst_end))
- ? tmp_data_1.y : data_1.y;
-
- data_2.xy = ((dst_index + 6 >= dst_start) && (dst_index + 6 < dst_end))
- ? tmp_data_2.xy : data_2.xy;
-
- *((__global ushort2 *)((__global char *)dst + dst_index + 0))= data_0;
- *((__global ushort2 *)((__global char *)dst + dst_index + 4))= data_1;
- *((__global ushort2 *)((__global char *)dst + dst_index + 8))= data_2;
- }
-}
-__kernel void arithm_s_bitwise_xor_C3_D3 (
- __global short *src1, int src1_step, int src1_offset,
- __global short *dst, int dst_step, int dst_offset,
- short4 src2, int rows, int cols, int dst_step1)
-{
-
- int x = get_global_id(0);
- int y = get_global_id(1);
-
- if (x < cols && y < rows)
- {
- x = x << 1;
-
-#ifdef dst_align
-#undef dst_align
-#endif
-#define dst_align (((dst_offset % dst_step) / 6 ) & 1)
- int src1_index = mad24(y, src1_step, (x * 6) + src1_offset - (dst_align * 6));
-
- int dst_start = mad24(y, dst_step, dst_offset);
- int dst_end = mad24(y, dst_step, dst_offset + dst_step1);
- int dst_index = mad24(y, dst_step, dst_offset + (x * 6) - (dst_align * 6));
-
- short2 src1_data_0 = vload2(0, (__global short *)((__global char *)src1 + src1_index + 0));
- short2 src1_data_1 = vload2(0, (__global short *)((__global char *)src1 + src1_index + 4));
- short2 src1_data_2 = vload2(0, (__global short *)((__global char *)src1 + src1_index + 8));
-
- short2 src2_data_0 = (short2)(src2.x, src2.y);
- short2 src2_data_1 = (short2)(src2.z, src2.x);
- short2 src2_data_2 = (short2)(src2.y, src2.z);
-
- short2 data_0 = *((__global short2 *)((__global char *)dst + dst_index + 0));
- short2 data_1 = *((__global short2 *)((__global char *)dst + dst_index + 4));
- short2 data_2 = *((__global short2 *)((__global char *)dst + dst_index + 8));
-
- short2 tmp_data_0 = src1_data_0 ^ src2_data_0;
- short2 tmp_data_1 = src1_data_1 ^ src2_data_1;
- short2 tmp_data_2 = src1_data_2 ^ src2_data_2;
-
- data_0.xy = ((dst_index + 0 >= dst_start)) ? tmp_data_0.xy : data_0.xy;
-
- data_1.x = ((dst_index + 0 >= dst_start) && (dst_index + 0 < dst_end))
- ? tmp_data_1.x : data_1.x;
- data_1.y = ((dst_index + 6 >= dst_start) && (dst_index + 6 < dst_end))
- ? tmp_data_1.y : data_1.y;
-
- data_2.xy = ((dst_index + 6 >= dst_start) && (dst_index + 6 < dst_end))
- ? tmp_data_2.xy : data_2.xy;
-
- *((__global short2 *)((__global char *)dst + dst_index + 0))= data_0;
- *((__global short2 *)((__global char *)dst + dst_index + 4))= data_1;
- *((__global short2 *)((__global char *)dst + dst_index + 8))= data_2;
- }
-}
-__kernel void arithm_s_bitwise_xor_C3_D4 (
- __global int *src1, int src1_step, int src1_offset,
- __global int *dst, int dst_step, int dst_offset,
- int4 src2, int rows, int cols, int dst_step1)
-{
-
- int x = get_global_id(0);
- int y = get_global_id(1);
-
- if (x < cols && y < rows)
- {
- int src1_index = mad24(y, src1_step, (x * 12) + src1_offset);
- int dst_index = mad24(y, dst_step, dst_offset + (x * 12));
-
- int src1_data_0 = *((__global int *)((__global char *)src1 + src1_index + 0));
- int src1_data_1 = *((__global int *)((__global char *)src1 + src1_index + 4));
- int src1_data_2 = *((__global int *)((__global char *)src1 + src1_index + 8));
-
- int src2_data_0 = src2.x;
- int src2_data_1 = src2.y;
- int src2_data_2 = src2.z;
-
- int data_0 = *((__global int *)((__global char *)dst + dst_index + 0));
- int data_1 = *((__global int *)((__global char *)dst + dst_index + 4));
- int data_2 = *((__global int *)((__global char *)dst + dst_index + 8));
-
- int tmp_data_0 = src1_data_0 ^ src2_data_0;
- int tmp_data_1 = src1_data_1 ^ src2_data_1;
- int tmp_data_2 = src1_data_2 ^ src2_data_2;
-
- *((__global int *)((__global char *)dst + dst_index + 0))= tmp_data_0;
- *((__global int *)((__global char *)dst + dst_index + 4))= tmp_data_1;
- *((__global int *)((__global char *)dst + dst_index + 8))= tmp_data_2;
- }
-}
-__kernel void arithm_s_bitwise_xor_C3_D5 (
- __global char *src1, int src1_step, int src1_offset,
- __global char *dst, int dst_step, int dst_offset,
- char16 src2, int rows, int cols, int dst_step1)
-{
-
- int x = get_global_id(0);
- int y = get_global_id(1);
-
- if (x < cols && y < rows)
- {
- int src1_index = mad24(y, src1_step, (x * 12) + src1_offset);
- int dst_index = mad24(y, dst_step, dst_offset + (x * 12));
- char4 src1_data_0 = *((__global char4 *)((__global char *)src1 + src1_index + 0));
- char4 src1_data_1 = *((__global char4 *)((__global char *)src1 + src1_index + 4));
- char4 src1_data_2 = *((__global char4 *)((__global char *)src1 + src1_index + 8));
-
- char4 src2_data_0 = (char4)(src2.s0, src2.s1, src2.s2, src2.s3);
- char4 src2_data_1 = (char4)(src2.s4, src2.s5, src2.s6, src2.s7);
- char4 src2_data_2 = (char4)(src2.s8, src2.s9, src2.sA, src2.sB);
-
- char4 data_0 = *((__global char4 *)((__global char *)dst + dst_index + 0));
- char4 data_1 = *((__global char4 *)((__global char *)dst + dst_index + 4));
- char4 data_2 = *((__global char4 *)((__global char *)dst + dst_index + 8));
-
- char4 tmp_data_0 = src1_data_0 ^ src2_data_0;
- char4 tmp_data_1 = src1_data_1 ^ src2_data_1;
- char4 tmp_data_2 = src1_data_2 ^ src2_data_2;
-
- *((__global char4 *)((__global char *)dst + dst_index + 0))= tmp_data_0;
- *((__global char4 *)((__global char *)dst + dst_index + 4))= tmp_data_1;
- *((__global char4 *)((__global char *)dst + dst_index + 8))= tmp_data_2;
- }
-}
-#if defined (DOUBLE_SUPPORT)
-__kernel void arithm_s_bitwise_xor_C3_D6 (
- __global short *src1, int src1_step, int src1_offset,
- __global short *dst, int dst_step, int dst_offset,
- short16 src2, int rows, int cols, int dst_step1)
-{
-
- int x = get_global_id(0);
- int y = get_global_id(1);
-
- if (x < cols && y < rows)
- {
- int src1_index = mad24(y, src1_step, (x * 24) + src1_offset);
- int dst_index = mad24(y, dst_step, dst_offset + (x * 24));
-
- short4 src1_data_0 = *((__global short4 *)((__global char *)src1 + src1_index + 0 ));
- short4 src1_data_1 = *((__global short4 *)((__global char *)src1 + src1_index + 8 ));
- short4 src1_data_2 = *((__global short4 *)((__global char *)src1 + src1_index + 16));
-
- short4 src2_data_0 = (short4)(src2.s0, src2.s1, src2.s2, src2.s3);
- short4 src2_data_1 = (short4)(src2.s4, src2.s5, src2.s6, src2.s7);
- short4 src2_data_2 = (short4)(src2.s8, src2.s9, src2.sa, src2.sb);
-
- short4 data_0 = *((__global short4 *)((__global char *)dst + dst_index + 0 ));
- short4 data_1 = *((__global short4 *)((__global char *)dst + dst_index + 8 ));
- short4 data_2 = *((__global short4 *)((__global char *)dst + dst_index + 16));
-
- short4 tmp_data_0 = src1_data_0 ^ src2_data_0;
- short4 tmp_data_1 = src1_data_1 ^ src2_data_1;
- short4 tmp_data_2 = src1_data_2 ^ src2_data_2;
-
- *((__global short4 *)((__global char *)dst + dst_index + 0 ))= tmp_data_0;
- *((__global short4 *)((__global char *)dst + dst_index + 8 ))= tmp_data_1;
- *((__global short4 *)((__global char *)dst + dst_index + 16))= tmp_data_2;
- }
-}
-#endif
-__kernel void arithm_s_bitwise_xor_C4_D0 (
+__kernel void arithm_s_bitwise_binary_C4_D0 (
__global uchar *src1, int src1_step, int src1_offset,
__global uchar *dst, int dst_step, int dst_offset,
uchar4 src2, int rows, int cols, int dst_step1)
uchar4 src_data1 = *((__global uchar4 *)(src1 + src1_index));
- uchar4 data = src_data1 ^ src2;
+ uchar4 data = src_data1 OP_BINARY src2;
*((__global uchar4 *)(dst + dst_index)) = data;
}
}
-__kernel void arithm_s_bitwise_xor_C4_D1 (
+__kernel void arithm_s_bitwise_binary_C4_D1 (
__global char *src1, int src1_step, int src1_offset,
__global char *dst, int dst_step, int dst_offset,
char4 src2, int rows, int cols, int dst_step1)
char4 src_data1 = *((__global char4 *)(src1 + src1_index));
- char4 data = src_data1 ^ src2;
+ char4 data = src_data1 OP_BINARY src2;
*((__global char4 *)(dst + dst_index)) = data;
}
}
-__kernel void arithm_s_bitwise_xor_C4_D2 (
+__kernel void arithm_s_bitwise_binary_C4_D2 (
__global ushort *src1, int src1_step, int src1_offset,
__global ushort *dst, int dst_step, int dst_offset,
ushort4 src2, int rows, int cols, int dst_step1)
ushort4 src_data1 = *((__global ushort4 *)((__global char *)src1 + src1_index));
- ushort4 data = src_data1 ^ src2;
+ ushort4 data = src_data1 OP_BINARY src2;
*((__global ushort4 *)((__global char *)dst + dst_index)) = data;
}
}
-__kernel void arithm_s_bitwise_xor_C4_D3 (
+__kernel void arithm_s_bitwise_binary_C4_D3 (
__global short *src1, int src1_step, int src1_offset,
__global short *dst, int dst_step, int dst_offset,
short4 src2, int rows, int cols, int dst_step1)
short4 src_data1 = *((__global short4 *)((__global char *)src1 + src1_index));
- short4 data = src_data1 ^ src2;
+ short4 data = src_data1 OP_BINARY src2;
*((__global short4 *)((__global char *)dst + dst_index)) = data;
}
}
-__kernel void arithm_s_bitwise_xor_C4_D4 (
+__kernel void arithm_s_bitwise_binary_C4_D4 (
__global int *src1, int src1_step, int src1_offset,
__global int *dst, int dst_step, int dst_offset,
int4 src2, int rows, int cols, int dst_step1)
int4 src_data1 = *((__global int4 *)((__global char *)src1 + src1_index));
- int4 data = src_data1 ^ src2;
+ int4 data = src_data1 OP_BINARY src2;
*((__global int4 *)((__global char *)dst + dst_index)) = data;
}
}
-__kernel void arithm_s_bitwise_xor_C4_D5 (
+__kernel void arithm_s_bitwise_binary_C4_D5 (
__global char *src1, int src1_step, int src1_offset,
__global char *dst, int dst_step, int dst_offset,
char16 src2, int rows, int cols, int dst_step1)
char16 src2_data = (char16)(src2.s0, src2.s1, src2.s2, src2.s3, src2.s4, src2.s5, src2.s6, src2.s7,
src2.s8, src2.s9, src2.sa, src2.sb, src2.sc, src2.sd, src2.se, src2.sf);
- char16 tmp_data = src1_data ^ src2_data;
+ char16 tmp_data = src1_data OP_BINARY src2_data;
*((__global char16 *)((__global char *)dst + dst_index)) = tmp_data;
}
}
#if defined (DOUBLE_SUPPORT)
-__kernel void arithm_s_bitwise_xor_C4_D6 (
+__kernel void arithm_s_bitwise_binary_C4_D6 (
__global short *src1, int src1_step, int src1_offset,
__global short *dst, int dst_step, int dst_offset,
short16 src2, int rows, int cols, int dst_step1)
short4 src2_data_2 = (short4)(src2.s8, src2.s9, src2.sa, src2.sb);
short4 src2_data_3 = (short4)(src2.sc, src2.sd, src2.se, src2.sf);
- short4 tmp_data_0 = src1_data_0 ^ src2_data_0;
- short4 tmp_data_1 = src1_data_1 ^ src2_data_1;
- short4 tmp_data_2 = src1_data_2 ^ src2_data_2;
- short4 tmp_data_3 = src1_data_3 ^ src2_data_3;
+ short4 tmp_data_0 = src1_data_0 OP_BINARY src2_data_0;
+ short4 tmp_data_1 = src1_data_1 OP_BINARY src2_data_1;
+ short4 tmp_data_2 = src1_data_2 OP_BINARY src2_data_2;
+ short4 tmp_data_3 = src1_data_3 OP_BINARY src2_data_3;
*((__global short4 *)((__global char *)dst + dst_index + 0 ))= tmp_data_0;
*((__global short4 *)((__global char *)dst + dst_index + 8 ))= tmp_data_1;
}
}
-#endif
\ No newline at end of file
+#endif
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
-
#if defined (DOUBLE_SUPPORT)
#ifdef cl_khr_fp64
#pragma OPENCL EXTENSION cl_khr_fp64:enable
#pragma OPENCL EXTENSION cl_amd_fp64:enable
#endif
#endif
+
+#ifndef OP_BINARY
+#define OP_BINARY &
+#endif
+
//////////////////////////////////////////////////////////////////////////////////////////////////////
-////////////////////////////////////////////BITWISE_XOR////////////////////////////////////////////////////
-///////////////////////////////////////////////////////////////////////////////////////////////////////
-/**************************************bitwise_xor with scalar with mask**************************************/
-__kernel void arithm_s_bitwise_xor_with_mask_C1_D0 (
+////////////////////////////////////////////bitwise_binary////////////////////////////////////////////
+//////////////////////////////////////////////////////////////////////////////////////////////////////
+/**************************************bitwise_binary with scalar with mask**************************************/
+__kernel void arithm_s_bitwise_binary_with_mask_C1_D0 (
__global uchar *src1, int src1_step, int src1_offset,
__global uchar *dst, int dst_step, int dst_offset,
__global uchar *mask, int mask_step, int mask_offset,
uchar4 mask_data = vload4(0, mask + mask_index);
uchar4 data = *((__global uchar4 *)(dst + dst_index));
- uchar4 tmp_data = src1_data ^ src2_data;
+ uchar4 tmp_data = src1_data OP_BINARY src2_data;
data.x = ((mask_data.x) && (dst_index + 0 >= dst_start) && (dst_index + 0 < dst_end)) ? tmp_data.x : data.x;
data.y = ((mask_data.y) && (dst_index + 1 >= dst_start) && (dst_index + 1 < dst_end)) ? tmp_data.y : data.y;
}
-__kernel void arithm_s_bitwise_xor_with_mask_C1_D1 (
+__kernel void arithm_s_bitwise_binary_with_mask_C1_D1 (
__global char *src1, int src1_step, int src1_offset,
__global char *dst, int dst_step, int dst_offset,
__global uchar *mask, int mask_step, int mask_offset,
uchar4 mask_data = vload4(0, mask + mask_index);
char4 data = *((__global char4 *)(dst + dst_index));
- char4 tmp_data = src1_data ^ src2_data;
+ char4 tmp_data = src1_data OP_BINARY src2_data;
data.x = ((mask_data.x) && (dst_index + 0 >= dst_start) && (dst_index + 0 < dst_end)) ? tmp_data.x : data.x;
data.y = ((mask_data.y) && (dst_index + 1 >= dst_start) && (dst_index + 1 < dst_end)) ? tmp_data.y : data.y;
}
}
-__kernel void arithm_s_bitwise_xor_with_mask_C1_D2 (
+__kernel void arithm_s_bitwise_binary_with_mask_C1_D2 (
__global ushort *src1, int src1_step, int src1_offset,
__global ushort *dst, int dst_step, int dst_offset,
__global uchar *mask, int mask_step, int mask_offset,
#ifdef dst_align
#undef dst_align
#endif
-#define dst_align ((dst_offset >> 1) & 1)
+#define dst_align ((dst_offset / 2) & 1)
int src1_index = mad24(y, src1_step, (x << 1) + src1_offset - (dst_align << 1));
int mask_index = mad24(y, mask_step, x + mask_offset - dst_align);
uchar2 mask_data = vload2(0, mask + mask_index);
ushort2 data = *((__global ushort2 *)((__global uchar *)dst + dst_index));
- ushort2 tmp_data = src1_data ^ src2_data;
+ ushort2 tmp_data = src1_data OP_BINARY src2_data;
data.x = ((mask_data.x) && (dst_index + 0 >= dst_start)) ? tmp_data.x : data.x;
data.y = ((mask_data.y) && (dst_index + 2 < dst_end )) ? tmp_data.y : data.y;
*((__global ushort2 *)((__global uchar *)dst + dst_index)) = data;
}
}
-__kernel void arithm_s_bitwise_xor_with_mask_C1_D3 (
+__kernel void arithm_s_bitwise_binary_with_mask_C1_D3 (
__global short *src1, int src1_step, int src1_offset,
__global short *dst, int dst_step, int dst_offset,
__global uchar *mask, int mask_step, int mask_offset,
#ifdef dst_align
#undef dst_align
#endif
-#define dst_align ((dst_offset >> 1) & 1)
+#define dst_align ((dst_offset / 2) & 1)
int src1_index = mad24(y, src1_step, (x << 1) + src1_offset - (dst_align << 1));
int mask_index = mad24(y, mask_step, x + mask_offset - dst_align);
uchar2 mask_data = vload2(0, mask + mask_index);
short2 data = *((__global short2 *)((__global uchar *)dst + dst_index));
- short2 tmp_data = src1_data ^ src2_data;
+ short2 tmp_data = src1_data OP_BINARY src2_data;
data.x = ((mask_data.x) && (dst_index + 0 >= dst_start)) ? tmp_data.x : data.x;
data.y = ((mask_data.y) && (dst_index + 2 < dst_end )) ? tmp_data.y : data.y;
*((__global short2 *)((__global uchar *)dst + dst_index)) = data;
}
}
-__kernel void arithm_s_bitwise_xor_with_mask_C1_D4 (
+__kernel void arithm_s_bitwise_binary_with_mask_C1_D4 (
__global int *src1, int src1_step, int src1_offset,
__global int *dst, int dst_step, int dst_offset,
__global uchar *mask, int mask_step, int mask_offset,
int src_data2 = src2.x;
int dst_data = *((__global int *)((__global char *)dst + dst_index));
- int data = src_data1 ^ src_data2;
+ int data = src_data1 OP_BINARY src_data2;
data = mask_data ? data : dst_data;
*((__global int *)((__global char *)dst + dst_index)) = data;
}
}
-__kernel void arithm_s_bitwise_xor_with_mask_C1_D5 (
+__kernel void arithm_s_bitwise_binary_with_mask_C1_D5 (
__global char *src1, int src1_step, int src1_offset,
__global char *dst, int dst_step, int dst_offset,
__global uchar *mask, int mask_step, int mask_offset,
char4 src2_data = (char4)(src2.s0, src2.s1, src2.s2, src2.s3);
char4 dst_data = *((__global char4 *)((__global char *)dst + dst_index));
- char4 data = src1_data ^ src2_data;
+ char4 data = src1_data OP_BINARY src2_data;
data = mask_data ? data : dst_data;
*((__global char4 *)((__global char *)dst + dst_index)) = data;
}
#if defined (DOUBLE_SUPPORT)
-__kernel void arithm_s_bitwise_xor_with_mask_C1_D6 (
+__kernel void arithm_s_bitwise_binary_with_mask_C1_D6 (
__global short *src1, int src1_step, int src1_offset,
__global short *dst, int dst_step, int dst_offset,
__global uchar *mask, int mask_step, int mask_offset,
short4 src2_data = (short4)(src2.s0, src2.s1, src2.s2, src2.s3);
short4 dst_data = *((__global short4 *)((__global char *)dst + dst_index));
- short4 data = src1_data ^ src2_data;
+ short4 data = src1_data OP_BINARY src2_data;
data = mask_data ? data : dst_data;
*((__global short4 *)((__global char *)dst + dst_index)) = data;
}
}
#endif
-__kernel void arithm_s_bitwise_xor_with_mask_C2_D0 (
+__kernel void arithm_s_bitwise_binary_with_mask_C2_D0 (
__global uchar *src1, int src1_step, int src1_offset,
__global uchar *dst, int dst_step, int dst_offset,
__global uchar *mask, int mask_step, int mask_offset,
#ifdef dst_align
#undef dst_align
#endif
-#define dst_align ((dst_offset >> 1) & 1)
+#define dst_align ((dst_offset / 2) & 1)
int src1_index = mad24(y, src1_step, (x << 1) + src1_offset - (dst_align << 1));
int mask_index = mad24(y, mask_step, x + mask_offset - dst_align);
uchar2 mask_data = vload2(0, mask + mask_index);
uchar4 data = *((__global uchar4 *)(dst + dst_index));
- uchar4 tmp_data = src1_data ^ src2_data;
+ uchar4 tmp_data = src1_data OP_BINARY src2_data;
data.xy = ((mask_data.x) && (dst_index + 0 >= dst_start)) ? tmp_data.xy : data.xy;
data.zw = ((mask_data.y) && (dst_index + 2 < dst_end )) ? tmp_data.zw : data.zw;
}
-__kernel void arithm_s_bitwise_xor_with_mask_C2_D1 (
+__kernel void arithm_s_bitwise_binary_with_mask_C2_D1 (
__global char *src1, int src1_step, int src1_offset,
__global char *dst, int dst_step, int dst_offset,
__global uchar *mask, int mask_step, int mask_offset,
#ifdef dst_align
#undef dst_align
#endif
-#define dst_align ((dst_offset >> 1) & 1)
+#define dst_align ((dst_offset / 2) & 1)
int src1_index = mad24(y, src1_step, (x << 1) + src1_offset - (dst_align << 1));
int mask_index = mad24(y, mask_step, x + mask_offset - dst_align);
uchar2 mask_data = vload2(0, mask + mask_index);
char4 data = *((__global char4 *)(dst + dst_index));
- char4 tmp_data = src1_data ^ src2_data;
+ char4 tmp_data = src1_data OP_BINARY src2_data;
data.xy = ((mask_data.x) && (dst_index + 0 >= dst_start)) ? tmp_data.xy : data.xy;
data.zw = ((mask_data.y) && (dst_index + 2 < dst_end )) ? tmp_data.zw : data.zw;
}
}
-__kernel void arithm_s_bitwise_xor_with_mask_C2_D2 (
+__kernel void arithm_s_bitwise_binary_with_mask_C2_D2 (
__global ushort *src1, int src1_step, int src1_offset,
__global ushort *dst, int dst_step, int dst_offset,
__global uchar *mask, int mask_step, int mask_offset,
ushort2 src_data2 = (ushort2)(src2.x, src2.y);
ushort2 dst_data = *((__global ushort2 *)((__global char *)dst + dst_index));
- ushort2 data = src_data1 ^ src_data2;
+ ushort2 data = src_data1 OP_BINARY src_data2;
data = mask_data ? data : dst_data;
*((__global ushort2 *)((__global char *)dst + dst_index)) = data;
}
}
-__kernel void arithm_s_bitwise_xor_with_mask_C2_D3 (
+__kernel void arithm_s_bitwise_binary_with_mask_C2_D3 (
__global short *src1, int src1_step, int src1_offset,
__global short *dst, int dst_step, int dst_offset,
__global uchar *mask, int mask_step, int mask_offset,
short2 src_data2 = (short2)(src2.x, src2.y);
short2 dst_data = *((__global short2 *)((__global char *)dst + dst_index));
- short2 data = src_data1 ^ src_data2;
+ short2 data = src_data1 OP_BINARY src_data2;
data = mask_data ? data : dst_data;
*((__global short2 *)((__global char *)dst + dst_index)) = data;
}
}
-__kernel void arithm_s_bitwise_xor_with_mask_C2_D4 (
+__kernel void arithm_s_bitwise_binary_with_mask_C2_D4 (
__global int *src1, int src1_step, int src1_offset,
__global int *dst, int dst_step, int dst_offset,
__global uchar *mask, int mask_step, int mask_offset,
int2 src_data2 = (int2)(src2.x, src2.y);
int2 dst_data = *((__global int2 *)((__global char *)dst + dst_index));
- int2 data = src_data1 ^ src_data2;
+ int2 data = src_data1 OP_BINARY src_data2;
data = mask_data ? data : dst_data;
*((__global int2 *)((__global char *)dst + dst_index)) = data;
}
}
-__kernel void arithm_s_bitwise_xor_with_mask_C2_D5 (
+__kernel void arithm_s_bitwise_binary_with_mask_C2_D5 (
__global char *src1, int src1_step, int src1_offset,
__global char *dst, int dst_step, int dst_offset,
__global uchar *mask, int mask_step, int mask_offset,
char8 src2_data = (char8)(src2.s0, src2.s1, src2.s2, src2.s3, src2.s4, src2.s5, src2.s6, src2.s7);
char8 dst_data = *((__global char8 *)((__global char *)dst + dst_index));
- char8 data = src1_data ^ src2_data;
+ char8 data = src1_data OP_BINARY src2_data;
data = mask_data ? data : dst_data;
}
}
#if defined (DOUBLE_SUPPORT)
-__kernel void arithm_s_bitwise_xor_with_mask_C2_D6 (
+__kernel void arithm_s_bitwise_binary_with_mask_C2_D6 (
__global short *src1, int src1_step, int src1_offset,
__global short *dst, int dst_step, int dst_offset,
__global uchar *mask, int mask_step, int mask_offset,
short8 src2_data = (short8)(src2.s0, src2.s1, src2.s2, src2.s3, src2.s4, src2.s5, src2.s6, src2.s7);
short8 dst_data = *((__global short8 *)((__global char *)dst + dst_index));
- short8 data = src1_data ^ src2_data;
+ short8 data = src1_data OP_BINARY src2_data;
data = mask_data ? data : dst_data;
*((__global short8 *)((__global char *)dst + dst_index)) = data;
}
}
#endif
-__kernel void arithm_s_bitwise_xor_with_mask_C3_D0 (
- __global uchar *src1, int src1_step, int src1_offset,
- __global uchar *dst, int dst_step, int dst_offset,
- __global uchar *mask, int mask_step, int mask_offset,
- uchar4 src2, int rows, int cols, int dst_step1)
-{
-
- int x = get_global_id(0);
- int y = get_global_id(1);
-
- if (x < cols && y < rows)
- {
- x = x << 2;
-
-#ifdef dst_align
-#undef dst_align
-#endif
-#define dst_align (((dst_offset % dst_step) / 3 ) & 3)
- int src1_index = mad24(y, src1_step, (x * 3) + src1_offset - (dst_align * 3));
- int mask_index = mad24(y, mask_step, x + mask_offset - dst_align);
-
- int dst_start = mad24(y, dst_step, dst_offset);
- int dst_end = mad24(y, dst_step, dst_offset + dst_step1);
- int dst_index = mad24(y, dst_step, dst_offset + (x * 3) - (dst_align * 3));
-
- uchar4 src1_data_0 = vload4(0, src1 + src1_index + 0);
- uchar4 src1_data_1 = vload4(0, src1 + src1_index + 4);
- uchar4 src1_data_2 = vload4(0, src1 + src1_index + 8);
-
- uchar4 src2_data_0 = (uchar4)(src2.x, src2.y, src2.z, src2.x);
- uchar4 src2_data_1 = (uchar4)(src2.y, src2.z, src2.x, src2.y);
- uchar4 src2_data_2 = (uchar4)(src2.z, src2.x, src2.y, src2.z);
-
- uchar4 mask_data = vload4(0, mask + mask_index);
-
- uchar4 data_0 = *((__global uchar4 *)(dst + dst_index + 0));
- uchar4 data_1 = *((__global uchar4 *)(dst + dst_index + 4));
- uchar4 data_2 = *((__global uchar4 *)(dst + dst_index + 8));
-
- uchar4 tmp_data_0 = src1_data_0 ^ src2_data_0;
- uchar4 tmp_data_1 = src1_data_1 ^ src2_data_1;
- uchar4 tmp_data_2 = src1_data_2 ^ src2_data_2;
-
- data_0.xyz = ((mask_data.x) && (dst_index + 0 >= dst_start)) ? tmp_data_0.xyz : data_0.xyz;
- data_0.w = ((mask_data.y) && (dst_index + 3 >= dst_start) && (dst_index + 3 < dst_end))
- ? tmp_data_0.w : data_0.w;
-
- data_1.xy = ((mask_data.y) && (dst_index + 3 >= dst_start) && (dst_index + 3 < dst_end))
- ? tmp_data_1.xy : data_1.xy;
- data_1.zw = ((mask_data.z) && (dst_index + 6 >= dst_start) && (dst_index + 6 < dst_end))
- ? tmp_data_1.zw : data_1.zw;
-
- data_2.x = ((mask_data.z) && (dst_index + 6 >= dst_start) && (dst_index + 6 < dst_end))
- ? tmp_data_2.x : data_2.x;
- data_2.yzw = ((mask_data.w) && (dst_index + 9 >= dst_start) && (dst_index + 9 < dst_end))
- ? tmp_data_2.yzw : data_2.yzw;
-
- *((__global uchar4 *)(dst + dst_index + 0)) = data_0;
- *((__global uchar4 *)(dst + dst_index + 4)) = data_1;
- *((__global uchar4 *)(dst + dst_index + 8)) = data_2;
- }
-}
-
-
-__kernel void arithm_s_bitwise_xor_with_mask_C3_D1 (
- __global char *src1, int src1_step, int src1_offset,
- __global char *dst, int dst_step, int dst_offset,
- __global uchar *mask, int mask_step, int mask_offset,
- char4 src2, int rows, int cols, int dst_step1)
-{
-
- int x = get_global_id(0);
- int y = get_global_id(1);
-
- if (x < cols && y < rows)
- {
- x = x << 2;
-
-#ifdef dst_align
-#undef dst_align
-#endif
-#define dst_align (((dst_offset % dst_step) / 3 ) & 3)
- int src1_index = mad24(y, src1_step, (x * 3) + src1_offset - (dst_align * 3));
- int mask_index = mad24(y, mask_step, x + mask_offset - dst_align);
-
- int dst_start = mad24(y, dst_step, dst_offset);
- int dst_end = mad24(y, dst_step, dst_offset + dst_step1);
- int dst_index = mad24(y, dst_step, dst_offset + (x * 3) - (dst_align * 3));
-
- char4 src1_data_0 = vload4(0, src1 + src1_index + 0);
- char4 src1_data_1 = vload4(0, src1 + src1_index + 4);
- char4 src1_data_2 = vload4(0, src1 + src1_index + 8);
-
- char4 src2_data_0 = (char4)(src2.x, src2.y, src2.z, src2.x);
- char4 src2_data_1 = (char4)(src2.y, src2.z, src2.x, src2.y);
- char4 src2_data_2 = (char4)(src2.z, src2.x, src2.y, src2.z);
-
- uchar4 mask_data = vload4(0, mask + mask_index);
-
- char4 data_0 = *((__global char4 *)(dst + dst_index + 0));
- char4 data_1 = *((__global char4 *)(dst + dst_index + 4));
- char4 data_2 = *((__global char4 *)(dst + dst_index + 8));
-
- char4 tmp_data_0 = src1_data_0 ^ src2_data_0;
- char4 tmp_data_1 = src1_data_1 ^ src2_data_1;
- char4 tmp_data_2 = src1_data_2 ^ src2_data_2;
-
- data_0.xyz = ((mask_data.x) && (dst_index + 0 >= dst_start)) ? tmp_data_0.xyz : data_0.xyz;
- data_0.w = ((mask_data.y) && (dst_index + 3 >= dst_start) && (dst_index + 3 < dst_end))
- ? tmp_data_0.w : data_0.w;
-
- data_1.xy = ((mask_data.y) && (dst_index + 3 >= dst_start) && (dst_index + 3 < dst_end))
- ? tmp_data_1.xy : data_1.xy;
- data_1.zw = ((mask_data.z) && (dst_index + 6 >= dst_start) && (dst_index + 6 < dst_end))
- ? tmp_data_1.zw : data_1.zw;
-
- data_2.x = ((mask_data.z) && (dst_index + 6 >= dst_start) && (dst_index + 6 < dst_end))
- ? tmp_data_2.x : data_2.x;
- data_2.yzw = ((mask_data.w) && (dst_index + 9 >= dst_start) && (dst_index + 9 < dst_end))
- ? tmp_data_2.yzw : data_2.yzw;
-
- *((__global char4 *)(dst + dst_index + 0)) = data_0;
- *((__global char4 *)(dst + dst_index + 4)) = data_1;
- *((__global char4 *)(dst + dst_index + 8)) = data_2;
- }
-}
-
-__kernel void arithm_s_bitwise_xor_with_mask_C3_D2 (
- __global ushort *src1, int src1_step, int src1_offset,
- __global ushort *dst, int dst_step, int dst_offset,
- __global uchar *mask, int mask_step, int mask_offset,
- ushort4 src2, int rows, int cols, int dst_step1)
-{
-
- int x = get_global_id(0);
- int y = get_global_id(1);
-
- if (x < cols && y < rows)
- {
- x = x << 1;
-
-#ifdef dst_align
-#undef dst_align
-#endif
-#define dst_align (((dst_offset % dst_step) / 6 ) & 1)
- int src1_index = mad24(y, src1_step, (x * 6) + src1_offset - (dst_align * 6));
- int mask_index = mad24(y, mask_step, x + mask_offset - dst_align);
-
- int dst_start = mad24(y, dst_step, dst_offset);
- int dst_end = mad24(y, dst_step, dst_offset + dst_step1);
- int dst_index = mad24(y, dst_step, dst_offset + (x * 6) - (dst_align * 6));
-
- ushort2 src1_data_0 = vload2(0, (__global ushort *)((__global char *)src1 + src1_index + 0));
- ushort2 src1_data_1 = vload2(0, (__global ushort *)((__global char *)src1 + src1_index + 4));
- ushort2 src1_data_2 = vload2(0, (__global ushort *)((__global char *)src1 + src1_index + 8));
-
- ushort2 src2_data_0 = (ushort2)(src2.x, src2.y);
- ushort2 src2_data_1 = (ushort2)(src2.z, src2.x);
- ushort2 src2_data_2 = (ushort2)(src2.y, src2.z);
-
- uchar2 mask_data = vload2(0, mask + mask_index);
-
- ushort2 data_0 = *((__global ushort2 *)((__global char *)dst + dst_index + 0));
- ushort2 data_1 = *((__global ushort2 *)((__global char *)dst + dst_index + 4));
- ushort2 data_2 = *((__global ushort2 *)((__global char *)dst + dst_index + 8));
-
- ushort2 tmp_data_0 = src1_data_0 ^ src2_data_0;
- ushort2 tmp_data_1 = src1_data_1 ^ src2_data_1;
- ushort2 tmp_data_2 = src1_data_2 ^ src2_data_2;
-
- data_0.xy = ((mask_data.x) && (dst_index + 0 >= dst_start)) ? tmp_data_0.xy : data_0.xy;
-
- data_1.x = ((mask_data.x) && (dst_index + 0 >= dst_start) && (dst_index + 0 < dst_end))
- ? tmp_data_1.x : data_1.x;
- data_1.y = ((mask_data.y) && (dst_index + 6 >= dst_start) && (dst_index + 6 < dst_end))
- ? tmp_data_1.y : data_1.y;
-
- data_2.xy = ((mask_data.y) && (dst_index + 6 >= dst_start) && (dst_index + 6 < dst_end))
- ? tmp_data_2.xy : data_2.xy;
-
- *((__global ushort2 *)((__global char *)dst + dst_index + 0))= data_0;
- *((__global ushort2 *)((__global char *)dst + dst_index + 4))= data_1;
- *((__global ushort2 *)((__global char *)dst + dst_index + 8))= data_2;
- }
-}
-__kernel void arithm_s_bitwise_xor_with_mask_C3_D3 (
- __global short *src1, int src1_step, int src1_offset,
- __global short *dst, int dst_step, int dst_offset,
- __global uchar *mask, int mask_step, int mask_offset,
- short4 src2, int rows, int cols, int dst_step1)
-{
-
- int x = get_global_id(0);
- int y = get_global_id(1);
-
- if (x < cols && y < rows)
- {
- x = x << 1;
-
-#ifdef dst_align
-#undef dst_align
-#endif
-#define dst_align (((dst_offset % dst_step) / 6 ) & 1)
- int src1_index = mad24(y, src1_step, (x * 6) + src1_offset - (dst_align * 6));
- int mask_index = mad24(y, mask_step, x + mask_offset - dst_align);
-
- int dst_start = mad24(y, dst_step, dst_offset);
- int dst_end = mad24(y, dst_step, dst_offset + dst_step1);
- int dst_index = mad24(y, dst_step, dst_offset + (x * 6) - (dst_align * 6));
-
- short2 src1_data_0 = vload2(0, (__global short *)((__global char *)src1 + src1_index + 0));
- short2 src1_data_1 = vload2(0, (__global short *)((__global char *)src1 + src1_index + 4));
- short2 src1_data_2 = vload2(0, (__global short *)((__global char *)src1 + src1_index + 8));
-
- short2 src2_data_0 = (short2)(src2.x, src2.y);
- short2 src2_data_1 = (short2)(src2.z, src2.x);
- short2 src2_data_2 = (short2)(src2.y, src2.z);
-
- uchar2 mask_data = vload2(0, mask + mask_index);
-
- short2 data_0 = *((__global short2 *)((__global char *)dst + dst_index + 0));
- short2 data_1 = *((__global short2 *)((__global char *)dst + dst_index + 4));
- short2 data_2 = *((__global short2 *)((__global char *)dst + dst_index + 8));
-
- short2 tmp_data_0 = src1_data_0 ^ src2_data_0;
- short2 tmp_data_1 = src1_data_1 ^ src2_data_1;
- short2 tmp_data_2 = src1_data_2 ^ src2_data_2;
-
- data_0.xy = ((mask_data.x) && (dst_index + 0 >= dst_start)) ? tmp_data_0.xy : data_0.xy;
-
- data_1.x = ((mask_data.x) && (dst_index + 0 >= dst_start) && (dst_index + 0 < dst_end))
- ? tmp_data_1.x : data_1.x;
- data_1.y = ((mask_data.y) && (dst_index + 6 >= dst_start) && (dst_index + 6 < dst_end))
- ? tmp_data_1.y : data_1.y;
-
- data_2.xy = ((mask_data.y) && (dst_index + 6 >= dst_start) && (dst_index + 6 < dst_end))
- ? tmp_data_2.xy : data_2.xy;
-
- *((__global short2 *)((__global char *)dst + dst_index + 0))= data_0;
- *((__global short2 *)((__global char *)dst + dst_index + 4))= data_1;
- *((__global short2 *)((__global char *)dst + dst_index + 8))= data_2;
- }
-}
-__kernel void arithm_s_bitwise_xor_with_mask_C3_D4 (
- __global int *src1, int src1_step, int src1_offset,
- __global int *dst, int dst_step, int dst_offset,
- __global uchar *mask, int mask_step, int mask_offset,
- int4 src2, int rows, int cols, int dst_step1)
-{
-
- int x = get_global_id(0);
- int y = get_global_id(1);
-
- if (x < cols && y < rows)
- {
- int src1_index = mad24(y, src1_step, (x * 12) + src1_offset);
- int mask_index = mad24(y, mask_step, x + mask_offset);
- int dst_index = mad24(y, dst_step, dst_offset + (x * 12));
-
- int src1_data_0 = *((__global int *)((__global char *)src1 + src1_index + 0));
- int src1_data_1 = *((__global int *)((__global char *)src1 + src1_index + 4));
- int src1_data_2 = *((__global int *)((__global char *)src1 + src1_index + 8));
-
- int src2_data_0 = src2.x;
- int src2_data_1 = src2.y;
- int src2_data_2 = src2.z;
-
- uchar mask_data = * (mask + mask_index);
-
- int data_0 = *((__global int *)((__global char *)dst + dst_index + 0));
- int data_1 = *((__global int *)((__global char *)dst + dst_index + 4));
- int data_2 = *((__global int *)((__global char *)dst + dst_index + 8));
-
- int tmp_data_0 = src1_data_0 ^ src2_data_0;
- int tmp_data_1 = src1_data_1 ^ src2_data_1;
- int tmp_data_2 = src1_data_2 ^ src2_data_2;
-
- data_0 = mask_data ? tmp_data_0 : data_0;
- data_1 = mask_data ? tmp_data_1 : data_1;
- data_2 = mask_data ? tmp_data_2 : data_2;
-
- *((__global int *)((__global char *)dst + dst_index + 0))= data_0;
- *((__global int *)((__global char *)dst + dst_index + 4))= data_1;
- *((__global int *)((__global char *)dst + dst_index + 8))= data_2;
- }
-}
-__kernel void arithm_s_bitwise_xor_with_mask_C3_D5 (
- __global char *src1, int src1_step, int src1_offset,
- __global char *dst, int dst_step, int dst_offset,
- __global uchar *mask, int mask_step, int mask_offset,
- char16 src2, int rows, int cols, int dst_step1)
-{
-
- int x = get_global_id(0);
- int y = get_global_id(1);
-
- if (x < cols && y < rows)
- {
- int src1_index = mad24(y, src1_step, (x * 12) + src1_offset);
- int mask_index = mad24(y, mask_step, x + mask_offset);
- int dst_index = mad24(y, dst_step, dst_offset + (x * 12));
-
- char4 src1_data_0 = *((__global char4 *)((__global char *)src1 + src1_index + 0));
- char4 src1_data_1 = *((__global char4 *)((__global char *)src1 + src1_index + 4));
- char4 src1_data_2 = *((__global char4 *)((__global char *)src1 + src1_index + 8));
-
- char4 src2_data_0 = (char4)(src2.s0, src2.s1, src2.s2, src2.s3);
- char4 src2_data_1 = (char4)(src2.s4, src2.s5, src2.s6, src2.s7);
- char4 src2_data_2 = (char4)(src2.s8, src2.s9, src2.sA, src2.sB);
-
- uchar mask_data = * (mask + mask_index);
-
- char4 data_0 = *((__global char4 *)((__global char *)dst + dst_index + 0));
- char4 data_1 = *((__global char4 *)((__global char *)dst + dst_index + 4));
- char4 data_2 = *((__global char4 *)((__global char *)dst + dst_index + 8));
-
- char4 tmp_data_0 = src1_data_0 ^ src2_data_0;
- char4 tmp_data_1 = src1_data_1 ^ src2_data_1;
- char4 tmp_data_2 = src1_data_2 ^ src2_data_2;
-
- data_0 = mask_data ? tmp_data_0 : data_0;
- data_1 = mask_data ? tmp_data_1 : data_1;
- data_2 = mask_data ? tmp_data_2 : data_2;
-
- *((__global char4 *)((__global char *)dst + dst_index + 0))= data_0;
- *((__global char4 *)((__global char *)dst + dst_index + 4))= data_1;
- *((__global char4 *)((__global char *)dst + dst_index + 8))= data_2;
- }
-}
-#if defined (DOUBLE_SUPPORT)
-__kernel void arithm_s_bitwise_xor_with_mask_C3_D6 (
- __global short *src1, int src1_step, int src1_offset,
- __global short *dst, int dst_step, int dst_offset,
- __global uchar *mask, int mask_step, int mask_offset,
- short16 src2, int rows, int cols, int dst_step1)
-{
-
- int x = get_global_id(0);
- int y = get_global_id(1);
-
- if (x < cols && y < rows)
- {
- int src1_index = mad24(y, src1_step, (x * 24) + src1_offset);
- int mask_index = mad24(y, mask_step, x + mask_offset);
- int dst_index = mad24(y, dst_step, dst_offset + (x * 24));
-
- short4 src1_data_0 = *((__global short4 *)((__global char *)src1 + src1_index + 0 ));
- short4 src1_data_1 = *((__global short4 *)((__global char *)src1 + src1_index + 8 ));
- short4 src1_data_2 = *((__global short4 *)((__global char *)src1 + src1_index + 16));
-
- short4 src2_data_0 = (short4)(src2.s0, src2.s1, src2.s2, src2.s3);
- short4 src2_data_1 = (short4)(src2.s4, src2.s5, src2.s6, src2.s7);
- short4 src2_data_2 = (short4)(src2.s8, src2.s9, src2.sa, src2.sb);
-
- uchar mask_data = * (mask + mask_index);
-
- short4 data_0 = *((__global short4 *)((__global char *)dst + dst_index + 0 ));
- short4 data_1 = *((__global short4 *)((__global char *)dst + dst_index + 8 ));
- short4 data_2 = *((__global short4 *)((__global char *)dst + dst_index + 16));
-
- short4 tmp_data_0 = src1_data_0 ^ src2_data_0;
- short4 tmp_data_1 = src1_data_1 ^ src2_data_1;
- short4 tmp_data_2 = src1_data_2 ^ src2_data_2;
-
- data_0 = mask_data ? tmp_data_0 : data_0;
- data_1 = mask_data ? tmp_data_1 : data_1;
- data_2 = mask_data ? tmp_data_2 : data_2;
-
- *((__global short4 *)((__global char *)dst + dst_index + 0 ))= data_0;
- *((__global short4 *)((__global char *)dst + dst_index + 8 ))= data_1;
- *((__global short4 *)((__global char *)dst + dst_index + 16))= data_2;
- }
-}
-#endif
-__kernel void arithm_s_bitwise_xor_with_mask_C4_D0 (
+__kernel void arithm_s_bitwise_binary_with_mask_C4_D0 (
__global uchar *src1, int src1_step, int src1_offset,
__global uchar *dst, int dst_step, int dst_offset,
__global uchar *mask, int mask_step, int mask_offset,
uchar4 src_data1 = *((__global uchar4 *)(src1 + src1_index));
uchar4 dst_data = *((__global uchar4 *)(dst + dst_index));
- uchar4 data = src_data1 ^ src2;
+ uchar4 data = src_data1 OP_BINARY src2;
data = mask_data ? data : dst_data;
*((__global uchar4 *)(dst + dst_index)) = data;
}
-__kernel void arithm_s_bitwise_xor_with_mask_C4_D1 (
+__kernel void arithm_s_bitwise_binary_with_mask_C4_D1 (
__global char *src1, int src1_step, int src1_offset,
__global char *dst, int dst_step, int dst_offset,
__global uchar *mask, int mask_step, int mask_offset,
char4 src_data1 = *((__global char4 *)(src1 + src1_index));
char4 dst_data = *((__global char4 *)(dst + dst_index));
- char4 data = src_data1 ^ src2;
+ char4 data = src_data1 OP_BINARY src2;
data = mask_data ? data : dst_data;
*((__global char4 *)(dst + dst_index)) = data;
}
}
-__kernel void arithm_s_bitwise_xor_with_mask_C4_D2 (
+__kernel void arithm_s_bitwise_binary_with_mask_C4_D2 (
__global ushort *src1, int src1_step, int src1_offset,
__global ushort *dst, int dst_step, int dst_offset,
__global uchar *mask, int mask_step, int mask_offset,
ushort4 src_data1 = *((__global ushort4 *)((__global char *)src1 + src1_index));
ushort4 dst_data = *((__global ushort4 *)((__global char *)dst + dst_index));
- ushort4 data = src_data1 ^ src2;
+ ushort4 data = src_data1 OP_BINARY src2;
data = mask_data ? data : dst_data;
*((__global ushort4 *)((__global char *)dst + dst_index)) = data;
}
}
-__kernel void arithm_s_bitwise_xor_with_mask_C4_D3 (
+__kernel void arithm_s_bitwise_binary_with_mask_C4_D3 (
__global short *src1, int src1_step, int src1_offset,
__global short *dst, int dst_step, int dst_offset,
__global uchar *mask, int mask_step, int mask_offset,
short4 src_data1 = *((__global short4 *)((__global char *)src1 + src1_index));
short4 dst_data = *((__global short4 *)((__global char *)dst + dst_index));
- short4 data = src_data1 ^ src2;
+ short4 data = src_data1 OP_BINARY src2;
data = mask_data ? data : dst_data;
*((__global short4 *)((__global char *)dst + dst_index)) = data;
}
}
-__kernel void arithm_s_bitwise_xor_with_mask_C4_D4 (
+__kernel void arithm_s_bitwise_binary_with_mask_C4_D4 (
__global int *src1, int src1_step, int src1_offset,
__global int *dst, int dst_step, int dst_offset,
__global uchar *mask, int mask_step, int mask_offset,
int4 src_data1 = *((__global int4 *)((__global char *)src1 + src1_index));
int4 dst_data = *((__global int4 *)((__global char *)dst + dst_index));
- int4 data = src_data1 ^ src2;
+ int4 data = src_data1 OP_BINARY src2;
data = mask_data ? data : dst_data;
*((__global int4 *)((__global char *)dst + dst_index)) = data;
}
}
-__kernel void arithm_s_bitwise_xor_with_mask_C4_D5 (
+__kernel void arithm_s_bitwise_binary_with_mask_C4_D5 (
__global char *src1, int src1_step, int src1_offset,
__global char *dst, int dst_step, int dst_offset,
__global uchar *mask, int mask_step, int mask_offset,
src2.s8, src2.s9, src2.sa, src2.sb, src2.sc, src2.sd, src2.se, src2.sf);
char16 dst_data = *((__global char16 *)((__global char *)dst + dst_index));
- char16 data = src1_data ^ src2_data;
+ char16 data = src1_data OP_BINARY src2_data;
data = mask_data ? data : dst_data;
*((__global char16 *)((__global char *)dst + dst_index)) = data;
}
}
#if defined (DOUBLE_SUPPORT)
-__kernel void arithm_s_bitwise_xor_with_mask_C4_D6 (
+__kernel void arithm_s_bitwise_binary_with_mask_C4_D6 (
__global short *src1, int src1_step, int src1_offset,
__global short *dst, int dst_step, int dst_offset,
__global uchar *mask, int mask_step, int mask_offset,
short4 dst_data_2 = *((__global short4 *)((__global char *)dst + dst_index + 16));
short4 dst_data_3 = *((__global short4 *)((__global char *)dst + dst_index + 24));
- short4 data_0 = src1_data_0 ^ src2_data_0;
- short4 data_1 = src1_data_1 ^ src2_data_1;
- short4 data_2 = src1_data_2 ^ src2_data_2;
- short4 data_3 = src1_data_3 ^ src2_data_3;
+ short4 data_0 = src1_data_0 OP_BINARY src2_data_0;
+ short4 data_1 = src1_data_1 OP_BINARY src2_data_1;
+ short4 data_2 = src1_data_2 OP_BINARY src2_data_2;
+ short4 data_3 = src1_data_3 OP_BINARY src2_data_3;
data_0 = mask_data ? data_0 : dst_data_0;
data_1 = mask_data ? data_1 : dst_data_1;
}
}
#endif
+
+++ /dev/null
-/*M///////////////////////////////////////////////////////////////////////////////////////
-//
-// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
-//
-// By downloading, copying, installing or using the software you agree to this license.
-// If you do not agree to this license, do not download, install,
-// copy or use the software.
-//
-//
-// License Agreement
-// For Open Source Computer Vision Library
-//
-// Copyright (C) 2010-2012, Institute Of Software Chinese Academy Of Science, all rights reserved.
-// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved.
-// Third party copyrights are property of their respective owners.
-//
-// @Authors
-// Jiang Liyuan, jlyuan001.good@163.com
-//
-// Redistribution and use in source and binary forms, with or without modification,
-// are permitted provided that the following conditions are met:
-//
-// * Redistribution's of source code must retain the above copyright notice,
-// this list of conditions and the following disclaimer.
-//
-// * Redistribution's in binary form must reproduce the above copyright notice,
-// this list of conditions and the following disclaimer in the documentation
-// and/or other oclMaterials provided with the distribution.
-//
-// * The name of the copyright holders may not be used to endorse or promote products
-// derived from this software without specific prior written permission.
-//
-// This software is provided by the copyright holders and contributors as is and
-// any express or implied warranties, including, but not limited to, the implied
-// warranties of merchantability and fitness for a particular purpose are disclaimed.
-// In no event shall the Intel Corporation or contributors be liable for any direct,
-// indirect, incidental, special, exemplary, or consequential damages
-// (including, but not limited to, procurement of substitute goods or services;
-// loss of use, data, or profits; or business interruption) however caused
-// and on any theory of liability, whether in contract, strict liability,
-// or tort (including negligence or otherwise) arising in any way out of
-// the use of this software, even if advised of the possibility of such damage.
-//
-//M*/
-#if defined (DOUBLE_SUPPORT)
-#ifdef cl_khr_fp64
-#pragma OPENCL EXTENSION cl_khr_fp64:enable
-#elif defined (cl_amd_fp64)
-#pragma OPENCL EXTENSION cl_amd_fp64:enable
-#endif
-#endif
-
-//////////////////////////////////////////////////////////////////////////////////////////////////////
-////////////////////////////////////////////BITWISE_OR////////////////////////////////////////////////////
-///////////////////////////////////////////////////////////////////////////////////////////////////////
-/**************************************bitwise_or without mask**************************************/
-__kernel void arithm_bitwise_or_D0 (__global uchar *src1, int src1_step, int src1_offset,
- __global uchar *src2, int src2_step, int src2_offset,
- __global uchar *dst, int dst_step, int dst_offset,
- int rows, int cols, int dst_step1)
-{
- int x = get_global_id(0);
- int y = get_global_id(1);
-
- if (x < cols && y < rows)
- {
- x = x << 2;
-
-#ifdef dst_align
-#undef dst_align
-#endif
-#define dst_align (dst_offset & 3)
- int src1_index = mad24(y, src1_step, x + src1_offset - dst_align);
- int src2_index = mad24(y, src2_step, x + src2_offset - dst_align);
-
- int dst_start = mad24(y, dst_step, dst_offset);
- int dst_end = mad24(y, dst_step, dst_offset + dst_step1);
- int dst_index = mad24(y, dst_step, dst_offset + x & (int)0xfffffffc);
- int src1_index_fix = src1_index < 0 ? 0 : src1_index;
- int src2_index_fix = src2_index < 0 ? 0 : src2_index;
- uchar4 src1_data = vload4(0, src1 + src1_index_fix);
- uchar4 src2_data = vload4(0, src2 + src2_index_fix);
- if(src1_index < 0)
- {
- uchar4 tmp;
- tmp.xyzw = (src1_index == -2) ? src1_data.zwxy:src1_data.yzwx;
- src1_data.xyzw = (src1_index == -1) ? src1_data.wxyz:tmp.xyzw;
- }
- if(src2_index < 0)
- {
- uchar4 tmp;
- tmp.xyzw = (src2_index == -2) ? src2_data.zwxy:src2_data.yzwx;
- src2_data.xyzw = (src2_index == -1) ? src2_data.wxyz:tmp.xyzw;
- }
- uchar4 dst_data = *((__global uchar4 *)(dst + dst_index));
- uchar4 tmp_data = src1_data | src2_data;
-
- dst_data.x = ((dst_index + 0 >= dst_start) && (dst_index + 0 < dst_end)) ? tmp_data.x : dst_data.x;
- dst_data.y = ((dst_index + 1 >= dst_start) && (dst_index + 1 < dst_end)) ? tmp_data.y : dst_data.y;
- dst_data.z = ((dst_index + 2 >= dst_start) && (dst_index + 2 < dst_end)) ? tmp_data.z : dst_data.z;
- dst_data.w = ((dst_index + 3 >= dst_start) && (dst_index + 3 < dst_end)) ? tmp_data.w : dst_data.w;
-
- *((__global uchar4 *)(dst + dst_index)) = dst_data;
- }
-}
-
-
-__kernel void arithm_bitwise_or_D1 (__global char *src1, int src1_step, int src1_offset,
- __global char *src2, int src2_step, int src2_offset,
- __global char *dst, int dst_step, int dst_offset,
- int rows, int cols, int dst_step1)
-{
- int x = get_global_id(0);
- int y = get_global_id(1);
-
- if (x < cols && y < rows)
- {
- x = x << 2;
-
-#ifdef dst_align
-#undef dst_align
-#endif
-#define dst_align (dst_offset & 3)
- int src1_index = mad24(y, src1_step, x + src1_offset - dst_align);
- int src2_index = mad24(y, src2_step, x + src2_offset - dst_align);
-
- int dst_start = mad24(y, dst_step, dst_offset);
- int dst_end = mad24(y, dst_step, dst_offset + dst_step1);
- int dst_index = mad24(y, dst_step, dst_offset + x & (int)0xfffffffc);
-
- char4 src1_data = vload4(0, src1 + src1_index);
- char4 src2_data = vload4(0, src2 + src2_index);
-
- char4 dst_data = *((__global char4 *)(dst + dst_index));
- char4 tmp_data = src1_data | src2_data;
-
- dst_data.x = ((dst_index + 0 >= dst_start) && (dst_index + 0 < dst_end)) ? tmp_data.x : dst_data.x;
- dst_data.y = ((dst_index + 1 >= dst_start) && (dst_index + 1 < dst_end)) ? tmp_data.y : dst_data.y;
- dst_data.z = ((dst_index + 2 >= dst_start) && (dst_index + 2 < dst_end)) ? tmp_data.z : dst_data.z;
- dst_data.w = ((dst_index + 3 >= dst_start) && (dst_index + 3 < dst_end)) ? tmp_data.w : dst_data.w;
-
- *((__global char4 *)(dst + dst_index)) = dst_data;
- }
-}
-
-
-__kernel void arithm_bitwise_or_D2 (__global ushort *src1, int src1_step, int src1_offset,
- __global ushort *src2, int src2_step, int src2_offset,
- __global ushort *dst, int dst_step, int dst_offset,
- int rows, int cols, int dst_step1)
-
-{
- int x = get_global_id(0);
- int y = get_global_id(1);
-
- if (x < cols && y < rows)
- {
- x = x << 2;
-
-#ifdef dst_align
-#undef dst_align
-#endif
-#define dst_align ((dst_offset >> 1) & 3)
- int src1_index = mad24(y, src1_step, (x << 1) + src1_offset - (dst_align << 1));
- int src2_index = mad24(y, src2_step, (x << 1) + src2_offset - (dst_align << 1));
-
- int dst_start = mad24(y, dst_step, dst_offset);
- int dst_end = mad24(y, dst_step, dst_offset + dst_step1);
- int dst_index = mad24(y, dst_step, dst_offset + (x << 1) & (int)0xfffffff8);
-
- ushort4 src1_data = vload4(0, (__global ushort *)((__global char *)src1 + src1_index));
- ushort4 src2_data = vload4(0, (__global ushort *)((__global char *)src2 + src2_index));
-
- ushort4 dst_data = *((__global ushort4 *)((__global char *)dst + dst_index));
- ushort4 tmp_data = src1_data | src2_data;
-
- dst_data.x = ((dst_index + 0 >= dst_start) && (dst_index + 0 < dst_end)) ? tmp_data.x : dst_data.x;
- dst_data.y = ((dst_index + 2 >= dst_start) && (dst_index + 2 < dst_end)) ? tmp_data.y : dst_data.y;
- dst_data.z = ((dst_index + 4 >= dst_start) && (dst_index + 4 < dst_end)) ? tmp_data.z : dst_data.z;
- dst_data.w = ((dst_index + 6 >= dst_start) && (dst_index + 6 < dst_end)) ? tmp_data.w : dst_data.w;
-
- *((__global ushort4 *)((__global char *)dst + dst_index)) = dst_data;
- }
-}
-
-
-
-__kernel void arithm_bitwise_or_D3 (__global short *src1, int src1_step, int src1_offset,
- __global short *src2, int src2_step, int src2_offset,
- __global short *dst, int dst_step, int dst_offset,
- int rows, int cols, int dst_step1)
-
-{
- int x = get_global_id(0);
- int y = get_global_id(1);
-
- if (x < cols && y < rows)
- {
- x = x << 2;
-
-#ifdef dst_align
-#undef dst_align
-#endif
-#define dst_align ((dst_offset >> 1) & 3)
- int src1_index = mad24(y, src1_step, (x << 1) + src1_offset - (dst_align << 1));
- int src2_index = mad24(y, src2_step, (x << 1) + src2_offset - (dst_align << 1));
-
- int dst_start = mad24(y, dst_step, dst_offset);
- int dst_end = mad24(y, dst_step, dst_offset + dst_step1);
- int dst_index = mad24(y, dst_step, dst_offset + (x << 1) & (int)0xfffffff8);
-
- short4 src1_data = vload4(0, (__global short *)((__global char *)src1 + src1_index));
- short4 src2_data = vload4(0, (__global short *)((__global char *)src2 + src2_index));
-
- short4 dst_data = *((__global short4 *)((__global char *)dst + dst_index));
- short4 tmp_data = src1_data | src2_data;
-
- dst_data.x = ((dst_index + 0 >= dst_start) && (dst_index + 0 < dst_end)) ? tmp_data.x : dst_data.x;
- dst_data.y = ((dst_index + 2 >= dst_start) && (dst_index + 2 < dst_end)) ? tmp_data.y : dst_data.y;
- dst_data.z = ((dst_index + 4 >= dst_start) && (dst_index + 4 < dst_end)) ? tmp_data.z : dst_data.z;
- dst_data.w = ((dst_index + 6 >= dst_start) && (dst_index + 6 < dst_end)) ? tmp_data.w : dst_data.w;
-
- *((__global short4 *)((__global char *)dst + dst_index)) = dst_data;
- }
-}
-
-
-
-__kernel void arithm_bitwise_or_D4 (__global int *src1, int src1_step, int src1_offset,
- __global int *src2, int src2_step, int src2_offset,
- __global int *dst, int dst_step, int dst_offset,
- int rows, int cols, int dst_step1)
-{
- int x = get_global_id(0);
- int y = get_global_id(1);
-
- if (x < cols && y < rows)
- {
- int src1_index = mad24(y, src1_step, (x << 2) + src1_offset);
- int src2_index = mad24(y, src2_step, (x << 2) + src2_offset);
- int dst_index = mad24(y, dst_step, (x << 2) + dst_offset);
-
- int data1 = *((__global int *)((__global char *)src1 + src1_index));
- int data2 = *((__global int *)((__global char *)src2 + src2_index));
- int tmp = data1 | data2;
-
- *((__global int *)((__global char *)dst + dst_index)) = tmp;
- }
-}
-
-__kernel void arithm_bitwise_or_D5 (__global char *src1, int src1_step, int src1_offset,
- __global char *src2, int src2_step, int src2_offset,
- __global char *dst, int dst_step, int dst_offset,
- int rows, int cols, int dst_step1)
-{
- int x = get_global_id(0);
- int y = get_global_id(1);
-
- if (x < cols && y < rows)
- {
- int src1_index = mad24(y, src1_step, (x << 2) + src1_offset);
- int src2_index = mad24(y, src2_step, (x << 2) + src2_offset);
- int dst_index = mad24(y, dst_step, (x << 2) + dst_offset);
-
- char4 data1 = *((__global char4 *)((__global char *)src1 + src1_index));
- char4 data2 = *((__global char4 *)((__global char *)src2 + src2_index));
- char4 tmp = data1 | data2;
-
- *((__global char4 *)((__global char *)dst + dst_index)) = tmp;
- }
-}
-
-#if defined (DOUBLE_SUPPORT)
-__kernel void arithm_bitwise_or_D6 (__global char *src1, int src1_step, int src1_offset,
- __global char *src2, int src2_step, int src2_offset,
- __global char *dst, int dst_step, int dst_offset,
- int rows, int cols, int dst_step1)
-{
- int x = get_global_id(0);
- int y = get_global_id(1);
-
- if (x < cols && y < rows)
- {
- int src1_index = mad24(y, src1_step, (x << 3) + src1_offset);
- int src2_index = mad24(y, src2_step, (x << 3) + src2_offset);
- int dst_index = mad24(y, dst_step, (x << 3) + dst_offset);
-
- char8 data1 = *((__global char8 *)((__global char *)src1 + src1_index));
- char8 data2 = *((__global char8 *)((__global char *)src2 + src2_index));
-
- *((__global char8 *)((__global char *)dst + dst_index)) = data1 | data2;
- }
-}
-#endif
+++ /dev/null
-/*M///////////////////////////////////////////////////////////////////////////////////////
-//
-// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
-//
-// By downloading, copying, installing or using the software you agree to this license.
-// If you do not agree to this license, do not download, install,
-// copy or use the software.
-//
-//
-// License Agreement
-// For Open Source Computer Vision Library
-//
-// Copyright (C) 2010-2012, Institute Of Software Chinese Academy Of Science, all rights reserved.
-// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved.
-// Third party copyrights are property of their respective owners.
-//
-// @Authors
-// Jiang Liyuan, jlyuan001.good@163.com
-//
-// Redistribution and use in source and binary forms, with or without modification,
-// are permitted provided that the following conditions are met:
-//
-// * Redistribution's of source code must retain the above copyright notice,
-// this list of conditions and the following disclaimer.
-//
-// * Redistribution's in binary form must reproduce the above copyright notice,
-// this list of conditions and the following disclaimer in the documentation
-// and/or other oclMaterials provided with the distribution.
-//
-// * The name of the copyright holders may not be used to endorse or promote products
-// derived from this software without specific prior written permission.
-//
-// This software is provided by the copyright holders and contributors as is and
-// any express or implied warranties, including, but not limited to, the implied
-// warranties of merchantability and fitness for a particular purpose are disclaimed.
-// In no event shall the Intel Corporation or contributors be liable for any direct,
-// indirect, incidental, special, exemplary, or consequential damages
-// (including, but not limited to, procurement of substitute goods or services;
-// loss of use, data, or profits; or business interruption) however caused
-// and on any theory of liability, whether in contract, strict liability,
-// or tort (including negligence or otherwise) arising in any way out of
-// the use of this software, even if advised of the possibility of such damage.
-//
-//M*/
-#if defined (DOUBLE_SUPPORT)
-#ifdef cl_khr_fp64
-#pragma OPENCL EXTENSION cl_khr_fp64:enable
-#elif defined (cl_amd_fp64)
-#pragma OPENCL EXTENSION cl_amd_fp64:enable
-#endif
-#endif
-//////////////////////////////////////////////////////////////////////////////////////////////////////
-////////////////////////////////////////////BITWISE_OR////////////////////////////////////////////////////
-///////////////////////////////////////////////////////////////////////////////////////////////////////
-/**************************************bitwise_or with mask**************************************/
-__kernel void arithm_bitwise_or_with_mask_C1_D0 (
- __global uchar *src1, int src1_step, int src1_offset,
- __global uchar *src2, int src2_step, int src2_offset,
- __global uchar *mask, int mask_step, int mask_offset,
- __global uchar *dst, int dst_step, int dst_offset,
- int rows, int cols, int dst_step1)
-{
-
- int x = get_global_id(0);
- int y = get_global_id(1);
-
- if (x < cols && y < rows)
- {
- x = x << 2;
-
-#ifdef dst_align
-#undef dst_align
-#endif
-#define dst_align (dst_offset & 3)
- int src1_index = mad24(y, src1_step, x + src1_offset - dst_align);
- int src2_index = mad24(y, src2_step, x + src2_offset - dst_align);
- int mask_index = mad24(y, mask_step, x + mask_offset - dst_align);
-
- int dst_start = mad24(y, dst_step, dst_offset);
- int dst_end = mad24(y, dst_step, dst_offset + dst_step1);
- int dst_index = mad24(y, dst_step, dst_offset + x & (int)0xfffffffc);
-
- uchar4 src1_data = vload4(0, src1 + src1_index);
- uchar4 src2_data = vload4(0, src2 + src2_index);
- uchar4 mask_data = vload4(0, mask + mask_index);
-
- uchar4 data = *((__global uchar4 *)(dst + dst_index));
- uchar4 tmp_data = src1_data | src2_data;
-
- data.x = ((mask_data.x) && (dst_index + 0 >= dst_start) && (dst_index + 0 < dst_end)) ? tmp_data.x : data.x;
- data.y = ((mask_data.y) && (dst_index + 1 >= dst_start) && (dst_index + 1 < dst_end)) ? tmp_data.y : data.y;
- data.z = ((mask_data.z) && (dst_index + 2 >= dst_start) && (dst_index + 2 < dst_end)) ? tmp_data.z : data.z;
- data.w = ((mask_data.w) && (dst_index + 3 >= dst_start) && (dst_index + 3 < dst_end)) ? tmp_data.w : data.w;
-
- *((__global uchar4 *)(dst + dst_index)) = data;
- }
-}
-
-
-
-__kernel void arithm_bitwise_or_with_mask_C1_D1 (
- __global char *src1, int src1_step, int src1_offset,
- __global char *src2, int src2_step, int src2_offset,
- __global uchar *mask, int mask_step, int mask_offset,
- __global char *dst, int dst_step, int dst_offset,
- int rows, int cols, int dst_step1)
-{
-
- int x = get_global_id(0);
- int y = get_global_id(1);
-
- if (x < cols && y < rows)
- {
- x = x << 2;
-
-#ifdef dst_align
-#undef dst_align
-#endif
-#define dst_align (dst_offset & 3)
- int src1_index = mad24(y, src1_step, x + src1_offset - dst_align);
- int src2_index = mad24(y, src2_step, x + src2_offset - dst_align);
- int mask_index = mad24(y, mask_step, x + mask_offset - dst_align);
-
- int dst_start = mad24(y, dst_step, dst_offset);
- int dst_end = mad24(y, dst_step, dst_offset + dst_step1);
- int dst_index = mad24(y, dst_step, dst_offset + x & (int)0xfffffffc);
-
- char4 src1_data = vload4(0, src1 + src1_index);
- char4 src2_data = vload4(0, src2 + src2_index);
- uchar4 mask_data = vload4(0, mask + mask_index);
-
- char4 data = *((__global char4 *)(dst + dst_index));
- char4 tmp_data = src1_data | src2_data;
-
- data.x = convert_char((mask_data.x) && (dst_index + 0 >= dst_start) && (dst_index + 0 < dst_end)) ? tmp_data.x : data.x;
- data.y = convert_char((mask_data.y) && (dst_index + 1 >= dst_start) && (dst_index + 1 < dst_end)) ? tmp_data.y : data.y;
- data.z = convert_char((mask_data.z) && (dst_index + 2 >= dst_start) && (dst_index + 2 < dst_end)) ? tmp_data.z : data.z;
- data.w = convert_char((mask_data.w) && (dst_index + 3 >= dst_start) && (dst_index + 3 < dst_end)) ? tmp_data.w : data.w;
-
- *((__global char4 *)(dst + dst_index)) = data;
- }
-}
-
-
-
-__kernel void arithm_bitwise_or_with_mask_C1_D2 (
- __global ushort *src1, int src1_step, int src1_offset,
- __global ushort *src2, int src2_step, int src2_offset,
- __global uchar *mask, int mask_step, int mask_offset,
- __global ushort *dst, int dst_step, int dst_offset,
- int rows, int cols, int dst_step1)
-{
-
- int x = get_global_id(0);
- int y = get_global_id(1);
-
- if (x < cols && y < rows)
- {
- x = x << 1;
-
-#ifdef dst_align
-#undef dst_align
-#endif
-#define dst_align ((dst_offset >> 1) & 1)
- int src1_index = mad24(y, src1_step, (x << 1) + src1_offset - (dst_align << 1));
- int src2_index = mad24(y, src2_step, (x << 1) + src2_offset - (dst_align << 1));
- int mask_index = mad24(y, mask_step, x + mask_offset - dst_align);
-
- int dst_start = mad24(y, dst_step, dst_offset);
- int dst_end = mad24(y, dst_step, dst_offset + dst_step1);
- int dst_index = mad24(y, dst_step, dst_offset + (x << 1) & (int)0xfffffffc);
-
- ushort2 src1_data = vload2(0, (__global ushort *)((__global char *)src1 + src1_index));
- ushort2 src2_data = vload2(0, (__global ushort *)((__global char *)src2 + src2_index));
- uchar2 mask_data = vload2(0, mask + mask_index);
-
- ushort2 data = *((__global ushort2 *)((__global uchar *)dst + dst_index));
- ushort2 tmp_data = src1_data | src2_data;
-
- data.x = convert_ushort((mask_data.x) && (dst_index + 0 >= dst_start) && (dst_index + 0 < dst_end)) ? tmp_data.x : data.x;
- data.y = convert_ushort((mask_data.y) && (dst_index + 2 >= dst_start) && (dst_index + 2 < dst_end)) ? tmp_data.y : data.y;
-
- *((__global ushort2 *)((__global uchar *)dst + dst_index)) = data;
- }
-}
-
-
-
-__kernel void arithm_bitwise_or_with_mask_C1_D3 (
- __global short *src1, int src1_step, int src1_offset,
- __global short *src2, int src2_step, int src2_offset,
- __global uchar *mask, int mask_step, int mask_offset,
- __global short *dst, int dst_step, int dst_offset,
- int rows, int cols, int dst_step1)
-{
-
- int x = get_global_id(0);
- int y = get_global_id(1);
-
- if (x < cols && y < rows)
- {
- x = x << 1;
-
-#ifdef dst_align
-#undef dst_align
-#endif
-#define dst_align ((dst_offset >> 1) & 1)
- int src1_index = mad24(y, src1_step, (x << 1) + src1_offset - (dst_align << 1));
- int src2_index = mad24(y, src2_step, (x << 1) + src2_offset - (dst_align << 1));
- int mask_index = mad24(y, mask_step, x + mask_offset - dst_align);
-
- int dst_start = mad24(y, dst_step, dst_offset);
- int dst_end = mad24(y, dst_step, dst_offset + dst_step1);
- int dst_index = mad24(y, dst_step, dst_offset + (x << 1) & (int)0xfffffffc);
-
- short2 src1_data = vload2(0, (__global short *)((__global char *)src1 + src1_index));
- short2 src2_data = vload2(0, (__global short *)((__global char *)src2 + src2_index));
- uchar2 mask_data = vload2(0, mask + mask_index);
-
- short2 data = *((__global short2 *)((__global uchar *)dst + dst_index));
- short2 tmp_data = src1_data | src2_data;
-
- data.x = convert_short((mask_data.x) && (dst_index + 0 >= dst_start) && (dst_index + 0 < dst_end)) ? tmp_data.x : data.x;
- data.y = convert_short((mask_data.y) && (dst_index + 2 >= dst_start) && (dst_index + 2 < dst_end)) ? tmp_data.y : data.y;
-
- *((__global short2 *)((__global uchar *)dst + dst_index)) = data;
- }
-}
-
-
-
-__kernel void arithm_bitwise_or_with_mask_C1_D4 (
- __global int *src1, int src1_step, int src1_offset,
- __global int *src2, int src2_step, int src2_offset,
- __global uchar *mask, int mask_step, int mask_offset,
- __global int *dst, int dst_step, int dst_offset,
- int rows, int cols, int dst_step1)
-{
-
- int x = get_global_id(0);
- int y = get_global_id(1);
-
- if (x < cols && y < rows)
- {
- int src1_index = mad24(y, src1_step, (x << 2) + src1_offset);
- int src2_index = mad24(y, src2_step, (x << 2) + src2_offset);
- int mask_index = mad24(y, mask_step, x + mask_offset);
- int dst_index = mad24(y, dst_step, (x << 2) + dst_offset);
-
- uchar mask_data = *(mask + mask_index);
-
- int src_data1 = *((__global int *)((__global char *)src1 + src1_index));
- int src_data2 = *((__global int *)((__global char *)src2 + src2_index));
- int dst_data = *((__global int *)((__global char *)dst + dst_index));
-
- int data = src_data1 | src_data2;
- data = mask_data ? data : dst_data;
-
- *((__global int *)((__global char *)dst + dst_index)) = data;
- }
-}
-
-
-
-__kernel void arithm_bitwise_or_with_mask_C1_D5 (
- __global char *src1, int src1_step, int src1_offset,
- __global char *src2, int src2_step, int src2_offset,
- __global uchar *mask, int mask_step, int mask_offset,
- __global char *dst, int dst_step, int dst_offset,
- int rows, int cols, int dst_step1)
-{
-
- int x = get_global_id(0);
- int y = get_global_id(1);
-
- if (x < cols && y < rows)
- {
- int src1_index = mad24(y, src1_step, (x << 2) + src1_offset);
- int src2_index = mad24(y, src2_step, (x << 2) + src2_offset);
- int mask_index = mad24(y, mask_step, x + mask_offset);
- int dst_index = mad24(y, dst_step, (x << 2) + dst_offset);
-
- uchar mask_data = *(mask + mask_index);
-
- char4 src_data1 = *((__global char4 *)((__global char *)src1 + src1_index));
- char4 src_data2 = *((__global char4 *)((__global char *)src2 + src2_index));
- char4 dst_data = *((__global char4 *)((__global char *)dst + dst_index));
-
- char4 data = src_data1 | src_data2;
- data = mask_data ? data : dst_data;
-
- *((__global char4 *)((__global char *)dst + dst_index)) = data;
- }
-}
-
-
-#if defined (DOUBLE_SUPPORT)
-__kernel void arithm_bitwise_or_with_mask_C1_D6 (
- __global char *src1, int src1_step, int src1_offset,
- __global char *src2, int src2_step, int src2_offset,
- __global uchar *mask, int mask_step, int mask_offset,
- __global char *dst, int dst_step, int dst_offset,
- int rows, int cols, int dst_step1)
-{
-
- int x = get_global_id(0);
- int y = get_global_id(1);
-
- if (x < cols && y < rows)
- {
- int src1_index = mad24(y, src1_step, (x << 3) + src1_offset);
- int src2_index = mad24(y, src2_step, (x << 3) + src2_offset);
- int mask_index = mad24(y, mask_step, x + mask_offset);
- int dst_index = mad24(y, dst_step, (x << 3) + dst_offset);
-
- uchar mask_data = *(mask + mask_index);
-
- char8 src_data1 = *((__global char8 *)((__global char *)src1 + src1_index));
- char8 src_data2 = *((__global char8 *)((__global char *)src2 + src2_index));
- char8 dst_data = *((__global char8 *)((__global char *)dst + dst_index));
-
- char8 data = src_data1 | src_data2;
- data = mask_data ? data : dst_data;
-
- *((__global char8 *)((__global char *)dst + dst_index)) = data;
- }
-
-}
-#endif
-
-
-__kernel void arithm_bitwise_or_with_mask_C2_D0 (
- __global uchar *src1, int src1_step, int src1_offset,
- __global uchar *src2, int src2_step, int src2_offset,
- __global uchar *mask, int mask_step, int mask_offset,
- __global uchar *dst, int dst_step, int dst_offset,
- int rows, int cols, int dst_step1)
-{
-
- int x = get_global_id(0);
- int y = get_global_id(1);
-
- if (x < cols && y < rows)
- {
- x = x << 1;
-
-#ifdef dst_align
-#undef dst_align
-#endif
-#define dst_align ((dst_offset >> 1) & 1)
- int src1_index = mad24(y, src1_step, (x << 1) + src1_offset - (dst_align << 1));
- int src2_index = mad24(y, src2_step, (x << 1) + src2_offset - (dst_align << 1));
- int mask_index = mad24(y, mask_step, x + mask_offset - dst_align);
-
- int dst_start = mad24(y, dst_step, dst_offset);
- int dst_end = mad24(y, dst_step, dst_offset + dst_step1);
- int dst_index = mad24(y, dst_step, dst_offset + (x << 1) & (int)0xfffffffc);
-
- uchar4 src1_data = vload4(0, src1 + src1_index);
- uchar4 src2_data = vload4(0, src2 + src2_index);
- uchar2 mask_data = vload2(0, mask + mask_index);
-
- uchar4 data = *((__global uchar4 *)(dst + dst_index));
- uchar4 tmp_data = src1_data | src2_data;
-
- data.xy = ((mask_data.x) && (dst_index + 0 >= dst_start)) ? tmp_data.xy : data.xy;
- data.zw = ((mask_data.y) && (dst_index + 2 < dst_end )) ? tmp_data.zw : data.zw;
-
- *((__global uchar4 *)(dst + dst_index)) = data;
- }
-}
-
-
-__kernel void arithm_bitwise_or_with_mask_C2_D1 (
- __global char *src1, int src1_step, int src1_offset,
- __global char *src2, int src2_step, int src2_offset,
- __global uchar *mask, int mask_step, int mask_offset,
- __global char *dst, int dst_step, int dst_offset,
- int rows, int cols, int dst_step1)
-{
-
- int x = get_global_id(0);
- int y = get_global_id(1);
-
- if (x < cols && y < rows)
- {
- x = x << 1;
-
-#ifdef dst_align
-#undef dst_align
-#endif
-#define dst_align ((dst_offset >> 1) & 1)
- int src1_index = mad24(y, src1_step, (x << 1) + src1_offset - (dst_align << 1));
- int src2_index = mad24(y, src2_step, (x << 1) + src2_offset - (dst_align << 1));
- int mask_index = mad24(y, mask_step, x + mask_offset - dst_align);
-
- int dst_start = mad24(y, dst_step, dst_offset);
- int dst_end = mad24(y, dst_step, dst_offset + dst_step1);
- int dst_index = mad24(y, dst_step, dst_offset + (x << 1) & (int)0xfffffffc);
-
- char4 src1_data = vload4(0, src1 + src1_index);
- char4 src2_data = vload4(0, src2 + src2_index);
- uchar2 mask_data = vload2(0, mask + mask_index);
-
- char4 data = *((__global char4 *)(dst + dst_index));
- char4 tmp_data = src1_data | src2_data;
-
- data.xy = ((mask_data.x) && (dst_index + 0 >= dst_start)) ? tmp_data.xy : data.xy;
- data.zw = ((mask_data.y) && (dst_index + 2 < dst_end )) ? tmp_data.zw : data.zw;
-
- *((__global char4 *)(dst + dst_index)) = data;
- }
-}
-
-__kernel void arithm_bitwise_or_with_mask_C2_D2 (
- __global ushort *src1, int src1_step, int src1_offset,
- __global ushort *src2, int src2_step, int src2_offset,
- __global uchar *mask, int mask_step, int mask_offset,
- __global ushort *dst, int dst_step, int dst_offset,
- int rows, int cols, int dst_step1)
-{
-
- int x = get_global_id(0);
- int y = get_global_id(1);
-
- if (x < cols && y < rows)
- {
- int src1_index = mad24(y, src1_step, (x << 2) + src1_offset);
- int src2_index = mad24(y, src2_step, (x << 2) + src2_offset);
- int mask_index = mad24(y, mask_step, x + mask_offset);
- int dst_index = mad24(y, dst_step, (x << 2) + dst_offset);
-
- uchar mask_data = *(mask + mask_index);
-
- ushort2 src_data1 = *((__global ushort2 *)((__global char *)src1 + src1_index));
- ushort2 src_data2 = *((__global ushort2 *)((__global char *)src2 + src2_index));
- ushort2 dst_data = *((__global ushort2 *)((__global char *)dst + dst_index));
-
- ushort2 data = src_data1 | src_data2;
- data = mask_data ? data : dst_data;
-
- *((__global ushort2 *)((__global char *)dst + dst_index)) = data;
- }
-}
-__kernel void arithm_bitwise_or_with_mask_C2_D3 (
- __global short *src1, int src1_step, int src1_offset,
- __global short *src2, int src2_step, int src2_offset,
- __global uchar *mask, int mask_step, int mask_offset,
- __global short *dst, int dst_step, int dst_offset,
- int rows, int cols, int dst_step1)
-{
-
- int x = get_global_id(0);
- int y = get_global_id(1);
-
- if (x < cols && y < rows)
- {
- int src1_index = mad24(y, src1_step, (x << 2) + src1_offset);
- int src2_index = mad24(y, src2_step, (x << 2) + src2_offset);
- int mask_index = mad24(y, mask_step, x + mask_offset);
- int dst_index = mad24(y, dst_step, (x << 2) + dst_offset);
-
- uchar mask_data = *(mask + mask_index);
-
- short2 src_data1 = *((__global short2 *)((__global char *)src1 + src1_index));
- short2 src_data2 = *((__global short2 *)((__global char *)src2 + src2_index));
- short2 dst_data = *((__global short2 *)((__global char *)dst + dst_index));
-
- short2 data = src_data1 | src_data2;
- data = mask_data ? data : dst_data;
-
- *((__global short2 *)((__global char *)dst + dst_index)) = data;
- }
-}
-__kernel void arithm_bitwise_or_with_mask_C2_D4 (
- __global int *src1, int src1_step, int src1_offset,
- __global int *src2, int src2_step, int src2_offset,
- __global uchar *mask, int mask_step, int mask_offset,
- __global int *dst, int dst_step, int dst_offset,
- int rows, int cols, int dst_step1)
-{
-
- int x = get_global_id(0);
- int y = get_global_id(1);
-
- if (x < cols && y < rows)
- {
- int src1_index = mad24(y, src1_step, (x << 3) + src1_offset);
- int src2_index = mad24(y, src2_step, (x << 3) + src2_offset);
- int mask_index = mad24(y, mask_step, x + mask_offset);
- int dst_index = mad24(y, dst_step, (x << 3) + dst_offset);
-
- uchar mask_data = *(mask + mask_index);
-
- int2 src_data1 = *((__global int2 *)((__global char *)src1 + src1_index));
- int2 src_data2 = *((__global int2 *)((__global char *)src2 + src2_index));
- int2 dst_data = *((__global int2 *)((__global char *)dst + dst_index));
-
- int2 data = src_data1 | src_data2;
- data = mask_data ? data : dst_data;
-
- *((__global int2 *)((__global char *)dst + dst_index)) = data;
- }
-}
-__kernel void arithm_bitwise_or_with_mask_C2_D5 (
- __global char *src1, int src1_step, int src1_offset,
- __global char *src2, int src2_step, int src2_offset,
- __global uchar *mask, int mask_step, int mask_offset,
- __global char *dst, int dst_step, int dst_offset,
- int rows, int cols, int dst_step1)
-{
-
- int x = get_global_id(0);
- int y = get_global_id(1);
-
- if (x < cols && y < rows)
- {
- int src1_index = mad24(y, src1_step, (x << 3) + src1_offset);
- int src2_index = mad24(y, src2_step, (x << 3) + src2_offset);
- int mask_index = mad24(y, mask_step, x + mask_offset);
- int dst_index = mad24(y, dst_step, (x << 3) + dst_offset);
-
- uchar mask_data = *(mask + mask_index);
-
- char8 src_data1 = *((__global char8 *)((__global char *)src1 + src1_index));
- char8 src_data2 = *((__global char8 *)((__global char *)src2 + src2_index));
- char8 dst_data = *((__global char8 *)((__global char *)dst + dst_index));
-
- char8 data = src_data1 | src_data2;
- data = mask_data ? data : dst_data;
-
- *((__global char8 *)((__global char *)dst + dst_index)) = data;
- }
-}
-#if defined (DOUBLE_SUPPORT)
-__kernel void arithm_bitwise_or_with_mask_C2_D6 (
- __global char *src1, int src1_step, int src1_offset,
- __global char *src2, int src2_step, int src2_offset,
- __global uchar *mask, int mask_step, int mask_offset,
- __global char *dst, int dst_step, int dst_offset,
- int rows, int cols, int dst_step1)
-{
-
- int x = get_global_id(0);
- int y = get_global_id(1);
-
- if (x < cols && y < rows)
- {
- int src1_index = mad24(y, src1_step, (x << 4) + src1_offset);
- int src2_index = mad24(y, src2_step, (x << 4) + src2_offset);
- int mask_index = mad24(y, mask_step, x + mask_offset);
- int dst_index = mad24(y, dst_step, (x << 4) + dst_offset);
-
- uchar mask_data = *(mask + mask_index);
-
- char16 src_data1 = *((__global char16 *)((__global char *)src1 + src1_index));
- char16 src_data2 = *((__global char16 *)((__global char *)src2 + src2_index));
- char16 dst_data = *((__global char16 *)((__global char *)dst + dst_index));
-
- char16 data = src_data1 | src_data2;
- data = mask_data ? data : dst_data;
-
- *((__global char16 *)((__global char *)dst + dst_index)) = data;
- }
-}
-#endif
-
-
-__kernel void arithm_bitwise_or_with_mask_C3_D0 (
- __global uchar *src1, int src1_step, int src1_offset,
- __global uchar *src2, int src2_step, int src2_offset,
- __global uchar *mask, int mask_step, int mask_offset,
- __global uchar *dst, int dst_step, int dst_offset,
- int rows, int cols, int dst_step1)
-{
-
- int x = get_global_id(0);
- int y = get_global_id(1);
-
- if (x < cols && y < rows)
- {
- x = x << 2;
-
-#ifdef dst_align
-#undef dst_align
-#endif
-#define dst_align (((dst_offset % dst_step) / 3 ) & 3)
- int src1_index = mad24(y, src1_step, (x * 3) + src1_offset - (dst_align * 3));
- int src2_index = mad24(y, src2_step, (x * 3) + src2_offset - (dst_align * 3));
- int mask_index = mad24(y, mask_step, x + mask_offset - dst_align);
-
- int dst_start = mad24(y, dst_step, dst_offset);
- int dst_end = mad24(y, dst_step, dst_offset + dst_step1);
- int dst_index = mad24(y, dst_step, dst_offset + (x * 3) - (dst_align * 3));
-
- uchar4 src1_data_0 = vload4(0, src1 + src1_index + 0);
- uchar4 src1_data_1 = vload4(0, src1 + src1_index + 4);
- uchar4 src1_data_2 = vload4(0, src1 + src1_index + 8);
-
- uchar4 src2_data_0 = vload4(0, src2 + src2_index + 0);
- uchar4 src2_data_1 = vload4(0, src2 + src2_index + 4);
- uchar4 src2_data_2 = vload4(0, src2 + src2_index + 8);
-
- uchar4 mask_data = vload4(0, mask + mask_index);
-
- uchar4 data_0 = *((__global uchar4 *)(dst + dst_index + 0));
- uchar4 data_1 = *((__global uchar4 *)(dst + dst_index + 4));
- uchar4 data_2 = *((__global uchar4 *)(dst + dst_index + 8));
-
- uchar4 tmp_data_0 = src1_data_0 | src2_data_0;
- uchar4 tmp_data_1 = src1_data_1 | src2_data_1;
- uchar4 tmp_data_2 = src1_data_2 | src2_data_2;
-
- data_0.xyz = ((mask_data.x) && (dst_index + 0 >= dst_start)) ? tmp_data_0.xyz : data_0.xyz;
- data_0.w = ((mask_data.y) && (dst_index + 3 >= dst_start) && (dst_index + 3 < dst_end))
- ? tmp_data_0.w : data_0.w;
-
- data_1.xy = ((mask_data.y) && (dst_index + 3 >= dst_start) && (dst_index + 3 < dst_end))
- ? tmp_data_1.xy : data_1.xy;
- data_1.zw = ((mask_data.z) && (dst_index + 6 >= dst_start) && (dst_index + 6 < dst_end))
- ? tmp_data_1.zw : data_1.zw;
-
- data_2.x = ((mask_data.z) && (dst_index + 6 >= dst_start) && (dst_index + 6 < dst_end))
- ? tmp_data_2.x : data_2.x;
- data_2.yzw = ((mask_data.w) && (dst_index + 9 >= dst_start) && (dst_index + 9 < dst_end))
- ? tmp_data_2.yzw : data_2.yzw;
-
- *((__global uchar4 *)(dst + dst_index + 0)) = data_0;
- *((__global uchar4 *)(dst + dst_index + 4)) = data_1;
- *((__global uchar4 *)(dst + dst_index + 8)) = data_2;
- }
-}
-
-
-__kernel void arithm_bitwise_or_with_mask_C3_D1 (
- __global char *src1, int src1_step, int src1_offset,
- __global char *src2, int src2_step, int src2_offset,
- __global uchar *mask, int mask_step, int mask_offset,
- __global char *dst, int dst_step, int dst_offset,
- int rows, int cols, int dst_step1)
-{
-
- int x = get_global_id(0);
- int y = get_global_id(1);
-
- if (x < cols && y < rows)
- {
- x = x << 2;
-
-#ifdef dst_align
-#undef dst_align
-#endif
-#define dst_align (((dst_offset % dst_step) / 3 ) & 3)
- int src1_index = mad24(y, src1_step, (x * 3) + src1_offset - (dst_align * 3));
- int src2_index = mad24(y, src2_step, (x * 3) + src2_offset - (dst_align * 3));
- int mask_index = mad24(y, mask_step, x + mask_offset - dst_align);
-
- int dst_start = mad24(y, dst_step, dst_offset);
- int dst_end = mad24(y, dst_step, dst_offset + dst_step1);
- int dst_index = mad24(y, dst_step, dst_offset + (x * 3) - (dst_align * 3));
-
- char4 src1_data_0 = vload4(0, src1 + src1_index + 0);
- char4 src1_data_1 = vload4(0, src1 + src1_index + 4);
- char4 src1_data_2 = vload4(0, src1 + src1_index + 8);
-
- char4 src2_data_0 = vload4(0, src2 + src2_index + 0);
- char4 src2_data_1 = vload4(0, src2 + src2_index + 4);
- char4 src2_data_2 = vload4(0, src2 + src2_index + 8);
-
- uchar4 mask_data = vload4(0, mask + mask_index);
-
- char4 data_0 = *((__global char4 *)(dst + dst_index + 0));
- char4 data_1 = *((__global char4 *)(dst + dst_index + 4));
- char4 data_2 = *((__global char4 *)(dst + dst_index + 8));
-
- char4 tmp_data_0 = src1_data_0 | src2_data_0;
- char4 tmp_data_1 = src1_data_1 | src2_data_1;
- char4 tmp_data_2 = src1_data_2 | src2_data_2;
-
- data_0.xyz = ((mask_data.x) && (dst_index + 0 >= dst_start)) ? tmp_data_0.xyz : data_0.xyz;
- data_0.w = ((mask_data.y) && (dst_index + 3 >= dst_start) && (dst_index + 3 < dst_end))
- ? tmp_data_0.w : data_0.w;
-
- data_1.xy = ((mask_data.y) && (dst_index + 3 >= dst_start) && (dst_index + 3 < dst_end))
- ? tmp_data_1.xy : data_1.xy;
- data_1.zw = ((mask_data.z) && (dst_index + 6 >= dst_start) && (dst_index + 6 < dst_end))
- ? tmp_data_1.zw : data_1.zw;
-
- data_2.x = ((mask_data.z) && (dst_index + 6 >= dst_start) && (dst_index + 6 < dst_end))
- ? tmp_data_2.x : data_2.x;
- data_2.yzw = ((mask_data.w) && (dst_index + 9 >= dst_start) && (dst_index + 9 < dst_end))
- ? tmp_data_2.yzw : data_2.yzw;
-
- *((__global char4 *)(dst + dst_index + 0)) = data_0;
- *((__global char4 *)(dst + dst_index + 4)) = data_1;
- *((__global char4 *)(dst + dst_index + 8)) = data_2;
- }
-}
-
-__kernel void arithm_bitwise_or_with_mask_C3_D2 (
- __global ushort *src1, int src1_step, int src1_offset,
- __global ushort *src2, int src2_step, int src2_offset,
- __global uchar *mask, int mask_step, int mask_offset,
- __global ushort *dst, int dst_step, int dst_offset,
- int rows, int cols, int dst_step1)
-{
-
- int x = get_global_id(0);
- int y = get_global_id(1);
-
- if (x < cols && y < rows)
- {
- x = x << 1;
-
-#ifdef dst_align
-#undef dst_align
-#endif
-#define dst_align (((dst_offset % dst_step) / 6 ) & 1)
- int src1_index = mad24(y, src1_step, (x * 6) + src1_offset - (dst_align * 6));
- int src2_index = mad24(y, src2_step, (x * 6) + src2_offset - (dst_align * 6));
- int mask_index = mad24(y, mask_step, x + mask_offset - dst_align);
-
- int dst_start = mad24(y, dst_step, dst_offset);
- int dst_end = mad24(y, dst_step, dst_offset + dst_step1);
- int dst_index = mad24(y, dst_step, dst_offset + (x * 6) - (dst_align * 6));
-
- ushort2 src1_data_0 = vload2(0, (__global ushort *)((__global char *)src1 + src1_index + 0));
- ushort2 src1_data_1 = vload2(0, (__global ushort *)((__global char *)src1 + src1_index + 4));
- ushort2 src1_data_2 = vload2(0, (__global ushort *)((__global char *)src1 + src1_index + 8));
-
- ushort2 src2_data_0 = vload2(0, (__global ushort *)((__global char *)src2 + src2_index + 0));
- ushort2 src2_data_1 = vload2(0, (__global ushort *)((__global char *)src2 + src2_index + 4));
- ushort2 src2_data_2 = vload2(0, (__global ushort *)((__global char *)src2 + src2_index + 8));
-
- uchar2 mask_data = vload2(0, mask + mask_index);
-
- ushort2 data_0 = *((__global ushort2 *)((__global char *)dst + dst_index + 0));
- ushort2 data_1 = *((__global ushort2 *)((__global char *)dst + dst_index + 4));
- ushort2 data_2 = *((__global ushort2 *)((__global char *)dst + dst_index + 8));
-
- ushort2 tmp_data_0 = src1_data_0 | src2_data_0 ;
- ushort2 tmp_data_1 = src1_data_1 | src2_data_1 ;
- ushort2 tmp_data_2 = src1_data_2 | src2_data_2 ;
-
- data_0.xy = ((mask_data.x) && (dst_index + 0 >= dst_start)) ? tmp_data_0.xy : data_0.xy;
-
- data_1.x = ((mask_data.x) && (dst_index + 0 >= dst_start) && (dst_index + 0 < dst_end))
- ? tmp_data_1.x : data_1.x;
- data_1.y = ((mask_data.y) && (dst_index + 6 >= dst_start) && (dst_index + 6 < dst_end))
- ? tmp_data_1.y : data_1.y;
-
- data_2.xy = ((mask_data.y) && (dst_index + 6 >= dst_start) && (dst_index + 6 < dst_end))
- ? tmp_data_2.xy : data_2.xy;
-
- *((__global ushort2 *)((__global char *)dst + dst_index + 0))= data_0;
- *((__global ushort2 *)((__global char *)dst + dst_index + 4))= data_1;
- *((__global ushort2 *)((__global char *)dst + dst_index + 8))= data_2;
- }
-}
-__kernel void arithm_bitwise_or_with_mask_C3_D3 (
- __global short *src1, int src1_step, int src1_offset,
- __global short *src2, int src2_step, int src2_offset,
- __global uchar *mask, int mask_step, int mask_offset,
- __global short *dst, int dst_step, int dst_offset,
- int rows, int cols, int dst_step1)
-{
-
- int x = get_global_id(0);
- int y = get_global_id(1);
-
- if (x < cols && y < rows)
- {
- x = x << 1;
-
-#ifdef dst_align
-#undef dst_align
-#endif
-#define dst_align (((dst_offset % dst_step) / 6 ) & 1)
- int src1_index = mad24(y, src1_step, (x * 6) + src1_offset - (dst_align * 6));
- int src2_index = mad24(y, src2_step, (x * 6) + src2_offset - (dst_align * 6));
- int mask_index = mad24(y, mask_step, x + mask_offset - dst_align);
-
- int dst_start = mad24(y, dst_step, dst_offset);
- int dst_end = mad24(y, dst_step, dst_offset + dst_step1);
- int dst_index = mad24(y, dst_step, dst_offset + (x * 6) - (dst_align * 6));
-
- short2 src1_data_0 = vload2(0, (__global short *)((__global char *)src1 + src1_index + 0));
- short2 src1_data_1 = vload2(0, (__global short *)((__global char *)src1 + src1_index + 4));
- short2 src1_data_2 = vload2(0, (__global short *)((__global char *)src1 + src1_index + 8));
-
- short2 src2_data_0 = vload2(0, (__global short *)((__global char *)src2 + src2_index + 0));
- short2 src2_data_1 = vload2(0, (__global short *)((__global char *)src2 + src2_index + 4));
- short2 src2_data_2 = vload2(0, (__global short *)((__global char *)src2 + src2_index + 8));
-
- uchar2 mask_data = vload2(0, mask + mask_index);
-
- short2 data_0 = *((__global short2 *)((__global char *)dst + dst_index + 0));
- short2 data_1 = *((__global short2 *)((__global char *)dst + dst_index + 4));
- short2 data_2 = *((__global short2 *)((__global char *)dst + dst_index + 8));
-
- short2 tmp_data_0 = src1_data_0 | src2_data_0 ;
- short2 tmp_data_1 = src1_data_1 | src2_data_1 ;
- short2 tmp_data_2 = src1_data_2 | src2_data_2 ;
-
- data_0.xy = ((mask_data.x) && (dst_index + 0 >= dst_start)) ? tmp_data_0.xy : data_0.xy;
-
- data_1.x = ((mask_data.x) && (dst_index + 0 >= dst_start) && (dst_index + 0 < dst_end))
- ? tmp_data_1.x : data_1.x;
- data_1.y = ((mask_data.y) && (dst_index + 6 >= dst_start) && (dst_index + 6 < dst_end))
- ? tmp_data_1.y : data_1.y;
-
- data_2.xy = ((mask_data.y) && (dst_index + 6 >= dst_start) && (dst_index + 6 < dst_end))
- ? tmp_data_2.xy : data_2.xy;
-
- *((__global short2 *)((__global char *)dst + dst_index + 0))= data_0;
- *((__global short2 *)((__global char *)dst + dst_index + 4))= data_1;
- *((__global short2 *)((__global char *)dst + dst_index + 8))= data_2;
- }
-}
-__kernel void arithm_bitwise_or_with_mask_C3_D4 (
- __global int *src1, int src1_step, int src1_offset,
- __global int *src2, int src2_step, int src2_offset,
- __global uchar *mask, int mask_step, int mask_offset,
- __global int *dst, int dst_step, int dst_offset,
- int rows, int cols, int dst_step1)
-{
-
- int x = get_global_id(0);
- int y = get_global_id(1);
-
- if (x < cols && y < rows)
- {
- int src1_index = mad24(y, src1_step, (x * 12) + src1_offset);
- int src2_index = mad24(y, src2_step, (x * 12) + src2_offset);
- int mask_index = mad24(y, mask_step, x + mask_offset);
- int dst_index = mad24(y, dst_step, dst_offset + (x * 12));
-
- int src1_data_0 = *((__global int *)((__global char *)src1 + src1_index + 0));
- int src1_data_1 = *((__global int *)((__global char *)src1 + src1_index + 4));
- int src1_data_2 = *((__global int *)((__global char *)src1 + src1_index + 8));
-
- int src2_data_0 = *((__global int *)((__global char *)src2 + src2_index + 0));
- int src2_data_1 = *((__global int *)((__global char *)src2 + src2_index + 4));
- int src2_data_2 = *((__global int *)((__global char *)src2 + src2_index + 8));
-
- uchar mask_data = * (mask + mask_index);
-
- int data_0 = *((__global int *)((__global char *)dst + dst_index + 0));
- int data_1 = *((__global int *)((__global char *)dst + dst_index + 4));
- int data_2 = *((__global int *)((__global char *)dst + dst_index + 8));
-
- int tmp_data_0 = src1_data_0 | src2_data_0 ;
- int tmp_data_1 = src1_data_1 | src2_data_1 ;
- int tmp_data_2 = src1_data_2 | src2_data_2 ;
-
- data_0 = mask_data ? tmp_data_0 : data_0;
- data_1 = mask_data ? tmp_data_1 : data_1;
- data_2 = mask_data ? tmp_data_2 : data_2;
-
- *((__global int *)((__global char *)dst + dst_index + 0))= data_0;
- *((__global int *)((__global char *)dst + dst_index + 4))= data_1;
- *((__global int *)((__global char *)dst + dst_index + 8))= data_2;
- }
-}
-__kernel void arithm_bitwise_or_with_mask_C3_D5 (
- __global char *src1, int src1_step, int src1_offset,
- __global char *src2, int src2_step, int src2_offset,
- __global uchar *mask, int mask_step, int mask_offset,
- __global char *dst, int dst_step, int dst_offset,
- int rows, int cols, int dst_step1)
-{
-
- int x = get_global_id(0);
- int y = get_global_id(1);
-
- if (x < cols && y < rows)
- {
- int src1_index = mad24(y, src1_step, (x * 12) + src1_offset);
- int src2_index = mad24(y, src2_step, (x * 12) + src2_offset);
- int mask_index = mad24(y, mask_step, x + mask_offset);
- int dst_index = mad24(y, dst_step, dst_offset + (x * 12));
-
- char4 src1_data_0 = *((__global char4 *)((__global char *)src1 + src1_index + 0));
- char4 src1_data_1 = *((__global char4 *)((__global char *)src1 + src1_index + 4));
- char4 src1_data_2 = *((__global char4 *)((__global char *)src1 + src1_index + 8));
-
- char4 src2_data_0 = *((__global char4 *)((__global char *)src2 + src2_index + 0));
- char4 src2_data_1 = *((__global char4 *)((__global char *)src2 + src2_index + 4));
- char4 src2_data_2 = *((__global char4 *)((__global char *)src2 + src2_index + 8));
-
- uchar mask_data = * (mask + mask_index);
-
- char4 data_0 = *((__global char4 *)((__global char *)dst + dst_index + 0));
- char4 data_1 = *((__global char4 *)((__global char *)dst + dst_index + 4));
- char4 data_2 = *((__global char4 *)((__global char *)dst + dst_index + 8));
-
- char4 tmp_data_0 = src1_data_0 | src2_data_0;
- char4 tmp_data_1 = src1_data_1 | src2_data_1;
- char4 tmp_data_2 = src1_data_2 | src2_data_2;
-
- data_0 = mask_data ? tmp_data_0 : data_0;
- data_1 = mask_data ? tmp_data_1 : data_1;
- data_2 = mask_data ? tmp_data_2 : data_2;
-
- *((__global char4 *)((__global char *)dst + dst_index + 0))= data_0;
- *((__global char4 *)((__global char *)dst + dst_index + 4))= data_1;
- *((__global char4 *)((__global char *)dst + dst_index + 8))= data_2;
- }
-}
-#if defined (DOUBLE_SUPPORT)
-__kernel void arithm_bitwise_or_with_mask_C3_D6 (
- __global char *src1, int src1_step, int src1_offset,
- __global char *src2, int src2_step, int src2_offset,
- __global uchar *mask, int mask_step, int mask_offset,
- __global char *dst, int dst_step, int dst_offset,
- int rows, int cols, int dst_step1)
-{
-
- int x = get_global_id(0);
- int y = get_global_id(1);
-
- if (x < cols && y < rows)
- {
- int src1_index = mad24(y, src1_step, (x * 24) + src1_offset);
- int src2_index = mad24(y, src2_step, (x * 24) + src2_offset);
- int mask_index = mad24(y, mask_step, x + mask_offset);
- int dst_index = mad24(y, dst_step, dst_offset + (x * 24));
-
- char8 src1_data_0 = *((__global char8 *)((__global char *)src1 + src1_index + 0 ));
- char8 src1_data_1 = *((__global char8 *)((__global char *)src1 + src1_index + 8 ));
- char8 src1_data_2 = *((__global char8 *)((__global char *)src1 + src1_index + 16));
-
- char8 src2_data_0 = *((__global char8 *)((__global char *)src2 + src2_index + 0 ));
- char8 src2_data_1 = *((__global char8 *)((__global char *)src2 + src2_index + 8 ));
- char8 src2_data_2 = *((__global char8 *)((__global char *)src2 + src2_index + 16));
-
- uchar mask_data = * (mask + mask_index);
-
- char8 data_0 = *((__global char8 *)((__global char *)dst + dst_index + 0 ));
- char8 data_1 = *((__global char8 *)((__global char *)dst + dst_index + 8 ));
- char8 data_2 = *((__global char8 *)((__global char *)dst + dst_index + 16));
-
- char8 tmp_data_0 = src1_data_0 | src2_data_0;
- char8 tmp_data_1 = src1_data_1 | src2_data_1;
- char8 tmp_data_2 = src1_data_2 | src2_data_2;
-
- data_0 = mask_data ? tmp_data_0 : data_0;
- data_1 = mask_data ? tmp_data_1 : data_1;
- data_2 = mask_data ? tmp_data_2 : data_2;
-
- *((__global char8 *)((__global char *)dst + dst_index + 0 ))= data_0;
- *((__global char8 *)((__global char *)dst + dst_index + 8 ))= data_1;
- *((__global char8 *)((__global char *)dst + dst_index + 16))= data_2;
- }
-}
-#endif
-
-
-__kernel void arithm_bitwise_or_with_mask_C4_D0 (
- __global uchar *src1, int src1_step, int src1_offset,
- __global uchar *src2, int src2_step, int src2_offset,
- __global uchar *mask, int mask_step, int mask_offset,
- __global uchar *dst, int dst_step, int dst_offset,
- int rows, int cols, int dst_step1)
-{
-
- int x = get_global_id(0);
- int y = get_global_id(1);
-
- if (x < cols && y < rows)
- {
- int src1_index = mad24(y, src1_step, (x << 2) + src1_offset);
- int src2_index = mad24(y, src2_step, (x << 2) + src2_offset);
- int mask_index = mad24(y, mask_step, x + mask_offset);
- int dst_index = mad24(y, dst_step, (x << 2) + dst_offset);
-
- uchar mask_data = *(mask + mask_index);
-
- uchar4 src_data1 = *((__global uchar4 *)(src1 + src1_index));
- uchar4 src_data2 = *((__global uchar4 *)(src2 + src2_index));
- uchar4 dst_data = *((__global uchar4 *)(dst + dst_index));
-
- uchar4 data = src_data1 | src_data2;
- data = mask_data ? data : dst_data;
-
- *((__global uchar4 *)(dst + dst_index)) = data;
- }
-}
-
-
-__kernel void arithm_bitwise_or_with_mask_C4_D1 (
- __global char *src1, int src1_step, int src1_offset,
- __global char *src2, int src2_step, int src2_offset,
- __global uchar *mask, int mask_step, int mask_offset,
- __global char *dst, int dst_step, int dst_offset,
- int rows, int cols, int dst_step1)
-{
-
- int x = get_global_id(0);
- int y = get_global_id(1);
-
- if (x < cols && y < rows)
- {
- int src1_index = mad24(y, src1_step, (x << 2) + src1_offset);
- int src2_index = mad24(y, src2_step, (x << 2) + src2_offset);
- int mask_index = mad24(y, mask_step, x + mask_offset);
- int dst_index = mad24(y, dst_step, (x << 2) + dst_offset);
-
- uchar mask_data = *(mask + mask_index);
-
- char4 src_data1 = *((__global char4 *)(src1 + src1_index));
- char4 src_data2 = *((__global char4 *)(src2 + src2_index));
- char4 dst_data = *((__global char4 *)(dst + dst_index));
-
- char4 data = src_data1 | src_data2;
- data = mask_data ? data : dst_data;
-
- *((__global char4 *)(dst + dst_index)) = data;
- }
-}
-
-__kernel void arithm_bitwise_or_with_mask_C4_D2 (
- __global ushort *src1, int src1_step, int src1_offset,
- __global ushort *src2, int src2_step, int src2_offset,
- __global uchar *mask, int mask_step, int mask_offset,
- __global ushort *dst, int dst_step, int dst_offset,
- int rows, int cols, int dst_step1)
-{
-
- int x = get_global_id(0);
- int y = get_global_id(1);
-
- if (x < cols && y < rows)
- {
- int src1_index = mad24(y, src1_step, (x << 3) + src1_offset);
- int src2_index = mad24(y, src2_step, (x << 3) + src2_offset);
- int mask_index = mad24(y, mask_step, x + mask_offset);
- int dst_index = mad24(y, dst_step, (x << 3) + dst_offset);
-
- uchar mask_data = *(mask + mask_index);
-
- ushort4 src_data1 = *((__global ushort4 *)((__global char *)src1 + src1_index));
- ushort4 src_data2 = *((__global ushort4 *)((__global char *)src2 + src2_index));
- ushort4 dst_data = *((__global ushort4 *)((__global char *)dst + dst_index));
-
- ushort4 data = src_data1 | src_data2;
- data = mask_data ? data : dst_data;
-
- *((__global ushort4 *)((__global char *)dst + dst_index)) = data;
- }
-}
-__kernel void arithm_bitwise_or_with_mask_C4_D3 (
- __global short *src1, int src1_step, int src1_offset,
- __global short *src2, int src2_step, int src2_offset,
- __global uchar *mask, int mask_step, int mask_offset,
- __global short *dst, int dst_step, int dst_offset,
- int rows, int cols, int dst_step1)
-{
-
- int x = get_global_id(0);
- int y = get_global_id(1);
-
- if (x < cols && y < rows)
- {
- int src1_index = mad24(y, src1_step, (x << 3) + src1_offset);
- int src2_index = mad24(y, src2_step, (x << 3) + src2_offset);
- int mask_index = mad24(y, mask_step, x + mask_offset);
- int dst_index = mad24(y, dst_step, (x << 3) + dst_offset);
-
- uchar mask_data = *(mask + mask_index);
-
- short4 src_data1 = *((__global short4 *)((__global char *)src1 + src1_index));
- short4 src_data2 = *((__global short4 *)((__global char *)src2 + src2_index));
- short4 dst_data = *((__global short4 *)((__global char *)dst + dst_index));
-
- short4 data = src_data1 | src_data2;
- data = mask_data ? data : dst_data;
-
- *((__global short4 *)((__global char *)dst + dst_index)) = data;
- }
-}
-__kernel void arithm_bitwise_or_with_mask_C4_D4 (
- __global int *src1, int src1_step, int src1_offset,
- __global int *src2, int src2_step, int src2_offset,
- __global uchar *mask, int mask_step, int mask_offset,
- __global int *dst, int dst_step, int dst_offset,
- int rows, int cols, int dst_step1)
-{
-
- int x = get_global_id(0);
- int y = get_global_id(1);
-
- if (x < cols && y < rows)
- {
- int src1_index = mad24(y, src1_step, (x << 4) + src1_offset);
- int src2_index = mad24(y, src2_step, (x << 4) + src2_offset);
- int mask_index = mad24(y, mask_step, x + mask_offset);
- int dst_index = mad24(y, dst_step, (x << 4) + dst_offset);
-
- uchar mask_data = *(mask + mask_index);
-
- int4 src_data1 = *((__global int4 *)((__global char *)src1 + src1_index));
- int4 src_data2 = *((__global int4 *)((__global char *)src2 + src2_index));
- int4 dst_data = *((__global int4 *)((__global char *)dst + dst_index));
-
- int4 data = src_data1 | src_data2;
- data = mask_data ? data : dst_data;
-
- *((__global int4 *)((__global char *)dst + dst_index)) = data;
- }
-}
-__kernel void arithm_bitwise_or_with_mask_C4_D5 (
- __global char *src1, int src1_step, int src1_offset,
- __global char *src2, int src2_step, int src2_offset,
- __global uchar *mask, int mask_step, int mask_offset,
- __global char *dst, int dst_step, int dst_offset,
- int rows, int cols, int dst_step1)
-{
-
- int x = get_global_id(0);
- int y = get_global_id(1);
-
- if (x < cols && y < rows)
- {
- int src1_index = mad24(y, src1_step, (x << 4) + src1_offset);
- int src2_index = mad24(y, src2_step, (x << 4) + src2_offset);
- int mask_index = mad24(y, mask_step, x + mask_offset);
- int dst_index = mad24(y, dst_step, (x << 4) + dst_offset);
-
- uchar mask_data = *(mask + mask_index);
-
- char16 src_data1 = *((__global char16 *)((__global char *)src1 + src1_index));
- char16 src_data2 = *((__global char16 *)((__global char *)src2 + src2_index));
- char16 dst_data = *((__global char16 *)((__global char *)dst + dst_index));
-
- char16 data = src_data1 | src_data2;
- data = mask_data ? data : dst_data;
-
- *((__global char16 *)((__global char *)dst + dst_index)) = data;
- }
-}
-#if defined (DOUBLE_SUPPORT)
-__kernel void arithm_bitwise_or_with_mask_C4_D6 (
- __global char *src1, int src1_step, int src1_offset,
- __global char *src2, int src2_step, int src2_offset,
- __global uchar *mask, int mask_step, int mask_offset,
- __global char *dst, int dst_step, int dst_offset,
- int rows, int cols, int dst_step1)
-{
-
- int x = get_global_id(0);
- int y = get_global_id(1);
-
- if (x < cols && y < rows)
- {
- int src1_index = mad24(y, src1_step, (x << 5) + src1_offset);
- int src2_index = mad24(y, src2_step, (x << 5) + src2_offset);
- int mask_index = mad24(y, mask_step, x + mask_offset);
- int dst_index = mad24(y, dst_step, (x << 5) + dst_offset);
-
- uchar mask_data = *(mask + mask_index);
-
- char8 src_data1_0 = *((__global char8 *)((__global char *)src1 + src1_index + 0));
- char8 src_data1_1 = *((__global char8 *)((__global char *)src1 + src1_index + 8));
- char8 src_data1_2 = *((__global char8 *)((__global char *)src1 + src1_index + 16));
- char8 src_data1_3 = *((__global char8 *)((__global char *)src1 + src1_index + 24));
-
- char8 src_data2_0 = *((__global char8 *)((__global char *)src2 + src2_index + 0));
- char8 src_data2_1 = *((__global char8 *)((__global char *)src2 + src2_index + 8));
- char8 src_data2_2 = *((__global char8 *)((__global char *)src2 + src2_index + 16));
- char8 src_data2_3 = *((__global char8 *)((__global char *)src2 + src2_index + 24));
-
- char8 dst_data_0 = *((__global char8 *)((__global char *)dst + dst_index + 0));
- char8 dst_data_1 = *((__global char8 *)((__global char *)dst + dst_index + 8));
- char8 dst_data_2 = *((__global char8 *)((__global char *)dst + dst_index + 16));
- char8 dst_data_3 = *((__global char8 *)((__global char *)dst + dst_index + 24));
-
- char8 data_0 = src_data1_0 | src_data2_0;
- char8 data_1 = src_data1_1 | src_data2_1;
- char8 data_2 = src_data1_2 | src_data2_2;
- char8 data_3 = src_data1_3 | src_data2_3;
-
- data_0 = mask_data ? data_0 : dst_data_0;
- data_1 = mask_data ? data_1 : dst_data_1;
- data_2 = mask_data ? data_2 : dst_data_2;
- data_3 = mask_data ? data_3 : dst_data_3;
-
- *((__global char8 *)((__global char *)dst + dst_index + 0)) = data_0;
- *((__global char8 *)((__global char *)dst + dst_index + 8)) = data_1;
- *((__global char8 *)((__global char *)dst + dst_index + 16)) = data_2;
- *((__global char8 *)((__global char *)dst + dst_index + 24)) = data_3;
- }
-}
-#endif
+++ /dev/null
-/*M///////////////////////////////////////////////////////////////////////////////////////
-//
-// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
-//
-// By downloading, copying, installing or using the software you agree to this license.
-// If you do not agree to this license, do not download, install,
-// copy or use the software.
-//
-//
-// License Agreement
-// For Open Source Computer Vision Library
-//
-// Copyright (C) 2010-2012, Institute Of Software Chinese Academy Of Science, all rights reserved.
-// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved.
-// Third party copyrights are property of their respective owners.
-//
-// @Authors
-// Jiang Liyuan, jlyuan001.good@163.com
-//
-// Redistribution and use in source and binary forms, with or without modification,
-// are permitted provided that the following conditions are met:
-//
-// * Redistribution's of source code must retain the above copyright notice,
-// this list of conditions and the following disclaimer.
-//
-// * Redistribution's in binary form must reproduce the above copyright notice,
-// this list of conditions and the following disclaimer in the documentation
-// and/or other oclMaterials provided with the distribution.
-//
-// * The name of the copyright holders may not be used to endorse or promote products
-// derived from this software without specific prior written permission.
-//
-// This software is provided by the copyright holders and contributors as is and
-// any express or implied warranties, including, but not limited to, the implied
-// warranties of merchantability and fitness for a particular purpose are disclaimed.
-// In no event shall the Intel Corporation or contributors be liable for any direct,
-// indirect, incidental, special, exemplary, or consequential damages
-// (including, but not limited to, procurement of substitute goods or services;
-// loss of use, data, or profits; or business interruption) however caused
-// and on any theory of liability, whether in contract, strict liability,
-// or tort (including negligence or otherwise) arising in any way out of
-// the use of this software, even if advised of the possibility of such damage.
-//
-//M*/
-#if defined (DOUBLE_SUPPORT)
-#ifdef cl_khr_fp64
-#pragma OPENCL EXTENSION cl_khr_fp64:enable
-#elif defined (cl_amd_fp64)
-#pragma OPENCL EXTENSION cl_amd_fp64:enable
-#endif
-#endif
-
-//////////////////////////////////////////////////////////////////////////////////////////////////////
-////////////////////////////////////////////BITWISE_OR////////////////////////////////////////////////////
-///////////////////////////////////////////////////////////////////////////////////////////////////////
-/**************************************and with scalar without mask**************************************/
-__kernel void arithm_s_bitwise_or_C1_D0 (
- __global uchar *src1, int src1_step, int src1_offset,
- __global uchar *dst, int dst_step, int dst_offset,
- uchar4 src2, int rows, int cols, int dst_step1)
-{
- int x = get_global_id(0);
- int y = get_global_id(1);
-
- if (x < cols && y < rows)
- {
- x = x << 2;
-
-#ifdef dst_align
-#undef dst_align
-#endif
-#define dst_align (dst_offset & 3)
- int src1_index = mad24(y, src1_step, x + src1_offset - dst_align);
-
- int dst_start = mad24(y, dst_step, dst_offset);
- int dst_end = mad24(y, dst_step, dst_offset + dst_step1);
- int dst_index = mad24(y, dst_step, dst_offset + x & (int)0xfffffffc);
-
- uchar4 src1_data = vload4(0, src1 + src1_index);
- uchar4 src2_data = (uchar4)(src2.x, src2.x, src2.x, src2.x);
-
- uchar4 data = *((__global uchar4 *)(dst + dst_index));
- uchar4 tmp_data = src1_data | src2_data;
-
- data.x = ((dst_index + 0 >= dst_start) && (dst_index + 0 < dst_end)) ? tmp_data.x : data.x;
- data.y = ((dst_index + 1 >= dst_start) && (dst_index + 1 < dst_end)) ? tmp_data.y : data.y;
- data.z = ((dst_index + 2 >= dst_start) && (dst_index + 2 < dst_end)) ? tmp_data.z : data.z;
- data.w = ((dst_index + 3 >= dst_start) && (dst_index + 3 < dst_end)) ? tmp_data.w : data.w;
-
- *((__global uchar4 *)(dst + dst_index)) = data;
- }
-}
-
-
-__kernel void arithm_s_bitwise_or_C1_D1 (
- __global char *src1, int src1_step, int src1_offset,
- __global char *dst, int dst_step, int dst_offset,
- char4 src2, int rows, int cols, int dst_step1)
-{
- int x = get_global_id(0);
- int y = get_global_id(1);
-
- if (x < cols && y < rows)
- {
- x = x << 2;
-
-#ifdef dst_align
-#undef dst_align
-#endif
-#define dst_align (dst_offset & 3)
- int src1_index = mad24(y, src1_step, x + src1_offset - dst_align);
-
- int dst_start = mad24(y, dst_step, dst_offset);
- int dst_end = mad24(y, dst_step, dst_offset + dst_step1);
- int dst_index = mad24(y, dst_step, dst_offset + x & (int)0xfffffffc);
-
- char4 src1_data = vload4(0, src1 + src1_index);
- char4 src2_data = (char4)(src2.x, src2.x, src2.x, src2.x);
-
- char4 data = *((__global char4 *)(dst + dst_index));
- char4 tmp_data = src1_data | src2_data;
-
- data.x = ((dst_index + 0 >= dst_start) && (dst_index + 0 < dst_end)) ? tmp_data.x : data.x;
- data.y = ((dst_index + 1 >= dst_start) && (dst_index + 1 < dst_end)) ? tmp_data.y : data.y;
- data.z = ((dst_index + 2 >= dst_start) && (dst_index + 2 < dst_end)) ? tmp_data.z : data.z;
- data.w = ((dst_index + 3 >= dst_start) && (dst_index + 3 < dst_end)) ? tmp_data.w : data.w;
-
- *((__global char4 *)(dst + dst_index)) = data;
- }
-}
-
-__kernel void arithm_s_bitwise_or_C1_D2 (
- __global ushort *src1, int src1_step, int src1_offset,
- __global ushort *dst, int dst_step, int dst_offset,
- ushort4 src2, int rows, int cols, int dst_step1)
-{
-
- int x = get_global_id(0);
- int y = get_global_id(1);
-
- if (x < cols && y < rows)
- {
- x = x << 1;
-
-#ifdef dst_align
-#undef dst_align
-#endif
-#define dst_align ((dst_offset >> 1) & 1)
- int src1_index = mad24(y, src1_step, (x << 1) + src1_offset - (dst_align << 1));
-
- int dst_start = mad24(y, dst_step, dst_offset);
- int dst_end = mad24(y, dst_step, dst_offset + dst_step1);
- int dst_index = mad24(y, dst_step, dst_offset + (x << 1) & (int)0xfffffffc);
-
- ushort2 src1_data = vload2(0, (__global ushort *)((__global char *)src1 + src1_index));
- ushort2 src2_data = (ushort2)(src2.x, src2.x);
-
- ushort2 data = *((__global ushort2 *)((__global uchar *)dst + dst_index));
- ushort2 tmp_data = src1_data | src2_data;
-
- data.x = (dst_index + 0 >= dst_start) ? tmp_data.x : data.x;
- data.y = (dst_index + 2 < dst_end ) ? tmp_data.y : data.y;
-
- *((__global ushort2 *)((__global uchar *)dst + dst_index)) = data;
- }
-}
-__kernel void arithm_s_bitwise_or_C1_D3 (
- __global short *src1, int src1_step, int src1_offset,
- __global short *dst, int dst_step, int dst_offset,
- short4 src2, int rows, int cols, int dst_step1)
-{
-
- int x = get_global_id(0);
- int y = get_global_id(1);
-
- if (x < cols && y < rows)
- {
- x = x << 1;
-
-#ifdef dst_align
-#undef dst_align
-#endif
-#define dst_align ((dst_offset >> 1) & 1)
- int src1_index = mad24(y, src1_step, (x << 1) + src1_offset - (dst_align << 1));
-
- int dst_start = mad24(y, dst_step, dst_offset);
- int dst_end = mad24(y, dst_step, dst_offset + dst_step1);
- int dst_index = mad24(y, dst_step, dst_offset + (x << 1) & (int)0xfffffffc);
-
- short2 src1_data = vload2(0, (__global short *)((__global char *)src1 + src1_index));
- short2 src2_data = (short2)(src2.x, src2.x);
- short2 data = *((__global short2 *)((__global uchar *)dst + dst_index));
-
- short2 tmp_data = src1_data | src2_data;
-
- data.x = (dst_index + 0 >= dst_start) ? tmp_data.x : data.x;
- data.y = (dst_index + 2 < dst_end ) ? tmp_data.y : data.y;
-
- *((__global short2 *)((__global uchar *)dst + dst_index)) = data;
- }
-}
-__kernel void arithm_s_bitwise_or_C1_D4 (
- __global int *src1, int src1_step, int src1_offset,
- __global int *dst, int dst_step, int dst_offset,
- int4 src2, int rows, int cols, int dst_step1)
-{
-
- int x = get_global_id(0);
- int y = get_global_id(1);
-
- if (x < cols && y < rows)
- {
- int src1_index = mad24(y, src1_step, (x << 2) + src1_offset);
- int dst_index = mad24(y, dst_step, (x << 2) + dst_offset);
-
- int src_data1 = *((__global int *)((__global char *)src1 + src1_index));
- int src_data2 = src2.x;
-
- int data = src_data1 | src_data2;
-
- *((__global int *)((__global char *)dst + dst_index)) = data;
- }
-}
-__kernel void arithm_s_bitwise_or_C1_D5 (
- __global char *src1, int src1_step, int src1_offset,
- __global char *dst, int dst_step, int dst_offset,
- char16 src2, int rows, int cols, int dst_step1)
-{
-
- int x = get_global_id(0);
- int y = get_global_id(1);
-
- if (x < cols && y < rows)
- {
- int src1_index = mad24(y, src1_step, (x << 2) + src1_offset);
- int dst_index = mad24(y, dst_step, (x << 2) + dst_offset);
-
- char4 src_data1 = *((__global char4 *)((__global char *)src1 + src1_index));
- char4 src_data2 = (char4)(src2.s0, src2.s1, src2.s2, src2.s3);
-
- char4 data = src_data1 | src_data2;
-
- *((__global char4 *)((__global char *)dst + dst_index)) = data;
- }
-}
-#if defined (DOUBLE_SUPPORT)
-__kernel void arithm_s_bitwise_or_C1_D6 (
- __global short *src1, int src1_step, int src1_offset,
- __global short *dst, int dst_step, int dst_offset,
- short16 src2, int rows, int cols, int dst_step1)
-
-{
-
- int x = get_global_id(0);
- int y = get_global_id(1);
-
- if (x < cols && y < rows)
- {
- int src1_index = mad24(y, src1_step, (x << 3) + src1_offset);
- int dst_index = mad24(y, dst_step, (x << 3) + dst_offset);
-
- short4 src1_data = *((__global short4 *)((__global char *)src1 + src1_index));
- short4 src2_data = (short4)(src2.s0, src2.s1, src2.s2, src2.s3);
-
- short4 tmp_data = src1_data | src2_data;
-
- *((__global short4 *)((__global char *)dst + dst_index)) = tmp_data;
- }
-}
-#endif
-__kernel void arithm_s_bitwise_or_C2_D0 (
- __global uchar *src1, int src1_step, int src1_offset,
- __global uchar *dst, int dst_step, int dst_offset,
- uchar4 src2, int rows, int cols, int dst_step1)
-
-{
-
- int x = get_global_id(0);
- int y = get_global_id(1);
-
- if (x < cols && y < rows)
- {
- x = x << 1;
-
-#ifdef dst_align
-#undef dst_align
-#endif
-#define dst_align ((dst_offset >> 1) & 1)
- int src1_index = mad24(y, src1_step, (x << 1) + src1_offset - (dst_align << 1));
-
- int dst_start = mad24(y, dst_step, dst_offset);
- int dst_end = mad24(y, dst_step, dst_offset + dst_step1);
- int dst_index = mad24(y, dst_step, dst_offset + (x << 1) & (int)0xfffffffc);
-
- uchar4 src1_data = vload4(0, src1 + src1_index);
- uchar4 src2_data = (uchar4)(src2.x, src2.y, src2.x, src2.y);
-
- uchar4 data = *((__global uchar4 *)(dst + dst_index));
- uchar4 tmp_data = src1_data | src2_data;
-
- data.xy = (dst_index + 0 >= dst_start) ? tmp_data.xy : data.xy;
- data.zw = (dst_index + 2 < dst_end ) ? tmp_data.zw : data.zw;
-
- *((__global uchar4 *)(dst + dst_index)) = data;
- }
-}
-
-
-__kernel void arithm_s_bitwise_or_C2_D1 (
- __global char *src1, int src1_step, int src1_offset,
- __global char *dst, int dst_step, int dst_offset,
- char4 src2, int rows, int cols, int dst_step1)
-
-{
-
- int x = get_global_id(0);
- int y = get_global_id(1);
-
- if (x < cols && y < rows)
- {
- x = x << 1;
-
-#ifdef dst_align
-#undef dst_align
-#endif
-#define dst_align ((dst_offset >> 1) & 1)
- int src1_index = mad24(y, src1_step, (x << 1) + src1_offset - (dst_align << 1));
-
- int dst_start = mad24(y, dst_step, dst_offset);
- int dst_end = mad24(y, dst_step, dst_offset + dst_step1);
- int dst_index = mad24(y, dst_step, dst_offset + (x << 1) & (int)0xfffffffc);
-
- char4 src1_data = vload4(0, src1 + src1_index);
- char4 src2_data = (char4)(src2.x, src2.y, src2.x, src2.y);
-
- char4 data = *((__global char4 *)(dst + dst_index));
- char4 tmp_data = src1_data | src2_data;
-
- data.xy = (dst_index + 0 >= dst_start) ? tmp_data.xy : data.xy;
- data.zw = (dst_index + 2 < dst_end ) ? tmp_data.zw : data.zw;
-
- *((__global char4 *)(dst + dst_index)) = data;
- }
-}
-
-__kernel void arithm_s_bitwise_or_C2_D2 (
- __global ushort *src1, int src1_step, int src1_offset,
- __global ushort *dst, int dst_step, int dst_offset,
- ushort4 src2, int rows, int cols, int dst_step1)
-
-{
-
- int x = get_global_id(0);
- int y = get_global_id(1);
-
- if (x < cols && y < rows)
- {
- int src1_index = mad24(y, src1_step, (x << 2) + src1_offset);
- int dst_index = mad24(y, dst_step, (x << 2) + dst_offset);
-
- ushort2 src_data1 = *((__global ushort2 *)((__global char *)src1 + src1_index));
- ushort2 src_data2 = (ushort2)(src2.x, src2.y);
-
- ushort2 data = src_data1 | src_data2;
-
- *((__global ushort2 *)((__global char *)dst + dst_index)) = data;
- }
-}
-__kernel void arithm_s_bitwise_or_C2_D3 (
- __global short *src1, int src1_step, int src1_offset,
- __global short *dst, int dst_step, int dst_offset,
- short4 src2, int rows, int cols, int dst_step1)
-
-{
-
- int x = get_global_id(0);
- int y = get_global_id(1);
-
- if (x < cols && y < rows)
- {
- int src1_index = mad24(y, src1_step, (x << 2) + src1_offset);
- int dst_index = mad24(y, dst_step, (x << 2) + dst_offset);
-
- short2 src_data1 = *((__global short2 *)((__global char *)src1 + src1_index));
- short2 src_data2 = (short2)(src2.x, src2.y);
-
- short2 data = src_data1 | src_data2;
-
- *((__global short2 *)((__global char *)dst + dst_index)) = data;
- }
-}
-__kernel void arithm_s_bitwise_or_C2_D4 (__global int *src1, int src1_step, int src1_offset,
- __global int *dst, int dst_step, int dst_offset,
- int4 src2, int rows, int cols, int dst_step1)
-
-{
-
- int x = get_global_id(0);
- int y = get_global_id(1);
-
- if (x < cols && y < rows)
- {
- int src1_index = mad24(y, src1_step, (x << 3) + src1_offset);
- int dst_index = mad24(y, dst_step, (x << 3) + dst_offset);
-
- int2 src_data1 = *((__global int2 *)((__global char *)src1 + src1_index));
- int2 src_data2 = (int2)(src2.x, src2.y);
-
- int2 data = src_data1 | src_data2;
- *((__global int2 *)((__global char *)dst + dst_index)) = data;
- }
-}
-__kernel void arithm_s_bitwise_or_C2_D5 (
- __global char *src1, int src1_step, int src1_offset,
- __global char *dst, int dst_step, int dst_offset,
- char16 src2, int rows, int cols, int dst_step1)
-
-{
-
- int x = get_global_id(0);
- int y = get_global_id(1);
-
- if (x < cols && y < rows)
- {
- int src1_index = mad24(y, src1_step, (x << 3) + src1_offset);
- int dst_index = mad24(y, dst_step, (x << 3) + dst_offset);
-
- char8 src_data1 = *((__global char8 *)((__global char *)src1 + src1_index));
- char8 src_data2 = (char8)(src2.s0, src2.s1, src2.s2, src2.s3, src2.s4, src2.s5, src2.s6, src2.s7);
-
- char8 data = src_data1 | src_data2;
- *((__global char8 *)((__global char *)dst + dst_index)) = data;
- }
-}
-#if defined (DOUBLE_SUPPORT)
-__kernel void arithm_s_bitwise_or_C2_D6 (
- __global short *src1, int src1_step, int src1_offset,
- __global short *dst, int dst_step, int dst_offset,
- short16 src2, int rows, int cols, int dst_step1)
-
-{
-
- int x = get_global_id(0);
- int y = get_global_id(1);
-
- if (x < cols && y < rows)
- {
- int src1_index = mad24(y, src1_step, (x << 4) + src1_offset);
- int dst_index = mad24(y, dst_step, (x << 4) + dst_offset);
-
- short8 src1_data = *((__global short8 *)((__global char *)src1 + src1_index));
- short8 src2_data = (short8)(src2.s0, src2.s1, src2.s2, src2.s3, src2.s4, src2.s5, src2.s6, src2.s7);
-
- short8 tmp_data = src1_data & src2_data;
-
- *((__global short8 *)((__global char *)dst + dst_index)) = tmp_data;
- }
-}
-#endif
-__kernel void arithm_s_bitwise_or_C3_D0 (
- __global uchar *src1, int src1_step, int src1_offset,
- __global uchar *dst, int dst_step, int dst_offset,
- uchar4 src2, int rows, int cols, int dst_step1)
-
-{
-
- int x = get_global_id(0);
- int y = get_global_id(1);
-
- if (x < cols && y < rows)
- {
- x = x << 2;
-
-#ifdef dst_align
-#undef dst_align
-#endif
-#define dst_align (((dst_offset % dst_step) / 3 ) & 3)
- int src1_index = mad24(y, src1_step, (x * 3) + src1_offset - (dst_align * 3));
-
- int dst_start = mad24(y, dst_step, dst_offset);
- int dst_end = mad24(y, dst_step, dst_offset + dst_step1);
- int dst_index = mad24(y, dst_step, dst_offset + (x * 3) - (dst_align * 3));
-
- uchar4 src1_data_0 = vload4(0, src1 + src1_index + 0);
- uchar4 src1_data_1 = vload4(0, src1 + src1_index + 4);
- uchar4 src1_data_2 = vload4(0, src1 + src1_index + 8);
-
- uchar4 src2_data_0 = (uchar4)(src2.x, src2.y, src2.z, src2.x);
- uchar4 src2_data_1 = (uchar4)(src2.y, src2.z, src2.x, src2.y);
- uchar4 src2_data_2 = (uchar4)(src2.z, src2.x, src2.y, src2.z);
-
- uchar4 data_0 = *((__global uchar4 *)(dst + dst_index + 0));
- uchar4 data_1 = *((__global uchar4 *)(dst + dst_index + 4));
- uchar4 data_2 = *((__global uchar4 *)(dst + dst_index + 8));
-
- uchar4 tmp_data_0 = src1_data_0 | src2_data_0 ;
- uchar4 tmp_data_1 = src1_data_1 | src2_data_1 ;
- uchar4 tmp_data_2 = src1_data_2 | src2_data_2 ;
-
- data_0.xyz = ((dst_index + 0 >= dst_start)) ? tmp_data_0.xyz : data_0.xyz;
- data_0.w = ((dst_index + 3 >= dst_start) && (dst_index + 3 < dst_end))
- ? tmp_data_0.w : data_0.w;
-
- data_1.xy = ((dst_index + 3 >= dst_start) && (dst_index + 3 < dst_end))
- ? tmp_data_1.xy : data_1.xy;
- data_1.zw = ((dst_index + 6 >= dst_start) && (dst_index + 6 < dst_end))
- ? tmp_data_1.zw : data_1.zw;
-
- data_2.x = ((dst_index + 6 >= dst_start) && (dst_index + 6 < dst_end))
- ? tmp_data_2.x : data_2.x;
- data_2.yzw = ((dst_index + 9 >= dst_start) && (dst_index + 9 < dst_end))
- ? tmp_data_2.yzw : data_2.yzw;
-
- *((__global uchar4 *)(dst + dst_index + 0)) = data_0;
- *((__global uchar4 *)(dst + dst_index + 4)) = data_1;
- *((__global uchar4 *)(dst + dst_index + 8)) = data_2;
- }
-}
-
-
-__kernel void arithm_s_bitwise_or_C3_D1 (
- __global char *src1, int src1_step, int src1_offset,
- __global char *dst, int dst_step, int dst_offset,
- char4 src2, int rows, int cols, int dst_step1)
-
-{
-
- int x = get_global_id(0);
- int y = get_global_id(1);
-
- if (x < cols && y < rows)
- {
- x = x << 2;
-
-#ifdef dst_align
-#undef dst_align
-#endif
-#define dst_align (((dst_offset % dst_step) / 3 ) & 3)
- int src1_index = mad24(y, src1_step, (x * 3) + src1_offset - (dst_align * 3));
-
- int dst_start = mad24(y, dst_step, dst_offset);
- int dst_end = mad24(y, dst_step, dst_offset + dst_step1);
- int dst_index = mad24(y, dst_step, dst_offset + (x * 3) - (dst_align * 3));
-
- char4 src1_data_0 = vload4(0, src1 + src1_index + 0);
- char4 src1_data_1 = vload4(0, src1 + src1_index + 4);
- char4 src1_data_2 = vload4(0, src1 + src1_index + 8);
-
- char4 src2_data_0 = (char4)(src2.x, src2.y, src2.z, src2.x);
- char4 src2_data_1 = (char4)(src2.y, src2.z, src2.x, src2.y);
- char4 src2_data_2 = (char4)(src2.z, src2.x, src2.y, src2.z);
-
- char4 data_0 = *((__global char4 *)(dst + dst_index + 0));
- char4 data_1 = *((__global char4 *)(dst + dst_index + 4));
- char4 data_2 = *((__global char4 *)(dst + dst_index + 8));
-
- char4 tmp_data_0 = src1_data_0 | src2_data_0;
- char4 tmp_data_1 = src1_data_1 | src2_data_1;
- char4 tmp_data_2 = src1_data_2 | src2_data_2;
-
- data_0.xyz = ((dst_index + 0 >= dst_start)) ? tmp_data_0.xyz : data_0.xyz;
- data_0.w = ((dst_index + 3 >= dst_start) && (dst_index + 3 < dst_end))
- ? tmp_data_0.w : data_0.w;
-
- data_1.xy = ((dst_index + 3 >= dst_start) && (dst_index + 3 < dst_end))
- ? tmp_data_1.xy : data_1.xy;
- data_1.zw = ((dst_index + 6 >= dst_start) && (dst_index + 6 < dst_end))
- ? tmp_data_1.zw : data_1.zw;
-
- data_2.x = ((dst_index + 6 >= dst_start) && (dst_index + 6 < dst_end))
- ? tmp_data_2.x : data_2.x;
- data_2.yzw = ((dst_index + 9 >= dst_start) && (dst_index + 9 < dst_end))
- ? tmp_data_2.yzw : data_2.yzw;
-
- *((__global char4 *)(dst + dst_index + 0)) = data_0;
- *((__global char4 *)(dst + dst_index + 4)) = data_1;
- *((__global char4 *)(dst + dst_index + 8)) = data_2;
- }
-}
-
-__kernel void arithm_s_bitwise_or_C3_D2 (
- __global ushort *src1, int src1_step, int src1_offset,
- __global ushort *dst, int dst_step, int dst_offset,
- ushort4 src2, int rows, int cols, int dst_step1)
-
-{
-
- int x = get_global_id(0);
- int y = get_global_id(1);
-
- if (x < cols && y < rows)
- {
- x = x << 1;
-
-#ifdef dst_align
-#undef dst_align
-#endif
-#define dst_align (((dst_offset % dst_step) / 6 ) & 1)
- int src1_index = mad24(y, src1_step, (x * 6) + src1_offset - (dst_align * 6));
-
- int dst_start = mad24(y, dst_step, dst_offset);
- int dst_end = mad24(y, dst_step, dst_offset + dst_step1);
- int dst_index = mad24(y, dst_step, dst_offset + (x * 6) - (dst_align * 6));
-
- ushort2 src1_data_0 = vload2(0, (__global ushort *)((__global char *)src1 + src1_index + 0));
- ushort2 src1_data_1 = vload2(0, (__global ushort *)((__global char *)src1 + src1_index + 4));
- ushort2 src1_data_2 = vload2(0, (__global ushort *)((__global char *)src1 + src1_index + 8));
-
- ushort2 src2_data_0 = (ushort2)(src2.x, src2.y);
- ushort2 src2_data_1 = (ushort2)(src2.z, src2.x);
- ushort2 src2_data_2 = (ushort2)(src2.y, src2.z);
-
- ushort2 data_0 = *((__global ushort2 *)((__global char *)dst + dst_index + 0));
- ushort2 data_1 = *((__global ushort2 *)((__global char *)dst + dst_index + 4));
- ushort2 data_2 = *((__global ushort2 *)((__global char *)dst + dst_index + 8));
-
- ushort2 tmp_data_0 = src1_data_0 | src2_data_0 ;
- ushort2 tmp_data_1 = src1_data_1 | src2_data_1 ;
- ushort2 tmp_data_2 = src1_data_2 | src2_data_2 ;
-
- data_0.xy = ((dst_index + 0 >= dst_start)) ? tmp_data_0.xy : data_0.xy;
-
- data_1.x = ((dst_index + 0 >= dst_start) && (dst_index + 0 < dst_end))
- ? tmp_data_1.x : data_1.x;
- data_1.y = ((dst_index + 6 >= dst_start) && (dst_index + 6 < dst_end))
- ? tmp_data_1.y : data_1.y;
-
- data_2.xy = ((dst_index + 6 >= dst_start) && (dst_index + 6 < dst_end))
- ? tmp_data_2.xy : data_2.xy;
-
- *((__global ushort2 *)((__global char *)dst + dst_index + 0))= data_0;
- *((__global ushort2 *)((__global char *)dst + dst_index + 4))= data_1;
- *((__global ushort2 *)((__global char *)dst + dst_index + 8))= data_2;
- }
-}
-__kernel void arithm_s_bitwise_or_C3_D3 (
- __global short *src1, int src1_step, int src1_offset,
- __global short *dst, int dst_step, int dst_offset,
- short4 src2, int rows, int cols, int dst_step1)
-
-{
-
- int x = get_global_id(0);
- int y = get_global_id(1);
-
- if (x < cols && y < rows)
- {
- x = x << 1;
-
-#ifdef dst_align
-#undef dst_align
-#endif
-#define dst_align (((dst_offset % dst_step) / 6 ) & 1)
- int src1_index = mad24(y, src1_step, (x * 6) + src1_offset - (dst_align * 6));
-
- int dst_start = mad24(y, dst_step, dst_offset);
- int dst_end = mad24(y, dst_step, dst_offset + dst_step1);
- int dst_index = mad24(y, dst_step, dst_offset + (x * 6) - (dst_align * 6));
-
- short2 src1_data_0 = vload2(0, (__global short *)((__global char *)src1 + src1_index + 0));
- short2 src1_data_1 = vload2(0, (__global short *)((__global char *)src1 + src1_index + 4));
- short2 src1_data_2 = vload2(0, (__global short *)((__global char *)src1 + src1_index + 8));
-
- short2 src2_data_0 = (short2)(src2.x, src2.y);
- short2 src2_data_1 = (short2)(src2.z, src2.x);
- short2 src2_data_2 = (short2)(src2.y, src2.z);
-
- short2 data_0 = *((__global short2 *)((__global char *)dst + dst_index + 0));
- short2 data_1 = *((__global short2 *)((__global char *)dst + dst_index + 4));
- short2 data_2 = *((__global short2 *)((__global char *)dst + dst_index + 8));
-
- short2 tmp_data_0 = src1_data_0 | src2_data_0 ;
- short2 tmp_data_1 = src1_data_1 | src2_data_1 ;
- short2 tmp_data_2 = src1_data_2 | src2_data_2 ;
-
- data_0.xy = ((dst_index + 0 >= dst_start)) ? tmp_data_0.xy : data_0.xy;
-
- data_1.x = ((dst_index + 0 >= dst_start) && (dst_index + 0 < dst_end))
- ? tmp_data_1.x : data_1.x;
- data_1.y = ((dst_index + 6 >= dst_start) && (dst_index + 6 < dst_end))
- ? tmp_data_1.y : data_1.y;
-
- data_2.xy = ((dst_index + 6 >= dst_start) && (dst_index + 6 < dst_end))
- ? tmp_data_2.xy : data_2.xy;
-
- *((__global short2 *)((__global char *)dst + dst_index + 0))= data_0;
- *((__global short2 *)((__global char *)dst + dst_index + 4))= data_1;
- *((__global short2 *)((__global char *)dst + dst_index + 8))= data_2;
- }
-}
-__kernel void arithm_s_bitwise_or_C3_D4 (
- __global int *src1, int src1_step, int src1_offset,
- __global int *dst, int dst_step, int dst_offset,
- int4 src2, int rows, int cols, int dst_step1)
-
-{
-
- int x = get_global_id(0);
- int y = get_global_id(1);
-
- if (x < cols && y < rows)
- {
- int src1_index = mad24(y, src1_step, (x * 12) + src1_offset);
- int dst_index = mad24(y, dst_step, dst_offset + (x * 12));
-
- int src1_data_0 = *((__global int *)((__global char *)src1 + src1_index + 0));
- int src1_data_1 = *((__global int *)((__global char *)src1 + src1_index + 4));
- int src1_data_2 = *((__global int *)((__global char *)src1 + src1_index + 8));
-
- int src2_data_0 = src2.x;
- int src2_data_1 = src2.y;
- int src2_data_2 = src2.z;
-
- int data_0 = *((__global int *)((__global char *)dst + dst_index + 0));
- int data_1 = *((__global int *)((__global char *)dst + dst_index + 4));
- int data_2 = *((__global int *)((__global char *)dst + dst_index + 8));
-
- int tmp_data_0 = src1_data_0 | src2_data_0;
- int tmp_data_1 = src1_data_1 | src2_data_1;
- int tmp_data_2 = src1_data_2 | src2_data_2;
-
- *((__global int *)((__global char *)dst + dst_index + 0))= tmp_data_0;
- *((__global int *)((__global char *)dst + dst_index + 4))= tmp_data_1;
- *((__global int *)((__global char *)dst + dst_index + 8))= tmp_data_2;
- }
-}
-__kernel void arithm_s_bitwise_or_C3_D5 (
- __global char *src1, int src1_step, int src1_offset,
- __global char *dst, int dst_step, int dst_offset,
- char16 src2, int rows, int cols, int dst_step1)
-
-{
-
- int x = get_global_id(0);
- int y = get_global_id(1);
-
- if (x < cols && y < rows)
-
- {
- int src1_index = mad24(y, src1_step, (x * 12) + src1_offset);
- int dst_index = mad24(y, dst_step, dst_offset + (x * 12));
-
- char4 src1_data_0 = *((__global char4 *)((__global char *)src1 + src1_index + 0));
- char4 src1_data_1 = *((__global char4 *)((__global char *)src1 + src1_index + 4));
- char4 src1_data_2 = *((__global char4 *)((__global char *)src1 + src1_index + 8));
-
- char4 src2_data_0 = (char4)(src2.s0, src2.s1, src2.s2, src2.s3);
- char4 src2_data_1 = (char4)(src2.s4, src2.s5, src2.s6, src2.s7);
- char4 src2_data_2 = (char4)(src2.s8, src2.s9, src2.sA, src2.sB);
-
- char4 tmp_data_0 = src1_data_0 | src2_data_0;
- char4 tmp_data_1 = src1_data_1 | src2_data_1;
- char4 tmp_data_2 = src1_data_2 | src2_data_2;
-
- *((__global char4 *)((__global char *)dst + dst_index + 0))= tmp_data_0;
- *((__global char4 *)((__global char *)dst + dst_index + 4))= tmp_data_1;
- *((__global char4 *)((__global char *)dst + dst_index + 8))= tmp_data_2;
- }
-}
-#if defined (DOUBLE_SUPPORT)
-__kernel void arithm_s_bitwise_or_C3_D6 (
- __global short *src1, int src1_step, int src1_offset,
- __global short *dst, int dst_step, int dst_offset,
- short16 src2, int rows, int cols, int dst_step1)
-
-{
-
- int x = get_global_id(0);
- int y = get_global_id(1);
-
- if (x < cols && y < rows)
- {
- int src1_index = mad24(y, src1_step, (x * 24) + src1_offset);
- int dst_index = mad24(y, dst_step, dst_offset + (x * 24));
-
- short4 src1_data_0 = *((__global short4 *)((__global char *)src1 + src1_index + 0 ));
- short4 src1_data_1 = *((__global short4 *)((__global char *)src1 + src1_index + 8 ));
- short4 src1_data_2 = *((__global short4 *)((__global char *)src1 + src1_index + 16));
-
- short4 src2_data_0 = (short4)(src2.s0, src2.s1, src2.s2, src2.s3);
- short4 src2_data_1 = (short4)(src2.s4, src2.s5, src2.s6, src2.s7);
- short4 src2_data_2 = (short4)(src2.s8, src2.s9, src2.sa, src2.sb);
-
- short4 data_0 = *((__global short4 *)((__global char *)dst + dst_index + 0 ));
- short4 data_1 = *((__global short4 *)((__global char *)dst + dst_index + 8 ));
- short4 data_2 = *((__global short4 *)((__global char *)dst + dst_index + 16));
-
- short4 tmp_data_0 = src1_data_0 | src2_data_0;
- short4 tmp_data_1 = src1_data_1 | src2_data_1;
- short4 tmp_data_2 = src1_data_2 | src2_data_2;
-
- *((__global short4 *)((__global char *)dst + dst_index + 0 ))= tmp_data_0;
- *((__global short4 *)((__global char *)dst + dst_index + 8 ))= tmp_data_1;
- *((__global short4 *)((__global char *)dst + dst_index + 16))= tmp_data_2;
- }
-}
-#endif
-__kernel void arithm_s_bitwise_or_C4_D0 (
- __global uchar *src1, int src1_step, int src1_offset,
- __global uchar *dst, int dst_step, int dst_offset,
- uchar4 src2, int rows, int cols, int dst_step1)
-
-{
-
- int x = get_global_id(0);
- int y = get_global_id(1);
-
- if (x < cols && y < rows)
- {
- int src1_index = mad24(y, src1_step, (x << 2) + src1_offset);
- int dst_index = mad24(y, dst_step, (x << 2) + dst_offset);
-
- uchar4 src_data1 = *((__global uchar4 *)(src1 + src1_index));
-
- uchar4 data = src_data1 | src2;
-
- *((__global uchar4 *)(dst + dst_index)) = data;
- }
-}
-
-
-__kernel void arithm_s_bitwise_or_C4_D1 (
- __global char *src1, int src1_step, int src1_offset,
- __global char *dst, int dst_step, int dst_offset,
- char4 src2, int rows, int cols, int dst_step1)
-
-{
-
- int x = get_global_id(0);
- int y = get_global_id(1);
-
- if (x < cols && y < rows)
- {
- int src1_index = mad24(y, src1_step, (x << 2) + src1_offset);
- int dst_index = mad24(y, dst_step, (x << 2) + dst_offset);
-
- char4 src_data1 = *((__global char4 *)(src1 + src1_index));
-
- char4 data = src_data1 | src2;
-
- *((__global char4 *)(dst + dst_index)) = data;
- }
-}
-
-__kernel void arithm_s_bitwise_or_C4_D2 (
- __global ushort *src1, int src1_step, int src1_offset,
- __global ushort *dst, int dst_step, int dst_offset,
- ushort4 src2, int rows, int cols, int dst_step1)
-
-{
-
- int x = get_global_id(0);
- int y = get_global_id(1);
-
- if (x < cols && y < rows)
- {
- int src1_index = mad24(y, src1_step, (x << 3) + src1_offset);
- int dst_index = mad24(y, dst_step, (x << 3) + dst_offset);
-
- ushort4 src_data1 = *((__global ushort4 *)((__global char *)src1 + src1_index));
-
- ushort4 data = src_data1 | src2;
-
- *((__global ushort4 *)((__global char *)dst + dst_index)) = data;
- }
-}
-__kernel void arithm_s_bitwise_or_C4_D3 (
- __global short *src1, int src1_step, int src1_offset,
- __global short *dst, int dst_step, int dst_offset,
- short4 src2, int rows, int cols, int dst_step1)
-
-{
-
- int x = get_global_id(0);
- int y = get_global_id(1);
-
- if (x < cols && y < rows)
- {
- int src1_index = mad24(y, src1_step, (x << 3) + src1_offset);
- int dst_index = mad24(y, dst_step, (x << 3) + dst_offset);
-
- short4 src_data1 = *((__global short4 *)((__global char *)src1 + src1_index));
-
- short4 data = src_data1 | src2;
-
- *((__global short4 *)((__global char *)dst + dst_index)) = data;
- }
-}
-__kernel void arithm_s_bitwise_or_C4_D4 (
- __global int *src1, int src1_step, int src1_offset,
- __global int *dst, int dst_step, int dst_offset,
- int4 src2, int rows, int cols, int dst_step1)
-
-{
-
- int x = get_global_id(0);
- int y = get_global_id(1);
-
- if (x < cols && y < rows)
- {
- int src1_index = mad24(y, src1_step, (x << 4) + src1_offset);
- int dst_index = mad24(y, dst_step, (x << 4) + dst_offset);
-
- int4 src_data1 = *((__global int4 *)((__global char *)src1 + src1_index));
-
- int4 data = src_data1 | src2;
-
- *((__global int4 *)((__global char *)dst + dst_index)) = data;
- }
-}
-__kernel void arithm_s_bitwise_or_C4_D5 (
- __global char *src1, int src1_step, int src1_offset,
- __global char *dst, int dst_step, int dst_offset,
- char16 src2, int rows, int cols, int dst_step1)
-
-{
-
- int x = get_global_id(0);
- int y = get_global_id(1);
-
- if (x < cols && y < rows)
- {
- int src1_index = mad24(y, src1_step, (x << 4) + src1_offset);
- int dst_index = mad24(y, dst_step, (x << 4) + dst_offset);
-
- char16 src_data1 = *((__global char16 *)((__global char *)src1 + src1_index));
- char16 src_data2 = (char16)(src2.s0, src2.s1, src2.s2, src2.s3, src2.s4, src2.s5, src2.s6, src2.s7,
- src2.s8, src2.s9, src2.sa, src2.sb, src2.sc, src2.sd, src2.se, src2.sf);
-
- char16 data = src_data1 | src_data2;
-
- *((__global char16 *)((__global char *)dst + dst_index)) = data;
- }
-}
-#if defined (DOUBLE_SUPPORT)
-__kernel void arithm_s_bitwise_or_C4_D6 (
- __global short *src1, int src1_step, int src1_offset,
- __global short *dst, int dst_step, int dst_offset,
- short16 src2, int rows, int cols, int dst_step1)
-
-{
-
- int x = get_global_id(0);
- int y = get_global_id(1);
-
- if (x < cols && y < rows)
- {
- int src1_index = mad24(y, src1_step, (x << 5) + src1_offset);
- int dst_index = mad24(y, dst_step, (x << 5) + dst_offset);
-
- short4 src1_data_0 = *((__global short4 *)((__global char *)src1 + src1_index + 0));
- short4 src1_data_1 = *((__global short4 *)((__global char *)src1 + src1_index + 8));
- short4 src1_data_2 = *((__global short4 *)((__global char *)src1 + src1_index + 16));
- short4 src1_data_3 = *((__global short4 *)((__global char *)src1 + src1_index + 24));
-
- short4 src2_data_0 = (short4)(src2.s0, src2.s1, src2.s2, src2.s3);
- short4 src2_data_1 = (short4)(src2.s4, src2.s5, src2.s6, src2.s7);
- short4 src2_data_2 = (short4)(src2.s8, src2.s9, src2.sa, src2.sb);
- short4 src2_data_3 = (short4)(src2.sc, src2.sd, src2.se, src2.sf);
-
- short4 tmp_data_0 = src1_data_0 | src2_data_0;
- short4 tmp_data_1 = src1_data_1 | src2_data_1;
- short4 tmp_data_2 = src1_data_2 | src2_data_2;
- short4 tmp_data_3 = src1_data_3 | src2_data_3;
-
- *((__global short4 *)((__global char *)dst + dst_index + 0 ))= tmp_data_0;
- *((__global short4 *)((__global char *)dst + dst_index + 8 ))= tmp_data_1;
- *((__global short4 *)((__global char *)dst + dst_index + 16))= tmp_data_2;
- *((__global short4 *)((__global char *)dst + dst_index + 24))= tmp_data_3;
-
- }
-}
-#endif
+++ /dev/null
-/*M///////////////////////////////////////////////////////////////////////////////////////
-//
-// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
-//
-// By downloading, copying, installing or using the software you agree to this license.
-// If you do not agree to this license, do not download, install,
-// copy or use the software.
-//
-//
-// License Agreement
-// For Open Source Computer Vision Library
-//
-// Copyright (C) 2010-2012, Institute Of Software Chinese Academy Of Science, all rights reserved.
-// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved.
-// Third party copyrights are property of their respective owners.
-//
-// @Authors
-// Jiang Liyuan, jlyuan001.good@163.com
-//
-// Redistribution and use in source and binary forms, with or without modification,
-// are permitted provided that the following conditions are met:
-//
-// * Redistribution's of source code must retain the above copyright notice,
-// this list of conditions and the following disclaimer.
-//
-// * Redistribution's in binary form must reproduce the above copyright notice,
-// this list of conditions and the following disclaimer in the documentation
-// and/or other GpuMaterials provided with the distribution.
-//
-// * The name of the copyright holders may not be used to endorse or promote products
-// derived from this software without specific prior written permission.
-//
-// This software is provided by the copyright holders and contributors as is and
-// any express or implied warranties, including, but not limited to, the implied
-// warranties of merchantability and fitness for a particular purpose are disclaimed.
-// In no event shall the Intel Corporation or contributors be liable for any direct,
-// indirect, incidental, special, exemplary, or consequential damages
-// (including, but not limited to, procurement of substitute goods or services;
-// loss of use, data, or profits; or business interruption) however caused
-// and on any theory of liability, whether in contract, strict liability,
-// or tort (including negligence or otherwise) arising in any way out of
-// the use of this software, even if advised of the possibility of such damage.
-//
-//M*/
-#if defined (DOUBLE_SUPPORT)
-#ifdef cl_khr_fp64
-#pragma OPENCL EXTENSION cl_khr_fp64:enable
-#elif defined (cl_amd_fp64)
-#pragma OPENCL EXTENSION cl_amd_fp64:enable
-#endif
-#endif
-//////////////////////////////////////////////////////////////////////////////////////////////////////
-////////////////////////////////////////////BITWISE_OR////////////////////////////////////////////////////
-///////////////////////////////////////////////////////////////////////////////////////////////////////
-/**************************************bitwise_or with scalar with mask**************************************/
-__kernel void arithm_s_bitwise_or_with_mask_C1_D0 (
- __global uchar *src1, int src1_step, int src1_offset,
- __global uchar *dst, int dst_step, int dst_offset,
- __global uchar *mask, int mask_step, int mask_offset,
- uchar4 src2, int rows, int cols, int dst_step1)
-
-{
-
- int x = get_global_id(0);
- int y = get_global_id(1);
-
- if (x < cols && y < rows)
- {
- x = x << 2;
-
-#ifdef dst_align
-#undef dst_align
-#endif
-#define dst_align (dst_offset & 3)
- int src1_index = mad24(y, src1_step, x + src1_offset - dst_align);
- int mask_index = mad24(y, mask_step, x + mask_offset - dst_align);
-
- int dst_start = mad24(y, dst_step, dst_offset);
- int dst_end = mad24(y, dst_step, dst_offset + dst_step1);
- int dst_index = mad24(y, dst_step, dst_offset + x & (int)0xfffffffc);
-
- uchar4 src1_data = vload4(0, src1 + src1_index);
- uchar4 src2_data = (uchar4)(src2.x, src2.x, src2.x, src2.x);
- uchar4 mask_data = vload4(0, mask + mask_index);
-
- uchar4 data = *((__global uchar4 *)(dst + dst_index));
- uchar4 tmp_data = src1_data | src2_data;
-
- data.x = ((mask_data.x) && (dst_index + 0 >= dst_start) && (dst_index + 0 < dst_end)) ? tmp_data.x : data.x;
- data.y = ((mask_data.y) && (dst_index + 1 >= dst_start) && (dst_index + 1 < dst_end)) ? tmp_data.y : data.y;
- data.z = ((mask_data.z) && (dst_index + 2 >= dst_start) && (dst_index + 2 < dst_end)) ? tmp_data.z : data.z;
- data.w = ((mask_data.w) && (dst_index + 3 >= dst_start) && (dst_index + 3 < dst_end)) ? tmp_data.w : data.w;
-
- *((__global uchar4 *)(dst + dst_index)) = data;
- }
-}
-
-
-__kernel void arithm_s_bitwise_or_with_mask_C1_D1 (
- __global char *src1, int src1_step, int src1_offset,
- __global char *dst, int dst_step, int dst_offset,
- __global uchar *mask, int mask_step, int mask_offset,
- char4 src2, int rows, int cols, int dst_step1)
-
-{
-
- int x = get_global_id(0);
- int y = get_global_id(1);
-
- if (x < cols && y < rows)
- {
- x = x << 2;
-
-#ifdef dst_align
-#undef dst_align
-#endif
-#define dst_align (dst_offset & 3)
- int src1_index = mad24(y, src1_step, x + src1_offset - dst_align);
- int mask_index = mad24(y, mask_step, x + mask_offset - dst_align);
-
- int dst_start = mad24(y, dst_step, dst_offset);
- int dst_end = mad24(y, dst_step, dst_offset + dst_step1);
- int dst_index = mad24(y, dst_step, dst_offset + x & (int)0xfffffffc);
-
- char4 src1_data = vload4(0, src1 + src1_index);
- char4 src2_data = (char4)(src2.x, src2.x, src2.x, src2.x);
- uchar4 mask_data = vload4(0, mask + mask_index);
-
- char4 data = *((__global char4 *)(dst + dst_index));
- char4 tmp_data = src1_data | src2_data;
-
- data.x = ((mask_data.x) && (dst_index + 0 >= dst_start) && (dst_index + 0 < dst_end)) ? tmp_data.x : data.x;
- data.y = ((mask_data.y) && (dst_index + 1 >= dst_start) && (dst_index + 1 < dst_end)) ? tmp_data.y : data.y;
- data.z = ((mask_data.z) && (dst_index + 2 >= dst_start) && (dst_index + 2 < dst_end)) ? tmp_data.z : data.z;
- data.w = ((mask_data.w) && (dst_index + 3 >= dst_start) && (dst_index + 3 < dst_end)) ? tmp_data.w : data.w;
-
- *((__global char4 *)(dst + dst_index)) = data;
- }
-}
-
-__kernel void arithm_s_bitwise_or_with_mask_C1_D2 (
- __global ushort *src1, int src1_step, int src1_offset,
- __global ushort *dst, int dst_step, int dst_offset,
- __global uchar *mask, int mask_step, int mask_offset,
- ushort4 src2, int rows, int cols, int dst_step1)
-
-{
-
- int x = get_global_id(0);
- int y = get_global_id(1);
-
- if (x < cols && y < rows)
- {
- x = x << 1;
-
-#ifdef dst_align
-#undef dst_align
-#endif
-#define dst_align ((dst_offset >> 1) & 1)
- int src1_index = mad24(y, src1_step, (x << 1) + src1_offset - (dst_align << 1));
- int mask_index = mad24(y, mask_step, x + mask_offset - dst_align);
-
- int dst_start = mad24(y, dst_step, dst_offset);
- int dst_end = mad24(y, dst_step, dst_offset + dst_step1);
- int dst_index = mad24(y, dst_step, dst_offset + (x << 1) & (int)0xfffffffc);
-
- ushort2 src1_data = vload2(0, (__global ushort *)((__global char *)src1 + src1_index));
- ushort2 src2_data = (ushort2)(src2.x, src2.x);
- uchar2 mask_data = vload2(0, mask + mask_index);
-
- ushort2 data = *((__global ushort2 *)((__global uchar *)dst + dst_index));
- ushort2 tmp_data = src1_data | src2_data;
-
- data.x = ((mask_data.x) && (dst_index + 0 >= dst_start)) ? tmp_data.x : data.x;
- data.y = ((mask_data.y) && (dst_index + 2 < dst_end )) ? tmp_data.y : data.y;
-
- *((__global ushort2 *)((__global uchar *)dst + dst_index)) = data;
- }
-}
-__kernel void arithm_s_bitwise_or_with_mask_C1_D3 (
- __global short *src1, int src1_step, int src1_offset,
- __global short *dst, int dst_step, int dst_offset,
- __global uchar *mask, int mask_step, int mask_offset,
- short4 src2, int rows, int cols, int dst_step1)
-
-{
-
- int x = get_global_id(0);
- int y = get_global_id(1);
-
- if (x < cols && y < rows)
- {
- x = x << 1;
-
-#ifdef dst_align
-#undef dst_align
-#endif
-#define dst_align ((dst_offset >> 1) & 1)
- int src1_index = mad24(y, src1_step, (x << 1) + src1_offset - (dst_align << 1));
- int mask_index = mad24(y, mask_step, x + mask_offset - dst_align);
-
- int dst_start = mad24(y, dst_step, dst_offset);
- int dst_end = mad24(y, dst_step, dst_offset + dst_step1);
- int dst_index = mad24(y, dst_step, dst_offset + (x << 1) & (int)0xfffffffc);
-
- short2 src1_data = vload2(0, (__global short *)((__global char *)src1 + src1_index));
- short2 src2_data = (short2)(src2.x, src2.x);
- uchar2 mask_data = vload2(0, mask + mask_index);
-
- short2 data = *((__global short2 *)((__global uchar *)dst + dst_index));
- short2 tmp_data = src1_data | src2_data;
-
- data.x = ((mask_data.x) && (dst_index + 0 >= dst_start)) ? tmp_data.x : data.x;
- data.y = ((mask_data.y) && (dst_index + 2 < dst_end )) ? tmp_data.y : data.y;
-
- *((__global short2 *)((__global uchar *)dst + dst_index)) = data;
- }
-}
-__kernel void arithm_s_bitwise_or_with_mask_C1_D4 (
- __global int *src1, int src1_step, int src1_offset,
- __global int *dst, int dst_step, int dst_offset,
- __global uchar *mask, int mask_step, int mask_offset,
- int4 src2, int rows, int cols, int dst_step1)
-
-{
-
- int x = get_global_id(0);
- int y = get_global_id(1);
-
- if (x < cols && y < rows)
- {
- int src1_index = mad24(y, src1_step, (x << 2) + src1_offset);
- int mask_index = mad24(y, mask_step, x + mask_offset);
- int dst_index = mad24(y, dst_step, (x << 2) + dst_offset);
-
- uchar mask_data = *(mask + mask_index);
-
- int src_data1 = *((__global int *)((__global char *)src1 + src1_index));
- int src_data2 = src2.x;
- int dst_data = *((__global int *)((__global char *)dst + dst_index));
-
- int data = src_data1 | src_data2;
- data = mask_data ? data : dst_data;
-
- *((__global int *)((__global char *)dst + dst_index)) = data;
- }
-}
-
-__kernel void arithm_s_bitwise_or_with_mask_C1_D5 (
- __global char *src1, int src1_step, int src1_offset,
- __global char *dst, int dst_step, int dst_offset,
- __global uchar *mask, int mask_step, int mask_offset,
- char16 src2, int rows, int cols, int dst_step1)
-
-{
-
- int x = get_global_id(0);
- int y = get_global_id(1);
-
- if (x < cols && y < rows)
- {
- int src1_index = mad24(y, src1_step, (x << 2) + src1_offset);
- int mask_index = mad24(y, mask_step, x + mask_offset);
- int dst_index = mad24(y, dst_step, (x << 2) + dst_offset);
-
- uchar mask_data = *(mask + mask_index);
-
- char4 src_data1 = *((__global char4 *)((__global char *)src1 + src1_index));
- char4 src_data2 = (char4)(src2.s0, src2.s1, src2.s2, src2.s3);
- char4 dst_data = *((__global char4 *)((__global char *)dst + dst_index));
-
- char4 data = src_data1 | src_data2;
- data = mask_data ? data : dst_data;
-
- *((__global char4 *)((__global char *)dst + dst_index)) = data;
- }
-}
-#if defined (DOUBLE_SUPPORT)
-__kernel void arithm_s_bitwise_or_with_mask_C1_D6 (
- __global short *src1, int src1_step, int src1_offset,
- __global short *dst, int dst_step, int dst_offset,
- __global uchar *mask, int mask_step, int mask_offset,
- short16 src2, int rows, int cols, int dst_step1)
-
-{
-
- int x = get_global_id(0);
- int y = get_global_id(1);
-
- if (x < cols && y < rows)
- {
- int src1_index = mad24(y, src1_step, (x << 3) + src1_offset);
- int mask_index = mad24(y, mask_step, x + mask_offset);
- int dst_index = mad24(y, dst_step, (x << 3) + dst_offset);
-
- uchar mask_data = *(mask + mask_index);
-
- short4 src1_data = *((__global short4 *)((__global char *)src1 + src1_index));
- short4 src2_data = (short4)(src2.s0, src2.s1, src2.s2, src2.s3);
- short4 dst_data = *((__global short4 *)((__global char *)dst + dst_index));
-
- short4 data = src1_data | src2_data;
- data = mask_data ? data : dst_data;
-
- *((__global short4 *)((__global char *)dst + dst_index)) = data;
- }
-}
-#endif
-__kernel void arithm_s_bitwise_or_with_mask_C2_D0 (
- __global uchar *src1, int src1_step, int src1_offset,
- __global uchar *dst, int dst_step, int dst_offset,
- __global uchar *mask, int mask_step, int mask_offset,
- uchar4 src2, int rows, int cols, int dst_step1)
-
-{
-
- int x = get_global_id(0);
- int y = get_global_id(1);
-
- if (x < cols && y < rows)
- {
- x = x << 1;
-
-#ifdef dst_align
-#undef dst_align
-#endif
-#define dst_align ((dst_offset >> 1) & 1)
- int src1_index = mad24(y, src1_step, (x << 1) + src1_offset - (dst_align << 1));
- int mask_index = mad24(y, mask_step, x + mask_offset - dst_align);
-
- int dst_start = mad24(y, dst_step, dst_offset);
- int dst_end = mad24(y, dst_step, dst_offset + dst_step1);
- int dst_index = mad24(y, dst_step, dst_offset + (x << 1) & (int)0xfffffffc);
-
- uchar4 src1_data = vload4(0, src1 + src1_index);
- uchar4 src2_data = (uchar4)(src2.x, src2.y, src2.x, src2.y);
- uchar2 mask_data = vload2(0, mask + mask_index);
-
- uchar4 data = *((__global uchar4 *)(dst + dst_index));
- uchar4 tmp_data = src1_data | src2_data;
-
- data.xy = ((mask_data.x) && (dst_index + 0 >= dst_start)) ? tmp_data.xy : data.xy;
- data.zw = ((mask_data.y) && (dst_index + 2 < dst_end )) ? tmp_data.zw : data.zw;
-
- *((__global uchar4 *)(dst + dst_index)) = data;
- }
-}
-
-
-__kernel void arithm_s_bitwise_or_with_mask_C2_D1 (
- __global char *src1, int src1_step, int src1_offset,
- __global char *dst, int dst_step, int dst_offset,
- __global uchar *mask, int mask_step, int mask_offset,
- char4 src2, int rows, int cols, int dst_step1)
-
-{
-
- int x = get_global_id(0);
- int y = get_global_id(1);
-
- if (x < cols && y < rows)
- {
- x = x << 1;
-
-#ifdef dst_align
-#undef dst_align
-#endif
-#define dst_align ((dst_offset >> 1) & 1)
- int src1_index = mad24(y, src1_step, (x << 1) + src1_offset - (dst_align << 1));
- int mask_index = mad24(y, mask_step, x + mask_offset - dst_align);
-
- int dst_start = mad24(y, dst_step, dst_offset);
- int dst_end = mad24(y, dst_step, dst_offset + dst_step1);
- int dst_index = mad24(y, dst_step, dst_offset + (x << 1) & (int)0xfffffffc);
-
- char4 src1_data = vload4(0, src1 + src1_index);
- char4 src2_data = (char4)(src2.x, src2.y, src2.x, src2.y);
- uchar2 mask_data = vload2(0, mask + mask_index);
-
- char4 data = *((__global char4 *)(dst + dst_index));
- char4 tmp_data = src1_data | src2_data;
-
- data.xy = ((mask_data.x) && (dst_index + 0 >= dst_start)) ? tmp_data.xy : data.xy;
- data.zw = ((mask_data.y) && (dst_index + 2 < dst_end )) ? tmp_data.zw : data.zw;
-
- *((__global char4 *)(dst + dst_index)) = data;
- }
-}
-
-__kernel void arithm_s_bitwise_or_with_mask_C2_D2 (
- __global ushort *src1, int src1_step, int src1_offset,
- __global ushort *dst, int dst_step, int dst_offset,
- __global uchar *mask, int mask_step, int mask_offset,
- ushort4 src2, int rows, int cols, int dst_step1)
-
-{
-
- int x = get_global_id(0);
- int y = get_global_id(1);
-
- if (x < cols && y < rows)
- {
- int src1_index = mad24(y, src1_step, (x << 2) + src1_offset);
- int mask_index = mad24(y, mask_step, x + mask_offset);
- int dst_index = mad24(y, dst_step, (x << 2) + dst_offset);
-
- uchar mask_data = *(mask + mask_index);
-
- ushort2 src_data1 = *((__global ushort2 *)((__global char *)src1 + src1_index));
- ushort2 src_data2 = (ushort2)(src2.x, src2.y);
- ushort2 dst_data = *((__global ushort2 *)((__global char *)dst + dst_index));
-
- ushort2 data = src_data1 | src_data2;
- data = mask_data ? data : dst_data;
-
- *((__global ushort2 *)((__global char *)dst + dst_index)) = data;
- }
-}
-__kernel void arithm_s_bitwise_or_with_mask_C2_D3 (
- __global short *src1, int src1_step, int src1_offset,
- __global short *dst, int dst_step, int dst_offset,
- __global uchar *mask, int mask_step, int mask_offset,
- short4 src2, int rows, int cols, int dst_step1)
-
-{
-
- int x = get_global_id(0);
- int y = get_global_id(1);
-
- if (x < cols && y < rows)
- {
- int src1_index = mad24(y, src1_step, (x << 2) + src1_offset);
- int mask_index = mad24(y, mask_step, x + mask_offset);
- int dst_index = mad24(y, dst_step, (x << 2) + dst_offset);
-
- uchar mask_data = *(mask + mask_index);
-
- short2 src_data1 = *((__global short2 *)((__global char *)src1 + src1_index));
- short2 src_data2 = (short2)(src2.x, src2.y);
- short2 dst_data = *((__global short2 *)((__global char *)dst + dst_index));
-
- short2 data = src_data1 | src_data2;
- data = mask_data ? data : dst_data;
-
- *((__global short2 *)((__global char *)dst + dst_index)) = data;
- }
-}
-__kernel void arithm_s_bitwise_or_with_mask_C2_D4 (
- __global int *src1, int src1_step, int src1_offset,
- __global int *dst, int dst_step, int dst_offset,
- __global uchar *mask, int mask_step, int mask_offset,
- int4 src2, int rows, int cols, int dst_step1)
-
-{
-
- int x = get_global_id(0);
- int y = get_global_id(1);
-
- if (x < cols && y < rows)
- {
- int src1_index = mad24(y, src1_step, (x << 3) + src1_offset);
- int mask_index = mad24(y, mask_step, x + mask_offset);
- int dst_index = mad24(y, dst_step, (x << 3) + dst_offset);
-
- uchar mask_data = *(mask + mask_index);
-
- int2 src_data1 = *((__global int2 *)((__global char *)src1 + src1_index));
- int2 src_data2 = (int2)(src2.x, src2.y);
- int2 dst_data = *((__global int2 *)((__global char *)dst + dst_index));
-
- int2 data = src_data1 | src_data2;
- data = mask_data ? data : dst_data;
-
- *((__global int2 *)((__global char *)dst + dst_index)) = data;
- }
-}
-__kernel void arithm_s_bitwise_or_with_mask_C2_D5 (
- __global char *src1, int src1_step, int src1_offset,
- __global char *dst, int dst_step, int dst_offset,
- __global uchar *mask, int mask_step, int mask_offset,
- char16 src2, int rows, int cols, int dst_step1)
-
-{
-
- int x = get_global_id(0);
- int y = get_global_id(1);
-
- if (x < cols && y < rows)
- {
- int src1_index = mad24(y, src1_step, (x << 3) + src1_offset);
- int mask_index = mad24(y, mask_step, x + mask_offset);
- int dst_index = mad24(y, dst_step, (x << 3) + dst_offset);
-
- uchar mask_data = *(mask + mask_index);
-
- char8 src_data1 = *((__global char8 *)((__global char *)src1 + src1_index));
- char8 src_data2 = (char8)(src2.s0, src2.s1, src2.s2, src2.s3, src2.s4, src2.s5, src2.s6, src2.s7);
- char8 dst_data = *((__global char8 *)((__global char *)dst + dst_index));
- char8 data = src_data1 | src_data2;
- data = mask_data ? data : dst_data;
- *((__global char8 *)((__global char *)dst + dst_index)) = data;
-
- }
-}
-#if defined (DOUBLE_SUPPORT)
-__kernel void arithm_s_bitwise_or_with_mask_C2_D6 (
- __global char *src1, int src1_step, int src1_offset,
- __global char *dst, int dst_step, int dst_offset,
- __global uchar *mask, int mask_step, int mask_offset,
- short16 src2, int rows, int cols, int dst_step1)
-
-{
-
- int x = get_global_id(0);
- int y = get_global_id(1);
-
- if (x < cols && y < rows)
- {
- int src1_index = mad24(y, src1_step, (x << 4) + src1_offset);
- int mask_index = mad24(y, mask_step, x + mask_offset);
- int dst_index = mad24(y, dst_step, (x << 4) + dst_offset);
-
- uchar mask_data = *(mask + mask_index);
-
- short8 src1_data = *((__global short8 *)((__global char *)src1 + src1_index));
- short8 src2_data = (short8)(src2.s0, src2.s1, src2.s2, src2.s3, src2.s4, src2.s5, src2.s6, src2.s7);
- short8 dst_data = *((__global short8 *)((__global char *)dst + dst_index));
-
- short8 data = src1_data | src2_data;
- data = mask_data ? data : dst_data;
-
- *((__global short8 *)((__global char *)dst + dst_index)) = data;
- }
-}
-#endif
-__kernel void arithm_s_bitwise_or_with_mask_C3_D0 (
- __global uchar *src1, int src1_step, int src1_offset,
- __global uchar *dst, int dst_step, int dst_offset,
- __global uchar *mask, int mask_step, int mask_offset,
- uchar4 src2, int rows, int cols, int dst_step1)
-
-{
-
- int x = get_global_id(0);
- int y = get_global_id(1);
-
- if (x < cols && y < rows)
- {
- x = x << 2;
-
-#ifdef dst_align
-#undef dst_align
-#endif
-#define dst_align (((dst_offset % dst_step) / 3 ) & 3)
- int src1_index = mad24(y, src1_step, (x * 3) + src1_offset - (dst_align * 3));
- int mask_index = mad24(y, mask_step, x + mask_offset - dst_align);
-
- int dst_start = mad24(y, dst_step, dst_offset);
- int dst_end = mad24(y, dst_step, dst_offset + dst_step1);
- int dst_index = mad24(y, dst_step, dst_offset + (x * 3) - (dst_align * 3));
-
- uchar4 src1_data_0 = vload4(0, src1 + src1_index + 0);
- uchar4 src1_data_1 = vload4(0, src1 + src1_index + 4);
- uchar4 src1_data_2 = vload4(0, src1 + src1_index + 8);
-
- uchar4 src2_data_0 = (uchar4)(src2.x, src2.y, src2.z, src2.x);
- uchar4 src2_data_1 = (uchar4)(src2.y, src2.z, src2.x, src2.y);
- uchar4 src2_data_2 = (uchar4)(src2.z, src2.x, src2.y, src2.z);
-
- uchar4 mask_data = vload4(0, mask + mask_index);
-
- uchar4 data_0 = *((__global uchar4 *)(dst + dst_index + 0));
- uchar4 data_1 = *((__global uchar4 *)(dst + dst_index + 4));
- uchar4 data_2 = *((__global uchar4 *)(dst + dst_index + 8));
-
- uchar4 tmp_data_0 = src1_data_0 | src2_data_0;
- uchar4 tmp_data_1 = src1_data_1 | src2_data_1;
- uchar4 tmp_data_2 = src1_data_2 | src2_data_2;
-
- data_0.xyz = ((mask_data.x) && (dst_index + 0 >= dst_start)) ? tmp_data_0.xyz : data_0.xyz;
- data_0.w = ((mask_data.y) && (dst_index + 3 >= dst_start) && (dst_index + 3 < dst_end))
- ? tmp_data_0.w : data_0.w;
-
- data_1.xy = ((mask_data.y) && (dst_index + 3 >= dst_start) && (dst_index + 3 < dst_end))
- ? tmp_data_1.xy : data_1.xy;
- data_1.zw = ((mask_data.z) && (dst_index + 6 >= dst_start) && (dst_index + 6 < dst_end))
- ? tmp_data_1.zw : data_1.zw;
-
- data_2.x = ((mask_data.z) && (dst_index + 6 >= dst_start) && (dst_index + 6 < dst_end))
- ? tmp_data_2.x : data_2.x;
- data_2.yzw = ((mask_data.w) && (dst_index + 9 >= dst_start) && (dst_index + 9 < dst_end))
- ? tmp_data_2.yzw : data_2.yzw;
-
- *((__global uchar4 *)(dst + dst_index + 0)) = data_0;
- *((__global uchar4 *)(dst + dst_index + 4)) = data_1;
- *((__global uchar4 *)(dst + dst_index + 8)) = data_2;
- }
-}
-
-
-__kernel void arithm_s_bitwise_or_with_mask_C3_D1 (
- __global char *src1, int src1_step, int src1_offset,
- __global char *dst, int dst_step, int dst_offset,
- __global uchar *mask, int mask_step, int mask_offset,
- char4 src2, int rows, int cols, int dst_step1)
-
-{
-
- int x = get_global_id(0);
- int y = get_global_id(1);
-
- if (x < cols && y < rows)
- {
- x = x << 2;
-
-#ifdef dst_align
-#undef dst_align
-#endif
-#define dst_align (((dst_offset % dst_step) / 3 ) & 3)
- int src1_index = mad24(y, src1_step, (x * 3) + src1_offset - (dst_align * 3));
- int mask_index = mad24(y, mask_step, x + mask_offset - dst_align);
-
- int dst_start = mad24(y, dst_step, dst_offset);
- int dst_end = mad24(y, dst_step, dst_offset + dst_step1);
- int dst_index = mad24(y, dst_step, dst_offset + (x * 3) - (dst_align * 3));
-
- char4 src1_data_0 = vload4(0, src1 + src1_index + 0);
- char4 src1_data_1 = vload4(0, src1 + src1_index + 4);
- char4 src1_data_2 = vload4(0, src1 + src1_index + 8);
-
- char4 src2_data_0 = (char4)(src2.x, src2.y, src2.z, src2.x);
- char4 src2_data_1 = (char4)(src2.y, src2.z, src2.x, src2.y);
- char4 src2_data_2 = (char4)(src2.z, src2.x, src2.y, src2.z);
-
- uchar4 mask_data = vload4(0, mask + mask_index);
-
- char4 data_0 = *((__global char4 *)(dst + dst_index + 0));
- char4 data_1 = *((__global char4 *)(dst + dst_index + 4));
- char4 data_2 = *((__global char4 *)(dst + dst_index + 8));
-
- char4 tmp_data_0 = src1_data_0 | src2_data_0;
- char4 tmp_data_1 = src1_data_1 | src2_data_1;
- char4 tmp_data_2 = src1_data_2 | src2_data_2;
-
- data_0.xyz = ((mask_data.x) && (dst_index + 0 >= dst_start)) ? tmp_data_0.xyz : data_0.xyz;
- data_0.w = ((mask_data.y) && (dst_index + 3 >= dst_start) && (dst_index + 3 < dst_end))
- ? tmp_data_0.w : data_0.w;
-
- data_1.xy = ((mask_data.y) && (dst_index + 3 >= dst_start) && (dst_index + 3 < dst_end))
- ? tmp_data_1.xy : data_1.xy;
- data_1.zw = ((mask_data.z) && (dst_index + 6 >= dst_start) && (dst_index + 6 < dst_end))
- ? tmp_data_1.zw : data_1.zw;
-
- data_2.x = ((mask_data.z) && (dst_index + 6 >= dst_start) && (dst_index + 6 < dst_end))
- ? tmp_data_2.x : data_2.x;
- data_2.yzw = ((mask_data.w) && (dst_index + 9 >= dst_start) && (dst_index + 9 < dst_end))
- ? tmp_data_2.yzw : data_2.yzw;
-
- *((__global char4 *)(dst + dst_index + 0)) = data_0;
- *((__global char4 *)(dst + dst_index + 4)) = data_1;
- *((__global char4 *)(dst + dst_index + 8)) = data_2;
- }
-}
-
-__kernel void arithm_s_bitwise_or_with_mask_C3_D2 (
- __global ushort *src1, int src1_step, int src1_offset,
- __global ushort *dst, int dst_step, int dst_offset,
- __global uchar *mask, int mask_step, int mask_offset,
- ushort4 src2, int rows, int cols, int dst_step1)
-
-{
-
- int x = get_global_id(0);
- int y = get_global_id(1);
-
- if (x < cols && y < rows)
- {
- x = x << 1;
-
-#ifdef dst_align
-#undef dst_align
-#endif
-#define dst_align (((dst_offset % dst_step) / 6 ) & 1)
- int src1_index = mad24(y, src1_step, (x * 6) + src1_offset - (dst_align * 6));
- int mask_index = mad24(y, mask_step, x + mask_offset - dst_align);
-
- int dst_start = mad24(y, dst_step, dst_offset);
- int dst_end = mad24(y, dst_step, dst_offset + dst_step1);
- int dst_index = mad24(y, dst_step, dst_offset + (x * 6) - (dst_align * 6));
-
- ushort2 src1_data_0 = vload2(0, (__global ushort *)((__global char *)src1 + src1_index + 0));
- ushort2 src1_data_1 = vload2(0, (__global ushort *)((__global char *)src1 + src1_index + 4));
- ushort2 src1_data_2 = vload2(0, (__global ushort *)((__global char *)src1 + src1_index + 8));
-
- ushort2 src2_data_0 = (ushort2)(src2.x, src2.y);
- ushort2 src2_data_1 = (ushort2)(src2.z, src2.x);
- ushort2 src2_data_2 = (ushort2)(src2.y, src2.z);
-
- uchar2 mask_data = vload2(0, mask + mask_index);
-
- ushort2 data_0 = *((__global ushort2 *)((__global char *)dst + dst_index + 0));
- ushort2 data_1 = *((__global ushort2 *)((__global char *)dst + dst_index + 4));
- ushort2 data_2 = *((__global ushort2 *)((__global char *)dst + dst_index + 8));
-
- ushort2 tmp_data_0 = src1_data_0 | src2_data_0;
- ushort2 tmp_data_1 = src1_data_1 | src2_data_1;
- ushort2 tmp_data_2 = src1_data_2 | src2_data_2;
-
- data_0.xy = ((mask_data.x) && (dst_index + 0 >= dst_start)) ? tmp_data_0.xy : data_0.xy;
-
- data_1.x = ((mask_data.x) && (dst_index + 0 >= dst_start) && (dst_index + 0 < dst_end))
- ? tmp_data_1.x : data_1.x;
- data_1.y = ((mask_data.y) && (dst_index + 6 >= dst_start) && (dst_index + 6 < dst_end))
- ? tmp_data_1.y : data_1.y;
-
- data_2.xy = ((mask_data.y) && (dst_index + 6 >= dst_start) && (dst_index + 6 < dst_end))
- ? tmp_data_2.xy : data_2.xy;
-
- *((__global ushort2 *)((__global char *)dst + dst_index + 0))= data_0;
- *((__global ushort2 *)((__global char *)dst + dst_index + 4))= data_1;
- *((__global ushort2 *)((__global char *)dst + dst_index + 8))= data_2;
- }
-}
-__kernel void arithm_s_bitwise_or_with_mask_C3_D3 (
- __global short *src1, int src1_step, int src1_offset,
- __global short *dst, int dst_step, int dst_offset,
- __global uchar *mask, int mask_step, int mask_offset,
- short4 src2, int rows, int cols, int dst_step1)
-
-{
-
- int x = get_global_id(0);
- int y = get_global_id(1);
-
- if (x < cols && y < rows)
- {
- x = x << 1;
-
-#ifdef dst_align
-#undef dst_align
-#endif
-#define dst_align (((dst_offset % dst_step) / 6 ) & 1)
- int src1_index = mad24(y, src1_step, (x * 6) + src1_offset - (dst_align * 6));
- int mask_index = mad24(y, mask_step, x + mask_offset - dst_align);
-
- int dst_start = mad24(y, dst_step, dst_offset);
- int dst_end = mad24(y, dst_step, dst_offset + dst_step1);
- int dst_index = mad24(y, dst_step, dst_offset + (x * 6) - (dst_align * 6));
-
- short2 src1_data_0 = vload2(0, (__global short *)((__global char *)src1 + src1_index + 0));
- short2 src1_data_1 = vload2(0, (__global short *)((__global char *)src1 + src1_index + 4));
- short2 src1_data_2 = vload2(0, (__global short *)((__global char *)src1 + src1_index + 8));
-
- short2 src2_data_0 = (short2)(src2.x, src2.y);
- short2 src2_data_1 = (short2)(src2.z, src2.x);
- short2 src2_data_2 = (short2)(src2.y, src2.z);
-
- uchar2 mask_data = vload2(0, mask + mask_index);
-
- short2 data_0 = *((__global short2 *)((__global char *)dst + dst_index + 0));
- short2 data_1 = *((__global short2 *)((__global char *)dst + dst_index + 4));
- short2 data_2 = *((__global short2 *)((__global char *)dst + dst_index + 8));
-
- short2 tmp_data_0 = src1_data_0 | src2_data_0;
- short2 tmp_data_1 = src1_data_1 | src2_data_1;
- short2 tmp_data_2 = src1_data_2 | src2_data_2;
-
- data_0.xy = ((mask_data.x) && (dst_index + 0 >= dst_start)) ? tmp_data_0.xy : data_0.xy;
-
- data_1.x = ((mask_data.x) && (dst_index + 0 >= dst_start) && (dst_index + 0 < dst_end))
- ? tmp_data_1.x : data_1.x;
- data_1.y = ((mask_data.y) && (dst_index + 6 >= dst_start) && (dst_index + 6 < dst_end))
- ? tmp_data_1.y : data_1.y;
-
- data_2.xy = ((mask_data.y) && (dst_index + 6 >= dst_start) && (dst_index + 6 < dst_end))
- ? tmp_data_2.xy : data_2.xy;
-
- *((__global short2 *)((__global char *)dst + dst_index + 0))= data_0;
- *((__global short2 *)((__global char *)dst + dst_index + 4))= data_1;
- *((__global short2 *)((__global char *)dst + dst_index + 8))= data_2;
- }
-}
-__kernel void arithm_s_bitwise_or_with_mask_C3_D4 (
- __global int *src1, int src1_step, int src1_offset,
- __global int *dst, int dst_step, int dst_offset,
- __global uchar *mask, int mask_step, int mask_offset,
- int4 src2, int rows, int cols, int dst_step1)
-
-{
-
- int x = get_global_id(0);
- int y = get_global_id(1);
-
- if (x < cols && y < rows)
- {
- int src1_index = mad24(y, src1_step, (x * 12) + src1_offset);
- int mask_index = mad24(y, mask_step, x + mask_offset);
- int dst_index = mad24(y, dst_step, dst_offset + (x * 12));
-
- int src1_data_0 = *((__global int *)((__global char *)src1 + src1_index + 0));
- int src1_data_1 = *((__global int *)((__global char *)src1 + src1_index + 4));
- int src1_data_2 = *((__global int *)((__global char *)src1 + src1_index + 8));
-
- int src2_data_0 = src2.x;
- int src2_data_1 = src2.y;
- int src2_data_2 = src2.z;
-
- uchar mask_data = * (mask + mask_index);
-
- int data_0 = *((__global int *)((__global char *)dst + dst_index + 0));
- int data_1 = *((__global int *)((__global char *)dst + dst_index + 4));
- int data_2 = *((__global int *)((__global char *)dst + dst_index + 8));
-
- int tmp_data_0 = src1_data_0 | src2_data_0;
- int tmp_data_1 = src1_data_1 | src2_data_1;
- int tmp_data_2 = src1_data_2 | src2_data_2;
-
- data_0 = mask_data ? tmp_data_0 : data_0;
- data_1 = mask_data ? tmp_data_1 : data_1;
- data_2 = mask_data ? tmp_data_2 : data_2;
-
- *((__global int *)((__global char *)dst + dst_index + 0))= data_0;
- *((__global int *)((__global char *)dst + dst_index + 4))= data_1;
- *((__global int *)((__global char *)dst + dst_index + 8))= data_2;
- }
-}
-__kernel void arithm_s_bitwise_or_with_mask_C3_D5 (
- __global char *src1, int src1_step, int src1_offset,
- __global char *dst, int dst_step, int dst_offset,
- __global uchar *mask, int mask_step, int mask_offset,
- char16 src2, int rows, int cols, int dst_step1)
-
-{
-
- int x = get_global_id(0);
- int y = get_global_id(1);
-
- if (x < cols && y < rows)
- {
- int src1_index = mad24(y, src1_step, (x * 12) + src1_offset);
- int mask_index = mad24(y, mask_step, x + mask_offset);
- int dst_index = mad24(y, dst_step, dst_offset + (x * 12));
-
- char4 src1_data_0 = *((__global char4 *)((__global char *)src1 + src1_index + 0));
- char4 src1_data_1 = *((__global char4 *)((__global char *)src1 + src1_index + 4));
- char4 src1_data_2 = *((__global char4 *)((__global char *)src1 + src1_index + 8));
-
- char4 src2_data_0 = (char4)(src2.s0, src2.s1, src2.s2, src2.s3);
- char4 src2_data_1 = (char4)(src2.s4, src2.s5, src2.s6, src2.s7);
- char4 src2_data_2 = (char4)(src2.s8, src2.s9, src2.sA, src2.sB);
-
- uchar mask_data = * (mask + mask_index);
-
- char4 data_0 = *((__global char4 *)((__global char *)dst + dst_index + 0));
- char4 data_1 = *((__global char4 *)((__global char *)dst + dst_index + 4));
- char4 data_2 = *((__global char4 *)((__global char *)dst + dst_index + 8));
-
- char4 tmp_data_0 = src1_data_0 | src2_data_0;
- char4 tmp_data_1 = src1_data_1 | src2_data_1;
- char4 tmp_data_2 = src1_data_2 | src2_data_2;
-
- data_0 = mask_data ? tmp_data_0 : data_0;
- data_1 = mask_data ? tmp_data_1 : data_1;
- data_2 = mask_data ? tmp_data_2 : data_2;
-
- *((__global char4 *)((__global char *)dst + dst_index + 0))= data_0;
- *((__global char4 *)((__global char *)dst + dst_index + 4))= data_1;
- *((__global char4 *)((__global char *)dst + dst_index + 8))= data_2;
-
- }
-}
-#if defined (DOUBLE_SUPPORT)
-__kernel void arithm_s_bitwise_or_with_mask_C3_D6 (
- __global short *src1, int src1_step, int src1_offset,
- __global short *dst, int dst_step, int dst_offset,
- __global uchar *mask, int mask_step, int mask_offset,
- short16 src2, int rows, int cols, int dst_step1)
-{
-
- int x = get_global_id(0);
- int y = get_global_id(1);
-
- if (x < cols && y < rows)
- {
- int src1_index = mad24(y, src1_step, (x * 24) + src1_offset);
- int mask_index = mad24(y, mask_step, x + mask_offset);
- int dst_index = mad24(y, dst_step, dst_offset + (x * 24));
-
- short4 src1_data_0 = *((__global short4 *)((__global char *)src1 + src1_index + 0 ));
- short4 src1_data_1 = *((__global short4 *)((__global char *)src1 + src1_index + 8 ));
- short4 src1_data_2 = *((__global short4 *)((__global char *)src1 + src1_index + 16));
-
- short4 src2_data_0 = (short4)(src2.s0, src2.s1, src2.s2, src2.s3);
- short4 src2_data_1 = (short4)(src2.s4, src2.s5, src2.s6, src2.s7);
- short4 src2_data_2 = (short4)(src2.s8, src2.s9, src2.sa, src2.sb);
-
- uchar mask_data = * (mask + mask_index);
-
- short4 data_0 = *((__global short4 *)((__global char *)dst + dst_index + 0 ));
- short4 data_1 = *((__global short4 *)((__global char *)dst + dst_index + 8 ));
- short4 data_2 = *((__global short4 *)((__global char *)dst + dst_index + 16));
-
- short4 tmp_data_0 = src1_data_0 | src2_data_0;
- short4 tmp_data_1 = src1_data_1 | src2_data_1;
- short4 tmp_data_2 = src1_data_2 | src2_data_2;
-
- data_0 = mask_data ? tmp_data_0 : data_0;
- data_1 = mask_data ? tmp_data_1 : data_1;
- data_2 = mask_data ? tmp_data_2 : data_2;
-
- *((__global short4 *)((__global char *)dst + dst_index + 0 ))= data_0;
- *((__global short4 *)((__global char *)dst + dst_index + 8 ))= data_1;
- *((__global short4 *)((__global char *)dst + dst_index + 16))= data_2;
- }
-}
-#endif
-__kernel void arithm_s_bitwise_or_with_mask_C4_D0 (
- __global uchar *src1, int src1_step, int src1_offset,
- __global uchar *dst, int dst_step, int dst_offset,
- __global uchar *mask, int mask_step, int mask_offset,
- uchar4 src2, int rows, int cols, int dst_step1)
-
-{
-
- int x = get_global_id(0);
- int y = get_global_id(1);
-
- if (x < cols && y < rows)
- {
- int src1_index = mad24(y, src1_step, (x << 2) + src1_offset);
- int mask_index = mad24(y, mask_step, x + mask_offset);
- int dst_index = mad24(y, dst_step, (x << 2) + dst_offset);
-
- uchar mask_data = *(mask + mask_index);
-
- uchar4 src_data1 = *((__global uchar4 *)(src1 + src1_index));
- uchar4 dst_data = *((__global uchar4 *)(dst + dst_index));
-
- uchar4 data = src_data1 | src2;
- data = mask_data ? data : dst_data;
-
- *((__global uchar4 *)(dst + dst_index)) = data;
- }
-}
-
-
-__kernel void arithm_s_bitwise_or_with_mask_C4_D1 (
- __global char *src1, int src1_step, int src1_offset,
- __global char *dst, int dst_step, int dst_offset,
- __global uchar *mask, int mask_step, int mask_offset,
- char4 src2, int rows, int cols, int dst_step1)
-
-{
-
- int x = get_global_id(0);
- int y = get_global_id(1);
-
- if (x < cols && y < rows)
- {
- int src1_index = mad24(y, src1_step, (x << 2) + src1_offset);
- int mask_index = mad24(y, mask_step, x + mask_offset);
- int dst_index = mad24(y, dst_step, (x << 2) + dst_offset);
-
- uchar mask_data = *(mask + mask_index);
-
- char4 src_data1 = *((__global char4 *)(src1 + src1_index));
- char4 dst_data = *((__global char4 *)(dst + dst_index));
-
- char4 data = src_data1 | src2;
- data = mask_data ? data : dst_data;
-
- *((__global char4 *)(dst + dst_index)) = data;
- }
-}
-
-__kernel void arithm_s_bitwise_or_with_mask_C4_D2 (
- __global ushort *src1, int src1_step, int src1_offset,
- __global ushort *dst, int dst_step, int dst_offset,
- __global uchar *mask, int mask_step, int mask_offset,
- ushort4 src2, int rows, int cols, int dst_step1)
-
-{
-
- int x = get_global_id(0);
- int y = get_global_id(1);
-
- if (x < cols && y < rows)
- {
- int src1_index = mad24(y, src1_step, (x << 3) + src1_offset);
- int mask_index = mad24(y, mask_step, x + mask_offset);
- int dst_index = mad24(y, dst_step, (x << 3) + dst_offset);
-
- uchar mask_data = *(mask + mask_index);
-
- ushort4 src_data1 = *((__global ushort4 *)((__global char *)src1 + src1_index));
- ushort4 dst_data = *((__global ushort4 *)((__global char *)dst + dst_index));
-
- ushort4 data = src_data1 | src2;
- data = mask_data ? data : dst_data;
-
- *((__global ushort4 *)((__global char *)dst + dst_index)) = data;
- }
-}
-__kernel void arithm_s_bitwise_or_with_mask_C4_D3 (
- __global short *src1, int src1_step, int src1_offset,
- __global short *dst, int dst_step, int dst_offset,
- __global uchar *mask, int mask_step, int mask_offset,
- short4 src2, int rows, int cols, int dst_step1)
-
-{
-
- int x = get_global_id(0);
- int y = get_global_id(1);
-
- if (x < cols && y < rows)
- {
- int src1_index = mad24(y, src1_step, (x << 3) + src1_offset);
- int mask_index = mad24(y, mask_step, x + mask_offset);
- int dst_index = mad24(y, dst_step, (x << 3) + dst_offset);
-
- uchar mask_data = *(mask + mask_index);
-
- short4 src_data1 = *((__global short4 *)((__global char *)src1 + src1_index));
- short4 dst_data = *((__global short4 *)((__global char *)dst + dst_index));
-
- short4 data = src_data1 | src2;
- data = mask_data ? data : dst_data;
-
- *((__global short4 *)((__global char *)dst + dst_index)) = data;
- }
-}
-__kernel void arithm_s_bitwise_or_with_mask_C4_D4 (
- __global int *src1, int src1_step, int src1_offset,
- __global int *dst, int dst_step, int dst_offset,
- __global uchar *mask, int mask_step, int mask_offset,
- int4 src2, int rows, int cols, int dst_step1)
-
-{
-
- int x = get_global_id(0);
- int y = get_global_id(1);
-
- if (x < cols && y < rows)
- {
- int src1_index = mad24(y, src1_step, (x << 4) + src1_offset);
- int mask_index = mad24(y, mask_step, x + mask_offset);
- int dst_index = mad24(y, dst_step, (x << 4) + dst_offset);
-
- uchar mask_data = *(mask + mask_index);
-
- int4 src_data1 = *((__global int4 *)((__global char *)src1 + src1_index));
- int4 dst_data = *((__global int4 *)((__global char *)dst + dst_index));
-
- int4 data = src_data1 | src2;
- data = mask_data ? data : dst_data;
-
- *((__global int4 *)((__global char *)dst + dst_index)) = data;
- }
-}
-__kernel void arithm_s_bitwise_or_with_mask_C4_D5 (
- __global char *src1, int src1_step, int src1_offset,
- __global char *dst, int dst_step, int dst_offset,
- __global uchar *mask, int mask_step, int mask_offset,
- char16 src2, int rows, int cols, int dst_step1)
-
-{
-
- int x = get_global_id(0);
- int y = get_global_id(1);
-
- if (x < cols && y < rows)
- {
- int src1_index = mad24(y, src1_step, (x << 4) + src1_offset);
- int mask_index = mad24(y, mask_step, x + mask_offset);
- int dst_index = mad24(y, dst_step, (x << 4) + dst_offset);
-
- uchar mask_data = *(mask + mask_index);
-
- char16 src_data1 = *((__global char16 *)((__global char *)src1 + src1_index));
- char16 src_data2 = (char16)(src2.s0, src2.s1, src2.s2, src2.s3, src2.s4, src2.s5, src2.s6, src2.s7,
- src2.s8, src2.s9, src2.sa, src2.sb, src2.sc, src2.sd, src2.se, src2.sf);
- char16 dst_data = *((__global char16 *)((__global char *)dst + dst_index));
-
- char16 data = src_data1 | src_data2;
- data = mask_data ? data : dst_data;
-
- *((__global char16 *)((__global char *)dst + dst_index)) = data;
- }
-}
-#if defined (DOUBLE_SUPPORT)
-__kernel void arithm_s_bitwise_or_with_mask_C4_D6 (
- __global short *src1, int src1_step, int src1_offset,
- __global short *dst, int dst_step, int dst_offset,
- __global uchar *mask, int mask_step, int mask_offset,
- short16 src2, int rows, int cols, int dst_step1)
-{
-
- int x = get_global_id(0);
- int y = get_global_id(1);
-
- if (x < cols && y < rows)
- {
- int src1_index = mad24(y, src1_step, (x << 5) + src1_offset);
- int mask_index = mad24(y, mask_step, x + mask_offset);
- int dst_index = mad24(y, dst_step, (x << 5) + dst_offset);
-
- uchar mask_data = *(mask + mask_index);
-
- short4 src1_data_0 = *((__global short4 *)((__global char *)src1 + src1_index + 0));
- short4 src1_data_1 = *((__global short4 *)((__global char *)src1 + src1_index + 8));
- short4 src1_data_2 = *((__global short4 *)((__global char *)src1 + src1_index + 16));
- short4 src1_data_3 = *((__global short4 *)((__global char *)src1 + src1_index + 24));
-
- short4 src2_data_0 = (short4)(src2.s0, src2.s1, src2.s2, src2.s3);
- short4 src2_data_1 = (short4)(src2.s4, src2.s5, src2.s6, src2.s7);
- short4 src2_data_2 = (short4)(src2.s8, src2.s9, src2.sa, src2.sb);
- short4 src2_data_3 = (short4)(src2.sc, src2.sd, src2.se, src2.sf);
-
- short4 dst_data_0 = *((__global short4 *)((__global char *)dst + dst_index + 0));
- short4 dst_data_1 = *((__global short4 *)((__global char *)dst + dst_index + 8));
- short4 dst_data_2 = *((__global short4 *)((__global char *)dst + dst_index + 16));
- short4 dst_data_3 = *((__global short4 *)((__global char *)dst + dst_index + 24));
-
- short4 data_0 = src1_data_0 | src2_data_0;
- short4 data_1 = src1_data_1 | src2_data_1;
- short4 data_2 = src1_data_2 | src2_data_2;
- short4 data_3 = src1_data_3 | src2_data_3;
-
- data_0 = mask_data ? data_0 : dst_data_0;
- data_1 = mask_data ? data_1 : dst_data_1;
- data_2 = mask_data ? data_2 : dst_data_2;
- data_3 = mask_data ? data_3 : dst_data_3;
-
- *((__global short4 *)((__global char *)dst + dst_index + 0)) = data_0;
- *((__global short4 *)((__global char *)dst + dst_index + 8)) = data_1;
- *((__global short4 *)((__global char *)dst + dst_index + 16)) = data_2;
- *((__global short4 *)((__global char *)dst + dst_index + 24)) = data_3;
- }
-}
-#endif
+++ /dev/null
-/*M///////////////////////////////////////////////////////////////////////////////////////
-//
-// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
-//
-// By downloading, copying, installing or using the software you agree to this license.
-// If you do not agree to this license, do not download, install,
-// copy or use the software.
-//
-//
-// License Agreement
-// For Open Source Computer Vision Library
-//
-// Copyright (C) 2010-2012, Institute Of Software Chinese Academy Of Science, all rights reserved.
-// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved.
-// Third party copyrights are property of their respective owners.
-//
-// @Authors
-// Jiang Liyuan, jlyuan001.good@163.com
-//
-// Redistribution and use in source and binary forms, with or without modification,
-// are permitted provided that the following conditions are met:
-//
-// * Redistribution's of source code must retain the above copyright notice,
-// this list of conditions and the following disclaimer.
-//
-// * Redistribution's in binary form must reproduce the above copyright notice,
-// this list of conditions and the following disclaimer in the documentation
-// and/or other GpuMaterials provided with the distribution.
-//
-// * The name of the copyright holders may not be used to endorse or promote products
-// derived from this software without specific prior written permission.
-//
-// This software is provided by the copyright holders and contributors as is and
-// any express or implied warranties, including, but not limited to, the implied
-// warranties of merchantability and fitness for a particular purpose are disclaimed.
-// In no event shall the Intel Corporation or contributors be liable for any direct,
-// indirect, incidental, special, exemplary, or consequential damages
-// (including, but not limited to, procurement of substitute goods or services;
-// loss of use, data, or profits; or business interruption) however caused
-// and on any theory of liability, whether in contract, strict liability,
-// or tort (including negligence or otherwise) arising in any way out of
-// the use of this software, even if advised of the possibility of such damage.
-//
-//M*/
-#if defined (DOUBLE_SUPPORT)
-#ifdef cl_khr_fp64
-#pragma OPENCL EXTENSION cl_khr_fp64:enable
-#elif defined (cl_amd_fp64)
-#pragma OPENCL EXTENSION cl_amd_fp64:enable
-#endif
-#endif
-//////////////////////////////////////////////////////////////////////////////////////////////////////
-////////////////////////////////////////////BITWISE_XOR////////////////////////////////////////////////////
-///////////////////////////////////////////////////////////////////////////////////////////////////////
-/**************************************bitwise_xor without mask**************************************/
-__kernel void arithm_bitwise_xor_D0 (__global uchar *src1, int src1_step, int src1_offset,
- __global uchar *src2, int src2_step, int src2_offset,
- __global uchar *dst, int dst_step, int dst_offset,
- int rows, int cols, int dst_step1)
-{
- int x = get_global_id(0);
- int y = get_global_id(1);
-
- if (x < cols && y < rows)
- {
- x = x << 2;
-
-#ifdef dst_align
-#undef dst_align
-#endif
-#define dst_align (dst_offset & 3)
- int src1_index = mad24(y, src1_step, x + src1_offset - dst_align);
- int src2_index = mad24(y, src2_step, x + src2_offset - dst_align);
-
- int dst_start = mad24(y, dst_step, dst_offset);
- int dst_end = mad24(y, dst_step, dst_offset + dst_step1);
- int dst_index = mad24(y, dst_step, dst_offset + x & (int)0xfffffffc);
-
- int src1_index_fix = src1_index < 0 ? 0 : src1_index;
- int src2_index_fix = src2_index < 0 ? 0 : src2_index;
- uchar4 src1_data = vload4(0, src1 + src1_index_fix);
- uchar4 src2_data = vload4(0, src2 + src2_index_fix);
-
- if(src1_index < 0)
- {
- uchar4 tmp;
- tmp.xyzw = (src1_index == -2) ? src1_data.zwxy:src1_data.yzwx;
- src1_data.xyzw = (src1_index == -1) ? src1_data.wxyz:tmp.xyzw;
- }
- if(src2_index < 0)
- {
- uchar4 tmp;
- tmp.xyzw = (src2_index == -2) ? src2_data.zwxy:src2_data.yzwx;
- src2_data.xyzw = (src2_index == -1) ? src2_data.wxyz:tmp.xyzw;
- }
- uchar4 dst_data = *((__global uchar4 *)(dst + dst_index));
- uchar4 tmp_data = src1_data ^ src2_data;
-
- dst_data.x = ((dst_index + 0 >= dst_start) && (dst_index + 0 < dst_end)) ? tmp_data.x : dst_data.x;
- dst_data.y = ((dst_index + 1 >= dst_start) && (dst_index + 1 < dst_end)) ? tmp_data.y : dst_data.y;
- dst_data.z = ((dst_index + 2 >= dst_start) && (dst_index + 2 < dst_end)) ? tmp_data.z : dst_data.z;
- dst_data.w = ((dst_index + 3 >= dst_start) && (dst_index + 3 < dst_end)) ? tmp_data.w : dst_data.w;
-
- *((__global uchar4 *)(dst + dst_index)) = dst_data;
- }
-}
-
-
-__kernel void arithm_bitwise_xor_D1 (__global char *src1, int src1_step, int src1_offset,
- __global char *src2, int src2_step, int src2_offset,
- __global char *dst, int dst_step, int dst_offset,
- int rows, int cols, int dst_step1)
-{
- int x = get_global_id(0);
- int y = get_global_id(1);
-
- if (x < cols && y < rows)
- {
- x = x << 2;
-
-#ifdef dst_align
-#undef dst_align
-#endif
-#define dst_align (dst_offset & 3)
- int src1_index = mad24(y, src1_step, x + src1_offset - dst_align);
- int src2_index = mad24(y, src2_step, x + src2_offset - dst_align);
-
- int dst_start = mad24(y, dst_step, dst_offset);
- int dst_end = mad24(y, dst_step, dst_offset + dst_step1);
- int dst_index = mad24(y, dst_step, dst_offset + x & (int)0xfffffffc);
-
- int src1_index_fix = src1_index < 0 ? 0 : src1_index;
- int src2_index_fix = src2_index < 0 ? 0 : src2_index;
- char4 src1_data = vload4(0, src1 + src1_index_fix);
- char4 src2_data = vload4(0, src2 + src2_index_fix);
-
- if(src1_index < 0)
- {
- char4 tmp;
- tmp.xyzw = (src1_index == -2) ? src1_data.zwxy:src1_data.yzwx;
- src1_data.xyzw = (src1_index == -1) ? src1_data.wxyz:tmp.xyzw;
- }
- if(src2_index < 0)
- {
- char4 tmp;
- tmp.xyzw = (src2_index == -2) ? src2_data.zwxy:src2_data.yzwx;
- src2_data.xyzw = (src2_index == -1) ? src2_data.wxyz:tmp.xyzw;
- }
- char4 dst_data = *((__global char4 *)(dst + dst_index));
- char4 tmp_data = src1_data ^ src2_data;
-
- dst_data.x = ((dst_index + 0 >= dst_start) && (dst_index + 0 < dst_end)) ? tmp_data.x : dst_data.x;
- dst_data.y = ((dst_index + 1 >= dst_start) && (dst_index + 1 < dst_end)) ? tmp_data.y : dst_data.y;
- dst_data.z = ((dst_index + 2 >= dst_start) && (dst_index + 2 < dst_end)) ? tmp_data.z : dst_data.z;
- dst_data.w = ((dst_index + 3 >= dst_start) && (dst_index + 3 < dst_end)) ? tmp_data.w : dst_data.w;
-
- *((__global char4 *)(dst + dst_index)) = dst_data;
- }
-}
-
-
-__kernel void arithm_bitwise_xor_D2 (__global ushort *src1, int src1_step, int src1_offset,
- __global ushort *src2, int src2_step, int src2_offset,
- __global ushort *dst, int dst_step, int dst_offset,
- int rows, int cols, int dst_step1)
-
-{
- int x = get_global_id(0);
- int y = get_global_id(1);
-
- if (x < cols && y < rows)
- {
- x = x << 2;
-
-#ifdef dst_align
-#undef dst_align
-#endif
-#define dst_align ((dst_offset >> 1) & 3)
- int src1_index = mad24(y, src1_step, (x << 1) + src1_offset - (dst_align << 1));
- int src2_index = mad24(y, src2_step, (x << 1) + src2_offset - (dst_align << 1));
-
- int dst_start = mad24(y, dst_step, dst_offset);
- int dst_end = mad24(y, dst_step, dst_offset + dst_step1);
- int dst_index = mad24(y, dst_step, dst_offset + (x << 1) & (int)0xfffffff8);
-
- int src1_index_fix = src1_index < 0 ? 0 : src1_index;
- int src2_index_fix = src2_index < 0 ? 0 : src2_index;
- ushort4 src1_data = vload4(0, (__global ushort *)((__global char *)src1 + src1_index_fix));
- ushort4 src2_data = vload4(0, (__global ushort *)((__global char *)src2 + src2_index_fix));
-
- if(src1_index < 0)
- {
- ushort4 tmp;
- tmp.xyzw = (src1_index == -2) ? src1_data.zwxy:src1_data.yzwx;
- src1_data.xyzw = (src1_index == -1) ? src1_data.wxyz:tmp.xyzw;
- }
- if(src2_index < 0)
- {
- ushort4 tmp;
- tmp.xyzw = (src2_index == -2) ? src2_data.zwxy:src2_data.yzwx;
- src2_data.xyzw = (src2_index == -1) ? src2_data.wxyz:tmp.xyzw;
- }
- ushort4 dst_data = *((__global ushort4 *)((__global char *)dst + dst_index));
- ushort4 tmp_data = src1_data ^ src2_data;
-
- dst_data.x = ((dst_index + 0 >= dst_start) && (dst_index + 0 < dst_end)) ? tmp_data.x : dst_data.x;
- dst_data.y = ((dst_index + 2 >= dst_start) && (dst_index + 2 < dst_end)) ? tmp_data.y : dst_data.y;
- dst_data.z = ((dst_index + 4 >= dst_start) && (dst_index + 4 < dst_end)) ? tmp_data.z : dst_data.z;
- dst_data.w = ((dst_index + 6 >= dst_start) && (dst_index + 6 < dst_end)) ? tmp_data.w : dst_data.w;
-
- *((__global ushort4 *)((__global char *)dst + dst_index)) = dst_data;
- }
-}
-
-
-
-__kernel void arithm_bitwise_xor_D3 (__global short *src1, int src1_step, int src1_offset,
- __global short *src2, int src2_step, int src2_offset,
- __global short *dst, int dst_step, int dst_offset,
- int rows, int cols, int dst_step1)
-
-{
- int x = get_global_id(0);
- int y = get_global_id(1);
-
- if (x < cols && y < rows)
- {
- x = x << 2;
-
-#ifdef dst_align
-#undef dst_align
-#endif
-#define dst_align ((dst_offset >> 1) & 3)
- int src1_index = mad24(y, src1_step, (x << 1) + src1_offset - (dst_align << 1));
- int src2_index = mad24(y, src2_step, (x << 1) + src2_offset - (dst_align << 1));
-
- int dst_start = mad24(y, dst_step, dst_offset);
- int dst_end = mad24(y, dst_step, dst_offset + dst_step1);
- int dst_index = mad24(y, dst_step, dst_offset + (x << 1) & (int)0xfffffff8);
-
- int src1_index_fix = src1_index < 0 ? 0 : src1_index;
- int src2_index_fix = src2_index < 0 ? 0 : src2_index;
- short4 src1_data = vload4(0, (__global short *)((__global char *)src1 + src1_index_fix));
- short4 src2_data = vload4(0, (__global short *)((__global char *)src2 + src2_index_fix));
-
- short4 dst_data = *((__global short4 *)((__global char *)dst + dst_index));
-
- if(src1_index < 0)
- {
- short4 tmp;
- tmp.xyzw = (src1_index == -2) ? src1_data.zwxy:src1_data.yzwx;
- src1_data.xyzw = (src1_index == -1) ? src1_data.wxyz:tmp.xyzw;
- }
- if(src2_index < 0)
- {
- short4 tmp;
- tmp.xyzw = (src2_index == -2) ? src2_data.zwxy:src2_data.yzwx;
- src2_data.xyzw = (src2_index == -1) ? src2_data.wxyz:tmp.xyzw;
- }
-
-
-
- short4 tmp_data = src1_data ^ src2_data;
-
- dst_data.x = ((dst_index + 0 >= dst_start) && (dst_index + 0 < dst_end)) ? tmp_data.x : dst_data.x;
- dst_data.y = ((dst_index + 2 >= dst_start) && (dst_index + 2 < dst_end)) ? tmp_data.y : dst_data.y;
- dst_data.z = ((dst_index + 4 >= dst_start) && (dst_index + 4 < dst_end)) ? tmp_data.z : dst_data.z;
- dst_data.w = ((dst_index + 6 >= dst_start) && (dst_index + 6 < dst_end)) ? tmp_data.w : dst_data.w;
-
- *((__global short4 *)((__global char *)dst + dst_index)) = dst_data;
- }
-}
-
-
-
-__kernel void arithm_bitwise_xor_D4 (__global int *src1, int src1_step, int src1_offset,
- __global int *src2, int src2_step, int src2_offset,
- __global int *dst, int dst_step, int dst_offset,
- int rows, int cols, int dst_step1)
-{
- int x = get_global_id(0);
- int y = get_global_id(1);
-
- if (x < cols && y < rows)
- {
- int src1_index = mad24(y, src1_step, (x << 2) + src1_offset);
- int src2_index = mad24(y, src2_step, (x << 2) + src2_offset);
- int dst_index = mad24(y, dst_step, (x << 2) + dst_offset);
-
- int data1 = *((__global int *)((__global char *)src1 + src1_index));
- int data2 = *((__global int *)((__global char *)src2 + src2_index));
- int tmp = data1 ^ data2;
-
- *((__global int *)((__global char *)dst + dst_index)) = tmp;
- }
-}
-
-__kernel void arithm_bitwise_xor_D5 (__global char *src1, int src1_step, int src1_offset,
- __global char *src2, int src2_step, int src2_offset,
- __global char *dst, int dst_step, int dst_offset,
- int rows, int cols, int dst_step1)
-{
- int x = get_global_id(0);
- int y = get_global_id(1);
-
- if (x < cols && y < rows)
- {
- int src1_index = mad24(y, src1_step, (x << 2) + src1_offset);
- int src2_index = mad24(y, src2_step, (x << 2) + src2_offset);
- int dst_index = mad24(y, dst_step, (x << 2) + dst_offset);
-
- char4 data1 = *((__global char4 *)((__global char *)src1 + src1_index));
- char4 data2 = *((__global char4 *)((__global char *)src2 + src2_index));
- char4 tmp = data1 ^ data2;
-
- *((__global char4 *)((__global char *)dst + dst_index)) = tmp;
- }
-}
-#if defined (DOUBLE_SUPPORT)
-__kernel void arithm_bitwise_xor_D6 (__global char *src1, int src1_step, int src1_offset,
- __global char *src2, int src2_step, int src2_offset,
- __global char *dst, int dst_step, int dst_offset,
- int rows, int cols, int dst_step1)
-{
- int x = get_global_id(0);
- int y = get_global_id(1);
-
- if (x < cols && y < rows)
- {
- int src1_index = mad24(y, src1_step, (x << 3) + src1_offset);
- int src2_index = mad24(y, src2_step, (x << 3) + src2_offset);
- int dst_index = mad24(y, dst_step, (x << 3) + dst_offset);
-
- char8 data1 = *((__global char8 *)((__global char *)src1 + src1_index));
- char8 data2 = *((__global char8 *)((__global char *)src2 + src2_index));
-
- *((__global char8 *)((__global char *)dst + dst_index)) = data1 ^ data2;
- }
-}
-#endif
+++ /dev/null
-/*M///////////////////////////////////////////////////////////////////////////////////////
-//
-// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
-//
-// By downloading, copying, installing or using the software you agree to this license.
-// If you do not agree to this license, do not download, install,
-// copy or use the software.
-//
-//
-// License Agreement
-// For Open Source Computer Vision Library
-//
-// Copyright (C) 2010-2012, Institute Of Software Chinese Academy Of Science, all rights reserved.
-// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved.
-// Third party copyrights are property of their respective owners.
-//
-// @Authors
-// Jiang Liyuan, jlyuan001.good@163.com
-//
-// Redistribution and use in source and binary forms, with or without modification,
-// are permitted provided that the following conditions are met:
-//
-// * Redistribution's of source code must retain the above copyright notice,
-// this list of conditions and the following disclaimer.
-//
-// * Redistribution's in binary form must reproduce the above copyright notice,
-// this list of conditions and the following disclaimer in the documentation
-// and/or other oclMaterials provided with the distribution.
-//
-// * The name of the copyright holders may not be used to endorse or promote products
-// derived from this software without specific prior written permission.
-//
-// This software is provided by the copyright holders and contributors as is and
-// any express or implied warranties, including, but not limited to, the implied
-// warranties of merchantability and fitness for a particular purpose are disclaimed.
-// In no event shall the Intel Corporation or contributors be liable for any direct,
-// indirect, incidental, special, exemplary, or consequential damages
-// (including, but not limited to, procurement of substitute goods or services;
-// loss of use, data, or profits; or business interruption) however caused
-// and on any theory of liability, whether in contract, strict liability,
-// or tort (including negligence or otherwise) arising in any way out of
-// the use of this software, even if advised of the possibility of such damage.
-//
-//M*/
-#if defined (DOUBLE_SUPPORT)
-#ifdef cl_khr_fp64
-#pragma OPENCL EXTENSION cl_khr_fp64:enable
-#elif defined (cl_amd_fp64)
-#pragma OPENCL EXTENSION cl_amd_fp64:enable
-#endif
-#endif
-//////////////////////////////////////////////////////////////////////////////////////////////////////
-////////////////////////////////////////////BITWISE_XOR////////////////////////////////////////////////////
-///////////////////////////////////////////////////////////////////////////////////////////////////////
-/**************************************bitwise_xor with mask**************************************/
-__kernel void arithm_bitwise_xor_with_mask_C1_D0 (
- __global uchar *src1, int src1_step, int src1_offset,
- __global uchar *src2, int src2_step, int src2_offset,
- __global uchar *mask, int mask_step, int mask_offset,
- __global uchar *dst, int dst_step, int dst_offset,
- int rows, int cols, int dst_step1)
-{
-
- int x = get_global_id(0);
- int y = get_global_id(1);
-
- if (x < cols && y < rows)
- {
- x = x << 2;
-
-#ifdef dst_align
-#undef dst_align
-#endif
-#define dst_align (dst_offset & 3)
- int src1_index = mad24(y, src1_step, x + src1_offset - dst_align);
- int src2_index = mad24(y, src2_step, x + src2_offset - dst_align);
- int mask_index = mad24(y, mask_step, x + mask_offset - dst_align);
-
- int dst_start = mad24(y, dst_step, dst_offset);
- int dst_end = mad24(y, dst_step, dst_offset + dst_step1);
- int dst_index = mad24(y, dst_step, dst_offset + x & (int)0xfffffffc);
-
- uchar4 src1_data = vload4(0, src1 + src1_index);
- uchar4 src2_data = vload4(0, src2 + src2_index);
- uchar4 mask_data = vload4(0, mask + mask_index);
-
- uchar4 data = *((__global uchar4 *)(dst + dst_index));
- uchar4 tmp_data = src1_data ^ src2_data;
-
- data.x = ((mask_data.x) && (dst_index + 0 >= dst_start) && (dst_index + 0 < dst_end)) ? tmp_data.x : data.x;
- data.y = ((mask_data.y) && (dst_index + 1 >= dst_start) && (dst_index + 1 < dst_end)) ? tmp_data.y : data.y;
- data.z = ((mask_data.z) && (dst_index + 2 >= dst_start) && (dst_index + 2 < dst_end)) ? tmp_data.z : data.z;
- data.w = ((mask_data.w) && (dst_index + 3 >= dst_start) && (dst_index + 3 < dst_end)) ? tmp_data.w : data.w;
-
- *((__global uchar4 *)(dst + dst_index)) = data;
- }
-}
-
-
-
-__kernel void arithm_bitwise_xor_with_mask_C1_D1 (
- __global char *src1, int src1_step, int src1_offset,
- __global char *src2, int src2_step, int src2_offset,
- __global uchar *mask, int mask_step, int mask_offset,
- __global char *dst, int dst_step, int dst_offset,
- int rows, int cols, int dst_step1)
-{
-
- int x = get_global_id(0);
- int y = get_global_id(1);
-
- if (x < cols && y < rows)
- {
- x = x << 2;
-
-#ifdef dst_align
-#undef dst_align
-#endif
-#define dst_align (dst_offset & 3)
- int src1_index = mad24(y, src1_step, x + src1_offset - dst_align);
- int src2_index = mad24(y, src2_step, x + src2_offset - dst_align);
- int mask_index = mad24(y, mask_step, x + mask_offset - dst_align);
-
- int dst_start = mad24(y, dst_step, dst_offset);
- int dst_end = mad24(y, dst_step, dst_offset + dst_step1);
- int dst_index = mad24(y, dst_step, dst_offset + x & (int)0xfffffffc);
-
- char4 src1_data = vload4(0, src1 + src1_index);
- char4 src2_data = vload4(0, src2 + src2_index);
- uchar4 mask_data = vload4(0, mask + mask_index);
-
- char4 data = *((__global char4 *)(dst + dst_index));
- char4 tmp_data = src1_data ^ src2_data;
-
- data.x = convert_char((mask_data.x) && (dst_index + 0 >= dst_start) && (dst_index + 0 < dst_end)) ? tmp_data.x : data.x;
- data.y = convert_char((mask_data.y) && (dst_index + 1 >= dst_start) && (dst_index + 1 < dst_end)) ? tmp_data.y : data.y;
- data.z = convert_char((mask_data.z) && (dst_index + 2 >= dst_start) && (dst_index + 2 < dst_end)) ? tmp_data.z : data.z;
- data.w = convert_char((mask_data.w) && (dst_index + 3 >= dst_start) && (dst_index + 3 < dst_end)) ? tmp_data.w : data.w;
-
- *((__global char4 *)(dst + dst_index)) = data;
- }
-}
-
-
-
-__kernel void arithm_bitwise_xor_with_mask_C1_D2 (
- __global ushort *src1, int src1_step, int src1_offset,
- __global ushort *src2, int src2_step, int src2_offset,
- __global uchar *mask, int mask_step, int mask_offset,
- __global ushort *dst, int dst_step, int dst_offset,
- int rows, int cols, int dst_step1)
-{
-
- int x = get_global_id(0);
- int y = get_global_id(1);
-
- if (x < cols && y < rows)
- {
- x = x << 1;
-
-#ifdef dst_align
-#undef dst_align
-#endif
-#define dst_align ((dst_offset >> 1) & 1)
- int src1_index = mad24(y, src1_step, (x << 1) + src1_offset - (dst_align << 1));
- int src2_index = mad24(y, src2_step, (x << 1) + src2_offset - (dst_align << 1));
- int mask_index = mad24(y, mask_step, x + mask_offset - dst_align);
-
- int dst_start = mad24(y, dst_step, dst_offset);
- int dst_end = mad24(y, dst_step, dst_offset + dst_step1);
- int dst_index = mad24(y, dst_step, dst_offset + (x << 1) & (int)0xfffffffc);
-
- ushort2 src1_data = vload2(0, (__global ushort *)((__global char *)src1 + src1_index));
- ushort2 src2_data = vload2(0, (__global ushort *)((__global char *)src2 + src2_index));
- uchar2 mask_data = vload2(0, mask + mask_index);
-
- ushort2 data = *((__global ushort2 *)((__global uchar *)dst + dst_index));
- ushort2 tmp_data = src1_data ^ src2_data;
-
- data.x = convert_ushort((mask_data.x) && (dst_index + 0 >= dst_start) && (dst_index + 0 < dst_end)) ? tmp_data.x : data.x;
- data.y = convert_ushort((mask_data.y) && (dst_index + 2 >= dst_start) && (dst_index + 2 < dst_end)) ? tmp_data.y : data.y;
-
- *((__global ushort2 *)((__global uchar *)dst + dst_index)) = data;
- }
-}
-
-
-
-__kernel void arithm_bitwise_xor_with_mask_C1_D3 (
- __global short *src1, int src1_step, int src1_offset,
- __global short *src2, int src2_step, int src2_offset,
- __global uchar *mask, int mask_step, int mask_offset,
- __global short *dst, int dst_step, int dst_offset,
- int rows, int cols, int dst_step1)
-{
-
- int x = get_global_id(0);
- int y = get_global_id(1);
-
- if (x < cols && y < rows)
- {
- x = x << 1;
-
-#ifdef dst_align
-#undef dst_align
-#endif
-#define dst_align ((dst_offset >> 1) & 1)
- int src1_index = mad24(y, src1_step, (x << 1) + src1_offset - (dst_align << 1));
- int src2_index = mad24(y, src2_step, (x << 1) + src2_offset - (dst_align << 1));
- int mask_index = mad24(y, mask_step, x + mask_offset - dst_align);
-
- int dst_start = mad24(y, dst_step, dst_offset);
- int dst_end = mad24(y, dst_step, dst_offset + dst_step1);
- int dst_index = mad24(y, dst_step, dst_offset + (x << 1) & (int)0xfffffffc);
-
- short2 src1_data = vload2(0, (__global short *)((__global char *)src1 + src1_index));
- short2 src2_data = vload2(0, (__global short *)((__global char *)src2 + src2_index));
- uchar2 mask_data = vload2(0, mask + mask_index);
-
- short2 data = *((__global short2 *)((__global uchar *)dst + dst_index));
- short2 tmp_data = src1_data ^ src2_data;
-
- data.x = convert_short((mask_data.x) && (dst_index + 0 >= dst_start) && (dst_index + 0 < dst_end)) ? tmp_data.x : data.x;
- data.y = convert_short((mask_data.y) && (dst_index + 2 >= dst_start) && (dst_index + 2 < dst_end)) ? tmp_data.y : data.y;
-
- *((__global short2 *)((__global uchar *)dst + dst_index)) = data;
- }
-}
-
-
-
-__kernel void arithm_bitwise_xor_with_mask_C1_D4 (
- __global int *src1, int src1_step, int src1_offset,
- __global int *src2, int src2_step, int src2_offset,
- __global uchar *mask, int mask_step, int mask_offset,
- __global int *dst, int dst_step, int dst_offset,
- int rows, int cols, int dst_step1)
-{
-
- int x = get_global_id(0);
- int y = get_global_id(1);
-
- if (x < cols && y < rows)
- {
- int src1_index = mad24(y, src1_step, (x << 2) + src1_offset);
- int src2_index = mad24(y, src2_step, (x << 2) + src2_offset);
- int mask_index = mad24(y, mask_step, x + mask_offset);
- int dst_index = mad24(y, dst_step, (x << 2) + dst_offset);
-
- uchar mask_data = *(mask + mask_index);
-
- int src_data1 = *((__global int *)((__global char *)src1 + src1_index));
- int src_data2 = *((__global int *)((__global char *)src2 + src2_index));
- int dst_data = *((__global int *)((__global char *)dst + dst_index));
-
- int data = src_data1 ^ src_data2;
- data = mask_data ? data : dst_data;
-
- *((__global int *)((__global char *)dst + dst_index)) = data;
- }
-}
-
-
-
-__kernel void arithm_bitwise_xor_with_mask_C1_D5 (
- __global char *src1, int src1_step, int src1_offset,
- __global char *src2, int src2_step, int src2_offset,
- __global uchar *mask, int mask_step, int mask_offset,
- __global char *dst, int dst_step, int dst_offset,
- int rows, int cols, int dst_step1)
-{
-
- int x = get_global_id(0);
- int y = get_global_id(1);
-
- if (x < cols && y < rows)
- {
- int src1_index = mad24(y, src1_step, (x << 2) + src1_offset);
- int src2_index = mad24(y, src2_step, (x << 2) + src2_offset);
- int mask_index = mad24(y, mask_step, x + mask_offset);
- int dst_index = mad24(y, dst_step, (x << 2) + dst_offset);
-
- uchar mask_data = *(mask + mask_index);
-
- char4 src_data1 = *((__global char4 *)((__global char *)src1 + src1_index));
- char4 src_data2 = *((__global char4 *)((__global char *)src2 + src2_index));
- char4 dst_data = *((__global char4 *)((__global char *)dst + dst_index));
-
- char4 data = src_data1 ^ src_data2;
- data = mask_data ? data : dst_data;
-
- *((__global char4 *)((__global char *)dst + dst_index)) = data;
- }
-}
-
-
-#if defined (DOUBLE_SUPPORT)
-__kernel void arithm_bitwise_xor_with_mask_C1_D6 (
- __global char *src1, int src1_step, int src1_offset,
- __global char *src2, int src2_step, int src2_offset,
- __global uchar *mask, int mask_step, int mask_offset,
- __global char *dst, int dst_step, int dst_offset,
- int rows, int cols, int dst_step1)
-{
-
- int x = get_global_id(0);
- int y = get_global_id(1);
-
- if (x < cols && y < rows)
- {
- int src1_index = mad24(y, src1_step, (x << 3) + src1_offset);
- int src2_index = mad24(y, src2_step, (x << 3) + src2_offset);
- int mask_index = mad24(y, mask_step, x + mask_offset);
- int dst_index = mad24(y, dst_step, (x << 3) + dst_offset);
-
- uchar mask_data = *(mask + mask_index);
-
- char8 src_data1 = *((__global char8 *)((__global char *)src1 + src1_index));
- char8 src_data2 = *((__global char8 *)((__global char *)src2 + src2_index));
- char8 dst_data = *((__global char8 *)((__global char *)dst + dst_index));
-
- char8 data = src_data1 ^ src_data2;
- data = mask_data ? data : dst_data;
-
- *((__global char8 *)((__global char *)dst + dst_index)) = data;
- }
-}
-#endif
-
-
-
-__kernel void arithm_bitwise_xor_with_mask_C2_D0 (
- __global uchar *src1, int src1_step, int src1_offset,
- __global uchar *src2, int src2_step, int src2_offset,
- __global uchar *mask, int mask_step, int mask_offset,
- __global uchar *dst, int dst_step, int dst_offset,
- int rows, int cols, int dst_step1)
-{
-
- int x = get_global_id(0);
- int y = get_global_id(1);
-
- if (x < cols && y < rows)
- {
- x = x << 1;
-
-#ifdef dst_align
-#undef dst_align
-#endif
-#define dst_align ((dst_offset >> 1) & 1)
- int src1_index = mad24(y, src1_step, (x << 1) + src1_offset - (dst_align << 1));
- int src2_index = mad24(y, src2_step, (x << 1) + src2_offset - (dst_align << 1));
- int mask_index = mad24(y, mask_step, x + mask_offset - dst_align);
-
- int dst_start = mad24(y, dst_step, dst_offset);
- int dst_end = mad24(y, dst_step, dst_offset + dst_step1);
- int dst_index = mad24(y, dst_step, dst_offset + (x << 1) & (int)0xfffffffc);
-
- uchar4 src1_data = vload4(0, src1 + src1_index);
- uchar4 src2_data = vload4(0, src2 + src2_index);
- uchar2 mask_data = vload2(0, mask + mask_index);
-
- uchar4 data = *((__global uchar4 *)(dst + dst_index));
- uchar4 tmp_data = src1_data ^ src2_data;
-
- data.xy = ((mask_data.x) && (dst_index + 0 >= dst_start)) ? tmp_data.xy : data.xy;
- data.zw = ((mask_data.y) && (dst_index + 2 < dst_end )) ? tmp_data.zw : data.zw;
-
- *((__global uchar4 *)(dst + dst_index)) = data;
- }
-}
-
-
-__kernel void arithm_bitwise_xor_with_mask_C2_D1 (
- __global char *src1, int src1_step, int src1_offset,
- __global char *src2, int src2_step, int src2_offset,
- __global uchar *mask, int mask_step, int mask_offset,
- __global char *dst, int dst_step, int dst_offset,
- int rows, int cols, int dst_step1)
-{
-
- int x = get_global_id(0);
- int y = get_global_id(1);
-
- if (x < cols && y < rows)
- {
- x = x << 1;
-
-#ifdef dst_align
-#undef dst_align
-#endif
-#define dst_align ((dst_offset >> 1) & 1)
- int src1_index = mad24(y, src1_step, (x << 1) + src1_offset - (dst_align << 1));
- int src2_index = mad24(y, src2_step, (x << 1) + src2_offset - (dst_align << 1));
- int mask_index = mad24(y, mask_step, x + mask_offset - dst_align);
-
- int dst_start = mad24(y, dst_step, dst_offset);
- int dst_end = mad24(y, dst_step, dst_offset + dst_step1);
- int dst_index = mad24(y, dst_step, dst_offset + (x << 1) & (int)0xfffffffc);
-
- char4 src1_data = vload4(0, src1 + src1_index);
- char4 src2_data = vload4(0, src2 + src2_index);
- uchar2 mask_data = vload2(0, mask + mask_index);
-
- char4 data = *((__global char4 *)(dst + dst_index));
- char4 tmp_data = src1_data ^ src2_data;
-
- data.xy = ((mask_data.x) && (dst_index + 0 >= dst_start)) ? tmp_data.xy : data.xy;
- data.zw = ((mask_data.y) && (dst_index + 2 < dst_end )) ? tmp_data.zw : data.zw;
-
- *((__global char4 *)(dst + dst_index)) = data;
- }
-}
-
-__kernel void arithm_bitwise_xor_with_mask_C2_D2 (
- __global ushort *src1, int src1_step, int src1_offset,
- __global ushort *src2, int src2_step, int src2_offset,
- __global uchar *mask, int mask_step, int mask_offset,
- __global ushort *dst, int dst_step, int dst_offset,
- int rows, int cols, int dst_step1)
-{
-
- int x = get_global_id(0);
- int y = get_global_id(1);
-
- if (x < cols && y < rows)
- {
- int src1_index = mad24(y, src1_step, (x << 2) + src1_offset);
- int src2_index = mad24(y, src2_step, (x << 2) + src2_offset);
- int mask_index = mad24(y, mask_step, x + mask_offset);
- int dst_index = mad24(y, dst_step, (x << 2) + dst_offset);
-
- uchar mask_data = *(mask + mask_index);
-
- ushort2 src_data1 = *((__global ushort2 *)((__global char *)src1 + src1_index));
- ushort2 src_data2 = *((__global ushort2 *)((__global char *)src2 + src2_index));
- ushort2 dst_data = *((__global ushort2 *)((__global char *)dst + dst_index));
-
- ushort2 data = src_data1 ^ src_data2;
- data = mask_data ? data : dst_data;
-
- *((__global ushort2 *)((__global char *)dst + dst_index)) = data;
- }
-}
-__kernel void arithm_bitwise_xor_with_mask_C2_D3 (
- __global short *src1, int src1_step, int src1_offset,
- __global short *src2, int src2_step, int src2_offset,
- __global uchar *mask, int mask_step, int mask_offset,
- __global short *dst, int dst_step, int dst_offset,
- int rows, int cols, int dst_step1)
-{
-
- int x = get_global_id(0);
- int y = get_global_id(1);
-
- if (x < cols && y < rows)
- {
- int src1_index = mad24(y, src1_step, (x << 2) + src1_offset);
- int src2_index = mad24(y, src2_step, (x << 2) + src2_offset);
- int mask_index = mad24(y, mask_step, x + mask_offset);
- int dst_index = mad24(y, dst_step, (x << 2) + dst_offset);
-
- uchar mask_data = *(mask + mask_index);
-
- short2 src_data1 = *((__global short2 *)((__global char *)src1 + src1_index));
- short2 src_data2 = *((__global short2 *)((__global char *)src2 + src2_index));
- short2 dst_data = *((__global short2 *)((__global char *)dst + dst_index));
-
- short2 data = src_data1 ^ src_data2;
- data = mask_data ? data : dst_data;
-
- *((__global short2 *)((__global char *)dst + dst_index)) = data;
- }
-}
-__kernel void arithm_bitwise_xor_with_mask_C2_D4 (
- __global int *src1, int src1_step, int src1_offset,
- __global int *src2, int src2_step, int src2_offset,
- __global uchar *mask, int mask_step, int mask_offset,
- __global int *dst, int dst_step, int dst_offset,
- int rows, int cols, int dst_step1)
-{
-
- int x = get_global_id(0);
- int y = get_global_id(1);
-
- if (x < cols && y < rows)
- {
- int src1_index = mad24(y, src1_step, (x << 3) + src1_offset);
- int src2_index = mad24(y, src2_step, (x << 3) + src2_offset);
- int mask_index = mad24(y, mask_step, x + mask_offset);
- int dst_index = mad24(y, dst_step, (x << 3) + dst_offset);
-
- uchar mask_data = *(mask + mask_index);
-
- int2 src_data1 = *((__global int2 *)((__global char *)src1 + src1_index));
- int2 src_data2 = *((__global int2 *)((__global char *)src2 + src2_index));
- int2 dst_data = *((__global int2 *)((__global char *)dst + dst_index));
-
- int2 data = src_data1 ^ src_data2;
- data = mask_data ? data : dst_data;
-
- *((__global int2 *)((__global char *)dst + dst_index)) = data;
- }
-}
-__kernel void arithm_bitwise_xor_with_mask_C2_D5 (
- __global char *src1, int src1_step, int src1_offset,
- __global char *src2, int src2_step, int src2_offset,
- __global uchar *mask, int mask_step, int mask_offset,
- __global char *dst, int dst_step, int dst_offset,
- int rows, int cols, int dst_step1)
-{
-
- int x = get_global_id(0);
- int y = get_global_id(1);
-
- if (x < cols && y < rows)
- {
- int src1_index = mad24(y, src1_step, (x << 3) + src1_offset);
- int src2_index = mad24(y, src2_step, (x << 3) + src2_offset);
- int mask_index = mad24(y, mask_step, x + mask_offset);
- int dst_index = mad24(y, dst_step, (x << 3) + dst_offset);
-
- uchar mask_data = *(mask + mask_index);
-
- char8 src_data1 = *((__global char8 *)((__global char *)src1 + src1_index));
- char8 src_data2 = *((__global char8 *)((__global char *)src2 + src2_index));
- char8 dst_data = *((__global char8 *)((__global char *)dst + dst_index));
-
- char8 data = src_data1 ^ src_data2;
- data = mask_data ? data : dst_data;
-
- *((__global char8 *)((__global char *)dst + dst_index)) = data;
- }
-}
-#if defined (DOUBLE_SUPPORT)
-__kernel void arithm_bitwise_xor_with_mask_C2_D6 (
- __global char *src1, int src1_step, int src1_offset,
- __global char *src2, int src2_step, int src2_offset,
- __global uchar *mask, int mask_step, int mask_offset,
- __global char *dst, int dst_step, int dst_offset,
- int rows, int cols, int dst_step1)
-{
-
- int x = get_global_id(0);
- int y = get_global_id(1);
-
- if (x < cols && y < rows)
- {
- int src1_index = mad24(y, src1_step, (x << 4) + src1_offset);
- int src2_index = mad24(y, src2_step, (x << 4) + src2_offset);
- int mask_index = mad24(y, mask_step, x + mask_offset);
- int dst_index = mad24(y, dst_step, (x << 4) + dst_offset);
-
- uchar mask_data = *(mask + mask_index);
-
- char16 src_data1 = *((__global char16 *)((__global char *)src1 + src1_index));
- char16 src_data2 = *((__global char16 *)((__global char *)src2 + src2_index));
- char16 dst_data = *((__global char16 *)((__global char *)dst + dst_index));
-
- char16 data = src_data1 ^ src_data2;
- data = mask_data ? data : dst_data;
-
- *((__global char16 *)((__global char *)dst + dst_index)) = data;
- }
-}
-#endif
-
-
-__kernel void arithm_bitwise_xor_with_mask_C3_D0 (
- __global uchar *src1, int src1_step, int src1_offset,
- __global uchar *src2, int src2_step, int src2_offset,
- __global uchar *mask, int mask_step, int mask_offset,
- __global uchar *dst, int dst_step, int dst_offset,
- int rows, int cols, int dst_step1)
-{
-
- int x = get_global_id(0);
- int y = get_global_id(1);
-
- if (x < cols && y < rows)
- {
- x = x << 2;
-
-#ifdef dst_align
-#undef dst_align
-#endif
-#define dst_align (((dst_offset % dst_step) / 3 ) & 3)
- int src1_index = mad24(y, src1_step, (x * 3) + src1_offset - (dst_align * 3));
- int src2_index = mad24(y, src2_step, (x * 3) + src2_offset - (dst_align * 3));
- int mask_index = mad24(y, mask_step, x + mask_offset - dst_align);
-
- int dst_start = mad24(y, dst_step, dst_offset);
- int dst_end = mad24(y, dst_step, dst_offset + dst_step1);
- int dst_index = mad24(y, dst_step, dst_offset + (x * 3) - (dst_align * 3));
-
- uchar4 src1_data_0 = vload4(0, src1 + src1_index + 0);
- uchar4 src1_data_1 = vload4(0, src1 + src1_index + 4);
- uchar4 src1_data_2 = vload4(0, src1 + src1_index + 8);
-
- uchar4 src2_data_0 = vload4(0, src2 + src2_index + 0);
- uchar4 src2_data_1 = vload4(0, src2 + src2_index + 4);
- uchar4 src2_data_2 = vload4(0, src2 + src2_index + 8);
-
- uchar4 mask_data = vload4(0, mask + mask_index);
-
- uchar4 data_0 = *((__global uchar4 *)(dst + dst_index + 0));
- uchar4 data_1 = *((__global uchar4 *)(dst + dst_index + 4));
- uchar4 data_2 = *((__global uchar4 *)(dst + dst_index + 8));
-
- uchar4 tmp_data_0 = src1_data_0 ^ src2_data_0;
- uchar4 tmp_data_1 = src1_data_1 ^ src2_data_1;
- uchar4 tmp_data_2 = src1_data_2 ^ src2_data_2;
-
- data_0.xyz = ((mask_data.x) && (dst_index + 0 >= dst_start)) ? tmp_data_0.xyz : data_0.xyz;
- data_0.w = ((mask_data.y) && (dst_index + 3 >= dst_start) && (dst_index + 3 < dst_end))
- ? tmp_data_0.w : data_0.w;
-
- data_1.xy = ((mask_data.y) && (dst_index + 3 >= dst_start) && (dst_index + 3 < dst_end))
- ? tmp_data_1.xy : data_1.xy;
- data_1.zw = ((mask_data.z) && (dst_index + 6 >= dst_start) && (dst_index + 6 < dst_end))
- ? tmp_data_1.zw : data_1.zw;
-
- data_2.x = ((mask_data.z) && (dst_index + 6 >= dst_start) && (dst_index + 6 < dst_end))
- ? tmp_data_2.x : data_2.x;
- data_2.yzw = ((mask_data.w) && (dst_index + 9 >= dst_start) && (dst_index + 9 < dst_end))
- ? tmp_data_2.yzw : data_2.yzw;
-
- *((__global uchar4 *)(dst + dst_index + 0)) = data_0;
- *((__global uchar4 *)(dst + dst_index + 4)) = data_1;
- *((__global uchar4 *)(dst + dst_index + 8)) = data_2;
- }
-}
-
-
-__kernel void arithm_bitwise_xor_with_mask_C3_D1 (
- __global char *src1, int src1_step, int src1_offset,
- __global char *src2, int src2_step, int src2_offset,
- __global uchar *mask, int mask_step, int mask_offset,
- __global char *dst, int dst_step, int dst_offset,
- int rows, int cols, int dst_step1)
-{
-
- int x = get_global_id(0);
- int y = get_global_id(1);
-
- if (x < cols && y < rows)
- {
- x = x << 2;
-
-#ifdef dst_align
-#undef dst_align
-#endif
-#define dst_align (((dst_offset % dst_step) / 3 ) & 3)
- int src1_index = mad24(y, src1_step, (x * 3) + src1_offset - (dst_align * 3));
- int src2_index = mad24(y, src2_step, (x * 3) + src2_offset - (dst_align * 3));
- int mask_index = mad24(y, mask_step, x + mask_offset - dst_align);
-
- int dst_start = mad24(y, dst_step, dst_offset);
- int dst_end = mad24(y, dst_step, dst_offset + dst_step1);
- int dst_index = mad24(y, dst_step, dst_offset + (x * 3) - (dst_align * 3));
-
- char4 src1_data_0 = vload4(0, src1 + src1_index + 0);
- char4 src1_data_1 = vload4(0, src1 + src1_index + 4);
- char4 src1_data_2 = vload4(0, src1 + src1_index + 8);
-
- char4 src2_data_0 = vload4(0, src2 + src2_index + 0);
- char4 src2_data_1 = vload4(0, src2 + src2_index + 4);
- char4 src2_data_2 = vload4(0, src2 + src2_index + 8);
-
- uchar4 mask_data = vload4(0, mask + mask_index);
-
- char4 data_0 = *((__global char4 *)(dst + dst_index + 0));
- char4 data_1 = *((__global char4 *)(dst + dst_index + 4));
- char4 data_2 = *((__global char4 *)(dst + dst_index + 8));
-
- char4 tmp_data_0 = src1_data_0 ^ src2_data_0;
- char4 tmp_data_1 = src1_data_1 ^ src2_data_1;
- char4 tmp_data_2 = src1_data_2 ^ src2_data_2;
-
- data_0.xyz = ((mask_data.x) && (dst_index + 0 >= dst_start)) ? tmp_data_0.xyz : data_0.xyz;
- data_0.w = ((mask_data.y) && (dst_index + 3 >= dst_start) && (dst_index + 3 < dst_end))
- ? tmp_data_0.w : data_0.w;
-
- data_1.xy = ((mask_data.y) && (dst_index + 3 >= dst_start) && (dst_index + 3 < dst_end))
- ? tmp_data_1.xy : data_1.xy;
- data_1.zw = ((mask_data.z) && (dst_index + 6 >= dst_start) && (dst_index + 6 < dst_end))
- ? tmp_data_1.zw : data_1.zw;
-
- data_2.x = ((mask_data.z) && (dst_index + 6 >= dst_start) && (dst_index + 6 < dst_end))
- ? tmp_data_2.x : data_2.x;
- data_2.yzw = ((mask_data.w) && (dst_index + 9 >= dst_start) && (dst_index + 9 < dst_end))
- ? tmp_data_2.yzw : data_2.yzw;
-
- *((__global char4 *)(dst + dst_index + 0)) = data_0;
- *((__global char4 *)(dst + dst_index + 4)) = data_1;
- *((__global char4 *)(dst + dst_index + 8)) = data_2;
- }
-}
-
-__kernel void arithm_bitwise_xor_with_mask_C3_D2 (
- __global ushort *src1, int src1_step, int src1_offset,
- __global ushort *src2, int src2_step, int src2_offset,
- __global uchar *mask, int mask_step, int mask_offset,
- __global ushort *dst, int dst_step, int dst_offset,
- int rows, int cols, int dst_step1)
-{
-
- int x = get_global_id(0);
- int y = get_global_id(1);
-
- if (x < cols && y < rows)
- {
- x = x << 1;
-
-#ifdef dst_align
-#undef dst_align
-#endif
-#define dst_align (((dst_offset % dst_step) / 6 ) & 1)
- int src1_index = mad24(y, src1_step, (x * 6) + src1_offset - (dst_align * 6));
- int src2_index = mad24(y, src2_step, (x * 6) + src2_offset - (dst_align * 6));
- int mask_index = mad24(y, mask_step, x + mask_offset - dst_align);
-
- int dst_start = mad24(y, dst_step, dst_offset);
- int dst_end = mad24(y, dst_step, dst_offset + dst_step1);
- int dst_index = mad24(y, dst_step, dst_offset + (x * 6) - (dst_align * 6));
-
- ushort2 src1_data_0 = vload2(0, (__global ushort *)((__global char *)src1 + src1_index + 0));
- ushort2 src1_data_1 = vload2(0, (__global ushort *)((__global char *)src1 + src1_index + 4));
- ushort2 src1_data_2 = vload2(0, (__global ushort *)((__global char *)src1 + src1_index + 8));
-
- ushort2 src2_data_0 = vload2(0, (__global ushort *)((__global char *)src2 + src2_index + 0));
- ushort2 src2_data_1 = vload2(0, (__global ushort *)((__global char *)src2 + src2_index + 4));
- ushort2 src2_data_2 = vload2(0, (__global ushort *)((__global char *)src2 + src2_index + 8));
-
- uchar2 mask_data = vload2(0, mask + mask_index);
-
- ushort2 data_0 = *((__global ushort2 *)((__global char *)dst + dst_index + 0));
- ushort2 data_1 = *((__global ushort2 *)((__global char *)dst + dst_index + 4));
- ushort2 data_2 = *((__global ushort2 *)((__global char *)dst + dst_index + 8));
-
- ushort2 tmp_data_0 = src1_data_0 ^ src2_data_0 ;
- ushort2 tmp_data_1 = src1_data_1 ^ src2_data_1 ;
- ushort2 tmp_data_2 = src1_data_2 ^ src2_data_2 ;
-
- data_0.xy = ((mask_data.x) && (dst_index + 0 >= dst_start)) ? tmp_data_0.xy : data_0.xy;
-
- data_1.x = ((mask_data.x) && (dst_index + 0 >= dst_start) && (dst_index + 0 < dst_end))
- ? tmp_data_1.x : data_1.x;
- data_1.y = ((mask_data.y) && (dst_index + 6 >= dst_start) && (dst_index + 6 < dst_end))
- ? tmp_data_1.y : data_1.y;
-
- data_2.xy = ((mask_data.y) && (dst_index + 6 >= dst_start) && (dst_index + 6 < dst_end))
- ? tmp_data_2.xy : data_2.xy;
-
- *((__global ushort2 *)((__global char *)dst + dst_index + 0))= data_0;
- *((__global ushort2 *)((__global char *)dst + dst_index + 4))= data_1;
- *((__global ushort2 *)((__global char *)dst + dst_index + 8))= data_2;
- }
-}
-__kernel void arithm_bitwise_xor_with_mask_C3_D3 (
- __global short *src1, int src1_step, int src1_offset,
- __global short *src2, int src2_step, int src2_offset,
- __global uchar *mask, int mask_step, int mask_offset,
- __global short *dst, int dst_step, int dst_offset,
- int rows, int cols, int dst_step1)
-{
-
- int x = get_global_id(0);
- int y = get_global_id(1);
-
- if (x < cols && y < rows)
- {
- x = x << 1;
-
-#ifdef dst_align
-#undef dst_align
-#endif
-#define dst_align (((dst_offset % dst_step) / 6 ) & 1)
- int src1_index = mad24(y, src1_step, (x * 6) + src1_offset - (dst_align * 6));
- int src2_index = mad24(y, src2_step, (x * 6) + src2_offset - (dst_align * 6));
- int mask_index = mad24(y, mask_step, x + mask_offset - dst_align);
-
- int dst_start = mad24(y, dst_step, dst_offset);
- int dst_end = mad24(y, dst_step, dst_offset + dst_step1);
- int dst_index = mad24(y, dst_step, dst_offset + (x * 6) - (dst_align * 6));
-
- short2 src1_data_0 = vload2(0, (__global short *)((__global char *)src1 + src1_index + 0));
- short2 src1_data_1 = vload2(0, (__global short *)((__global char *)src1 + src1_index + 4));
- short2 src1_data_2 = vload2(0, (__global short *)((__global char *)src1 + src1_index + 8));
-
- short2 src2_data_0 = vload2(0, (__global short *)((__global char *)src2 + src2_index + 0));
- short2 src2_data_1 = vload2(0, (__global short *)((__global char *)src2 + src2_index + 4));
- short2 src2_data_2 = vload2(0, (__global short *)((__global char *)src2 + src2_index + 8));
-
- uchar2 mask_data = vload2(0, mask + mask_index);
-
- short2 data_0 = *((__global short2 *)((__global char *)dst + dst_index + 0));
- short2 data_1 = *((__global short2 *)((__global char *)dst + dst_index + 4));
- short2 data_2 = *((__global short2 *)((__global char *)dst + dst_index + 8));
-
- short2 tmp_data_0 = src1_data_0 ^ src2_data_0 ;
- short2 tmp_data_1 = src1_data_1 ^ src2_data_1 ;
- short2 tmp_data_2 = src1_data_2 ^ src2_data_2 ;
-
- data_0.xy = ((mask_data.x) && (dst_index + 0 >= dst_start)) ? tmp_data_0.xy : data_0.xy;
-
- data_1.x = ((mask_data.x) && (dst_index + 0 >= dst_start) && (dst_index + 0 < dst_end))
- ? tmp_data_1.x : data_1.x;
- data_1.y = ((mask_data.y) && (dst_index + 6 >= dst_start) && (dst_index + 6 < dst_end))
- ? tmp_data_1.y : data_1.y;
-
- data_2.xy = ((mask_data.y) && (dst_index + 6 >= dst_start) && (dst_index + 6 < dst_end))
- ? tmp_data_2.xy : data_2.xy;
-
- *((__global short2 *)((__global char *)dst + dst_index + 0))= data_0;
- *((__global short2 *)((__global char *)dst + dst_index + 4))= data_1;
- *((__global short2 *)((__global char *)dst + dst_index + 8))= data_2;
- }
-}
-__kernel void arithm_bitwise_xor_with_mask_C3_D4 (
- __global int *src1, int src1_step, int src1_offset,
- __global int *src2, int src2_step, int src2_offset,
- __global uchar *mask, int mask_step, int mask_offset,
- __global int *dst, int dst_step, int dst_offset,
- int rows, int cols, int dst_step1)
-{
-
- int x = get_global_id(0);
- int y = get_global_id(1);
-
- if (x < cols && y < rows)
- {
- int src1_index = mad24(y, src1_step, (x * 12) + src1_offset);
- int src2_index = mad24(y, src2_step, (x * 12) + src2_offset);
- int mask_index = mad24(y, mask_step, x + mask_offset);
- int dst_index = mad24(y, dst_step, dst_offset + (x * 12));
-
- int src1_data_0 = *((__global int *)((__global char *)src1 + src1_index + 0));
- int src1_data_1 = *((__global int *)((__global char *)src1 + src1_index + 4));
- int src1_data_2 = *((__global int *)((__global char *)src1 + src1_index + 8));
-
- int src2_data_0 = *((__global int *)((__global char *)src2 + src2_index + 0));
- int src2_data_1 = *((__global int *)((__global char *)src2 + src2_index + 4));
- int src2_data_2 = *((__global int *)((__global char *)src2 + src2_index + 8));
-
- uchar mask_data = * (mask + mask_index);
-
- int data_0 = *((__global int *)((__global char *)dst + dst_index + 0));
- int data_1 = *((__global int *)((__global char *)dst + dst_index + 4));
- int data_2 = *((__global int *)((__global char *)dst + dst_index + 8));
-
- int tmp_data_0 = src1_data_0 ^ src2_data_0 ;
- int tmp_data_1 = src1_data_1 ^ src2_data_1 ;
- int tmp_data_2 = src1_data_2 ^ src2_data_2 ;
-
- data_0 = mask_data ? tmp_data_0 : data_0;
- data_1 = mask_data ? tmp_data_1 : data_1;
- data_2 = mask_data ? tmp_data_2 : data_2;
-
- *((__global int *)((__global char *)dst + dst_index + 0))= data_0;
- *((__global int *)((__global char *)dst + dst_index + 4))= data_1;
- *((__global int *)((__global char *)dst + dst_index + 8))= data_2;
- }
-}
-__kernel void arithm_bitwise_xor_with_mask_C3_D5 (
- __global char *src1, int src1_step, int src1_offset,
- __global char *src2, int src2_step, int src2_offset,
- __global uchar *mask, int mask_step, int mask_offset,
- __global char *dst, int dst_step, int dst_offset,
- int rows, int cols, int dst_step1)
-{
-
- int x = get_global_id(0);
- int y = get_global_id(1);
-
- if (x < cols && y < rows)
- {
- int src1_index = mad24(y, src1_step, (x * 12) + src1_offset);
- int src2_index = mad24(y, src2_step, (x * 12) + src2_offset);
- int mask_index = mad24(y, mask_step, x + mask_offset);
- int dst_index = mad24(y, dst_step, dst_offset + (x * 12));
-
- char4 src1_data_0 = *((__global char4 *)((__global char *)src1 + src1_index + 0));
- char4 src1_data_1 = *((__global char4 *)((__global char *)src1 + src1_index + 4));
- char4 src1_data_2 = *((__global char4 *)((__global char *)src1 + src1_index + 8));
-
- char4 src2_data_0 = *((__global char4 *)((__global char *)src2 + src2_index + 0));
- char4 src2_data_1 = *((__global char4 *)((__global char *)src2 + src2_index + 4));
- char4 src2_data_2 = *((__global char4 *)((__global char *)src2 + src2_index + 8));
-
- uchar mask_data = * (mask + mask_index);
-
- char4 data_0 = *((__global char4 *)((__global char *)dst + dst_index + 0));
- char4 data_1 = *((__global char4 *)((__global char *)dst + dst_index + 4));
- char4 data_2 = *((__global char4 *)((__global char *)dst + dst_index + 8));
-
- char4 tmp_data_0 = src1_data_0 ^ src2_data_0;
- char4 tmp_data_1 = src1_data_1 ^ src2_data_1;
- char4 tmp_data_2 = src1_data_2 ^ src2_data_2;
-
- data_0 = mask_data ? tmp_data_0 : data_0;
- data_1 = mask_data ? tmp_data_1 : data_1;
- data_2 = mask_data ? tmp_data_2 : data_2;
-
- *((__global char4 *)((__global char *)dst + dst_index + 0))= data_0;
- *((__global char4 *)((__global char *)dst + dst_index + 4))= data_1;
- *((__global char4 *)((__global char *)dst + dst_index + 8))= data_2;
- }
-}
-#if defined (DOUBLE_SUPPORT)
-__kernel void arithm_bitwise_xor_with_mask_C3_D6 (
- __global char *src1, int src1_step, int src1_offset,
- __global char *src2, int src2_step, int src2_offset,
- __global uchar *mask, int mask_step, int mask_offset,
- __global char *dst, int dst_step, int dst_offset,
- int rows, int cols, int dst_step1)
-{
-
- int x = get_global_id(0);
- int y = get_global_id(1);
-
- if (x < cols && y < rows)
- {
- int src1_index = mad24(y, src1_step, (x * 24) + src1_offset);
- int src2_index = mad24(y, src2_step, (x * 24) + src2_offset);
- int mask_index = mad24(y, mask_step, x + mask_offset);
- int dst_index = mad24(y, dst_step, dst_offset + (x * 24));
-
- char8 src1_data_0 = *((__global char8 *)((__global char *)src1 + src1_index + 0 ));
- char8 src1_data_1 = *((__global char8 *)((__global char *)src1 + src1_index + 8 ));
- char8 src1_data_2 = *((__global char8 *)((__global char *)src1 + src1_index + 16));
-
- char8 src2_data_0 = *((__global char8 *)((__global char *)src2 + src2_index + 0 ));
- char8 src2_data_1 = *((__global char8 *)((__global char *)src2 + src2_index + 8 ));
- char8 src2_data_2 = *((__global char8 *)((__global char *)src2 + src2_index + 16));
-
- uchar mask_data = * (mask + mask_index);
-
- char8 data_0 = *((__global char8 *)((__global char *)dst + dst_index + 0 ));
- char8 data_1 = *((__global char8 *)((__global char *)dst + dst_index + 8 ));
- char8 data_2 = *((__global char8 *)((__global char *)dst + dst_index + 16));
-
- char8 tmp_data_0 = src1_data_0 ^ src2_data_0;
- char8 tmp_data_1 = src1_data_1 ^ src2_data_1;
- char8 tmp_data_2 = src1_data_2 ^ src2_data_2;
-
- data_0 = mask_data ? tmp_data_0 : data_0;
- data_1 = mask_data ? tmp_data_1 : data_1;
- data_2 = mask_data ? tmp_data_2 : data_2;
-
- *((__global char8 *)((__global char *)dst + dst_index + 0 ))= data_0;
- *((__global char8 *)((__global char *)dst + dst_index + 8 ))= data_1;
- *((__global char8 *)((__global char *)dst + dst_index + 16))= data_2;
- }
-}
-#endif
-
-
-__kernel void arithm_bitwise_xor_with_mask_C4_D0 (
- __global uchar *src1, int src1_step, int src1_offset,
- __global uchar *src2, int src2_step, int src2_offset,
- __global uchar *mask, int mask_step, int mask_offset,
- __global uchar *dst, int dst_step, int dst_offset,
- int rows, int cols, int dst_step1)
-{
-
- int x = get_global_id(0);
- int y = get_global_id(1);
-
- if (x < cols && y < rows)
- {
- int src1_index = mad24(y, src1_step, (x << 2) + src1_offset);
- int src2_index = mad24(y, src2_step, (x << 2) + src2_offset);
- int mask_index = mad24(y, mask_step, x + mask_offset);
- int dst_index = mad24(y, dst_step, (x << 2) + dst_offset);
-
- uchar mask_data = *(mask + mask_index);
-
- uchar4 src_data1 = *((__global uchar4 *)(src1 + src1_index));
- uchar4 src_data2 = *((__global uchar4 *)(src2 + src2_index));
- uchar4 dst_data = *((__global uchar4 *)(dst + dst_index));
-
- uchar4 data = src_data1 ^ src_data2;
- data = mask_data ? data : dst_data;
-
- *((__global uchar4 *)(dst + dst_index)) = data;
- }
-}
-
-
-__kernel void arithm_bitwise_xor_with_mask_C4_D1 (
- __global char *src1, int src1_step, int src1_offset,
- __global char *src2, int src2_step, int src2_offset,
- __global uchar *mask, int mask_step, int mask_offset,
- __global char *dst, int dst_step, int dst_offset,
- int rows, int cols, int dst_step1)
-{
-
- int x = get_global_id(0);
- int y = get_global_id(1);
-
- if (x < cols && y < rows)
- {
- int src1_index = mad24(y, src1_step, (x << 2) + src1_offset);
- int src2_index = mad24(y, src2_step, (x << 2) + src2_offset);
- int mask_index = mad24(y, mask_step, x + mask_offset);
- int dst_index = mad24(y, dst_step, (x << 2) + dst_offset);
-
- uchar mask_data = *(mask + mask_index);
-
- char4 src_data1 = *((__global char4 *)(src1 + src1_index));
- char4 src_data2 = *((__global char4 *)(src2 + src2_index));
- char4 dst_data = *((__global char4 *)(dst + dst_index));
-
- char4 data = src_data1 ^ src_data2;
- data = mask_data ? data : dst_data;
-
- *((__global char4 *)(dst + dst_index)) = data;
- }
-}
-
-__kernel void arithm_bitwise_xor_with_mask_C4_D2 (
- __global ushort *src1, int src1_step, int src1_offset,
- __global ushort *src2, int src2_step, int src2_offset,
- __global uchar *mask, int mask_step, int mask_offset,
- __global ushort *dst, int dst_step, int dst_offset,
- int rows, int cols, int dst_step1)
-{
-
- int x = get_global_id(0);
- int y = get_global_id(1);
-
- if (x < cols && y < rows)
- {
- int src1_index = mad24(y, src1_step, (x << 3) + src1_offset);
- int src2_index = mad24(y, src2_step, (x << 3) + src2_offset);
- int mask_index = mad24(y, mask_step, x + mask_offset);
- int dst_index = mad24(y, dst_step, (x << 3) + dst_offset);
-
- uchar mask_data = *(mask + mask_index);
-
- ushort4 src_data1 = *((__global ushort4 *)((__global char *)src1 + src1_index));
- ushort4 src_data2 = *((__global ushort4 *)((__global char *)src2 + src2_index));
- ushort4 dst_data = *((__global ushort4 *)((__global char *)dst + dst_index));
-
- ushort4 data = src_data1 ^ src_data2;
- data = mask_data ? data : dst_data;
-
- *((__global ushort4 *)((__global char *)dst + dst_index)) = data;
- }
-}
-__kernel void arithm_bitwise_xor_with_mask_C4_D3 (
- __global short *src1, int src1_step, int src1_offset,
- __global short *src2, int src2_step, int src2_offset,
- __global uchar *mask, int mask_step, int mask_offset,
- __global short *dst, int dst_step, int dst_offset,
- int rows, int cols, int dst_step1)
-{
-
- int x = get_global_id(0);
- int y = get_global_id(1);
-
- if (x < cols && y < rows)
- {
- int src1_index = mad24(y, src1_step, (x << 3) + src1_offset);
- int src2_index = mad24(y, src2_step, (x << 3) + src2_offset);
- int mask_index = mad24(y, mask_step, x + mask_offset);
- int dst_index = mad24(y, dst_step, (x << 3) + dst_offset);
-
- uchar mask_data = *(mask + mask_index);
-
- short4 src_data1 = *((__global short4 *)((__global char *)src1 + src1_index));
- short4 src_data2 = *((__global short4 *)((__global char *)src2 + src2_index));
- short4 dst_data = *((__global short4 *)((__global char *)dst + dst_index));
-
- short4 data = src_data1 ^ src_data2;
- data = mask_data ? data : dst_data;
-
- *((__global short4 *)((__global char *)dst + dst_index)) = data;
- }
-}
-__kernel void arithm_bitwise_xor_with_mask_C4_D4 (
- __global int *src1, int src1_step, int src1_offset,
- __global int *src2, int src2_step, int src2_offset,
- __global uchar *mask, int mask_step, int mask_offset,
- __global int *dst, int dst_step, int dst_offset,
- int rows, int cols, int dst_step1)
-{
-
- int x = get_global_id(0);
- int y = get_global_id(1);
-
- if (x < cols && y < rows)
- {
- int src1_index = mad24(y, src1_step, (x << 4) + src1_offset);
- int src2_index = mad24(y, src2_step, (x << 4) + src2_offset);
- int mask_index = mad24(y, mask_step, x + mask_offset);
- int dst_index = mad24(y, dst_step, (x << 4) + dst_offset);
-
- uchar mask_data = *(mask + mask_index);
-
- int4 src_data1 = *((__global int4 *)((__global char *)src1 + src1_index));
- int4 src_data2 = *((__global int4 *)((__global char *)src2 + src2_index));
- int4 dst_data = *((__global int4 *)((__global char *)dst + dst_index));
-
- int4 data = src_data1 ^ src_data2;
- data = mask_data ? data : dst_data;
-
- *((__global int4 *)((__global char *)dst + dst_index)) = data;
- }
-}
-__kernel void arithm_bitwise_xor_with_mask_C4_D5 (
- __global char *src1, int src1_step, int src1_offset,
- __global char *src2, int src2_step, int src2_offset,
- __global uchar *mask, int mask_step, int mask_offset,
- __global char *dst, int dst_step, int dst_offset,
- int rows, int cols, int dst_step1)
-{
-
- int x = get_global_id(0);
- int y = get_global_id(1);
-
- if (x < cols && y < rows)
- {
- int src1_index = mad24(y, src1_step, (x << 4) + src1_offset);
- int src2_index = mad24(y, src2_step, (x << 4) + src2_offset);
- int mask_index = mad24(y, mask_step, x + mask_offset);
- int dst_index = mad24(y, dst_step, (x << 4) + dst_offset);
-
- uchar mask_data = *(mask + mask_index);
-
- char16 src_data1 = *((__global char16 *)((__global char *)src1 + src1_index));
- char16 src_data2 = *((__global char16 *)((__global char *)src2 + src2_index));
- char16 dst_data = *((__global char16 *)((__global char *)dst + dst_index));
-
- char16 data = src_data1 ^ src_data2;
- data = mask_data ? data : dst_data;
-
- *((__global char16 *)((__global char *)dst + dst_index)) = data;
- }
-}
-#if defined (DOUBLE_SUPPORT)
-__kernel void arithm_bitwise_xor_with_mask_C4_D6 (
- __global char *src1, int src1_step, int src1_offset,
- __global char *src2, int src2_step, int src2_offset,
- __global uchar *mask, int mask_step, int mask_offset,
- __global char *dst, int dst_step, int dst_offset,
- int rows, int cols, int dst_step1)
-{
-
- int x = get_global_id(0);
- int y = get_global_id(1);
-
- if (x < cols && y < rows)
- {
- int src1_index = mad24(y, src1_step, (x << 5) + src1_offset);
- int src2_index = mad24(y, src2_step, (x << 5) + src2_offset);
- int mask_index = mad24(y, mask_step, x + mask_offset);
- int dst_index = mad24(y, dst_step, (x << 5) + dst_offset);
-
- uchar mask_data = *(mask + mask_index);
-
- char8 src_data1_0 = *((__global char8 *)((__global char *)src1 + src1_index + 0));
- char8 src_data1_1 = *((__global char8 *)((__global char *)src1 + src1_index + 8));
- char8 src_data1_2 = *((__global char8 *)((__global char *)src1 + src1_index + 16));
- char8 src_data1_3 = *((__global char8 *)((__global char *)src1 + src1_index + 24));
-
- char8 src_data2_0 = *((__global char8 *)((__global char *)src2 + src2_index + 0));
- char8 src_data2_1 = *((__global char8 *)((__global char *)src2 + src2_index + 8));
- char8 src_data2_2 = *((__global char8 *)((__global char *)src2 + src2_index + 16));
- char8 src_data2_3 = *((__global char8 *)((__global char *)src2 + src2_index + 24));
-
- char8 dst_data_0 = *((__global char8 *)((__global char *)dst + dst_index + 0));
- char8 dst_data_1 = *((__global char8 *)((__global char *)dst + dst_index + 8));
- char8 dst_data_2 = *((__global char8 *)((__global char *)dst + dst_index + 16));
- char8 dst_data_3 = *((__global char8 *)((__global char *)dst + dst_index + 24));
-
- char8 data_0 = src_data1_0 ^ src_data2_0;
- char8 data_1 = src_data1_1 ^ src_data2_1;
- char8 data_2 = src_data1_2 ^ src_data2_2;
- char8 data_3 = src_data1_3 ^ src_data2_3;
-
- data_0 = mask_data ? data_0 : dst_data_0;
- data_1 = mask_data ? data_1 : dst_data_1;
- data_2 = mask_data ? data_2 : dst_data_2;
- data_3 = mask_data ? data_3 : dst_data_3;
-
- *((__global char8 *)((__global char *)dst + dst_index + 0)) = data_0;
- *((__global char8 *)((__global char *)dst + dst_index + 8)) = data_1;
- *((__global char8 *)((__global char *)dst + dst_index + 16)) = data_2;
- *((__global char8 *)((__global char *)dst + dst_index + 24)) = data_3;
- }
-}
-#endif
if (x < thread_cols && y < rows)
{
int src_index_0 = mad24(y, src_step, (x) + src_offset);
- int src_index_1 = mad24(y, src_step, (cols - x -1) + src_offset);
-
- int dst_index_0 = mad24(y, dst_step, (x) + dst_offset);
int dst_index_1 = mad24(y, dst_step, (cols - x -1) + dst_offset);
-
uchar data0 = *(src + src_index_0);
- uchar data1 = *(src + src_index_1);
+ *(dst + dst_index_1) = data0;
+ int src_index_1 = mad24(y, src_step, (cols - x -1) + src_offset);
+ int dst_index_0 = mad24(y, dst_step, (x) + dst_offset);
+ uchar data1 = *(src + src_index_1);
*(dst + dst_index_0) = data1;
- *(dst + dst_index_1) = data0;
}
}
__kernel void arithm_flip_cols_C1_D1 (__global char *src, int src_step, int src_offset,
+++ /dev/null
-/*M///////////////////////////////////////////////////////////////////////////////////////
-//
-// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
-//
-// By downloading, copying, installing or using the software you agree to this license.
-// If you do not agree to this license, do not download, install,
-// copy or use the software.
-//
-//
-// License Agreement
-// For Open Source Computer Vision Library
-//
-// Copyright (C) 2010-2012, Institute Of Software Chinese Academy Of Science, all rights reserved.
-// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved.
-// Third party copyrights are property of their respective owners.
-//
-// @Authors
-// Jia Haipeng, jiahaipeng95@gmail.com
-//
-// Redistribution and use in source and binary forms, with or without modification,
-// are permitted provided that the following conditions are met:
-//
-// * Redistribution's of source code must retain the above copyright notice,
-// this list of conditions and the following disclaimer.
-//
-// * Redistribution's in binary form must reproduce the above copyright notice,
-// this list of conditions and the following disclaimer in the documentation
-// and/or other GpuMaterials provided with the distribution.
-//
-// * The name of the copyright holders may not be used to endorse or promote products
-// derived from this software without specific prior written permission.
-//
-// This software is provided by the copyright holders and contributors as is and
-// any express or implied warranties, including, but not limited to, the implied
-// warranties of merchantability and fitness for a particular purpose are disclaimed.
-// In no event shall the Intel Corporation or contributors be liable for any direct,
-// indirect, incidental, special, exemplary, or consequential damages
-// (including, but not limited to, procurement of substitute goods or services;
-// loss of use, data, or profits; or business interruption) however caused
-// and on any theory of liability, whether in contract, strict liability,
-// or tort (including negligence or otherwise) arising in any way out of
-// the use of this software, even if advised of the possibility of such damage.
-//
-//M*/
-
-#if defined (DOUBLE_SUPPORT)
-#pragma OPENCL EXTENSION cl_khr_fp64:enable
-#endif
-
-//////////////////////////////////////////////////////////////////////////////////////////////////////
-/////////////////////////////////////////////SUB////////////////////////////////////////////////////
-///////////////////////////////////////////////////////////////////////////////////////////////////////
-/**************************************sub without mask**************************************/
-__kernel void arithm_sub_D0 (__global uchar *src1, int src1_step, int src1_offset,
- __global uchar *src2, int src2_step, int src2_offset,
- __global uchar *dst, int dst_step, int dst_offset,
- int rows, int cols, int dst_step1)
-{
- int x = get_global_id(0);
- int y = get_global_id(1);
-
- if (x < cols && y < rows)
- {
- x = x << 2;
-
- #define dst_align (dst_offset & 3)
- int src1_index = mad24(y, src1_step, x + src1_offset - dst_align);
- int src2_index = mad24(y, src2_step, x + src2_offset - dst_align);
-
- int dst_start = mad24(y, dst_step, dst_offset);
- int dst_end = mad24(y, dst_step, dst_offset + dst_step1);
- int dst_index = mad24(y, dst_step, dst_offset + x & (int)0xfffffffc);
-
- uchar4 src1_data = vload4(0, src1 + src1_index);
- uchar4 src2_data = vload4(0, src2 + src2_index);
-
- uchar4 dst_data = *((__global uchar4 *)(dst + dst_index));
- short4 tmp = convert_short4_sat(src1_data) - convert_short4_sat(src2_data);
- uchar4 tmp_data = convert_uchar4_sat(tmp);
-
- dst_data.x = ((dst_index + 0 >= dst_start) && (dst_index + 0 < dst_end)) ? tmp_data.x : dst_data.x;
- dst_data.y = ((dst_index + 1 >= dst_start) && (dst_index + 1 < dst_end)) ? tmp_data.y : dst_data.y;
- dst_data.z = ((dst_index + 2 >= dst_start) && (dst_index + 2 < dst_end)) ? tmp_data.z : dst_data.z;
- dst_data.w = ((dst_index + 3 >= dst_start) && (dst_index + 3 < dst_end)) ? tmp_data.w : dst_data.w;
-
- *((__global uchar4 *)(dst + dst_index)) = dst_data;
- }
-}
-__kernel void arithm_sub_D2 (__global ushort *src1, int src1_step, int src1_offset,
- __global ushort *src2, int src2_step, int src2_offset,
- __global ushort *dst, int dst_step, int dst_offset,
- int rows, int cols, int dst_step1)
-{
- int x = get_global_id(0);
- int y = get_global_id(1);
-
- if (x < cols && y < rows)
- {
- x = x << 2;
-
- #define dst_align ((dst_offset >> 1) & 3)
- int src1_index = mad24(y, src1_step, (x << 1) + src1_offset - (dst_align << 1));
- int src2_index = mad24(y, src2_step, (x << 1) + src2_offset - (dst_align << 1));
-
- int dst_start = mad24(y, dst_step, dst_offset);
- int dst_end = mad24(y, dst_step, dst_offset + dst_step1);
- int dst_index = mad24(y, dst_step, dst_offset + (x << 1) & (int)0xfffffff8);
-
- ushort4 src1_data = vload4(0, (__global ushort *)((__global char *)src1 + src1_index));
- ushort4 src2_data = vload4(0, (__global ushort *)((__global char *)src2 + src2_index));
-
- ushort4 dst_data = *((__global ushort4 *)((__global char *)dst + dst_index));
- int4 tmp = convert_int4_sat(src1_data) - convert_int4_sat(src2_data);
- ushort4 tmp_data = convert_ushort4_sat(tmp);
-
- dst_data.x = ((dst_index + 0 >= dst_start) && (dst_index + 0 < dst_end)) ? tmp_data.x : dst_data.x;
- dst_data.y = ((dst_index + 2 >= dst_start) && (dst_index + 2 < dst_end)) ? tmp_data.y : dst_data.y;
- dst_data.z = ((dst_index + 4 >= dst_start) && (dst_index + 4 < dst_end)) ? tmp_data.z : dst_data.z;
- dst_data.w = ((dst_index + 6 >= dst_start) && (dst_index + 6 < dst_end)) ? tmp_data.w : dst_data.w;
-
- *((__global ushort4 *)((__global char *)dst + dst_index)) = dst_data;
- }
-}
-__kernel void arithm_sub_D3 (__global short *src1, int src1_step, int src1_offset,
- __global short *src2, int src2_step, int src2_offset,
- __global short *dst, int dst_step, int dst_offset,
- int rows, int cols, int dst_step1)
-{
- int x = get_global_id(0);
- int y = get_global_id(1);
-
- if (x < cols && y < rows)
- {
- x = x << 2;
-
- #define dst_align ((dst_offset >> 1) & 3)
- int src1_index = mad24(y, src1_step, (x << 1) + src1_offset - (dst_align << 1));
- int src2_index = mad24(y, src2_step, (x << 1) + src2_offset - (dst_align << 1));
-
- int dst_start = mad24(y, dst_step, dst_offset);
- int dst_end = mad24(y, dst_step, dst_offset + dst_step1);
- int dst_index = mad24(y, dst_step, dst_offset + (x << 1) & (int)0xfffffff8);
-
- short4 src1_data = vload4(0, (__global short *)((__global char *)src1 + src1_index));
- short4 src2_data = vload4(0, (__global short *)((__global char *)src2 + src2_index));
-
- short4 dst_data = *((__global short4 *)((__global char *)dst + dst_index));
- int4 tmp = convert_int4_sat(src1_data) - convert_int4_sat(src2_data);
- short4 tmp_data = convert_short4_sat(tmp);
-
- dst_data.x = ((dst_index + 0 >= dst_start) && (dst_index + 0 < dst_end)) ? tmp_data.x : dst_data.x;
- dst_data.y = ((dst_index + 2 >= dst_start) && (dst_index + 2 < dst_end)) ? tmp_data.y : dst_data.y;
- dst_data.z = ((dst_index + 4 >= dst_start) && (dst_index + 4 < dst_end)) ? tmp_data.z : dst_data.z;
- dst_data.w = ((dst_index + 6 >= dst_start) && (dst_index + 6 < dst_end)) ? tmp_data.w : dst_data.w;
-
- *((__global short4 *)((__global char *)dst + dst_index)) = dst_data;
- }
-}
-
-__kernel void arithm_sub_D4 (__global int *src1, int src1_step, int src1_offset,
- __global int *src2, int src2_step, int src2_offset,
- __global int *dst, int dst_step, int dst_offset,
- int rows, int cols, int dst_step1)
-{
- int x = get_global_id(0);
- int y = get_global_id(1);
-
- if (x < cols && y < rows)
- {
- int src1_index = mad24(y, src1_step, (x << 2) + src1_offset);
- int src2_index = mad24(y, src2_step, (x << 2) + src2_offset);
- int dst_index = mad24(y, dst_step, (x << 2) + dst_offset);
-
- int data1 = *((__global int *)((__global char *)src1 + src1_index));
- int data2 = *((__global int *)((__global char *)src2 + src2_index));
- long tmp = (long)(data1) - (long)(data2);
-
- *((__global int *)((__global char *)dst + dst_index)) = convert_int_sat(tmp);
- }
-}
-__kernel void arithm_sub_D5 (__global float *src1, int src1_step, int src1_offset,
- __global float *src2, int src2_step, int src2_offset,
- __global float *dst, int dst_step, int dst_offset,
- int rows, int cols, int dst_step1)
-{
- int x = get_global_id(0);
- int y = get_global_id(1);
-
- if (x < cols && y < rows)
- {
- int src1_index = mad24(y, src1_step, (x << 2) + src1_offset);
- int src2_index = mad24(y, src2_step, (x << 2) + src2_offset);
- int dst_index = mad24(y, dst_step, (x << 2) + dst_offset);
-
- float data1 = *((__global float *)((__global char *)src1 + src1_index));
- float data2 = *((__global float *)((__global char *)src2 + src2_index));
- float tmp = data1 - data2;
-
- *((__global float *)((__global char *)dst + dst_index)) = tmp;
- }
-}
-
-
-#if defined (DOUBLE_SUPPORT)
-__kernel void arithm_sub_D6 (__global double *src1, int src1_step, int src1_offset,
- __global double *src2, int src2_step, int src2_offset,
- __global double *dst, int dst_step, int dst_offset,
- int rows, int cols, int dst_step1)
-{
- int x = get_global_id(0);
- int y = get_global_id(1);
-
- if (x < cols && y < rows)
- {
- int src1_index = mad24(y, src1_step, (x << 3) + src1_offset);
- int src2_index = mad24(y, src2_step, (x << 3) + src2_offset);
- int dst_index = mad24(y, dst_step, (x << 3) + dst_offset);
-
- double data1 = *((__global double *)((__global char *)src1 + src1_index));
- double data2 = *((__global double *)((__global char *)src2 + src2_index));
-
- *((__global double *)((__global char *)dst + dst_index)) = data1 - data2;
- }
-}
-#endif
-
-/**************************************sub with mask**************************************/
-__kernel void arithm_sub_with_mask_C1_D0 (__global uchar *src1, int src1_step, int src1_offset,
- __global uchar *src2, int src2_step, int src2_offset,
- __global uchar *mask, int mask_step, int mask_offset,
- __global uchar *dst, int dst_step, int dst_offset,
- int rows, int cols, int dst_step1)
-{
-
- int x = get_global_id(0);
- int y = get_global_id(1);
-
- if (x < cols && y < rows)
- {
- x = x << 2;
-
- #define dst_align (dst_offset & 3)
- int src1_index = mad24(y, src1_step, x + src1_offset - dst_align);
- int src2_index = mad24(y, src2_step, x + src2_offset - dst_align);
- int mask_index = mad24(y, mask_step, x + mask_offset - dst_align);
-
- int dst_start = mad24(y, dst_step, dst_offset);
- int dst_end = mad24(y, dst_step, dst_offset + dst_step1);
- int dst_index = mad24(y, dst_step, dst_offset + x & (int)0xfffffffc);
-
- uchar4 src1_data = vload4(0, src1 + src1_index);
- uchar4 src2_data = vload4(0, src2 + src2_index);
- uchar4 mask_data = vload4(0, mask + mask_index);
-
- uchar4 data = *((__global uchar4 *)(dst + dst_index));
- short4 tmp = convert_short4_sat(src1_data) - convert_short4_sat(src2_data);
- uchar4 tmp_data = convert_uchar4_sat(tmp);
-
- data.x = ((mask_data.x) && (dst_index + 0 >= dst_start) && (dst_index + 0 < dst_end)) ? tmp_data.x : data.x;
- data.y = ((mask_data.y) && (dst_index + 1 >= dst_start) && (dst_index + 1 < dst_end)) ? tmp_data.y : data.y;
- data.z = ((mask_data.z) && (dst_index + 2 >= dst_start) && (dst_index + 2 < dst_end)) ? tmp_data.z : data.z;
- data.w = ((mask_data.w) && (dst_index + 3 >= dst_start) && (dst_index + 3 < dst_end)) ? tmp_data.w : data.w;
-
- *((__global uchar4 *)(dst + dst_index)) = data;
- }
-}
-__kernel void arithm_sub_with_mask_C1_D2 (__global ushort *src1, int src1_step, int src1_offset,
- __global ushort *src2, int src2_step, int src2_offset,
- __global uchar *mask, int mask_step, int mask_offset,
- __global ushort *dst, int dst_step, int dst_offset,
- int rows, int cols, int dst_step1)
-{
-
- int x = get_global_id(0);
- int y = get_global_id(1);
-
- if (x < cols && y < rows)
- {
- x = x << 1;
-
- #define dst_align ((dst_offset >> 1) & 1)
- int src1_index = mad24(y, src1_step, (x << 1) + src1_offset - (dst_align << 1));
- int src2_index = mad24(y, src2_step, (x << 1) + src2_offset - (dst_align << 1));
- int mask_index = mad24(y, mask_step, x + mask_offset - dst_align);
-
- int dst_start = mad24(y, dst_step, dst_offset);
- int dst_end = mad24(y, dst_step, dst_offset + dst_step1);
- int dst_index = mad24(y, dst_step, dst_offset + (x << 1) & (int)0xfffffffc);
-
- ushort2 src1_data = vload2(0, (__global ushort *)((__global char *)src1 + src1_index));
- ushort2 src2_data = vload2(0, (__global ushort *)((__global char *)src2 + src2_index));
- uchar2 mask_data = vload2(0, mask + mask_index);
-
- ushort2 data = *((__global ushort2 *)((__global uchar *)dst + dst_index));
- int2 tmp = convert_int2_sat(src1_data) - convert_int2_sat(src2_data);
- ushort2 tmp_data = convert_ushort2_sat(tmp);
-
- data.x = ((mask_data.x) && (dst_index + 0 >= dst_start)) ? tmp_data.x : data.x;
- data.y = ((mask_data.y) && (dst_index + 2 < dst_end )) ? tmp_data.y : data.y;
-
- *((__global ushort2 *)((__global uchar *)dst + dst_index)) = data;
- }
-}
-__kernel void arithm_sub_with_mask_C1_D3 (__global short *src1, int src1_step, int src1_offset,
- __global short *src2, int src2_step, int src2_offset,
- __global uchar *mask, int mask_step, int mask_offset,
- __global short *dst, int dst_step, int dst_offset,
- int rows, int cols, int dst_step1)
-{
-
- int x = get_global_id(0);
- int y = get_global_id(1);
-
- if (x < cols && y < rows)
- {
- x = x << 1;
-
- #define dst_align ((dst_offset >> 1) & 1)
- int src1_index = mad24(y, src1_step, (x << 1) + src1_offset - (dst_align << 1));
- int src2_index = mad24(y, src2_step, (x << 1) + src2_offset - (dst_align << 1));
- int mask_index = mad24(y, mask_step, x + mask_offset - dst_align);
-
- int dst_start = mad24(y, dst_step, dst_offset);
- int dst_end = mad24(y, dst_step, dst_offset + dst_step1);
- int dst_index = mad24(y, dst_step, dst_offset + (x << 1) & (int)0xfffffffc);
-
- short2 src1_data = vload2(0, (__global short *)((__global char *)src1 + src1_index));
- short2 src2_data = vload2(0, (__global short *)((__global char *)src2 + src2_index));
- uchar2 mask_data = vload2(0, mask + mask_index);
-
- short2 data = *((__global short2 *)((__global uchar *)dst + dst_index));
- int2 tmp = convert_int2_sat(src1_data) - convert_int2_sat(src2_data);
- short2 tmp_data = convert_short2_sat(tmp);
-
- data.x = ((mask_data.x) && (dst_index + 0 >= dst_start)) ? tmp_data.x : data.x;
- data.y = ((mask_data.y) && (dst_index + 2 < dst_end )) ? tmp_data.y : data.y;
-
- *((__global short2 *)((__global uchar *)dst + dst_index)) = data;
- }
-}
-__kernel void arithm_sub_with_mask_C1_D4 (__global int *src1, int src1_step, int src1_offset,
- __global int *src2, int src2_step, int src2_offset,
- __global uchar *mask, int mask_step, int mask_offset,
- __global int *dst, int dst_step, int dst_offset,
- int rows, int cols, int dst_step1)
-{
-
- int x = get_global_id(0);
- int y = get_global_id(1);
-
- if (x < cols && y < rows)
- {
- int src1_index = mad24(y, src1_step, (x << 2) + src1_offset);
- int src2_index = mad24(y, src2_step, (x << 2) + src2_offset);
- int mask_index = mad24(y, mask_step, x + mask_offset);
- int dst_index = mad24(y, dst_step, (x << 2) + dst_offset);
-
- uchar mask_data = *(mask + mask_index);
-
- int src_data1 = *((__global int *)((__global char *)src1 + src1_index));
- int src_data2 = *((__global int *)((__global char *)src2 + src2_index));
- int dst_data = *((__global int *)((__global char *)dst + dst_index));
-
- int data = convert_int_sat((long)src_data1 - (long)src_data2);
- data = mask_data ? data : dst_data;
-
- *((__global int *)((__global char *)dst + dst_index)) = data;
- }
-}
-
-__kernel void arithm_sub_with_mask_C1_D5 (__global float *src1, int src1_step, int src1_offset,
- __global float *src2, int src2_step, int src2_offset,
- __global uchar *mask, int mask_step, int mask_offset,
- __global float *dst, int dst_step, int dst_offset,
- int rows, int cols, int dst_step1)
-{
-
- int x = get_global_id(0);
- int y = get_global_id(1);
-
- if (x < cols && y < rows)
- {
- int src1_index = mad24(y, src1_step, (x << 2) + src1_offset);
- int src2_index = mad24(y, src2_step, (x << 2) + src2_offset);
- int mask_index = mad24(y, mask_step, x + mask_offset);
- int dst_index = mad24(y, dst_step, (x << 2) + dst_offset);
-
- uchar mask_data = *(mask + mask_index);
-
- float src_data1 = *((__global float *)((__global char *)src1 + src1_index));
- float src_data2 = *((__global float *)((__global char *)src2 + src2_index));
- float dst_data = *((__global float *)((__global char *)dst + dst_index));
-
- float data = src_data1 - src_data2;
- data = mask_data ? data : dst_data;
-
- *((__global float *)((__global char *)dst + dst_index)) = data;
- }
-}
-
-
-#if defined (DOUBLE_SUPPORT)
-__kernel void arithm_sub_with_mask_C1_D6 (__global double *src1, int src1_step, int src1_offset,
- __global double *src2, int src2_step, int src2_offset,
- __global uchar *mask, int mask_step, int mask_offset,
- __global double *dst, int dst_step, int dst_offset,
- int rows, int cols, int dst_step1)
-{
-
- int x = get_global_id(0);
- int y = get_global_id(1);
-
- if (x < cols && y < rows)
- {
- int src1_index = mad24(y, src1_step, (x << 3) + src1_offset);
- int src2_index = mad24(y, src2_step, (x << 3) + src2_offset);
- int mask_index = mad24(y, mask_step, x + mask_offset);
- int dst_index = mad24(y, dst_step, (x << 3) + dst_offset);
-
- uchar mask_data = *(mask + mask_index);
-
- double src_data1 = *((__global double *)((__global char *)src1 + src1_index));
- double src_data2 = *((__global double *)((__global char *)src2 + src2_index));
- double dst_data = *((__global double *)((__global char *)dst + dst_index));
-
- double data = src_data1 - src_data2;
- data = mask_data ? data : dst_data;
-
- *((__global double *)((__global char *)dst + dst_index)) = data;
- }
-}
-#endif
-
-__kernel void arithm_sub_with_mask_C2_D0 (__global uchar *src1, int src1_step, int src1_offset,
- __global uchar *src2, int src2_step, int src2_offset,
- __global uchar *mask, int mask_step, int mask_offset,
- __global uchar *dst, int dst_step, int dst_offset,
- int rows, int cols, int dst_step1)
-{
-
- int x = get_global_id(0);
- int y = get_global_id(1);
-
- if (x < cols && y < rows)
- {
- x = x << 1;
-
- #define dst_align ((dst_offset >> 1) & 1)
- int src1_index = mad24(y, src1_step, (x << 1) + src1_offset - (dst_align << 1));
- int src2_index = mad24(y, src2_step, (x << 1) + src2_offset - (dst_align << 1));
- int mask_index = mad24(y, mask_step, x + mask_offset - dst_align);
-
- int dst_start = mad24(y, dst_step, dst_offset);
- int dst_end = mad24(y, dst_step, dst_offset + dst_step1);
- int dst_index = mad24(y, dst_step, dst_offset + (x << 1) & (int)0xfffffffc);
-
- uchar4 src1_data = vload4(0, src1 + src1_index);
- uchar4 src2_data = vload4(0, src2 + src2_index);
- uchar2 mask_data = vload2(0, mask + mask_index);
-
- uchar4 data = *((__global uchar4 *)(dst + dst_index));
- short4 tmp = convert_short4_sat(src1_data) - convert_short4_sat(src2_data);
- uchar4 tmp_data = convert_uchar4_sat(tmp);
-
- data.xy = ((mask_data.x) && (dst_index + 0 >= dst_start)) ? tmp_data.xy : data.xy;
- data.zw = ((mask_data.y) && (dst_index + 2 < dst_end )) ? tmp_data.zw : data.zw;
-
- *((__global uchar4 *)(dst + dst_index)) = data;
- }
-}
-__kernel void arithm_sub_with_mask_C2_D2 (__global ushort *src1, int src1_step, int src1_offset,
- __global ushort *src2, int src2_step, int src2_offset,
- __global uchar *mask, int mask_step, int mask_offset,
- __global ushort *dst, int dst_step, int dst_offset,
- int rows, int cols, int dst_step1)
-{
-
- int x = get_global_id(0);
- int y = get_global_id(1);
-
- if (x < cols && y < rows)
- {
- int src1_index = mad24(y, src1_step, (x << 2) + src1_offset);
- int src2_index = mad24(y, src2_step, (x << 2) + src2_offset);
- int mask_index = mad24(y, mask_step, x + mask_offset);
- int dst_index = mad24(y, dst_step, (x << 2) + dst_offset);
-
- uchar mask_data = *(mask + mask_index);
-
- ushort2 src_data1 = *((__global ushort2 *)((__global char *)src1 + src1_index));
- ushort2 src_data2 = *((__global ushort2 *)((__global char *)src2 + src2_index));
- ushort2 dst_data = *((__global ushort2 *)((__global char *)dst + dst_index));
-
- int2 tmp = convert_int2_sat(src_data1) - convert_int2_sat(src_data2);
- ushort2 data = convert_ushort2_sat(tmp);
- data = mask_data ? data : dst_data;
-
- *((__global ushort2 *)((__global char *)dst + dst_index)) = data;
- }
-}
-__kernel void arithm_sub_with_mask_C2_D3 (__global short *src1, int src1_step, int src1_offset,
- __global short *src2, int src2_step, int src2_offset,
- __global uchar *mask, int mask_step, int mask_offset,
- __global short *dst, int dst_step, int dst_offset,
- int rows, int cols, int dst_step1)
-{
-
- int x = get_global_id(0);
- int y = get_global_id(1);
-
- if (x < cols && y < rows)
- {
- int src1_index = mad24(y, src1_step, (x << 2) + src1_offset);
- int src2_index = mad24(y, src2_step, (x << 2) + src2_offset);
- int mask_index = mad24(y, mask_step, x + mask_offset);
- int dst_index = mad24(y, dst_step, (x << 2) + dst_offset);
-
- uchar mask_data = *(mask + mask_index);
-
- short2 src_data1 = *((__global short2 *)((__global char *)src1 + src1_index));
- short2 src_data2 = *((__global short2 *)((__global char *)src2 + src2_index));
- short2 dst_data = *((__global short2 *)((__global char *)dst + dst_index));
-
- int2 tmp = convert_int2_sat(src_data1) - convert_int2_sat(src_data2);
- short2 data = convert_short2_sat(tmp);
- data = mask_data ? data : dst_data;
-
- *((__global short2 *)((__global char *)dst + dst_index)) = data;
- }
-}
-__kernel void arithm_sub_with_mask_C2_D4 (__global int *src1, int src1_step, int src1_offset,
- __global int *src2, int src2_step, int src2_offset,
- __global uchar *mask, int mask_step, int mask_offset,
- __global int *dst, int dst_step, int dst_offset,
- int rows, int cols, int dst_step1)
-{
-
- int x = get_global_id(0);
- int y = get_global_id(1);
-
- if (x < cols && y < rows)
- {
- int src1_index = mad24(y, src1_step, (x << 3) + src1_offset);
- int src2_index = mad24(y, src2_step, (x << 3) + src2_offset);
- int mask_index = mad24(y, mask_step, x + mask_offset);
- int dst_index = mad24(y, dst_step, (x << 3) + dst_offset);
-
- uchar mask_data = *(mask + mask_index);
-
- int2 src_data1 = *((__global int2 *)((__global char *)src1 + src1_index));
- int2 src_data2 = *((__global int2 *)((__global char *)src2 + src2_index));
- int2 dst_data = *((__global int2 *)((__global char *)dst + dst_index));
-
- int2 data = convert_int2_sat(convert_long2_sat(src_data1) - convert_long2_sat(src_data2));
- data = mask_data ? data : dst_data;
-
- *((__global int2 *)((__global char *)dst + dst_index)) = data;
- }
-}
-__kernel void arithm_sub_with_mask_C2_D5 (__global float *src1, int src1_step, int src1_offset,
- __global float *src2, int src2_step, int src2_offset,
- __global uchar *mask, int mask_step, int mask_offset,
- __global float *dst, int dst_step, int dst_offset,
- int rows, int cols, int dst_step1)
-{
-
- int x = get_global_id(0);
- int y = get_global_id(1);
-
- if (x < cols && y < rows)
- {
- int src1_index = mad24(y, src1_step, (x << 3) + src1_offset);
- int src2_index = mad24(y, src2_step, (x << 3) + src2_offset);
- int mask_index = mad24(y, mask_step, x + mask_offset);
- int dst_index = mad24(y, dst_step, (x << 3) + dst_offset);
-
- uchar mask_data = *(mask + mask_index);
-
- float2 src_data1 = *((__global float2 *)((__global char *)src1 + src1_index));
- float2 src_data2 = *((__global float2 *)((__global char *)src2 + src2_index));
- float2 dst_data = *((__global float2 *)((__global char *)dst + dst_index));
-
- float2 data = src_data1 - src_data2;
- data = mask_data ? data : dst_data;
-
- *((__global float2 *)((__global char *)dst + dst_index)) = data;
- }
-}
-
-#if defined (DOUBLE_SUPPORT)
-__kernel void arithm_sub_with_mask_C2_D6 (__global double *src1, int src1_step, int src1_offset,
- __global double *src2, int src2_step, int src2_offset,
- __global uchar *mask, int mask_step, int mask_offset,
- __global double *dst, int dst_step, int dst_offset,
- int rows, int cols, int dst_step1)
-{
-
- int x = get_global_id(0);
- int y = get_global_id(1);
-
- if (x < cols && y < rows)
- {
- int src1_index = mad24(y, src1_step, (x << 4) + src1_offset);
- int src2_index = mad24(y, src2_step, (x << 4) + src2_offset);
- int mask_index = mad24(y, mask_step, x + mask_offset);
- int dst_index = mad24(y, dst_step, (x << 4) + dst_offset);
-
- uchar mask_data = *(mask + mask_index);
-
- double2 src_data1 = *((__global double2 *)((__global char *)src1 + src1_index));
- double2 src_data2 = *((__global double2 *)((__global char *)src2 + src2_index));
- double2 dst_data = *((__global double2 *)((__global char *)dst + dst_index));
-
- double2 data = src_data1 - src_data2;
- data = mask_data ? data : dst_data;
-
- *((__global double2 *)((__global char *)dst + dst_index)) = data;
- }
-}
-#endif
-__kernel void arithm_sub_with_mask_C3_D0 (__global uchar *src1, int src1_step, int src1_offset,
- __global uchar *src2, int src2_step, int src2_offset,
- __global uchar *mask, int mask_step, int mask_offset,
- __global uchar *dst, int dst_step, int dst_offset,
- int rows, int cols, int dst_step1)
-{
-
- int x = get_global_id(0);
- int y = get_global_id(1);
-
- if (x < cols && y < rows)
- {
- x = x << 2;
-
- #define dst_align (((dst_offset % dst_step) / 3 ) & 3)
- int src1_index = mad24(y, src1_step, (x * 3) + src1_offset - (dst_align * 3));
- int src2_index = mad24(y, src2_step, (x * 3) + src2_offset - (dst_align * 3));
- int mask_index = mad24(y, mask_step, x + mask_offset - dst_align);
-
- int dst_start = mad24(y, dst_step, dst_offset);
- int dst_end = mad24(y, dst_step, dst_offset + dst_step1);
- int dst_index = mad24(y, dst_step, dst_offset + (x * 3) - (dst_align * 3));
-
- uchar4 src1_data_0 = vload4(0, src1 + src1_index + 0);
- uchar4 src1_data_1 = vload4(0, src1 + src1_index + 4);
- uchar4 src1_data_2 = vload4(0, src1 + src1_index + 8);
-
- uchar4 src2_data_0 = vload4(0, src2 + src2_index + 0);
- uchar4 src2_data_1 = vload4(0, src2 + src2_index + 4);
- uchar4 src2_data_2 = vload4(0, src2 + src2_index + 8);
-
- uchar4 mask_data = vload4(0, mask + mask_index);
-
- uchar4 data_0 = *((__global uchar4 *)(dst + dst_index + 0));
- uchar4 data_1 = *((__global uchar4 *)(dst + dst_index + 4));
- uchar4 data_2 = *((__global uchar4 *)(dst + dst_index + 8));
-
- uchar4 tmp_data_0 = convert_uchar4_sat(convert_short4_sat(src1_data_0) - convert_short4_sat(src2_data_0));
- uchar4 tmp_data_1 = convert_uchar4_sat(convert_short4_sat(src1_data_1) - convert_short4_sat(src2_data_1));
- uchar4 tmp_data_2 = convert_uchar4_sat(convert_short4_sat(src1_data_2) - convert_short4_sat(src2_data_2));
-
- data_0.xyz = ((mask_data.x) && (dst_index + 0 >= dst_start)) ? tmp_data_0.xyz : data_0.xyz;
- data_0.w = ((mask_data.y) && (dst_index + 3 >= dst_start) && (dst_index + 3 < dst_end))
- ? tmp_data_0.w : data_0.w;
-
- data_1.xy = ((mask_data.y) && (dst_index + 3 >= dst_start) && (dst_index + 3 < dst_end))
- ? tmp_data_1.xy : data_1.xy;
- data_1.zw = ((mask_data.z) && (dst_index + 6 >= dst_start) && (dst_index + 6 < dst_end))
- ? tmp_data_1.zw : data_1.zw;
-
- data_2.x = ((mask_data.z) && (dst_index + 6 >= dst_start) && (dst_index + 6 < dst_end))
- ? tmp_data_2.x : data_2.x;
- data_2.yzw = ((mask_data.w) && (dst_index + 9 >= dst_start) && (dst_index + 9 < dst_end))
- ? tmp_data_2.yzw : data_2.yzw;
-
- *((__global uchar4 *)(dst + dst_index + 0)) = data_0;
- *((__global uchar4 *)(dst + dst_index + 4)) = data_1;
- *((__global uchar4 *)(dst + dst_index + 8)) = data_2;
- }
-}
-__kernel void arithm_sub_with_mask_C3_D2 (__global ushort *src1, int src1_step, int src1_offset,
- __global ushort *src2, int src2_step, int src2_offset,
- __global uchar *mask, int mask_step, int mask_offset,
- __global ushort *dst, int dst_step, int dst_offset,
- int rows, int cols, int dst_step1)
-{
-
- int x = get_global_id(0);
- int y = get_global_id(1);
-
- if (x < cols && y < rows)
- {
- x = x << 1;
-
- #define dst_align (((dst_offset % dst_step) / 6 ) & 1)
- int src1_index = mad24(y, src1_step, (x * 6) + src1_offset - (dst_align * 6));
- int src2_index = mad24(y, src2_step, (x * 6) + src2_offset - (dst_align * 6));
- int mask_index = mad24(y, mask_step, x + mask_offset - dst_align);
-
- int dst_start = mad24(y, dst_step, dst_offset);
- int dst_end = mad24(y, dst_step, dst_offset + dst_step1);
- int dst_index = mad24(y, dst_step, dst_offset + (x * 6) - (dst_align * 6));
-
- ushort2 src1_data_0 = vload2(0, (__global ushort *)((__global char *)src1 + src1_index + 0));
- ushort2 src1_data_1 = vload2(0, (__global ushort *)((__global char *)src1 + src1_index + 4));
- ushort2 src1_data_2 = vload2(0, (__global ushort *)((__global char *)src1 + src1_index + 8));
-
- ushort2 src2_data_0 = vload2(0, (__global ushort *)((__global char *)src2 + src2_index + 0));
- ushort2 src2_data_1 = vload2(0, (__global ushort *)((__global char *)src2 + src2_index + 4));
- ushort2 src2_data_2 = vload2(0, (__global ushort *)((__global char *)src2 + src2_index + 8));
-
- uchar2 mask_data = vload2(0, mask + mask_index);
-
- ushort2 data_0 = *((__global ushort2 *)((__global char *)dst + dst_index + 0));
- ushort2 data_1 = *((__global ushort2 *)((__global char *)dst + dst_index + 4));
- ushort2 data_2 = *((__global ushort2 *)((__global char *)dst + dst_index + 8));
-
- ushort2 tmp_data_0 = convert_ushort2_sat(convert_int2_sat(src1_data_0) - convert_int2_sat(src2_data_0));
- ushort2 tmp_data_1 = convert_ushort2_sat(convert_int2_sat(src1_data_1) - convert_int2_sat(src2_data_1));
- ushort2 tmp_data_2 = convert_ushort2_sat(convert_int2_sat(src1_data_2) - convert_int2_sat(src2_data_2));
-
- data_0.xy = ((mask_data.x) && (dst_index + 0 >= dst_start)) ? tmp_data_0.xy : data_0.xy;
-
- data_1.x = ((mask_data.x) && (dst_index + 0 >= dst_start) && (dst_index + 0 < dst_end))
- ? tmp_data_1.x : data_1.x;
- data_1.y = ((mask_data.y) && (dst_index + 6 >= dst_start) && (dst_index + 6 < dst_end))
- ? tmp_data_1.y : data_1.y;
-
- data_2.xy = ((mask_data.y) && (dst_index + 6 >= dst_start) && (dst_index + 6 < dst_end))
- ? tmp_data_2.xy : data_2.xy;
-
- *((__global ushort2 *)((__global char *)dst + dst_index + 0))= data_0;
- *((__global ushort2 *)((__global char *)dst + dst_index + 4))= data_1;
- *((__global ushort2 *)((__global char *)dst + dst_index + 8))= data_2;
- }
-}
-__kernel void arithm_sub_with_mask_C3_D3 (__global short *src1, int src1_step, int src1_offset,
- __global short *src2, int src2_step, int src2_offset,
- __global uchar *mask, int mask_step, int mask_offset,
- __global short *dst, int dst_step, int dst_offset,
- int rows, int cols, int dst_step1)
-{
-
- int x = get_global_id(0);
- int y = get_global_id(1);
-
- if (x < cols && y < rows)
- {
- x = x << 1;
-
- #define dst_align (((dst_offset % dst_step) / 6 ) & 1)
- int src1_index = mad24(y, src1_step, (x * 6) + src1_offset - (dst_align * 6));
- int src2_index = mad24(y, src2_step, (x * 6) + src2_offset - (dst_align * 6));
- int mask_index = mad24(y, mask_step, x + mask_offset - dst_align);
-
- int dst_start = mad24(y, dst_step, dst_offset);
- int dst_end = mad24(y, dst_step, dst_offset + dst_step1);
- int dst_index = mad24(y, dst_step, dst_offset + (x * 6) - (dst_align * 6));
-
- short2 src1_data_0 = vload2(0, (__global short *)((__global char *)src1 + src1_index + 0));
- short2 src1_data_1 = vload2(0, (__global short *)((__global char *)src1 + src1_index + 4));
- short2 src1_data_2 = vload2(0, (__global short *)((__global char *)src1 + src1_index + 8));
-
- short2 src2_data_0 = vload2(0, (__global short *)((__global char *)src2 + src2_index + 0));
- short2 src2_data_1 = vload2(0, (__global short *)((__global char *)src2 + src2_index + 4));
- short2 src2_data_2 = vload2(0, (__global short *)((__global char *)src2 + src2_index + 8));
-
- uchar2 mask_data = vload2(0, mask + mask_index);
-
- short2 data_0 = *((__global short2 *)((__global char *)dst + dst_index + 0));
- short2 data_1 = *((__global short2 *)((__global char *)dst + dst_index + 4));
- short2 data_2 = *((__global short2 *)((__global char *)dst + dst_index + 8));
-
- short2 tmp_data_0 = convert_short2_sat(convert_int2_sat(src1_data_0) - convert_int2_sat(src2_data_0));
- short2 tmp_data_1 = convert_short2_sat(convert_int2_sat(src1_data_1) - convert_int2_sat(src2_data_1));
- short2 tmp_data_2 = convert_short2_sat(convert_int2_sat(src1_data_2) - convert_int2_sat(src2_data_2));
-
- data_0.xy = ((mask_data.x) && (dst_index + 0 >= dst_start)) ? tmp_data_0.xy : data_0.xy;
-
- data_1.x = ((mask_data.x) && (dst_index + 0 >= dst_start) && (dst_index + 0 < dst_end))
- ? tmp_data_1.x : data_1.x;
- data_1.y = ((mask_data.y) && (dst_index + 6 >= dst_start) && (dst_index + 6 < dst_end))
- ? tmp_data_1.y : data_1.y;
-
- data_2.xy = ((mask_data.y) && (dst_index + 6 >= dst_start) && (dst_index + 6 < dst_end))
- ? tmp_data_2.xy : data_2.xy;
-
- *((__global short2 *)((__global char *)dst + dst_index + 0))= data_0;
- *((__global short2 *)((__global char *)dst + dst_index + 4))= data_1;
- *((__global short2 *)((__global char *)dst + dst_index + 8))= data_2;
- }
-}
-__kernel void arithm_sub_with_mask_C3_D4 (__global int *src1, int src1_step, int src1_offset,
- __global int *src2, int src2_step, int src2_offset,
- __global uchar *mask, int mask_step, int mask_offset,
- __global int *dst, int dst_step, int dst_offset,
- int rows, int cols, int dst_step1)
-{
-
- int x = get_global_id(0);
- int y = get_global_id(1);
-
- if (x < cols && y < rows)
- {
- int src1_index = mad24(y, src1_step, (x * 12) + src1_offset);
- int src2_index = mad24(y, src2_step, (x * 12) + src2_offset);
- int mask_index = mad24(y, mask_step, x + mask_offset);
- int dst_index = mad24(y, dst_step, dst_offset + (x * 12));
-
- int src1_data_0 = *((__global int *)((__global char *)src1 + src1_index + 0));
- int src1_data_1 = *((__global int *)((__global char *)src1 + src1_index + 4));
- int src1_data_2 = *((__global int *)((__global char *)src1 + src1_index + 8));
-
- int src2_data_0 = *((__global int *)((__global char *)src2 + src2_index + 0));
- int src2_data_1 = *((__global int *)((__global char *)src2 + src2_index + 4));
- int src2_data_2 = *((__global int *)((__global char *)src2 + src2_index + 8));
-
- uchar mask_data = * (mask + mask_index);
-
- int data_0 = *((__global int *)((__global char *)dst + dst_index + 0));
- int data_1 = *((__global int *)((__global char *)dst + dst_index + 4));
- int data_2 = *((__global int *)((__global char *)dst + dst_index + 8));
-
- int tmp_data_0 = convert_int_sat((long)src1_data_0 - (long)src2_data_0);
- int tmp_data_1 = convert_int_sat((long)src1_data_1 - (long)src2_data_1);
- int tmp_data_2 = convert_int_sat((long)src1_data_2 - (long)src2_data_2);
-
- data_0 = mask_data ? tmp_data_0 : data_0;
- data_1 = mask_data ? tmp_data_1 : data_1;
- data_2 = mask_data ? tmp_data_2 : data_2;
-
- *((__global int *)((__global char *)dst + dst_index + 0))= data_0;
- *((__global int *)((__global char *)dst + dst_index + 4))= data_1;
- *((__global int *)((__global char *)dst + dst_index + 8))= data_2;
- }
-}
-__kernel void arithm_sub_with_mask_C3_D5 (__global float *src1, int src1_step, int src1_offset,
- __global float *src2, int src2_step, int src2_offset,
- __global uchar *mask, int mask_step, int mask_offset,
- __global float *dst, int dst_step, int dst_offset,
- int rows, int cols, int dst_step1)
-{
-
- int x = get_global_id(0);
- int y = get_global_id(1);
-
- if (x < cols && y < rows)
- {
- int src1_index = mad24(y, src1_step, (x * 12) + src1_offset);
- int src2_index = mad24(y, src2_step, (x * 12) + src2_offset);
- int mask_index = mad24(y, mask_step, x + mask_offset);
- int dst_index = mad24(y, dst_step, dst_offset + (x * 12));
-
- float src1_data_0 = *((__global float *)((__global char *)src1 + src1_index + 0));
- float src1_data_1 = *((__global float *)((__global char *)src1 + src1_index + 4));
- float src1_data_2 = *((__global float *)((__global char *)src1 + src1_index + 8));
-
- float src2_data_0 = *((__global float *)((__global char *)src2 + src2_index + 0));
- float src2_data_1 = *((__global float *)((__global char *)src2 + src2_index + 4));
- float src2_data_2 = *((__global float *)((__global char *)src2 + src2_index + 8));
-
- uchar mask_data = * (mask + mask_index);
-
- float data_0 = *((__global float *)((__global char *)dst + dst_index + 0));
- float data_1 = *((__global float *)((__global char *)dst + dst_index + 4));
- float data_2 = *((__global float *)((__global char *)dst + dst_index + 8));
-
- float tmp_data_0 = src1_data_0 - src2_data_0;
- float tmp_data_1 = src1_data_1 - src2_data_1;
- float tmp_data_2 = src1_data_2 - src2_data_2;
-
- data_0 = mask_data ? tmp_data_0 : data_0;
- data_1 = mask_data ? tmp_data_1 : data_1;
- data_2 = mask_data ? tmp_data_2 : data_2;
-
- *((__global float *)((__global char *)dst + dst_index + 0))= data_0;
- *((__global float *)((__global char *)dst + dst_index + 4))= data_1;
- *((__global float *)((__global char *)dst + dst_index + 8))= data_2;
- }
-}
-
-#if defined (DOUBLE_SUPPORT)
-__kernel void arithm_sub_with_mask_C3_D6 (__global double *src1, int src1_step, int src1_offset,
- __global double *src2, int src2_step, int src2_offset,
- __global uchar *mask, int mask_step, int mask_offset,
- __global double *dst, int dst_step, int dst_offset,
- int rows, int cols, int dst_step1)
-{
-
- int x = get_global_id(0);
- int y = get_global_id(1);
-
- if (x < cols && y < rows)
- {
- int src1_index = mad24(y, src1_step, (x * 24) + src1_offset);
- int src2_index = mad24(y, src2_step, (x * 24) + src2_offset);
- int mask_index = mad24(y, mask_step, x + mask_offset);
- int dst_index = mad24(y, dst_step, dst_offset + (x * 24));
-
- double src1_data_0 = *((__global double *)((__global char *)src1 + src1_index + 0 ));
- double src1_data_1 = *((__global double *)((__global char *)src1 + src1_index + 8 ));
- double src1_data_2 = *((__global double *)((__global char *)src1 + src1_index + 16));
-
- double src2_data_0 = *((__global double *)((__global char *)src2 + src2_index + 0 ));
- double src2_data_1 = *((__global double *)((__global char *)src2 + src2_index + 8 ));
- double src2_data_2 = *((__global double *)((__global char *)src2 + src2_index + 16));
-
- uchar mask_data = * (mask + mask_index);
-
- double data_0 = *((__global double *)((__global char *)dst + dst_index + 0 ));
- double data_1 = *((__global double *)((__global char *)dst + dst_index + 8 ));
- double data_2 = *((__global double *)((__global char *)dst + dst_index + 16));
-
- double tmp_data_0 = src1_data_0 - src2_data_0;
- double tmp_data_1 = src1_data_1 - src2_data_1;
- double tmp_data_2 = src1_data_2 - src2_data_2;
-
- data_0 = mask_data ? tmp_data_0 : data_0;
- data_1 = mask_data ? tmp_data_1 : data_1;
- data_2 = mask_data ? tmp_data_2 : data_2;
-
- *((__global double *)((__global char *)dst + dst_index + 0 ))= data_0;
- *((__global double *)((__global char *)dst + dst_index + 8 ))= data_1;
- *((__global double *)((__global char *)dst + dst_index + 16))= data_2;
- }
-}
-#endif
-__kernel void arithm_sub_with_mask_C4_D0 (__global uchar *src1, int src1_step, int src1_offset,
- __global uchar *src2, int src2_step, int src2_offset,
- __global uchar *mask, int mask_step, int mask_offset,
- __global uchar *dst, int dst_step, int dst_offset,
- int rows, int cols, int dst_step1)
-{
-
- int x = get_global_id(0);
- int y = get_global_id(1);
-
- if (x < cols && y < rows)
- {
- int src1_index = mad24(y, src1_step, (x << 2) + src1_offset);
- int src2_index = mad24(y, src2_step, (x << 2) + src2_offset);
- int mask_index = mad24(y, mask_step, x + mask_offset);
- int dst_index = mad24(y, dst_step, (x << 2) + dst_offset);
-
- uchar mask_data = *(mask + mask_index);
-
- uchar4 src_data1 = *((__global uchar4 *)(src1 + src1_index));
- uchar4 src_data2 = *((__global uchar4 *)(src2 + src2_index));
- uchar4 dst_data = *((__global uchar4 *)(dst + dst_index));
-
- uchar4 data = convert_uchar4_sat(convert_short4_sat(src_data1) - convert_short4_sat(src_data2));
- data = mask_data ? data : dst_data;
-
- *((__global uchar4 *)(dst + dst_index)) = data;
- }
-}
-__kernel void arithm_sub_with_mask_C4_D2 (__global ushort *src1, int src1_step, int src1_offset,
- __global ushort *src2, int src2_step, int src2_offset,
- __global uchar *mask, int mask_step, int mask_offset,
- __global ushort *dst, int dst_step, int dst_offset,
- int rows, int cols, int dst_step1)
-{
-
- int x = get_global_id(0);
- int y = get_global_id(1);
-
- if (x < cols && y < rows)
- {
- int src1_index = mad24(y, src1_step, (x << 3) + src1_offset);
- int src2_index = mad24(y, src2_step, (x << 3) + src2_offset);
- int mask_index = mad24(y, mask_step, x + mask_offset);
- int dst_index = mad24(y, dst_step, (x << 3) + dst_offset);
-
- uchar mask_data = *(mask + mask_index);
-
- ushort4 src_data1 = *((__global ushort4 *)((__global char *)src1 + src1_index));
- ushort4 src_data2 = *((__global ushort4 *)((__global char *)src2 + src2_index));
- ushort4 dst_data = *((__global ushort4 *)((__global char *)dst + dst_index));
-
- ushort4 data = convert_ushort4_sat(convert_int4_sat(src_data1) - convert_int4_sat(src_data2));
- data = mask_data ? data : dst_data;
-
- *((__global ushort4 *)((__global char *)dst + dst_index)) = data;
- }
-}
-__kernel void arithm_sub_with_mask_C4_D3 (__global short *src1, int src1_step, int src1_offset,
- __global short *src2, int src2_step, int src2_offset,
- __global uchar *mask, int mask_step, int mask_offset,
- __global short *dst, int dst_step, int dst_offset,
- int rows, int cols, int dst_step1)
-{
-
- int x = get_global_id(0);
- int y = get_global_id(1);
-
- if (x < cols && y < rows)
- {
- int src1_index = mad24(y, src1_step, (x << 3) + src1_offset);
- int src2_index = mad24(y, src2_step, (x << 3) + src2_offset);
- int mask_index = mad24(y, mask_step, x + mask_offset);
- int dst_index = mad24(y, dst_step, (x << 3) + dst_offset);
-
- uchar mask_data = *(mask + mask_index);
-
- short4 src_data1 = *((__global short4 *)((__global char *)src1 + src1_index));
- short4 src_data2 = *((__global short4 *)((__global char *)src2 + src2_index));
- short4 dst_data = *((__global short4 *)((__global char *)dst + dst_index));
-
- short4 data = convert_short4_sat(convert_int4_sat(src_data1) - convert_int4_sat(src_data2));
- data = mask_data ? data : dst_data;
-
- *((__global short4 *)((__global char *)dst + dst_index)) = data;
- }
-}
-__kernel void arithm_sub_with_mask_C4_D4 (__global int *src1, int src1_step, int src1_offset,
- __global int *src2, int src2_step, int src2_offset,
- __global uchar *mask, int mask_step, int mask_offset,
- __global int *dst, int dst_step, int dst_offset,
- int rows, int cols, int dst_step1)
-{
-
- int x = get_global_id(0);
- int y = get_global_id(1);
-
- if (x < cols && y < rows)
- {
- int src1_index = mad24(y, src1_step, (x << 4) + src1_offset);
- int src2_index = mad24(y, src2_step, (x << 4) + src2_offset);
- int mask_index = mad24(y, mask_step, x + mask_offset);
- int dst_index = mad24(y, dst_step, (x << 4) + dst_offset);
-
- uchar mask_data = *(mask + mask_index);
-
- int4 src_data1 = *((__global int4 *)((__global char *)src1 + src1_index));
- int4 src_data2 = *((__global int4 *)((__global char *)src2 + src2_index));
- int4 dst_data = *((__global int4 *)((__global char *)dst + dst_index));
-
- int4 data = convert_int4_sat(convert_long4_sat(src_data1) - convert_long4_sat(src_data2));
- data = mask_data ? data : dst_data;
-
- *((__global int4 *)((__global char *)dst + dst_index)) = data;
- }
-}
-__kernel void arithm_sub_with_mask_C4_D5 (__global float *src1, int src1_step, int src1_offset,
- __global float *src2, int src2_step, int src2_offset,
- __global uchar *mask, int mask_step, int mask_offset,
- __global float *dst, int dst_step, int dst_offset,
- int rows, int cols, int dst_step1)
-{
-
- int x = get_global_id(0);
- int y = get_global_id(1);
-
- if (x < cols && y < rows)
- {
- int src1_index = mad24(y, src1_step, (x << 4) + src1_offset);
- int src2_index = mad24(y, src2_step, (x << 4) + src2_offset);
- int mask_index = mad24(y, mask_step, x + mask_offset);
- int dst_index = mad24(y, dst_step, (x << 4) + dst_offset);
-
- uchar mask_data = *(mask + mask_index);
-
- float4 src_data1 = *((__global float4 *)((__global char *)src1 + src1_index));
- float4 src_data2 = *((__global float4 *)((__global char *)src2 + src2_index));
- float4 dst_data = *((__global float4 *)((__global char *)dst + dst_index));
-
- float4 data = src_data1 - src_data2;
- data = mask_data ? data : dst_data;
-
- *((__global float4 *)((__global char *)dst + dst_index)) = data;
- }
-}
-
-#if defined (DOUBLE_SUPPORT)
-__kernel void arithm_sub_with_mask_C4_D6 (__global double *src1, int src1_step, int src1_offset,
- __global double *src2, int src2_step, int src2_offset,
- __global uchar *mask, int mask_step, int mask_offset,
- __global double *dst, int dst_step, int dst_offset,
- int rows, int cols, int dst_step1)
-{
-
- int x = get_global_id(0);
- int y = get_global_id(1);
-
- if (x < cols && y < rows)
- {
- int src1_index = mad24(y, src1_step, (x << 5) + src1_offset);
- int src2_index = mad24(y, src2_step, (x << 5) + src2_offset);
- int mask_index = mad24(y, mask_step, x + mask_offset);
- int dst_index = mad24(y, dst_step, (x << 5) + dst_offset);
-
- uchar mask_data = *(mask + mask_index);
-
- double4 src_data1 = *((__global double4 *)((__global char *)src1 + src1_index));
- double4 src_data2 = *((__global double4 *)((__global char *)src2 + src2_index));
- double4 dst_data = *((__global double4 *)((__global char *)dst + dst_index));
-
- double4 data = src_data1 - src_data2;
- data = mask_data ? data : dst_data;
-
- *((__global double4 *)((__global char *)dst + dst_index)) = data;
- }
-}
-#endif
+++ /dev/null
-/*M///////////////////////////////////////////////////////////////////////////////////////
-//
-// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
-//
-// By downloading, copying, installing or using the software you agree to this license.
-// If you do not agree to this license, do not download, install,
-// copy or use the software.
-//
-//
-// License Agreement
-// For Open Source Computer Vision Library
-//
-// Copyright (C) 2010-2012, Institute Of Software Chinese Academy Of Science, all rights reserved.
-// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved.
-// Third party copyrights are property of their respective owners.
-//
-// @Authors
-// Jia Haipeng, jiahaipeng95@gmail.com
-//
-// Redistribution and use in source and binary forms, with or without modification,
-// are permitted provided that the following conditions are met:
-//
-// * Redistribution's of source code must retain the above copyright notice,
-// this list of conditions and the following disclaimer.
-//
-// * Redistribution's in binary form must reproduce the above copyright notice,
-// this list of conditions and the following disclaimer in the documentation
-// and/or other oclMaterials provided with the distribution.
-//
-// * The name of the copyright holders may not be used to endorse or promote products
-// derived from this software without specific prior written permission.
-//
-// This software is provided by the copyright holders and contributors as is and
-// any express or implied warranties, including, but not limited to, the implied
-// warranties of merchantability and fitness for a particular purpose are disclaimed.
-// In no event shall the Intel Corporation or contributors be liable for any direct,
-// indirect, incidental, special, exemplary, or consequential damages
-// (including, but not limited to, procurement of substitute goods or services;
-// loss of use, data, or profits; or business interruption) however caused
-// and on any theory of liability, whether in contract, strict liability,
-// or tort (including negligence or otherwise) arising in any way out of
-// the use of this software, even if advised of the possibility of such damage.
-//
-//M*/
-
-#if defined (DOUBLE_SUPPORT)
-#pragma OPENCL EXTENSION cl_khr_fp64:enable
-#endif
-/**************************************sub with scalar without mask**************************************/
-__kernel void arithm_s_sub_C1_D0 (__global uchar *src1, int src1_step, int src1_offset,
- __global uchar *dst, int dst_step, int dst_offset,
- int4 src2, int rows, int cols, int dst_step1, int isMatSubScalar)
-{
- int x = get_global_id(0);
- int y = get_global_id(1);
-
- if (x < cols && y < rows)
- {
- x = x << 2;
-
- #define dst_align (dst_offset & 3)
- int src1_index = mad24(y, src1_step, x + src1_offset - dst_align);
-
- int dst_start = mad24(y, dst_step, dst_offset);
- int dst_end = mad24(y, dst_step, dst_offset + dst_step1);
- int dst_index = mad24(y, dst_step, dst_offset + x & (int)0xfffffffc);
-
- uchar4 src1_data = vload4(0, src1 + src1_index);
- int4 src2_data = (int4)(src2.x, src2.x, src2.x, src2.x);
-
- uchar4 data = *((__global uchar4 *)(dst + dst_index));
- int4 tmp = convert_int4_sat(src1_data) - src2_data;
- tmp = isMatSubScalar ? tmp : -tmp;
- uchar4 tmp_data = convert_uchar4_sat(tmp);
-
- data.x = ((dst_index + 0 >= dst_start) && (dst_index + 0 < dst_end)) ? tmp_data.x : data.x;
- data.y = ((dst_index + 1 >= dst_start) && (dst_index + 1 < dst_end)) ? tmp_data.y : data.y;
- data.z = ((dst_index + 2 >= dst_start) && (dst_index + 2 < dst_end)) ? tmp_data.z : data.z;
- data.w = ((dst_index + 3 >= dst_start) && (dst_index + 3 < dst_end)) ? tmp_data.w : data.w;
-
- *((__global uchar4 *)(dst + dst_index)) = data;
- }
-}
-__kernel void arithm_s_sub_C1_D2 (__global ushort *src1, int src1_step, int src1_offset,
- __global ushort *dst, int dst_step, int dst_offset,
- int4 src2, int rows, int cols, int dst_step1, int isMatSubScalar)
-{
-
- int x = get_global_id(0);
- int y = get_global_id(1);
-
- if (x < cols && y < rows)
- {
- x = x << 1;
-
- #define dst_align ((dst_offset >> 1) & 1)
- int src1_index = mad24(y, src1_step, (x << 1) + src1_offset - (dst_align << 1));
-
- int dst_start = mad24(y, dst_step, dst_offset);
- int dst_end = mad24(y, dst_step, dst_offset + dst_step1);
- int dst_index = mad24(y, dst_step, dst_offset + (x << 1) & (int)0xfffffffc);
-
- ushort2 src1_data = vload2(0, (__global ushort *)((__global char *)src1 + src1_index));
- int2 src2_data = (int2)(src2.x, src2.x);
-
- ushort2 data = *((__global ushort2 *)((__global uchar *)dst + dst_index));
- int2 tmp = convert_int2_sat(src1_data) - src2_data;
- tmp = isMatSubScalar ? tmp : -tmp;
- ushort2 tmp_data = convert_ushort2_sat(tmp);
-
- data.x = (dst_index + 0 >= dst_start) ? tmp_data.x : data.x;
- data.y = (dst_index + 2 < dst_end ) ? tmp_data.y : data.y;
-
- *((__global ushort2 *)((__global uchar *)dst + dst_index)) = data;
- }
-}
-__kernel void arithm_s_sub_C1_D3 (__global short *src1, int src1_step, int src1_offset,
- __global short *dst, int dst_step, int dst_offset,
- int4 src2, int rows, int cols, int dst_step1, int isMatSubScalar)
-{
-
- int x = get_global_id(0);
- int y = get_global_id(1);
-
- if (x < cols && y < rows)
- {
- x = x << 1;
-
- #define dst_align ((dst_offset >> 1) & 1)
- int src1_index = mad24(y, src1_step, (x << 1) + src1_offset - (dst_align << 1));
-
- int dst_start = mad24(y, dst_step, dst_offset);
- int dst_end = mad24(y, dst_step, dst_offset + dst_step1);
- int dst_index = mad24(y, dst_step, dst_offset + (x << 1) & (int)0xfffffffc);
-
- short2 src1_data = vload2(0, (__global short *)((__global char *)src1 + src1_index));
- int2 src2_data = (int2)(src2.x, src2.x);
- short2 data = *((__global short2 *)((__global uchar *)dst + dst_index));
-
- int2 tmp = convert_int2_sat(src1_data) - src2_data;
- tmp = isMatSubScalar ? tmp : -tmp;
- short2 tmp_data = convert_short2_sat(tmp);
-
- data.x = (dst_index + 0 >= dst_start) ? tmp_data.x : data.x;
- data.y = (dst_index + 2 < dst_end ) ? tmp_data.y : data.y;
-
- *((__global short2 *)((__global uchar *)dst + dst_index)) = data;
- }
-}
-__kernel void arithm_s_sub_C1_D4 (__global int *src1, int src1_step, int src1_offset,
- __global int *dst, int dst_step, int dst_offset,
- int4 src2, int rows, int cols, int dst_step1, int isMatSubScalar)
-{
-
- int x = get_global_id(0);
- int y = get_global_id(1);
-
- if (x < cols && y < rows)
- {
- int src1_index = mad24(y, src1_step, (x << 2) + src1_offset);
- int dst_index = mad24(y, dst_step, (x << 2) + dst_offset);
-
- int src_data1 = *((__global int *)((__global char *)src1 + src1_index));
- int src_data2 = src2.x;
-
- long tmp = (long)src_data1 - (long)src_data2;
- tmp = isMatSubScalar ? tmp : -tmp;
- int data = convert_int_sat(tmp);
-
- *((__global int *)((__global char *)dst + dst_index)) = data;
- }
-}
-__kernel void arithm_s_sub_C1_D5 (__global float *src1, int src1_step, int src1_offset,
- __global float *dst, int dst_step, int dst_offset,
- float4 src2, int rows, int cols, int dst_step1, int isMatSubScalar)
-{
-
- int x = get_global_id(0);
- int y = get_global_id(1);
-
- if (x < cols && y < rows)
- {
- int src1_index = mad24(y, src1_step, (x << 2) + src1_offset);
- int dst_index = mad24(y, dst_step, (x << 2) + dst_offset);
-
- float src_data1 = *((__global float *)((__global char *)src1 + src1_index));
- float src_data2 = src2.x;
-
- float tmp = src_data1 - src_data2;
- tmp = isMatSubScalar ? tmp : -tmp;
-
- *((__global float *)((__global char *)dst + dst_index)) = tmp;
- }
-}
-
-#if defined (DOUBLE_SUPPORT)
-__kernel void arithm_s_sub_C1_D6 (__global double *src1, int src1_step, int src1_offset,
- __global double *dst, int dst_step, int dst_offset,
- double4 src2, int rows, int cols, int dst_step1, int isMatSubScalar)
-{
-
- int x = get_global_id(0);
- int y = get_global_id(1);
-
- if (x < cols && y < rows)
- {
- int src1_index = mad24(y, src1_step, (x << 3) + src1_offset);
- int dst_index = mad24(y, dst_step, (x << 3) + dst_offset);
-
- double src_data1 = *((__global double *)((__global char *)src1 + src1_index));
- double src2_data = src2.x;
-
- double data = src_data1 - src2_data;
- data = isMatSubScalar ? data : -data;
-
- *((__global double *)((__global char *)dst + dst_index)) = data;
- }
-}
-#endif
-
-__kernel void arithm_s_sub_C2_D0 (__global uchar *src1, int src1_step, int src1_offset,
- __global uchar *dst, int dst_step, int dst_offset,
- int4 src2, int rows, int cols, int dst_step1, int isMatSubScalar)
-{
-
- int x = get_global_id(0);
- int y = get_global_id(1);
-
- if (x < cols && y < rows)
- {
- x = x << 1;
-
- #define dst_align ((dst_offset >> 1) & 1)
- int src1_index = mad24(y, src1_step, (x << 1) + src1_offset - (dst_align << 1));
-
- int dst_start = mad24(y, dst_step, dst_offset);
- int dst_end = mad24(y, dst_step, dst_offset + dst_step1);
- int dst_index = mad24(y, dst_step, dst_offset + (x << 1) & (int)0xfffffffc);
-
- uchar4 src1_data = vload4(0, src1 + src1_index);
- int4 src2_data = (int4)(src2.x, src2.y, src2.x, src2.y);
-
- uchar4 data = *((__global uchar4 *)(dst + dst_index));
- int4 tmp = convert_int4_sat(src1_data) - src2_data;
- tmp = isMatSubScalar ? tmp : -tmp;
- uchar4 tmp_data = convert_uchar4_sat(tmp);
-
- data.xy = (dst_index + 0 >= dst_start) ? tmp_data.xy : data.xy;
- data.zw = (dst_index + 2 < dst_end ) ? tmp_data.zw : data.zw;
-
- *((__global uchar4 *)(dst + dst_index)) = data;
- }
-}
-__kernel void arithm_s_sub_C2_D2 (__global ushort *src1, int src1_step, int src1_offset,
- __global ushort *dst, int dst_step, int dst_offset,
- int4 src2, int rows, int cols, int dst_step1, int isMatSubScalar)
-{
-
- int x = get_global_id(0);
- int y = get_global_id(1);
-
- if (x < cols && y < rows)
- {
- int src1_index = mad24(y, src1_step, (x << 2) + src1_offset);
- int dst_index = mad24(y, dst_step, (x << 2) + dst_offset);
-
- ushort2 src_data1 = *((__global ushort2 *)((__global char *)src1 + src1_index));
- int2 src_data2 = (int2)(src2.x, src2.y);
- ushort2 dst_data = *((__global ushort2 *)((__global char *)dst + dst_index));
-
- int2 tmp = convert_int2_sat(src_data1) - src_data2;
- tmp = isMatSubScalar ? tmp : -tmp;
- ushort2 data = convert_ushort2_sat(tmp);
-
- *((__global ushort2 *)((__global char *)dst + dst_index)) = data;
- }
-}
-__kernel void arithm_s_sub_C2_D3 (__global short *src1, int src1_step, int src1_offset,
- __global short *dst, int dst_step, int dst_offset,
- int4 src2, int rows, int cols, int dst_step1, int isMatSubScalar)
-{
-
- int x = get_global_id(0);
- int y = get_global_id(1);
-
- if (x < cols && y < rows)
- {
- int src1_index = mad24(y, src1_step, (x << 2) + src1_offset);
- int dst_index = mad24(y, dst_step, (x << 2) + dst_offset);
-
- short2 src_data1 = *((__global short2 *)((__global char *)src1 + src1_index));
- int2 src_data2 = (int2)(src2.x, src2.y);
- short2 dst_data = *((__global short2 *)((__global char *)dst + dst_index));
-
- int2 tmp = convert_int2_sat(src_data1) - src_data2;
- tmp = isMatSubScalar ? tmp : -tmp;
- short2 data = convert_short2_sat(tmp);
-
- *((__global short2 *)((__global char *)dst + dst_index)) = data;
- }
-}
-__kernel void arithm_s_sub_C2_D4 (__global int *src1, int src1_step, int src1_offset,
- __global int *dst, int dst_step, int dst_offset,
- int4 src2, int rows, int cols, int dst_step1, int isMatSubScalar)
-{
-
- int x = get_global_id(0);
- int y = get_global_id(1);
-
- if (x < cols && y < rows)
- {
- int src1_index = mad24(y, src1_step, (x << 3) + src1_offset);
- int dst_index = mad24(y, dst_step, (x << 3) + dst_offset);
-
- int2 src_data1 = *((__global int2 *)((__global char *)src1 + src1_index));
- int2 src_data2 = (int2)(src2.x, src2.y);
- int2 dst_data = *((__global int2 *)((__global char *)dst + dst_index));
-
- long2 tmp = convert_long2_sat(src_data1) - convert_long2_sat(src_data2);
- tmp = isMatSubScalar ? tmp : -tmp;
- int2 data = convert_int2_sat(tmp);
-
- *((__global int2 *)((__global char *)dst + dst_index)) = data;
- }
-}
-__kernel void arithm_s_sub_C2_D5 (__global float *src1, int src1_step, int src1_offset,
- __global float *dst, int dst_step, int dst_offset,
- float4 src2, int rows, int cols, int dst_step1, int isMatSubScalar)
-{
-
- int x = get_global_id(0);
- int y = get_global_id(1);
-
- if (x < cols && y < rows)
- {
- int src1_index = mad24(y, src1_step, (x << 3) + src1_offset);
- int dst_index = mad24(y, dst_step, (x << 3) + dst_offset);
-
- float2 src_data1 = *((__global float2 *)((__global char *)src1 + src1_index));
- float2 src_data2 = (float2)(src2.x, src2.y);
- float2 dst_data = *((__global float2 *)((__global char *)dst + dst_index));
-
- float2 tmp = src_data1 - src_data2;
- tmp = isMatSubScalar ? tmp : -tmp;
-
- *((__global float2 *)((__global char *)dst + dst_index)) = tmp;
- }
-}
-
-#if defined (DOUBLE_SUPPORT)
-__kernel void arithm_s_sub_C2_D6 (__global double *src1, int src1_step, int src1_offset,
- __global double *dst, int dst_step, int dst_offset,
- double4 src2, int rows, int cols, int dst_step1, int isMatSubScalar)
-{
-
- int x = get_global_id(0);
- int y = get_global_id(1);
-
- if (x < cols && y < rows)
- {
- int src1_index = mad24(y, src1_step, (x << 4) + src1_offset);
- int dst_index = mad24(y, dst_step, (x << 4) + dst_offset);
-
- double2 src_data1 = *((__global double2 *)((__global char *)src1 + src1_index));
- double2 src_data2 = (double2)(src2.x, src2.y);
- double2 dst_data = *((__global double2 *)((__global char *)dst + dst_index));
-
- double2 data = src_data1 - src_data2;
- data = isMatSubScalar ? data : -data;
-
- *((__global double2 *)((__global char *)dst + dst_index)) = data;
- }
-}
-#endif
-__kernel void arithm_s_sub_C3_D0 (__global uchar *src1, int src1_step, int src1_offset,
- __global uchar *dst, int dst_step, int dst_offset,
- int4 src2, int rows, int cols, int dst_step1, int isMatSubScalar)
-{
-
- int x = get_global_id(0);
- int y = get_global_id(1);
-
- if (x < cols && y < rows)
- {
- x = x << 2;
-
- #define dst_align (((dst_offset % dst_step) / 3 ) & 3)
- int src1_index = mad24(y, src1_step, (x * 3) + src1_offset - (dst_align * 3));
-
- int dst_start = mad24(y, dst_step, dst_offset);
- int dst_end = mad24(y, dst_step, dst_offset + dst_step1);
- int dst_index = mad24(y, dst_step, dst_offset + (x * 3) - (dst_align * 3));
-
- uchar4 src1_data_0 = vload4(0, src1 + src1_index + 0);
- uchar4 src1_data_1 = vload4(0, src1 + src1_index + 4);
- uchar4 src1_data_2 = vload4(0, src1 + src1_index + 8);
-
- int4 src2_data_0 = (int4)(src2.x, src2.y, src2.z, src2.x);
- int4 src2_data_1 = (int4)(src2.y, src2.z, src2.x, src2.y);
- int4 src2_data_2 = (int4)(src2.z, src2.x, src2.y, src2.z);
-
- uchar4 data_0 = *((__global uchar4 *)(dst + dst_index + 0));
- uchar4 data_1 = *((__global uchar4 *)(dst + dst_index + 4));
- uchar4 data_2 = *((__global uchar4 *)(dst + dst_index + 8));
-
- int4 tmp_0 = convert_int4_sat(src1_data_0) - src2_data_0;
- int4 tmp_1 = convert_int4_sat(src1_data_1) - src2_data_1;
- int4 tmp_2 = convert_int4_sat(src1_data_2) - src2_data_2;
-
- tmp_0 = isMatSubScalar ? tmp_0 : -tmp_0;
- tmp_1 = isMatSubScalar ? tmp_1 : -tmp_1;
- tmp_2 = isMatSubScalar ? tmp_2 : -tmp_2;
-
- uchar4 tmp_data_0 = convert_uchar4_sat(tmp_0);
- uchar4 tmp_data_1 = convert_uchar4_sat(tmp_1);
- uchar4 tmp_data_2 = convert_uchar4_sat(tmp_2);
-
- data_0.xyz = ((dst_index + 0 >= dst_start)) ? tmp_data_0.xyz : data_0.xyz;
- data_0.w = ((dst_index + 3 >= dst_start) && (dst_index + 3 < dst_end))
- ? tmp_data_0.w : data_0.w;
-
- data_1.xy = ((dst_index + 3 >= dst_start) && (dst_index + 3 < dst_end))
- ? tmp_data_1.xy : data_1.xy;
- data_1.zw = ((dst_index + 6 >= dst_start) && (dst_index + 6 < dst_end))
- ? tmp_data_1.zw : data_1.zw;
-
- data_2.x = ((dst_index + 6 >= dst_start) && (dst_index + 6 < dst_end))
- ? tmp_data_2.x : data_2.x;
- data_2.yzw = ((dst_index + 9 >= dst_start) && (dst_index + 9 < dst_end))
- ? tmp_data_2.yzw : data_2.yzw;
-
- *((__global uchar4 *)(dst + dst_index + 0)) = data_0;
- *((__global uchar4 *)(dst + dst_index + 4)) = data_1;
- *((__global uchar4 *)(dst + dst_index + 8)) = data_2;
- }
-}
-__kernel void arithm_s_sub_C3_D2 (__global ushort *src1, int src1_step, int src1_offset,
- __global ushort *dst, int dst_step, int dst_offset,
- int4 src2, int rows, int cols, int dst_step1, int isMatSubScalar)
-{
-
- int x = get_global_id(0);
- int y = get_global_id(1);
-
- if (x < cols && y < rows)
- {
- x = x << 1;
-
- #define dst_align (((dst_offset % dst_step) / 6 ) & 1)
- int src1_index = mad24(y, src1_step, (x * 6) + src1_offset - (dst_align * 6));
-
- int dst_start = mad24(y, dst_step, dst_offset);
- int dst_end = mad24(y, dst_step, dst_offset + dst_step1);
- int dst_index = mad24(y, dst_step, dst_offset + (x * 6) - (dst_align * 6));
-
- ushort2 src1_data_0 = vload2(0, (__global ushort *)((__global char *)src1 + src1_index + 0));
- ushort2 src1_data_1 = vload2(0, (__global ushort *)((__global char *)src1 + src1_index + 4));
- ushort2 src1_data_2 = vload2(0, (__global ushort *)((__global char *)src1 + src1_index + 8));
-
- int2 src2_data_0 = (int2)(src2.x, src2.y);
- int2 src2_data_1 = (int2)(src2.z, src2.x);
- int2 src2_data_2 = (int2)(src2.y, src2.z);
-
- ushort2 data_0 = *((__global ushort2 *)((__global char *)dst + dst_index + 0));
- ushort2 data_1 = *((__global ushort2 *)((__global char *)dst + dst_index + 4));
- ushort2 data_2 = *((__global ushort2 *)((__global char *)dst + dst_index + 8));
-
- int2 tmp_0 = convert_int2_sat(src1_data_0) - src2_data_0;
- int2 tmp_1 = convert_int2_sat(src1_data_1) - src2_data_1;
- int2 tmp_2 = convert_int2_sat(src1_data_2) - src2_data_2;
-
- tmp_0 = isMatSubScalar ? tmp_0 : -tmp_0;
- tmp_1 = isMatSubScalar ? tmp_1 : -tmp_1;
- tmp_2 = isMatSubScalar ? tmp_2 : -tmp_2;
-
- ushort2 tmp_data_0 = convert_ushort2_sat(tmp_0);
- ushort2 tmp_data_1 = convert_ushort2_sat(tmp_1);
- ushort2 tmp_data_2 = convert_ushort2_sat(tmp_2);
-
- data_0.xy = ((dst_index + 0 >= dst_start)) ? tmp_data_0.xy : data_0.xy;
-
- data_1.x = ((dst_index + 0 >= dst_start) && (dst_index + 0 < dst_end))
- ? tmp_data_1.x : data_1.x;
- data_1.y = ((dst_index + 6 >= dst_start) && (dst_index + 6 < dst_end))
- ? tmp_data_1.y : data_1.y;
-
- data_2.xy = ((dst_index + 6 >= dst_start) && (dst_index + 6 < dst_end))
- ? tmp_data_2.xy : data_2.xy;
-
- *((__global ushort2 *)((__global char *)dst + dst_index + 0))= data_0;
- *((__global ushort2 *)((__global char *)dst + dst_index + 4))= data_1;
- *((__global ushort2 *)((__global char *)dst + dst_index + 8))= data_2;
- }
-}
-__kernel void arithm_s_sub_C3_D3 (__global short *src1, int src1_step, int src1_offset,
- __global short *dst, int dst_step, int dst_offset,
- int4 src2, int rows, int cols, int dst_step1, int isMatSubScalar)
-{
-
- int x = get_global_id(0);
- int y = get_global_id(1);
-
- if (x < cols && y < rows)
- {
- x = x << 1;
-
- #define dst_align (((dst_offset % dst_step) / 6 ) & 1)
- int src1_index = mad24(y, src1_step, (x * 6) + src1_offset - (dst_align * 6));
-
- int dst_start = mad24(y, dst_step, dst_offset);
- int dst_end = mad24(y, dst_step, dst_offset + dst_step1);
- int dst_index = mad24(y, dst_step, dst_offset + (x * 6) - (dst_align * 6));
-
- short2 src1_data_0 = vload2(0, (__global short *)((__global char *)src1 + src1_index + 0));
- short2 src1_data_1 = vload2(0, (__global short *)((__global char *)src1 + src1_index + 4));
- short2 src1_data_2 = vload2(0, (__global short *)((__global char *)src1 + src1_index + 8));
-
- int2 src2_data_0 = (int2)(src2.x, src2.y);
- int2 src2_data_1 = (int2)(src2.z, src2.x);
- int2 src2_data_2 = (int2)(src2.y, src2.z);
-
- short2 data_0 = *((__global short2 *)((__global char *)dst + dst_index + 0));
- short2 data_1 = *((__global short2 *)((__global char *)dst + dst_index + 4));
- short2 data_2 = *((__global short2 *)((__global char *)dst + dst_index + 8));
-
- int2 tmp_0 = convert_int2_sat(src1_data_0) - src2_data_0;
- int2 tmp_1 = convert_int2_sat(src1_data_1) - src2_data_1;
- int2 tmp_2 = convert_int2_sat(src1_data_2) - src2_data_2;
-
- tmp_0 = isMatSubScalar ? tmp_0 : -tmp_0;
- tmp_1 = isMatSubScalar ? tmp_1 : -tmp_1;
- tmp_2 = isMatSubScalar ? tmp_2 : -tmp_2;
-
- short2 tmp_data_0 = convert_short2_sat(tmp_0);
- short2 tmp_data_1 = convert_short2_sat(tmp_1);
- short2 tmp_data_2 = convert_short2_sat(tmp_2);
-
- data_0.xy = ((dst_index + 0 >= dst_start)) ? tmp_data_0.xy : data_0.xy;
-
- data_1.x = ((dst_index + 0 >= dst_start) && (dst_index + 0 < dst_end))
- ? tmp_data_1.x : data_1.x;
- data_1.y = ((dst_index + 6 >= dst_start) && (dst_index + 6 < dst_end))
- ? tmp_data_1.y : data_1.y;
-
- data_2.xy = ((dst_index + 6 >= dst_start) && (dst_index + 6 < dst_end))
- ? tmp_data_2.xy : data_2.xy;
-
- *((__global short2 *)((__global char *)dst + dst_index + 0))= data_0;
- *((__global short2 *)((__global char *)dst + dst_index + 4))= data_1;
- *((__global short2 *)((__global char *)dst + dst_index + 8))= data_2;
- }
-}
-__kernel void arithm_s_sub_C3_D4 (__global int *src1, int src1_step, int src1_offset,
- __global int *dst, int dst_step, int dst_offset,
- int4 src2, int rows, int cols, int dst_step1, int isMatSubScalar)
-{
-
- int x = get_global_id(0);
- int y = get_global_id(1);
-
- if (x < cols && y < rows)
- {
- int src1_index = mad24(y, src1_step, (x * 12) + src1_offset);
- int dst_index = mad24(y, dst_step, dst_offset + (x * 12));
-
- int src1_data_0 = *((__global int *)((__global char *)src1 + src1_index + 0));
- int src1_data_1 = *((__global int *)((__global char *)src1 + src1_index + 4));
- int src1_data_2 = *((__global int *)((__global char *)src1 + src1_index + 8));
-
- int src2_data_0 = src2.x;
- int src2_data_1 = src2.y;
- int src2_data_2 = src2.z;
-
- int data_0 = *((__global int *)((__global char *)dst + dst_index + 0));
- int data_1 = *((__global int *)((__global char *)dst + dst_index + 4));
- int data_2 = *((__global int *)((__global char *)dst + dst_index + 8));
-
- long tmp_0 = (long)src1_data_0 - (long)src2_data_0;
- long tmp_1 = (long)src1_data_1 - (long)src2_data_1;
- long tmp_2 = (long)src1_data_2 - (long)src2_data_2;
-
- tmp_0 = isMatSubScalar ? tmp_0 : -tmp_0;
- tmp_1 = isMatSubScalar ? tmp_1 : -tmp_1;
- tmp_2 = isMatSubScalar ? tmp_2 : -tmp_2;
-
- int tmp_data_0 = convert_int_sat(tmp_0);
- int tmp_data_1 = convert_int_sat(tmp_1);
- int tmp_data_2 = convert_int_sat(tmp_2);
-
- *((__global int *)((__global char *)dst + dst_index + 0))= tmp_data_0;
- *((__global int *)((__global char *)dst + dst_index + 4))= tmp_data_1;
- *((__global int *)((__global char *)dst + dst_index + 8))= tmp_data_2;
- }
-}
-__kernel void arithm_s_sub_C3_D5 (__global float *src1, int src1_step, int src1_offset,
- __global float *dst, int dst_step, int dst_offset,
- float4 src2, int rows, int cols, int dst_step1, int isMatSubScalar)
-{
-
- int x = get_global_id(0);
- int y = get_global_id(1);
-
- if (x < cols && y < rows)
- {
- int src1_index = mad24(y, src1_step, (x * 12) + src1_offset);
- int dst_index = mad24(y, dst_step, dst_offset + (x * 12));
-
- float src1_data_0 = *((__global float *)((__global char *)src1 + src1_index + 0));
- float src1_data_1 = *((__global float *)((__global char *)src1 + src1_index + 4));
- float src1_data_2 = *((__global float *)((__global char *)src1 + src1_index + 8));
-
- float src2_data_0 = src2.x;
- float src2_data_1 = src2.y;
- float src2_data_2 = src2.z;
-
- float data_0 = *((__global float *)((__global char *)dst + dst_index + 0));
- float data_1 = *((__global float *)((__global char *)dst + dst_index + 4));
- float data_2 = *((__global float *)((__global char *)dst + dst_index + 8));
-
- float tmp_0 = src1_data_0 - src2_data_0;
- float tmp_1 = src1_data_1 - src2_data_1;
- float tmp_2 = src1_data_2 - src2_data_2;
-
- tmp_0 = isMatSubScalar ? tmp_0 : -tmp_0;
- tmp_1 = isMatSubScalar ? tmp_1 : -tmp_1;
- tmp_2 = isMatSubScalar ? tmp_2 : -tmp_2;
-
- *((__global float *)((__global char *)dst + dst_index + 0))= tmp_0;
- *((__global float *)((__global char *)dst + dst_index + 4))= tmp_1;
- *((__global float *)((__global char *)dst + dst_index + 8))= tmp_2;
- }
-}
-
-#if defined (DOUBLE_SUPPORT)
-__kernel void arithm_s_sub_C3_D6 (__global double *src1, int src1_step, int src1_offset,
- __global double *dst, int dst_step, int dst_offset,
- double4 src2, int rows, int cols, int dst_step1, int isMatSubScalar)
-{
-
- int x = get_global_id(0);
- int y = get_global_id(1);
-
- if (x < cols && y < rows)
- {
- int src1_index = mad24(y, src1_step, (x * 24) + src1_offset);
- int dst_index = mad24(y, dst_step, dst_offset + (x * 24));
-
- double src1_data_0 = *((__global double *)((__global char *)src1 + src1_index + 0 ));
- double src1_data_1 = *((__global double *)((__global char *)src1 + src1_index + 8 ));
- double src1_data_2 = *((__global double *)((__global char *)src1 + src1_index + 16));
-
- double src2_data_0 = src2.x;
- double src2_data_1 = src2.y;
- double src2_data_2 = src2.z;
-
- double data_0 = *((__global double *)((__global char *)dst + dst_index + 0 ));
- double data_1 = *((__global double *)((__global char *)dst + dst_index + 8 ));
- double data_2 = *((__global double *)((__global char *)dst + dst_index + 16));
-
- double tmp_data_0 = src1_data_0 - src2_data_0;
- double tmp_data_1 = src1_data_1 - src2_data_1;
- double tmp_data_2 = src1_data_2 - src2_data_2;
-
- tmp_data_0 = isMatSubScalar ? tmp_data_0 : -tmp_data_0;
- tmp_data_1 = isMatSubScalar ? tmp_data_1 : -tmp_data_1;
- tmp_data_2 = isMatSubScalar ? tmp_data_2 : -tmp_data_2;
-
- *((__global double *)((__global char *)dst + dst_index + 0 ))= tmp_data_0;
- *((__global double *)((__global char *)dst + dst_index + 8 ))= tmp_data_1;
- *((__global double *)((__global char *)dst + dst_index + 16))= tmp_data_2;
- }
-}
-#endif
-__kernel void arithm_s_sub_C4_D0 (__global uchar *src1, int src1_step, int src1_offset,
- __global uchar *dst, int dst_step, int dst_offset,
- int4 src2, int rows, int cols, int dst_step1, int isMatSubScalar)
-{
-
- int x = get_global_id(0);
- int y = get_global_id(1);
-
- if (x < cols && y < rows)
- {
- int src1_index = mad24(y, src1_step, (x << 2) + src1_offset);
- int dst_index = mad24(y, dst_step, (x << 2) + dst_offset);
-
- uchar4 src_data1 = *((__global uchar4 *)(src1 + src1_index));
-
- int4 tmp = convert_int4_sat(src_data1) - src2;
- tmp = isMatSubScalar ? tmp : -tmp;
- uchar4 data = convert_uchar4_sat(tmp);
-
- *((__global uchar4 *)(dst + dst_index)) = data;
- }
-}
-__kernel void arithm_s_sub_C4_D2 (__global ushort *src1, int src1_step, int src1_offset,
- __global ushort *dst, int dst_step, int dst_offset,
- int4 src2, int rows, int cols, int dst_step1, int isMatSubScalar)
-{
-
- int x = get_global_id(0);
- int y = get_global_id(1);
-
- if (x < cols && y < rows)
- {
- int src1_index = mad24(y, src1_step, (x << 3) + src1_offset);
- int dst_index = mad24(y, dst_step, (x << 3) + dst_offset);
-
- ushort4 src_data1 = *((__global ushort4 *)((__global char *)src1 + src1_index));
-
- int4 tmp = convert_int4_sat(src_data1) - src2;
- tmp = isMatSubScalar ? tmp : -tmp;
- ushort4 data = convert_ushort4_sat(tmp);
-
- *((__global ushort4 *)((__global char *)dst + dst_index)) = data;
- }
-}
-__kernel void arithm_s_sub_C4_D3 (__global short *src1, int src1_step, int src1_offset,
- __global short *dst, int dst_step, int dst_offset,
- int4 src2, int rows, int cols, int dst_step1, int isMatSubScalar)
-{
-
- int x = get_global_id(0);
- int y = get_global_id(1);
-
- if (x < cols && y < rows)
- {
- int src1_index = mad24(y, src1_step, (x << 3) + src1_offset);
- int dst_index = mad24(y, dst_step, (x << 3) + dst_offset);
-
- short4 src_data1 = *((__global short4 *)((__global char *)src1 + src1_index));
-
- int4 tmp = convert_int4_sat(src_data1) - src2;
- tmp = isMatSubScalar ? tmp : -tmp;
- short4 data = convert_short4_sat(tmp);
-
- *((__global short4 *)((__global char *)dst + dst_index)) = data;
- }
-}
-__kernel void arithm_s_sub_C4_D4 (__global int *src1, int src1_step, int src1_offset,
- __global int *dst, int dst_step, int dst_offset,
- int4 src2, int rows, int cols, int dst_step1, int isMatSubScalar)
-{
-
- int x = get_global_id(0);
- int y = get_global_id(1);
-
- if (x < cols && y < rows)
- {
- int src1_index = mad24(y, src1_step, (x << 4) + src1_offset);
- int dst_index = mad24(y, dst_step, (x << 4) + dst_offset);
-
- int4 src_data1 = *((__global int4 *)((__global char *)src1 + src1_index));
-
- long4 tmp = convert_long4_sat(src_data1) - convert_long4_sat(src2);
- tmp = isMatSubScalar ? tmp : -tmp;
- int4 data = convert_int4_sat(tmp);
-
- *((__global int4 *)((__global char *)dst + dst_index)) = data;
- }
-}
-__kernel void arithm_s_sub_C4_D5 (__global float *src1, int src1_step, int src1_offset,
- __global float *dst, int dst_step, int dst_offset,
- float4 src2, int rows, int cols, int dst_step1, int isMatSubScalar)
-{
-
- int x = get_global_id(0);
- int y = get_global_id(1);
-
- if (x < cols && y < rows)
- {
- int src1_index = mad24(y, src1_step, (x << 4) + src1_offset);
- int dst_index = mad24(y, dst_step, (x << 4) + dst_offset);
-
- float4 src_data1 = *((__global float4 *)((__global char *)src1 + src1_index));
-
- float4 tmp = src_data1 - src2;
- tmp = isMatSubScalar ? tmp : -tmp;
-
- *((__global float4 *)((__global char *)dst + dst_index)) = tmp;
- }
-}
-
-#if defined (DOUBLE_SUPPORT)
-__kernel void arithm_s_sub_C4_D6 (__global double *src1, int src1_step, int src1_offset,
- __global double *dst, int dst_step, int dst_offset,
- double4 src2, int rows, int cols, int dst_step1, int isMatSubScalar)
-{
-
- int x = get_global_id(0);
- int y = get_global_id(1);
-
- if (x < cols && y < rows)
- {
- int src1_index = mad24(y, src1_step, (x << 5) + src1_offset);
- int dst_index = mad24(y, dst_step, (x << 5) + dst_offset);
-
- double4 src_data1 = *((__global double4 *)((__global char *)src1 + src1_index));
-
- double4 data = src_data1 - src2;
- data = isMatSubScalar ? data : -data;
-
- *((__global double4 *)((__global char *)dst + dst_index)) = data;
- }
-}
-#endif
+++ /dev/null
-/*M///////////////////////////////////////////////////////////////////////////////////////
-//
-// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
-//
-// By downloading, copying, installing or using the software you agree to this license.
-// If you do not agree to this license, do not download, install,
-// copy or use the software.
-//
-//
-// License Agreement
-// For Open Source Computer Vision Library
-//
-// Copyright (C) 2010-2012, Institute Of Software Chinese Academy Of Science, all rights reserved.
-// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved.
-// Third party copyrights are property of their respective owners.
-//
-// @Authors
-// Jia Haipeng, jiahaipeng95@gmail.com
-//
-// Redistribution and use in source and binary forms, with or without modification,
-// are permitted provided that the following conditions are met:
-//
-// * Redistribution's of source code must retain the above copyright notice,
-// this list of conditions and the following disclaimer.
-//
-// * Redistribution's in binary form must reproduce the above copyright notice,
-// this list of conditions and the following disclaimer in the documentation
-// and/or other GpuMaterials provided with the distribution.
-//
-// * The name of the copyright holders may not be used to endorse or promote products
-// derived from this software without specific prior written permission.
-//
-// This software is provided by the copyright holders and contributors as is and
-// any express or implied warranties, including, but not limited to, the implied
-// warranties of merchantability and fitness for a particular purpose are disclaimed.
-// In no event shall the Intel Corporation or contributors be liable for any direct,
-// indirect, incidental, special, exemplary, or consequential damages
-// (including, but not limited to, procurement of substitute goods or services;
-// loss of use, data, or profits; or business interruption) however caused
-// and on any theory of liability, whether in contract, strict liability,
-// or tort (including negligence or otherwise) arising in any way out of
-// the use of this software, even if advised of the possibility of such damage.
-//
-//M*/
-
-#if defined (DOUBLE_SUPPORT)
-#pragma OPENCL EXTENSION cl_khr_fp64:enable
-#endif
-
-/**************************************sub with scalar with mask**************************************/
-__kernel void arithm_s_sub_with_mask_C1_D0 (__global uchar *src1, int src1_step, int src1_offset,
- __global uchar *dst, int dst_step, int dst_offset,
- __global uchar *mask, int mask_step, int mask_offset,
- int4 src2, int rows, int cols, int dst_step1, int isMatSubScalar)
-{
-
- int x = get_global_id(0);
- int y = get_global_id(1);
-
- if (x < cols && y < rows)
- {
- x = x << 2;
-
- #define dst_align (dst_offset & 3)
- int src1_index = mad24(y, src1_step, x + src1_offset - dst_align);
- int mask_index = mad24(y, mask_step, x + mask_offset - dst_align);
-
- int dst_start = mad24(y, dst_step, dst_offset);
- int dst_end = mad24(y, dst_step, dst_offset + dst_step1);
- int dst_index = mad24(y, dst_step, dst_offset + x & (int)0xfffffffc);
-
- uchar4 src1_data = vload4(0, src1 + src1_index);
- int4 src2_data = (int4)(src2.x, src2.x, src2.x, src2.x);
- uchar4 mask_data = vload4(0, mask + mask_index);
-
- uchar4 data = *((__global uchar4 *)(dst + dst_index));
- int4 tmp = convert_int4_sat(src1_data) - src2_data;
- tmp = isMatSubScalar ? tmp : -tmp;
- uchar4 tmp_data = convert_uchar4_sat(tmp);
-
- data.x = ((mask_data.x) && (dst_index + 0 >= dst_start) && (dst_index + 0 < dst_end)) ? tmp_data.x : data.x;
- data.y = ((mask_data.y) && (dst_index + 1 >= dst_start) && (dst_index + 1 < dst_end)) ? tmp_data.y : data.y;
- data.z = ((mask_data.z) && (dst_index + 2 >= dst_start) && (dst_index + 2 < dst_end)) ? tmp_data.z : data.z;
- data.w = ((mask_data.w) && (dst_index + 3 >= dst_start) && (dst_index + 3 < dst_end)) ? tmp_data.w : data.w;
-
- *((__global uchar4 *)(dst + dst_index)) = data;
- }
-}
-__kernel void arithm_s_sub_with_mask_C1_D2 (__global ushort *src1, int src1_step, int src1_offset,
- __global ushort *dst, int dst_step, int dst_offset,
- __global uchar *mask, int mask_step, int mask_offset,
- int4 src2, int rows, int cols, int dst_step1, int isMatSubScalar)
-{
-
- int x = get_global_id(0);
- int y = get_global_id(1);
-
- if (x < cols && y < rows)
- {
- x = x << 1;
-
- #define dst_align ((dst_offset >> 1) & 1)
- int src1_index = mad24(y, src1_step, (x << 1) + src1_offset - (dst_align << 1));
- int mask_index = mad24(y, mask_step, x + mask_offset - dst_align);
-
- int dst_start = mad24(y, dst_step, dst_offset);
- int dst_end = mad24(y, dst_step, dst_offset + dst_step1);
- int dst_index = mad24(y, dst_step, dst_offset + (x << 1) & (int)0xfffffffc);
-
- ushort2 src1_data = vload2(0, (__global ushort *)((__global char *)src1 + src1_index));
- int2 src2_data = (int2)(src2.x, src2.x);
- uchar2 mask_data = vload2(0, mask + mask_index);
-
- ushort2 data = *((__global ushort2 *)((__global uchar *)dst + dst_index));
- int2 tmp = convert_int2_sat(src1_data) - src2_data;
- tmp = isMatSubScalar ? tmp : -tmp;
- ushort2 tmp_data = convert_ushort2_sat(tmp);
-
- data.x = ((mask_data.x) && (dst_index + 0 >= dst_start)) ? tmp_data.x : data.x;
- data.y = ((mask_data.y) && (dst_index + 2 < dst_end )) ? tmp_data.y : data.y;
-
- *((__global ushort2 *)((__global uchar *)dst + dst_index)) = data;
- }
-}
-__kernel void arithm_s_sub_with_mask_C1_D3 (__global short *src1, int src1_step, int src1_offset,
- __global short *dst, int dst_step, int dst_offset,
- __global uchar *mask, int mask_step, int mask_offset,
- int4 src2, int rows, int cols, int dst_step1, int isMatSubScalar)
-{
-
- int x = get_global_id(0);
- int y = get_global_id(1);
-
- if (x < cols && y < rows)
- {
- x = x << 1;
-
- #define dst_align ((dst_offset >> 1) & 1)
- int src1_index = mad24(y, src1_step, (x << 1) + src1_offset - (dst_align << 1));
- int mask_index = mad24(y, mask_step, x + mask_offset - dst_align);
-
- int dst_start = mad24(y, dst_step, dst_offset);
- int dst_end = mad24(y, dst_step, dst_offset + dst_step1);
- int dst_index = mad24(y, dst_step, dst_offset + (x << 1) & (int)0xfffffffc);
-
- short2 src1_data = vload2(0, (__global short *)((__global char *)src1 + src1_index));
- int2 src2_data = (int2)(src2.x, src2.x);
- uchar2 mask_data = vload2(0, mask + mask_index);
-
- short2 data = *((__global short2 *)((__global uchar *)dst + dst_index));
- int2 tmp = convert_int2_sat(src1_data) - src2_data;
- tmp = isMatSubScalar ? tmp : -tmp;
- short2 tmp_data = convert_short2_sat(tmp);
-
- data.x = ((mask_data.x) && (dst_index + 0 >= dst_start)) ? tmp_data.x : data.x;
- data.y = ((mask_data.y) && (dst_index + 2 < dst_end )) ? tmp_data.y : data.y;
-
- *((__global short2 *)((__global uchar *)dst + dst_index)) = data;
- }
-}
-__kernel void arithm_s_sub_with_mask_C1_D4 (__global int *src1, int src1_step, int src1_offset,
- __global int *dst, int dst_step, int dst_offset,
- __global uchar *mask, int mask_step, int mask_offset,
- int4 src2, int rows, int cols, int dst_step1, int isMatSubScalar)
-{
-
- int x = get_global_id(0);
- int y = get_global_id(1);
-
- if (x < cols && y < rows)
- {
- int src1_index = mad24(y, src1_step, (x << 2) + src1_offset);
- int mask_index = mad24(y, mask_step, x + mask_offset);
- int dst_index = mad24(y, dst_step, (x << 2) + dst_offset);
-
- uchar mask_data = *(mask + mask_index);
-
- int src_data1 = *((__global int *)((__global char *)src1 + src1_index));
- int src_data2 = src2.x;
- int dst_data = *((__global int *)((__global char *)dst + dst_index));
-
- long tmp = (long)src_data1 - (long)src_data2;
- tmp = isMatSubScalar ? tmp : - tmp;
- int data = convert_int_sat(tmp);
- data = mask_data ? data : dst_data;
-
- *((__global int *)((__global char *)dst + dst_index)) = data;
- }
-}
-
-__kernel void arithm_s_sub_with_mask_C1_D5 (__global float *src1, int src1_step, int src1_offset,
- __global float *dst, int dst_step, int dst_offset,
- __global uchar *mask, int mask_step, int mask_offset,
- float4 src2, int rows, int cols, int dst_step1, int isMatSubScalar)
-{
-
- int x = get_global_id(0);
- int y = get_global_id(1);
-
- if (x < cols && y < rows)
- {
- int src1_index = mad24(y, src1_step, (x << 2) + src1_offset);
- int mask_index = mad24(y, mask_step, x + mask_offset);
- int dst_index = mad24(y, dst_step, (x << 2) + dst_offset);
-
- uchar mask_data = *(mask + mask_index);
-
- float src_data1 = *((__global float *)((__global char *)src1 + src1_index));
- float src_data2 = src2.x;
- float dst_data = *((__global float *)((__global char *)dst + dst_index));
-
- float data = src_data1 - src_data2;
- data = isMatSubScalar ? data : -data;
- data = mask_data ? data : dst_data;
-
- *((__global float *)((__global char *)dst + dst_index)) = data;
- }
-}
-
-
-#if defined (DOUBLE_SUPPORT)
-__kernel void arithm_s_sub_with_mask_C1_D6 (__global double *src1, int src1_step, int src1_offset,
- __global double *dst, int dst_step, int dst_offset,
- __global uchar *mask, int mask_step, int mask_offset,
- double4 src2, int rows, int cols, int dst_step1, int isMatSubScalar)
-{
-
- int x = get_global_id(0);
- int y = get_global_id(1);
-
- if (x < cols && y < rows)
- {
- int src1_index = mad24(y, src1_step, (x << 3) + src1_offset);
- int mask_index = mad24(y, mask_step, x + mask_offset);
- int dst_index = mad24(y, dst_step, (x << 3) + dst_offset);
-
- uchar mask_data = *(mask + mask_index);
-
- double src_data1 = *((__global double *)((__global char *)src1 + src1_index));
- double src_data2 = src2.x;
- double dst_data = *((__global double *)((__global char *)dst + dst_index));
-
- double data = src_data1 - src_data2;
- data = isMatSubScalar ? data : -data;
- data = mask_data ? data : dst_data;
-
- *((__global double *)((__global char *)dst + dst_index)) = data;
- }
-}
-#endif
-__kernel void arithm_s_sub_with_mask_C2_D0 (__global uchar *src1, int src1_step, int src1_offset,
- __global uchar *dst, int dst_step, int dst_offset,
- __global uchar *mask, int mask_step, int mask_offset,
- int4 src2, int rows, int cols, int dst_step1, int isMatSubScalar)
-{
-
- int x = get_global_id(0);
- int y = get_global_id(1);
-
- if (x < cols && y < rows)
- {
- x = x << 1;
-
- #define dst_align ((dst_offset >> 1) & 1)
- int src1_index = mad24(y, src1_step, (x << 1) + src1_offset - (dst_align << 1));
- int mask_index = mad24(y, mask_step, x + mask_offset - dst_align);
-
- int dst_start = mad24(y, dst_step, dst_offset);
- int dst_end = mad24(y, dst_step, dst_offset + dst_step1);
- int dst_index = mad24(y, dst_step, dst_offset + (x << 1) & (int)0xfffffffc);
-
- uchar4 src1_data = vload4(0, src1 + src1_index);
- int4 src2_data = (int4)(src2.x, src2.y, src2.x, src2.y);
- uchar2 mask_data = vload2(0, mask + mask_index);
-
- uchar4 data = *((__global uchar4 *)(dst + dst_index));
- int4 tmp = convert_int4_sat(src1_data) - src2_data;
- tmp = isMatSubScalar ? tmp : -tmp;
- uchar4 tmp_data = convert_uchar4_sat(tmp);
-
- data.xy = ((mask_data.x) && (dst_index + 0 >= dst_start)) ? tmp_data.xy : data.xy;
- data.zw = ((mask_data.y) && (dst_index + 2 < dst_end )) ? tmp_data.zw : data.zw;
-
- *((__global uchar4 *)(dst + dst_index)) = data;
- }
-}
-__kernel void arithm_s_sub_with_mask_C2_D2 (__global ushort *src1, int src1_step, int src1_offset,
- __global ushort *dst, int dst_step, int dst_offset,
- __global uchar *mask, int mask_step, int mask_offset,
- int4 src2, int rows, int cols, int dst_step1, int isMatSubScalar)
-{
-
- int x = get_global_id(0);
- int y = get_global_id(1);
-
- if (x < cols && y < rows)
- {
- int src1_index = mad24(y, src1_step, (x << 2) + src1_offset);
- int mask_index = mad24(y, mask_step, x + mask_offset);
- int dst_index = mad24(y, dst_step, (x << 2) + dst_offset);
-
- uchar mask_data = *(mask + mask_index);
-
- ushort2 src_data1 = *((__global ushort2 *)((__global char *)src1 + src1_index));
- int2 src_data2 = (int2)(src2.x, src2.y);
- ushort2 dst_data = *((__global ushort2 *)((__global char *)dst + dst_index));
-
- int2 tmp = convert_int2_sat(src_data1) - src_data2;
- tmp = isMatSubScalar ? tmp : -tmp;
- ushort2 data = convert_ushort2_sat(tmp);
- data = mask_data ? data : dst_data;
-
- *((__global ushort2 *)((__global char *)dst + dst_index)) = data;
- }
-}
-__kernel void arithm_s_sub_with_mask_C2_D3 (__global short *src1, int src1_step, int src1_offset,
- __global short *dst, int dst_step, int dst_offset,
- __global uchar *mask, int mask_step, int mask_offset,
- int4 src2, int rows, int cols, int dst_step1, int isMatSubScalar)
-{
-
- int x = get_global_id(0);
- int y = get_global_id(1);
-
- if (x < cols && y < rows)
- {
- int src1_index = mad24(y, src1_step, (x << 2) + src1_offset);
- int mask_index = mad24(y, mask_step, x + mask_offset);
- int dst_index = mad24(y, dst_step, (x << 2) + dst_offset);
-
- uchar mask_data = *(mask + mask_index);
-
- short2 src_data1 = *((__global short2 *)((__global char *)src1 + src1_index));
- int2 src_data2 = (int2)(src2.x, src2.y);
- short2 dst_data = *((__global short2 *)((__global char *)dst + dst_index));
-
- int2 tmp = convert_int2_sat(src_data1) - src_data2;
- tmp = isMatSubScalar ? tmp : -tmp;
- short2 data = convert_short2_sat(tmp);
- data = mask_data ? data : dst_data;
-
- *((__global short2 *)((__global char *)dst + dst_index)) = data;
- }
-}
-__kernel void arithm_s_sub_with_mask_C2_D4 (__global int *src1, int src1_step, int src1_offset,
- __global int *dst, int dst_step, int dst_offset,
- __global uchar *mask, int mask_step, int mask_offset,
- int4 src2, int rows, int cols, int dst_step1, int isMatSubScalar)
-{
-
- int x = get_global_id(0);
- int y = get_global_id(1);
-
- if (x < cols && y < rows)
- {
- int src1_index = mad24(y, src1_step, (x << 3) + src1_offset);
- int mask_index = mad24(y, mask_step, x + mask_offset);
- int dst_index = mad24(y, dst_step, (x << 3) + dst_offset);
-
- uchar mask_data = *(mask + mask_index);
-
- int2 src_data1 = *((__global int2 *)((__global char *)src1 + src1_index));
- int2 src_data2 = (int2)(src2.x, src2.y);
- int2 dst_data = *((__global int2 *)((__global char *)dst + dst_index));
-
- long2 tmp = convert_long2_sat(src_data1) - convert_long2_sat(src_data2);
- tmp = isMatSubScalar ? tmp : -tmp;
- int2 data = convert_int2_sat(tmp);
- data = mask_data ? data : dst_data;
-
- *((__global int2 *)((__global char *)dst + dst_index)) = data;
- }
-}
-__kernel void arithm_s_sub_with_mask_C2_D5 (__global float *src1, int src1_step, int src1_offset,
- __global float *dst, int dst_step, int dst_offset,
- __global uchar *mask, int mask_step, int mask_offset,
- float4 src2, int rows, int cols, int dst_step1, int isMatSubScalar)
-{
-
- int x = get_global_id(0);
- int y = get_global_id(1);
-
- if (x < cols && y < rows)
- {
- int src1_index = mad24(y, src1_step, (x << 3) + src1_offset);
- int mask_index = mad24(y, mask_step, x + mask_offset);
- int dst_index = mad24(y, dst_step, (x << 3) + dst_offset);
-
- uchar mask_data = *(mask + mask_index);
-
- float2 src_data1 = *((__global float2 *)((__global char *)src1 + src1_index));
- float2 src_data2 = (float2)(src2.x, src2.y);
- float2 dst_data = *((__global float2 *)((__global char *)dst + dst_index));
-
- float2 data = src_data1 - src_data2;
- data = isMatSubScalar ? data : -data;
- data = mask_data ? data : dst_data;
-
- *((__global float2 *)((__global char *)dst + dst_index)) = data;
- }
-}
-
-#if defined (DOUBLE_SUPPORT)
-__kernel void arithm_s_sub_with_mask_C2_D6 (__global double *src1, int src1_step, int src1_offset,
- __global double *dst, int dst_step, int dst_offset,
- __global uchar *mask, int mask_step, int mask_offset,
- double4 src2, int rows, int cols, int dst_step1, int isMatSubScalar)
-{
-
- int x = get_global_id(0);
- int y = get_global_id(1);
-
- if (x < cols && y < rows)
- {
- int src1_index = mad24(y, src1_step, (x << 4) + src1_offset);
- int mask_index = mad24(y, mask_step, x + mask_offset);
- int dst_index = mad24(y, dst_step, (x << 4) + dst_offset);
-
- uchar mask_data = *(mask + mask_index);
-
- double2 src_data1 = *((__global double2 *)((__global char *)src1 + src1_index));
- double2 src_data2 = (double2)(src2.x, src2.y);
- double2 dst_data = *((__global double2 *)((__global char *)dst + dst_index));
-
- double2 data = src_data1 - src_data2;
- data = isMatSubScalar ? data : -data;
- data = mask_data ? data : dst_data;
-
- *((__global double2 *)((__global char *)dst + dst_index)) = data;
- }
-}
-#endif
-__kernel void arithm_s_sub_with_mask_C3_D0 (__global uchar *src1, int src1_step, int src1_offset,
- __global uchar *dst, int dst_step, int dst_offset,
- __global uchar *mask, int mask_step, int mask_offset,
- int4 src2, int rows, int cols, int dst_step1, int isMatSubScalar)
-{
-
- int x = get_global_id(0);
- int y = get_global_id(1);
-
- if (x < cols && y < rows)
- {
- x = x << 2;
-
- #define dst_align (((dst_offset % dst_step) / 3 ) & 3)
- int src1_index = mad24(y, src1_step, (x * 3) + src1_offset - (dst_align * 3));
- int mask_index = mad24(y, mask_step, x + mask_offset - dst_align);
-
- int dst_start = mad24(y, dst_step, dst_offset);
- int dst_end = mad24(y, dst_step, dst_offset + dst_step1);
- int dst_index = mad24(y, dst_step, dst_offset + (x * 3) - (dst_align * 3));
-
- uchar4 src1_data_0 = vload4(0, src1 + src1_index + 0);
- uchar4 src1_data_1 = vload4(0, src1 + src1_index + 4);
- uchar4 src1_data_2 = vload4(0, src1 + src1_index + 8);
-
- int4 src2_data_0 = (int4)(src2.x, src2.y, src2.z, src2.x);
- int4 src2_data_1 = (int4)(src2.y, src2.z, src2.x, src2.y);
- int4 src2_data_2 = (int4)(src2.z, src2.x, src2.y, src2.z);
-
- uchar4 mask_data = vload4(0, mask + mask_index);
-
- uchar4 data_0 = *((__global uchar4 *)(dst + dst_index + 0));
- uchar4 data_1 = *((__global uchar4 *)(dst + dst_index + 4));
- uchar4 data_2 = *((__global uchar4 *)(dst + dst_index + 8));
-
- int4 tmp_0 = convert_int4_sat(src1_data_0) - src2_data_0;
- int4 tmp_1 = convert_int4_sat(src1_data_1) - src2_data_1;
- int4 tmp_2 = convert_int4_sat(src1_data_2) - src2_data_2;
-
- tmp_0 = isMatSubScalar ? tmp_0 : -tmp_0;
- tmp_1 = isMatSubScalar ? tmp_1 : -tmp_1;
- tmp_2 = isMatSubScalar ? tmp_2 : -tmp_2;
-
- uchar4 tmp_data_0 = convert_uchar4_sat(tmp_0);
- uchar4 tmp_data_1 = convert_uchar4_sat(tmp_1);
- uchar4 tmp_data_2 = convert_uchar4_sat(tmp_2);
-
- data_0.xyz = ((mask_data.x) && (dst_index + 0 >= dst_start)) ? tmp_data_0.xyz : data_0.xyz;
- data_0.w = ((mask_data.y) && (dst_index + 3 >= dst_start) && (dst_index + 3 < dst_end))
- ? tmp_data_0.w : data_0.w;
-
- data_1.xy = ((mask_data.y) && (dst_index + 3 >= dst_start) && (dst_index + 3 < dst_end))
- ? tmp_data_1.xy : data_1.xy;
- data_1.zw = ((mask_data.z) && (dst_index + 6 >= dst_start) && (dst_index + 6 < dst_end))
- ? tmp_data_1.zw : data_1.zw;
-
- data_2.x = ((mask_data.z) && (dst_index + 6 >= dst_start) && (dst_index + 6 < dst_end))
- ? tmp_data_2.x : data_2.x;
- data_2.yzw = ((mask_data.w) && (dst_index + 9 >= dst_start) && (dst_index + 9 < dst_end))
- ? tmp_data_2.yzw : data_2.yzw;
-
- *((__global uchar4 *)(dst + dst_index + 0)) = data_0;
- *((__global uchar4 *)(dst + dst_index + 4)) = data_1;
- *((__global uchar4 *)(dst + dst_index + 8)) = data_2;
- }
-}
-__kernel void arithm_s_sub_with_mask_C3_D2 (__global ushort *src1, int src1_step, int src1_offset,
- __global ushort *dst, int dst_step, int dst_offset,
- __global uchar *mask, int mask_step, int mask_offset,
- int4 src2, int rows, int cols, int dst_step1, int isMatSubScalar)
-{
-
- int x = get_global_id(0);
- int y = get_global_id(1);
-
- if (x < cols && y < rows)
- {
- x = x << 1;
-
- #define dst_align (((dst_offset % dst_step) / 6 ) & 1)
- int src1_index = mad24(y, src1_step, (x * 6) + src1_offset - (dst_align * 6));
- int mask_index = mad24(y, mask_step, x + mask_offset - dst_align);
-
- int dst_start = mad24(y, dst_step, dst_offset);
- int dst_end = mad24(y, dst_step, dst_offset + dst_step1);
- int dst_index = mad24(y, dst_step, dst_offset + (x * 6) - (dst_align * 6));
-
- ushort2 src1_data_0 = vload2(0, (__global ushort *)((__global char *)src1 + src1_index + 0));
- ushort2 src1_data_1 = vload2(0, (__global ushort *)((__global char *)src1 + src1_index + 4));
- ushort2 src1_data_2 = vload2(0, (__global ushort *)((__global char *)src1 + src1_index + 8));
-
- int2 src2_data_0 = (int2)(src2.x, src2.y);
- int2 src2_data_1 = (int2)(src2.z, src2.x);
- int2 src2_data_2 = (int2)(src2.y, src2.z);
-
- uchar2 mask_data = vload2(0, mask + mask_index);
-
- ushort2 data_0 = *((__global ushort2 *)((__global char *)dst + dst_index + 0));
- ushort2 data_1 = *((__global ushort2 *)((__global char *)dst + dst_index + 4));
- ushort2 data_2 = *((__global ushort2 *)((__global char *)dst + dst_index + 8));
-
- int2 tmp_0 = convert_int2_sat(src1_data_0) - src2_data_0;
- int2 tmp_1 = convert_int2_sat(src1_data_1) - src2_data_1;
- int2 tmp_2 = convert_int2_sat(src1_data_2) - src2_data_2;
-
- tmp_0 = isMatSubScalar ? tmp_0 : -tmp_0;
- tmp_1 = isMatSubScalar ? tmp_1 : -tmp_1;
- tmp_2 = isMatSubScalar ? tmp_2 : -tmp_2;
-
- ushort2 tmp_data_0 = convert_ushort2_sat(tmp_0);
- ushort2 tmp_data_1 = convert_ushort2_sat(tmp_1);
- ushort2 tmp_data_2 = convert_ushort2_sat(tmp_2);
-
- data_0.xy = ((mask_data.x) && (dst_index + 0 >= dst_start)) ? tmp_data_0.xy : data_0.xy;
-
- data_1.x = ((mask_data.x) && (dst_index + 0 >= dst_start) && (dst_index + 0 < dst_end))
- ? tmp_data_1.x : data_1.x;
- data_1.y = ((mask_data.y) && (dst_index + 6 >= dst_start) && (dst_index + 6 < dst_end))
- ? tmp_data_1.y : data_1.y;
-
- data_2.xy = ((mask_data.y) && (dst_index + 6 >= dst_start) && (dst_index + 6 < dst_end))
- ? tmp_data_2.xy : data_2.xy;
-
- *((__global ushort2 *)((__global char *)dst + dst_index + 0))= data_0;
- *((__global ushort2 *)((__global char *)dst + dst_index + 4))= data_1;
- *((__global ushort2 *)((__global char *)dst + dst_index + 8))= data_2;
- }
-}
-__kernel void arithm_s_sub_with_mask_C3_D3 (__global short *src1, int src1_step, int src1_offset,
- __global short *dst, int dst_step, int dst_offset,
- __global uchar *mask, int mask_step, int mask_offset,
- int4 src2, int rows, int cols, int dst_step1, int isMatSubScalar)
-{
-
- int x = get_global_id(0);
- int y = get_global_id(1);
-
- if (x < cols && y < rows)
- {
- x = x << 1;
-
- #define dst_align (((dst_offset % dst_step) / 6 ) & 1)
- int src1_index = mad24(y, src1_step, (x * 6) + src1_offset - (dst_align * 6));
- int mask_index = mad24(y, mask_step, x + mask_offset - dst_align);
-
- int dst_start = mad24(y, dst_step, dst_offset);
- int dst_end = mad24(y, dst_step, dst_offset + dst_step1);
- int dst_index = mad24(y, dst_step, dst_offset + (x * 6) - (dst_align * 6));
-
- short2 src1_data_0 = vload2(0, (__global short *)((__global char *)src1 + src1_index + 0));
- short2 src1_data_1 = vload2(0, (__global short *)((__global char *)src1 + src1_index + 4));
- short2 src1_data_2 = vload2(0, (__global short *)((__global char *)src1 + src1_index + 8));
-
- int2 src2_data_0 = (int2)(src2.x, src2.y);
- int2 src2_data_1 = (int2)(src2.z, src2.x);
- int2 src2_data_2 = (int2)(src2.y, src2.z);
-
- uchar2 mask_data = vload2(0, mask + mask_index);
-
- short2 data_0 = *((__global short2 *)((__global char *)dst + dst_index + 0));
- short2 data_1 = *((__global short2 *)((__global char *)dst + dst_index + 4));
- short2 data_2 = *((__global short2 *)((__global char *)dst + dst_index + 8));
-
- int2 tmp_0 = convert_int2_sat(src1_data_0) - src2_data_0;
- int2 tmp_1 = convert_int2_sat(src1_data_1) - src2_data_1;
- int2 tmp_2 = convert_int2_sat(src1_data_2) - src2_data_2;
-
- tmp_0 = isMatSubScalar ? tmp_0 : -tmp_0;
- tmp_1 = isMatSubScalar ? tmp_1 : -tmp_1;
- tmp_2 = isMatSubScalar ? tmp_2 : -tmp_2;
-
- short2 tmp_data_0 = convert_short2_sat(tmp_0);
- short2 tmp_data_1 = convert_short2_sat(tmp_1);
- short2 tmp_data_2 = convert_short2_sat(tmp_2);
-
- data_0.xy = ((mask_data.x) && (dst_index + 0 >= dst_start)) ? tmp_data_0.xy : data_0.xy;
-
- data_1.x = ((mask_data.x) && (dst_index + 0 >= dst_start) && (dst_index + 0 < dst_end))
- ? tmp_data_1.x : data_1.x;
- data_1.y = ((mask_data.y) && (dst_index + 6 >= dst_start) && (dst_index + 6 < dst_end))
- ? tmp_data_1.y : data_1.y;
-
- data_2.xy = ((mask_data.y) && (dst_index + 6 >= dst_start) && (dst_index + 6 < dst_end))
- ? tmp_data_2.xy : data_2.xy;
-
- *((__global short2 *)((__global char *)dst + dst_index + 0))= data_0;
- *((__global short2 *)((__global char *)dst + dst_index + 4))= data_1;
- *((__global short2 *)((__global char *)dst + dst_index + 8))= data_2;
- }
-}
-__kernel void arithm_s_sub_with_mask_C3_D4 (__global int *src1, int src1_step, int src1_offset,
- __global int *dst, int dst_step, int dst_offset,
- __global uchar *mask, int mask_step, int mask_offset,
- int4 src2, int rows, int cols, int dst_step1, int isMatSubScalar)
-{
-
- int x = get_global_id(0);
- int y = get_global_id(1);
-
- if (x < cols && y < rows)
- {
- int src1_index = mad24(y, src1_step, (x * 12) + src1_offset);
- int mask_index = mad24(y, mask_step, x + mask_offset);
- int dst_index = mad24(y, dst_step, dst_offset + (x * 12));
-
- int src1_data_0 = *((__global int *)((__global char *)src1 + src1_index + 0));
- int src1_data_1 = *((__global int *)((__global char *)src1 + src1_index + 4));
- int src1_data_2 = *((__global int *)((__global char *)src1 + src1_index + 8));
-
- int src2_data_0 = src2.x;
- int src2_data_1 = src2.y;
- int src2_data_2 = src2.z;
-
- uchar mask_data = * (mask + mask_index);
-
- int data_0 = *((__global int *)((__global char *)dst + dst_index + 0));
- int data_1 = *((__global int *)((__global char *)dst + dst_index + 4));
- int data_2 = *((__global int *)((__global char *)dst + dst_index + 8));
-
- long tmp_0 = (long)src1_data_0 - (long)src2_data_0;
- long tmp_1 = (long)src1_data_1 - (long)src2_data_1;
- long tmp_2 = (long)src1_data_2 - (long)src2_data_2;
-
- tmp_0 = isMatSubScalar ? tmp_0 : -tmp_0;
- tmp_1 = isMatSubScalar ? tmp_1 : -tmp_1;
- tmp_2 = isMatSubScalar ? tmp_2 : -tmp_2;
-
- int tmp_data_0 = convert_int_sat(tmp_0);
- int tmp_data_1 = convert_int_sat(tmp_1);
- int tmp_data_2 = convert_int_sat(tmp_2);
-
- data_0 = mask_data ? tmp_data_0 : data_0;
- data_1 = mask_data ? tmp_data_1 : data_1;
- data_2 = mask_data ? tmp_data_2 : data_2;
-
- *((__global int *)((__global char *)dst + dst_index + 0))= data_0;
- *((__global int *)((__global char *)dst + dst_index + 4))= data_1;
- *((__global int *)((__global char *)dst + dst_index + 8))= data_2;
- }
-}
-__kernel void arithm_s_sub_with_mask_C3_D5 (__global float *src1, int src1_step, int src1_offset,
- __global float *dst, int dst_step, int dst_offset,
- __global uchar *mask, int mask_step, int mask_offset,
- float4 src2, int rows, int cols, int dst_step1, int isMatSubScalar)
-{
-
- int x = get_global_id(0);
- int y = get_global_id(1);
-
- if (x < cols && y < rows)
- {
- int src1_index = mad24(y, src1_step, (x * 12) + src1_offset);
- int mask_index = mad24(y, mask_step, x + mask_offset);
- int dst_index = mad24(y, dst_step, dst_offset + (x * 12));
-
- float src1_data_0 = *((__global float *)((__global char *)src1 + src1_index + 0));
- float src1_data_1 = *((__global float *)((__global char *)src1 + src1_index + 4));
- float src1_data_2 = *((__global float *)((__global char *)src1 + src1_index + 8));
-
- float src2_data_0 = src2.x;
- float src2_data_1 = src2.y;
- float src2_data_2 = src2.z;
-
- uchar mask_data = * (mask + mask_index);
-
- float data_0 = *((__global float *)((__global char *)dst + dst_index + 0));
- float data_1 = *((__global float *)((__global char *)dst + dst_index + 4));
- float data_2 = *((__global float *)((__global char *)dst + dst_index + 8));
-
- float tmp_data_0 = src1_data_0 - src2_data_0;
- float tmp_data_1 = src1_data_1 - src2_data_1;
- float tmp_data_2 = src1_data_2 - src2_data_2;
-
- tmp_data_0 = isMatSubScalar ? tmp_data_0 : -tmp_data_0;
- tmp_data_1 = isMatSubScalar ? tmp_data_1 : -tmp_data_1;
- tmp_data_2 = isMatSubScalar ? tmp_data_2 : -tmp_data_2;
-
- data_0 = mask_data ? tmp_data_0 : data_0;
- data_1 = mask_data ? tmp_data_1 : data_1;
- data_2 = mask_data ? tmp_data_2 : data_2;
-
- *((__global float *)((__global char *)dst + dst_index + 0))= data_0;
- *((__global float *)((__global char *)dst + dst_index + 4))= data_1;
- *((__global float *)((__global char *)dst + dst_index + 8))= data_2;
- }
-}
-
-#if defined (DOUBLE_SUPPORT)
-__kernel void arithm_s_sub_with_mask_C3_D6 (__global double *src1, int src1_step, int src1_offset,
- __global double *dst, int dst_step, int dst_offset,
- __global uchar *mask, int mask_step, int mask_offset,
- double4 src2, int rows, int cols, int dst_step1, int isMatSubScalar)
-{
-
- int x = get_global_id(0);
- int y = get_global_id(1);
-
- if (x < cols && y < rows)
- {
- int src1_index = mad24(y, src1_step, (x * 24) + src1_offset);
- int mask_index = mad24(y, mask_step, x + mask_offset);
- int dst_index = mad24(y, dst_step, dst_offset + (x * 24));
-
- double src1_data_0 = *((__global double *)((__global char *)src1 + src1_index + 0 ));
- double src1_data_1 = *((__global double *)((__global char *)src1 + src1_index + 8 ));
- double src1_data_2 = *((__global double *)((__global char *)src1 + src1_index + 16));
-
- double src2_data_0 = src2.x;
- double src2_data_1 = src2.y;
- double src2_data_2 = src2.z;
-
- uchar mask_data = * (mask + mask_index);
-
- double data_0 = *((__global double *)((__global char *)dst + dst_index + 0 ));
- double data_1 = *((__global double *)((__global char *)dst + dst_index + 8 ));
- double data_2 = *((__global double *)((__global char *)dst + dst_index + 16));
-
- double tmp_data_0 = src1_data_0 - src2_data_0;
- double tmp_data_1 = src1_data_1 - src2_data_1;
- double tmp_data_2 = src1_data_2 - src2_data_2;
-
- tmp_data_0 = isMatSubScalar ? tmp_data_0 : -tmp_data_0;
- tmp_data_1 = isMatSubScalar ? tmp_data_1 : -tmp_data_1;
- tmp_data_2 = isMatSubScalar ? tmp_data_2 : -tmp_data_2;
-
- data_0 = mask_data ? tmp_data_0 : data_0;
- data_1 = mask_data ? tmp_data_1 : data_1;
- data_2 = mask_data ? tmp_data_2 : data_2;
-
- *((__global double *)((__global char *)dst + dst_index + 0 ))= data_0;
- *((__global double *)((__global char *)dst + dst_index + 8 ))= data_1;
- *((__global double *)((__global char *)dst + dst_index + 16))= data_2;
- }
-}
-#endif
-__kernel void arithm_s_sub_with_mask_C4_D0 (__global uchar *src1, int src1_step, int src1_offset,
- __global uchar *dst, int dst_step, int dst_offset,
- __global uchar *mask, int mask_step, int mask_offset,
- int4 src2, int rows, int cols, int dst_step1, int isMatSubScalar)
-{
-
- int x = get_global_id(0);
- int y = get_global_id(1);
-
- if (x < cols && y < rows)
- {
- int src1_index = mad24(y, src1_step, (x << 2) + src1_offset);
- int mask_index = mad24(y, mask_step, x + mask_offset);
- int dst_index = mad24(y, dst_step, (x << 2) + dst_offset);
-
- uchar mask_data = *(mask + mask_index);
-
- uchar4 src_data1 = *((__global uchar4 *)(src1 + src1_index));
- uchar4 dst_data = *((__global uchar4 *)(dst + dst_index));
-
- int4 tmp = convert_int4_sat(src_data1) - src2;
- tmp = isMatSubScalar ? tmp : -tmp;
- uchar4 data = convert_uchar4_sat(tmp);
-
- data = mask_data ? data : dst_data;
-
- *((__global uchar4 *)(dst + dst_index)) = data;
- }
-}
-__kernel void arithm_s_sub_with_mask_C4_D2 (__global ushort *src1, int src1_step, int src1_offset,
- __global ushort *dst, int dst_step, int dst_offset,
- __global uchar *mask, int mask_step, int mask_offset,
- int4 src2, int rows, int cols, int dst_step1, int isMatSubScalar)
-{
-
- int x = get_global_id(0);
- int y = get_global_id(1);
-
- if (x < cols && y < rows)
- {
- int src1_index = mad24(y, src1_step, (x << 3) + src1_offset);
- int mask_index = mad24(y, mask_step, x + mask_offset);
- int dst_index = mad24(y, dst_step, (x << 3) + dst_offset);
-
- uchar mask_data = *(mask + mask_index);
-
- ushort4 src_data1 = *((__global ushort4 *)((__global char *)src1 + src1_index));
- ushort4 dst_data = *((__global ushort4 *)((__global char *)dst + dst_index));
-
- int4 tmp = convert_int4_sat(src_data1) - src2;
- tmp = isMatSubScalar ? tmp : -tmp;
- ushort4 data = convert_ushort4_sat(tmp);
-
- data = mask_data ? data : dst_data;
-
- *((__global ushort4 *)((__global char *)dst + dst_index)) = data;
- }
-}
-__kernel void arithm_s_sub_with_mask_C4_D3 (__global short *src1, int src1_step, int src1_offset,
- __global short *dst, int dst_step, int dst_offset,
- __global uchar *mask, int mask_step, int mask_offset,
- int4 src2, int rows, int cols, int dst_step1, int isMatSubScalar)
-{
-
- int x = get_global_id(0);
- int y = get_global_id(1);
-
- if (x < cols && y < rows)
- {
- int src1_index = mad24(y, src1_step, (x << 3) + src1_offset);
- int mask_index = mad24(y, mask_step, x + mask_offset);
- int dst_index = mad24(y, dst_step, (x << 3) + dst_offset);
-
- uchar mask_data = *(mask + mask_index);
-
- short4 src_data1 = *((__global short4 *)((__global char *)src1 + src1_index));
- short4 dst_data = *((__global short4 *)((__global char *)dst + dst_index));
-
- int4 tmp = convert_int4_sat(src_data1) - src2;
- tmp = isMatSubScalar ? tmp : -tmp;
- short4 data = convert_short4_sat(tmp);
-
- data = mask_data ? data : dst_data;
-
- *((__global short4 *)((__global char *)dst + dst_index)) = data;
- }
-}
-__kernel void arithm_s_sub_with_mask_C4_D4 (__global int *src1, int src1_step, int src1_offset,
- __global int *dst, int dst_step, int dst_offset,
- __global uchar *mask, int mask_step, int mask_offset,
- int4 src2, int rows, int cols, int dst_step1, int isMatSubScalar)
-{
-
- int x = get_global_id(0);
- int y = get_global_id(1);
-
- if (x < cols && y < rows)
- {
- int src1_index = mad24(y, src1_step, (x << 4) + src1_offset);
- int mask_index = mad24(y, mask_step, x + mask_offset);
- int dst_index = mad24(y, dst_step, (x << 4) + dst_offset);
-
- uchar mask_data = *(mask + mask_index);
-
- int4 src_data1 = *((__global int4 *)((__global char *)src1 + src1_index));
- int4 dst_data = *((__global int4 *)((__global char *)dst + dst_index));
-
- long4 tmp = convert_long4_sat(src_data1) - convert_long4_sat(src2);
- tmp = isMatSubScalar ? tmp : -tmp;
- int4 data = convert_int4_sat(tmp);
-
- data = mask_data ? data : dst_data;
-
- *((__global int4 *)((__global char *)dst + dst_index)) = data;
- }
-}
-__kernel void arithm_s_sub_with_mask_C4_D5 (__global float *src1, int src1_step, int src1_offset,
- __global float *dst, int dst_step, int dst_offset,
- __global uchar *mask, int mask_step, int mask_offset,
- float4 src2, int rows, int cols, int dst_step1, int isMatSubScalar)
-{
-
- int x = get_global_id(0);
- int y = get_global_id(1);
-
- if (x < cols && y < rows)
- {
- int src1_index = mad24(y, src1_step, (x << 4) + src1_offset);
- int mask_index = mad24(y, mask_step, x + mask_offset);
- int dst_index = mad24(y, dst_step, (x << 4) + dst_offset);
-
- uchar mask_data = *(mask + mask_index);
-
- float4 src_data1 = *((__global float4 *)((__global char *)src1 + src1_index));
- float4 dst_data = *((__global float4 *)((__global char *)dst + dst_index));
-
- float4 data = src_data1 - src2;
- data = isMatSubScalar ? data : -data;
-
- data = mask_data ? data : dst_data;
-
- *((__global float4 *)((__global char *)dst + dst_index)) = data;
- }
-}
-
-#if defined (DOUBLE_SUPPORT)
-__kernel void arithm_s_sub_with_mask_C4_D6 (__global double *src1, int src1_step, int src1_offset,
- __global double *dst, int dst_step, int dst_offset,
- __global uchar *mask, int mask_step, int mask_offset,
- double4 src2, int rows, int cols, int dst_step1, int isMatSubScalar)
-{
-
- int x = get_global_id(0);
- int y = get_global_id(1);
-
- if (x < cols && y < rows)
- {
- int src1_index = mad24(y, src1_step, (x << 5) + src1_offset);
- int mask_index = mad24(y, mask_step, x + mask_offset);
- int dst_index = mad24(y, dst_step, (x << 5) + dst_offset);
-
- uchar mask_data = *(mask + mask_index);
-
- double4 src_data1 = *((__global double4 *)((__global char *)src1 + src1_index));
- double4 dst_data = *((__global double4 *)((__global char *)dst + dst_index));
-
- double4 data = src_data1 - src2;
- data = isMatSubScalar ? data : -data;
- data = mask_data ? data : dst_data;
-
- *((__global double4 *)((__global char *)dst + dst_index)) = data;
- }
-}
-#endif
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+// By downloading, copying, installing or using the software you agree to this license.
+// If you do not agree to this license, do not download, install,
+// copy or use the software.
+//
+//
+// License Agreement
+// For Open Source Computer Vision Library
+//
+// Copyright (C) 2010-2012, Multicoreware, Inc., all rights reserved.
+// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// @Authors
+// Nathan, liujun@multicorewareinc.com
+// Peng Xiao, pengxiao@outlook.com
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+// * Redistribution's of source code must retain the above copyright notice,
+// this list of conditions and the following disclaimer.
+//
+// * Redistribution's in binary form must reproduce the above copyright notice,
+// this list of conditions and the following disclaimer in the documentation
+// and/or other oclMaterials provided with the distribution.
+//
+// * The name of the copyright holders may not be used to endorse or promote products
+// derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors "as is" and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+
#pragma OPENCL EXTENSION cl_khr_global_int32_base_atomics:enable
-#define MAX_FLOAT 1e7f
+#define MAX_FLOAT 3.40282e+038f
+
+#ifndef T
+#define T float
+#endif
+
+#ifndef BLOCK_SIZE
+#define BLOCK_SIZE 16
+#endif
+#ifndef MAX_DESC_LEN
+#define MAX_DESC_LEN 64
+#endif
+
+#ifndef DIST_TYPE
+#define DIST_TYPE 0
+#endif
-int bit1Count(float x)
+//http://graphics.stanford.edu/~seander/bithacks.html#CountBitsSetParallel
+int bit1Count(int v)
{
- int c = 0;
- int ix = (int)x;
- for (int i = 0 ; i < 32 ; i++)
- {
- c += ix & 0x1;
- ix >>= 1;
- }
- return (float)c;
+ v = v - ((v >> 1) & 0x55555555); // reuse input as temporary
+ v = (v & 0x33333333) + ((v >> 2) & 0x33333333); // temp
+ return ((v + (v >> 4) & 0xF0F0F0F) * 0x1010101) >> 24; // count
}
-float reduce_block(__local float *s_query,
- __local float *s_train,
- int block_size,
- int lidx,
- int lidy,
- int distType
- )
+// dirty fix for non-template support
+#if (DIST_TYPE == 0) // L1Dist
+# ifdef T_FLOAT
+# define DIST(x, y) fabs((x) - (y))
+ typedef float value_type;
+ typedef float result_type;
+# else
+# define DIST(x, y) abs((x) - (y))
+ typedef int value_type;
+ typedef int result_type;
+# endif
+#define DIST_RES(x) (x)
+#elif (DIST_TYPE == 1) // L2Dist
+#define DIST(x, y) (((x) - (y)) * ((x) - (y)))
+typedef float value_type;
+typedef float result_type;
+#define DIST_RES(x) sqrt(x)
+#elif (DIST_TYPE == 2) // Hamming
+#define DIST(x, y) bit1Count( (x) ^ (y) )
+typedef int value_type;
+typedef int result_type;
+#define DIST_RES(x) (x)
+#endif
+
+result_type reduce_block(
+ __local value_type *s_query,
+ __local value_type *s_train,
+ int lidx,
+ int lidy
+ )
{
- /* there are threee types in the reducer. the first is L1Dist, which to sum the abs(v1, v2), the second is L2Dist, which to
- sum the (v1 - v2) * (v1 - v2), the third is humming, which to popc(v1 ^ v2), popc is to count the bits are set to 1*/
- float result = 0;
- switch(distType)
+ result_type result = 0;
+ #pragma unroll
+ for (int j = 0 ; j < BLOCK_SIZE ; j++)
{
- case 0:
- for (int j = 0 ; j < block_size ; j++)
- {
- result += fabs(s_query[lidy * block_size + j] - s_train[j * block_size + lidx]);
- }
- break;
- case 1:
- for (int j = 0 ; j < block_size ; j++)
- {
- float qr = s_query[lidy * block_size + j] - s_train[j * block_size + lidx];
- result += qr * qr;
- }
- break;
- case 2:
- for (int j = 0 ; j < block_size ; j++)
- {
- result += bit1Count((uint)s_query[lidy * block_size + j] ^ (uint)s_train[(uint)j * block_size + lidx]);
- }
- break;
+ result += DIST(
+ s_query[lidy * BLOCK_SIZE + j],
+ s_train[j * BLOCK_SIZE + lidx]);
}
- return result;
+ return DIST_RES(result);
}
-float reduce_multi_block(__local float *s_query,
- __local float *s_train,
- int max_desc_len,
- int block_size,
- int block_index,
- int lidx,
- int lidy,
- int distType
- )
+result_type reduce_multi_block(
+ __local value_type *s_query,
+ __local value_type *s_train,
+ int block_index,
+ int lidx,
+ int lidy
+ )
{
- /* there are threee types in the reducer. the first is L1Dist, which to sum the abs(v1, v2), the second is L2Dist, which to
- sum the (v1 - v2) * (v1 - v2), the third is humming, which to popc(v1 ^ v2), popc is to count the bits are set to 1*/
- float result = 0;
- switch(distType)
+ result_type result = 0;
+ #pragma unroll
+ for (int j = 0 ; j < BLOCK_SIZE ; j++)
{
- case 0:
- for (int j = 0 ; j < block_size ; j++)
- {
- result += fabs(s_query[lidy * max_desc_len + block_index * block_size + j] - s_train[j * block_size + lidx]);
- }
- break;
- case 1:
- for (int j = 0 ; j < block_size ; j++)
- {
- float qr = s_query[lidy * max_desc_len + block_index * block_size + j] - s_train[j * block_size + lidx];
- result += qr * qr;
- }
- break;
- case 2:
- for (int j = 0 ; j < block_size ; j++)
- {
- //result += popcount((uint)s_query[lidy * max_desc_len + block_index * block_size + j] ^ (uint)s_train[j * block_size + lidx]);
- result += bit1Count((uint)s_query[lidy * max_desc_len + block_index * block_size + j] ^ (uint)s_train[j * block_size + lidx]);
- }
- break;
+ result += DIST(
+ s_query[lidy * MAX_DESC_LEN + block_index * BLOCK_SIZE + j],
+ s_train[j * BLOCK_SIZE + lidx]);
}
- return result;
+ return DIST_RES(result);
}
-/* 2dim launch, global size: dim0 is (query rows + block_size - 1) / block_size * block_size, dim1 is block_size
-local size: dim0 is block_size, dim1 is block_size.
+/* 2dim launch, global size: dim0 is (query rows + BLOCK_SIZE - 1) / BLOCK_SIZE * BLOCK_SIZE, dim1 is BLOCK_SIZE
+local size: dim0 is BLOCK_SIZE, dim1 is BLOCK_SIZE.
*/
-__kernel void BruteForceMatch_UnrollMatch_D5(
- __global float *query,
- __global float *train,
+__kernel void BruteForceMatch_UnrollMatch(
+ __global T *query,
+ __global T *train,
//__global float *mask,
__global int *bestTrainIdx,
__global float *bestDistance,
__local float *sharebuffer,
- int block_size,
- int max_desc_len,
int query_rows,
int query_cols,
int train_rows,
int train_cols,
- int step,
- int distType
+ int step
)
{
const int lidx = get_local_id(0);
const int lidy = get_local_id(1);
const int groupidx = get_group_id(0);
- __local float *s_query = sharebuffer;
- __local float *s_train = sharebuffer + block_size * max_desc_len;
+ __local value_type *s_query = (__local value_type *)sharebuffer;
+ __local value_type *s_train = (__local value_type *)sharebuffer + BLOCK_SIZE * MAX_DESC_LEN;
- int queryIdx = groupidx * block_size + lidy;
+ int queryIdx = groupidx * BLOCK_SIZE + lidy;
// load the query into local memory.
- for (int i = 0 ; i < max_desc_len / block_size; i ++)
+ #pragma unroll
+ for (int i = 0 ; i < MAX_DESC_LEN / BLOCK_SIZE; i ++)
{
- int loadx = lidx + i * block_size;
- s_query[lidy * max_desc_len + loadx] = loadx < query_cols ? query[min(queryIdx, query_rows - 1) * (step / sizeof(float)) + loadx] : 0;
+ int loadx = lidx + i * BLOCK_SIZE;
+ s_query[lidy * MAX_DESC_LEN + loadx] = loadx < query_cols ? query[min(queryIdx, query_rows - 1) * (step / sizeof(float)) + loadx] : 0;
}
float myBestDistance = MAX_FLOAT;
// loopUnrolledCached to find the best trainIdx and best distance.
volatile int imgIdx = 0;
- for (int t = 0 ; t < (train_rows + block_size - 1) / block_size ; t++)
+ for (int t = 0, endt = (train_rows + BLOCK_SIZE - 1) / BLOCK_SIZE; t < endt; t++)
{
- float result = 0;
- for (int i = 0 ; i < max_desc_len / block_size ; i++)
+ result_type result = 0;
+ #pragma unroll
+ for (int i = 0 ; i < MAX_DESC_LEN / BLOCK_SIZE ; i++)
{
- //load a block_size * block_size block into local train.
- const int loadx = lidx + i * block_size;
- s_train[lidx * block_size + lidy] = loadx < train_cols ? train[min(t * block_size + lidy, train_rows - 1) * (step / sizeof(float)) + loadx] : 0;
+ //load a BLOCK_SIZE * BLOCK_SIZE block into local train.
+ const int loadx = lidx + i * BLOCK_SIZE;
+ s_train[lidx * BLOCK_SIZE + lidy] = loadx < train_cols ? train[min(t * BLOCK_SIZE + lidy, train_rows - 1) * (step / sizeof(float)) + loadx] : 0;
//synchronize to make sure each elem for reduceIteration in share memory is written already.
barrier(CLK_LOCAL_MEM_FENCE);
- result += reduce_multi_block(s_query, s_train, max_desc_len, block_size, i, lidx, lidy, distType);
+ result += reduce_multi_block(s_query, s_train, i, lidx, lidy);
barrier(CLK_LOCAL_MEM_FENCE);
}
- int trainIdx = t * block_size + lidx;
+ int trainIdx = t * BLOCK_SIZE + lidx;
if (queryIdx < query_rows && trainIdx < train_rows && result < myBestDistance/* && mask(queryIdx, trainIdx)*/)
{
barrier(CLK_LOCAL_MEM_FENCE);
__local float *s_distance = (__local float*)(sharebuffer);
- __local int* s_trainIdx = (__local int *)(sharebuffer + block_size * block_size);
+ __local int* s_trainIdx = (__local int *)(sharebuffer + BLOCK_SIZE * BLOCK_SIZE);
//find BestMatch
- s_distance += lidy * block_size;
- s_trainIdx += lidy * block_size;
+ s_distance += lidy * BLOCK_SIZE;
+ s_trainIdx += lidy * BLOCK_SIZE;
s_distance[lidx] = myBestDistance;
s_trainIdx[lidx] = myBestTrainIdx;
barrier(CLK_LOCAL_MEM_FENCE);
//reduce -- now all reduce implement in each threads.
- for (int k = 0 ; k < block_size; k++)
+ #pragma unroll
+ for (int k = 0 ; k < BLOCK_SIZE; k++)
{
if (myBestDistance > s_distance[k])
{
}
}
-__kernel void BruteForceMatch_Match_D5(
- __global float *query,
- __global float *train,
+__kernel void BruteForceMatch_Match(
+ __global T *query,
+ __global T *train,
//__global float *mask,
__global int *bestTrainIdx,
__global float *bestDistance,
__local float *sharebuffer,
- int block_size,
int query_rows,
int query_cols,
int train_rows,
int train_cols,
- int step,
- int distType
+ int step
)
{
const int lidx = get_local_id(0);
const int lidy = get_local_id(1);
const int groupidx = get_group_id(0);
- const int queryIdx = groupidx * block_size + lidy;
+ const int queryIdx = groupidx * BLOCK_SIZE + lidy;
float myBestDistance = MAX_FLOAT;
int myBestTrainIdx = -1;
- __local float *s_query = sharebuffer;
- __local float *s_train = sharebuffer + block_size * block_size;
+ __local value_type *s_query = (__local value_type *)sharebuffer;
+ __local value_type *s_train = (__local value_type *)sharebuffer + BLOCK_SIZE * BLOCK_SIZE;
// loop
- for (int t = 0 ; t < (train_rows + block_size - 1) / block_size ; t++)
+ for (int t = 0 ; t < (train_rows + BLOCK_SIZE - 1) / BLOCK_SIZE ; t++)
{
- //Dist dist;
- float result = 0;
- for (int i = 0 ; i < (query_cols + block_size - 1) / block_size ; i++)
+ result_type result = 0;
+ for (int i = 0 ; i < (query_cols + BLOCK_SIZE - 1) / BLOCK_SIZE ; i++)
{
- const int loadx = lidx + i * block_size;
+ const int loadx = lidx + i * BLOCK_SIZE;
//load query and train into local memory
- s_query[lidy * block_size + lidx] = 0;
- s_train[lidx * block_size + lidy] = 0;
+ s_query[lidy * BLOCK_SIZE + lidx] = 0;
+ s_train[lidx * BLOCK_SIZE + lidy] = 0;
if (loadx < query_cols)
{
- s_query[lidy * block_size + lidx] = query[min(queryIdx, query_rows - 1) * (step / sizeof(float)) + loadx];
- s_train[lidx * block_size + lidy] = train[min(t * block_size + lidy, train_rows - 1) * (step / sizeof(float)) + loadx];
+ s_query[lidy * BLOCK_SIZE + lidx] = query[min(queryIdx, query_rows - 1) * (step / sizeof(float)) + loadx];
+ s_train[lidx * BLOCK_SIZE + lidy] = train[min(t * BLOCK_SIZE + lidy, train_rows - 1) * (step / sizeof(float)) + loadx];
}
barrier(CLK_LOCAL_MEM_FENCE);
- result += reduce_block(s_query, s_train, block_size, lidx, lidy, distType);
+ result += reduce_block(s_query, s_train, lidx, lidy);
barrier(CLK_LOCAL_MEM_FENCE);
}
- const int trainIdx = t * block_size + lidx;
+ const int trainIdx = t * BLOCK_SIZE + lidx;
if (queryIdx < query_rows && trainIdx < train_rows && result < myBestDistance /*&& mask(queryIdx, trainIdx)*/)
{
barrier(CLK_LOCAL_MEM_FENCE);
__local float *s_distance = (__local float *)sharebuffer;
- __local int *s_trainIdx = (__local int *)(sharebuffer + block_size * block_size);
+ __local int *s_trainIdx = (__local int *)(sharebuffer + BLOCK_SIZE * BLOCK_SIZE);
//findBestMatch
- s_distance += lidy * block_size;
- s_trainIdx += lidy * block_size;
+ s_distance += lidy * BLOCK_SIZE;
+ s_trainIdx += lidy * BLOCK_SIZE;
s_distance[lidx] = myBestDistance;
s_trainIdx[lidx] = myBestTrainIdx;
barrier(CLK_LOCAL_MEM_FENCE);
//reduce -- now all reduce implement in each threads.
- for (int k = 0 ; k < block_size; k++)
+ for (int k = 0 ; k < BLOCK_SIZE; k++)
{
if (myBestDistance > s_distance[k])
{
}
//radius_unrollmatch
-__kernel void BruteForceMatch_RadiusUnrollMatch_D5(
- __global float *query,
- __global float *train,
+__kernel void BruteForceMatch_RadiusUnrollMatch(
+ __global T *query,
+ __global T *train,
float maxDistance,
//__global float *mask,
__global int *bestTrainIdx,
__global float *bestDistance,
__global int *nMatches,
__local float *sharebuffer,
- int block_size,
- int max_desc_len,
int query_rows,
int query_cols,
int train_rows,
int train_cols,
int bestTrainIdx_cols,
int step,
- int ostep,
- int distType
+ int ostep
)
{
const int lidx = get_local_id(0);
const int groupidx = get_group_id(0);
const int groupidy = get_group_id(1);
- const int queryIdx = groupidy * block_size + lidy;
- const int trainIdx = groupidx * block_size + lidx;
+ const int queryIdx = groupidy * BLOCK_SIZE + lidy;
+ const int trainIdx = groupidx * BLOCK_SIZE + lidx;
- __local float *s_query = sharebuffer;
- __local float *s_train = sharebuffer + block_size * block_size;
+ __local value_type *s_query = (__local value_type *)sharebuffer;
+ __local value_type *s_train = (__local value_type *)sharebuffer + BLOCK_SIZE * BLOCK_SIZE;
- float result = 0;
- for (int i = 0 ; i < max_desc_len / block_size ; ++i)
+ result_type result = 0;
+ for (int i = 0 ; i < MAX_DESC_LEN / BLOCK_SIZE ; ++i)
{
- //load a block_size * block_size block into local train.
- const int loadx = lidx + i * block_size;
+ //load a BLOCK_SIZE * BLOCK_SIZE block into local train.
+ const int loadx = lidx + i * BLOCK_SIZE;
- s_query[lidy * block_size + lidx] = loadx < query_cols ? query[min(queryIdx, query_rows - 1) * (step / sizeof(float)) + loadx] : 0;
- s_train[lidx * block_size + lidy] = loadx < query_cols ? train[min(groupidx * block_size + lidy, train_rows - 1) * (step / sizeof(float)) + loadx] : 0;
+ s_query[lidy * BLOCK_SIZE + lidx] = loadx < query_cols ? query[min(queryIdx, query_rows - 1) * (step / sizeof(float)) + loadx] : 0;
+ s_train[lidx * BLOCK_SIZE + lidy] = loadx < query_cols ? train[min(groupidx * BLOCK_SIZE + lidy, train_rows - 1) * (step / sizeof(float)) + loadx] : 0;
//synchronize to make sure each elem for reduceIteration in share memory is written already.
barrier(CLK_LOCAL_MEM_FENCE);
- result += reduce_block(s_query, s_train, block_size, lidx, lidy, distType);
+ result += reduce_block(s_query, s_train, lidx, lidy);
barrier(CLK_LOCAL_MEM_FENCE);
}
- if (queryIdx < query_rows && trainIdx < train_rows && result < maxDistance/* && mask(queryIdx, trainIdx)*/)
+ if (queryIdx < query_rows && trainIdx < train_rows &&
+ convert_float(result) < maxDistance/* && mask(queryIdx, trainIdx)*/)
{
unsigned int ind = atom_inc(nMatches + queryIdx/*, (unsigned int) -1*/);
}
//radius_match
-__kernel void BruteForceMatch_RadiusMatch_D5(
- __global float *query,
- __global float *train,
+__kernel void BruteForceMatch_RadiusMatch(
+ __global T *query,
+ __global T *train,
float maxDistance,
//__global float *mask,
__global int *bestTrainIdx,
__global float *bestDistance,
__global int *nMatches,
__local float *sharebuffer,
- int block_size,
int query_rows,
int query_cols,
int train_rows,
int train_cols,
int bestTrainIdx_cols,
int step,
- int ostep,
- int distType
+ int ostep
)
{
const int lidx = get_local_id(0);
const int groupidx = get_group_id(0);
const int groupidy = get_group_id(1);
- const int queryIdx = groupidy * block_size + lidy;
- const int trainIdx = groupidx * block_size + lidx;
+ const int queryIdx = groupidy * BLOCK_SIZE + lidy;
+ const int trainIdx = groupidx * BLOCK_SIZE + lidx;
- __local float *s_query = sharebuffer;
- __local float *s_train = sharebuffer + block_size * block_size;
+ __local value_type *s_query = (__local value_type *)sharebuffer;
+ __local value_type *s_train = (__local value_type *)sharebuffer + BLOCK_SIZE * BLOCK_SIZE;
- float result = 0;
- for (int i = 0 ; i < (query_cols + block_size - 1) / block_size ; ++i)
+ result_type result = 0;
+ for (int i = 0 ; i < (query_cols + BLOCK_SIZE - 1) / BLOCK_SIZE ; ++i)
{
- //load a block_size * block_size block into local train.
- const int loadx = lidx + i * block_size;
+ //load a BLOCK_SIZE * BLOCK_SIZE block into local train.
+ const int loadx = lidx + i * BLOCK_SIZE;
- s_query[lidy * block_size + lidx] = loadx < query_cols ? query[min(queryIdx, query_rows - 1) * (step / sizeof(float)) + loadx] : 0;
- s_train[lidx * block_size + lidy] = loadx < query_cols ? train[min(groupidx * block_size + lidy, train_rows - 1) * (step / sizeof(float)) + loadx] : 0;
+ s_query[lidy * BLOCK_SIZE + lidx] = loadx < query_cols ? query[min(queryIdx, query_rows - 1) * (step / sizeof(float)) + loadx] : 0;
+ s_train[lidx * BLOCK_SIZE + lidy] = loadx < query_cols ? train[min(groupidx * BLOCK_SIZE + lidy, train_rows - 1) * (step / sizeof(float)) + loadx] : 0;
//synchronize to make sure each elem for reduceIteration in share memory is written already.
barrier(CLK_LOCAL_MEM_FENCE);
- result += reduce_block(s_query, s_train, block_size, lidx, lidy, distType);
+ result += reduce_block(s_query, s_train, lidx, lidy);
barrier(CLK_LOCAL_MEM_FENCE);
}
- if (queryIdx < query_rows && trainIdx < train_rows && result < maxDistance/* && mask(queryIdx, trainIdx)*/)
+ if (queryIdx < query_rows && trainIdx < train_rows &&
+ convert_float(result) < maxDistance/* && mask(queryIdx, trainIdx)*/)
{
unsigned int ind = atom_inc(nMatches + queryIdx);
}
-__kernel void BruteForceMatch_knnUnrollMatch_D5(
- __global float *query,
- __global float *train,
+__kernel void BruteForceMatch_knnUnrollMatch(
+ __global T *query,
+ __global T *train,
//__global float *mask,
__global int2 *bestTrainIdx,
__global float2 *bestDistance,
__local float *sharebuffer,
- int block_size,
- int max_desc_len,
int query_rows,
int query_cols,
int train_rows,
int train_cols,
- int step,
- int distType
+ int step
)
{
const int lidx = get_local_id(0);
const int lidy = get_local_id(1);
const int groupidx = get_group_id(0);
- const int queryIdx = groupidx * block_size + lidy;
- local float *s_query = sharebuffer;
- local float *s_train = sharebuffer + block_size * max_desc_len;
+ const int queryIdx = groupidx * BLOCK_SIZE + lidy;
+ __local value_type *s_query = (__local value_type *)sharebuffer;
+ __local value_type *s_train = (__local value_type *)sharebuffer + BLOCK_SIZE * MAX_DESC_LEN;
// load the query into local memory.
- for (int i = 0 ; i < max_desc_len / block_size; i ++)
+ for (int i = 0 ; i < MAX_DESC_LEN / BLOCK_SIZE; i ++)
{
- int loadx = lidx + i * block_size;
- s_query[lidy * max_desc_len + loadx] = loadx < query_cols ? query[min(queryIdx, query_rows - 1) * (step / sizeof(float)) + loadx] : 0;
+ int loadx = lidx + i * BLOCK_SIZE;
+ s_query[lidy * MAX_DESC_LEN + loadx] = loadx < query_cols ? query[min(queryIdx, query_rows - 1) * (step / sizeof(float)) + loadx] : 0;
}
float myBestDistance1 = MAX_FLOAT;
//loopUnrolledCached
volatile int imgIdx = 0;
- for (int t = 0 ; t < (train_rows + block_size - 1) / block_size ; t++)
+ for (int t = 0 ; t < (train_rows + BLOCK_SIZE - 1) / BLOCK_SIZE ; t++)
{
- float result = 0;
- for (int i = 0 ; i < max_desc_len / block_size ; i++)
+ result_type result = 0;
+ for (int i = 0 ; i < MAX_DESC_LEN / BLOCK_SIZE ; i++)
{
- const int loadX = lidx + i * block_size;
- //load a block_size * block_size block into local train.
- const int loadx = lidx + i * block_size;
- s_train[lidx * block_size + lidy] = loadx < train_cols ? train[min(t * block_size + lidy, train_rows - 1) * (step / sizeof(float)) + loadx] : 0;
+ //load a BLOCK_SIZE * BLOCK_SIZE block into local train.
+ const int loadx = lidx + i * BLOCK_SIZE;
+ s_train[lidx * BLOCK_SIZE + lidy] = loadx < train_cols ? train[min(t * BLOCK_SIZE + lidy, train_rows - 1) * (step / sizeof(float)) + loadx] : 0;
//synchronize to make sure each elem for reduceIteration in share memory is written already.
barrier(CLK_LOCAL_MEM_FENCE);
- result += reduce_multi_block(s_query, s_train, max_desc_len, block_size, i, lidx, lidy, distType);
+ result += reduce_multi_block(s_query, s_train, i, lidx, lidy);
barrier(CLK_LOCAL_MEM_FENCE);
}
- const int trainIdx = t * block_size + lidx;
+ const int trainIdx = t * BLOCK_SIZE + lidx;
if (queryIdx < query_rows && trainIdx < train_rows)
{
barrier(CLK_LOCAL_MEM_FENCE);
- local float *s_distance = (local float *)sharebuffer;
- local int *s_trainIdx = (local int *)(sharebuffer + block_size * block_size);
+ __local float *s_distance = (local float *)sharebuffer;
+ __local int *s_trainIdx = (local int *)(sharebuffer + BLOCK_SIZE * BLOCK_SIZE);
// find BestMatch
- s_distance += lidy * block_size;
- s_trainIdx += lidy * block_size;
+ s_distance += lidy * BLOCK_SIZE;
+ s_trainIdx += lidy * BLOCK_SIZE;
s_distance[lidx] = myBestDistance1;
s_trainIdx[lidx] = myBestTrainIdx1;
if (lidx == 0)
{
- for (int i = 0 ; i < block_size ; i++)
+ for (int i = 0 ; i < BLOCK_SIZE ; i++)
{
float val = s_distance[i];
if (val < bestDistance1)
if (lidx == 0)
{
- for (int i = 0 ; i < block_size ; i++)
+ for (int i = 0 ; i < BLOCK_SIZE ; i++)
{
float val = s_distance[i];
}
}
-__kernel void BruteForceMatch_knnMatch_D5(
- __global float *query,
- __global float *train,
+__kernel void BruteForceMatch_knnMatch(
+ __global T *query,
+ __global T *train,
//__global float *mask,
__global int2 *bestTrainIdx,
__global float2 *bestDistance,
__local float *sharebuffer,
- int block_size,
int query_rows,
int query_cols,
int train_rows,
int train_cols,
- int step,
- int distType
+ int step
)
{
const int lidx = get_local_id(0);
const int lidy = get_local_id(1);
const int groupidx = get_group_id(0);
- const int queryIdx = groupidx * block_size + lidy;
- local float *s_query = sharebuffer;
- local float *s_train = sharebuffer + block_size * block_size;
+ const int queryIdx = groupidx * BLOCK_SIZE + lidy;
+ __local value_type *s_query = (__local value_type *)sharebuffer;
+ __local value_type *s_train = (__local value_type *)sharebuffer + BLOCK_SIZE * BLOCK_SIZE;
float myBestDistance1 = MAX_FLOAT;
float myBestDistance2 = MAX_FLOAT;
int myBestTrainIdx2 = -1;
//loop
- for (int t = 0 ; t < (train_rows + block_size - 1) / block_size ; t++)
+ for (int t = 0 ; t < (train_rows + BLOCK_SIZE - 1) / BLOCK_SIZE ; t++)
{
- float result = 0.0f;
- for (int i = 0 ; i < (query_cols + block_size -1) / block_size ; i++)
+ result_type result = 0.0f;
+ for (int i = 0 ; i < (query_cols + BLOCK_SIZE -1) / BLOCK_SIZE ; i++)
{
- const int loadx = lidx + i * block_size;
+ const int loadx = lidx + i * BLOCK_SIZE;
//load query and train into local memory
- s_query[lidy * block_size + lidx] = 0;
- s_train[lidx * block_size + lidy] = 0;
+ s_query[lidy * BLOCK_SIZE + lidx] = 0;
+ s_train[lidx * BLOCK_SIZE + lidy] = 0;
if (loadx < query_cols)
{
- s_query[lidy * block_size + lidx] = query[min(queryIdx, query_rows - 1) * (step / sizeof(float)) + loadx];
- s_train[lidx * block_size + lidy] = train[min(t * block_size + lidy, train_rows - 1) * (step / sizeof(float)) + loadx];
+ s_query[lidy * BLOCK_SIZE + lidx] = query[min(queryIdx, query_rows - 1) * (step / sizeof(float)) + loadx];
+ s_train[lidx * BLOCK_SIZE + lidy] = train[min(t * BLOCK_SIZE + lidy, train_rows - 1) * (step / sizeof(float)) + loadx];
}
barrier(CLK_LOCAL_MEM_FENCE);
- result += reduce_block(s_query, s_train, block_size, lidx, lidy, distType);
+ result += reduce_block(s_query, s_train, lidx, lidy);
barrier(CLK_LOCAL_MEM_FENCE);
}
- const int trainIdx = t * block_size + lidx;
+ const int trainIdx = t * BLOCK_SIZE + lidx;
if (queryIdx < query_rows && trainIdx < train_rows /*&& mask(queryIdx, trainIdx)*/)
{
barrier(CLK_LOCAL_MEM_FENCE);
__local float *s_distance = (__local float *)sharebuffer;
- __local int *s_trainIdx = (__local int *)(sharebuffer + block_size * block_size);
+ __local int *s_trainIdx = (__local int *)(sharebuffer + BLOCK_SIZE * BLOCK_SIZE);
//findBestMatch
- s_distance += lidy * block_size;
- s_trainIdx += lidy * block_size;
+ s_distance += lidy * BLOCK_SIZE;
+ s_trainIdx += lidy * BLOCK_SIZE;
s_distance[lidx] = myBestDistance1;
s_trainIdx[lidx] = myBestTrainIdx1;
if (lidx == 0)
{
- for (int i = 0 ; i < block_size ; i++)
+ for (int i = 0 ; i < BLOCK_SIZE ; i++)
{
float val = s_distance[i];
if (val < bestDistance1)
if (lidx == 0)
{
- for (int i = 0 ; i < block_size ; i++)
+ for (int i = 0 ; i < BLOCK_SIZE ; i++)
{
float val = s_distance[i];
}
}
-kernel void BruteForceMatch_calcDistanceUnrolled_D5(
- __global float *query,
- __global float *train,
+kernel void BruteForceMatch_calcDistanceUnrolled(
+ __global T *query,
+ __global T *train,
//__global float *mask,
__global float *allDist,
__local float *sharebuffer,
- int block_size,
- int max_desc_len,
int query_rows,
int query_cols,
int train_rows,
int train_cols,
- int step,
- int distType)
+ int step)
{
/* Todo */
}
-kernel void BruteForceMatch_calcDistance_D5(
- __global float *query,
- __global float *train,
+kernel void BruteForceMatch_calcDistance(
+ __global T *query,
+ __global T *train,
//__global float *mask,
__global float *allDist,
__local float *sharebuffer,
- int block_size,
int query_rows,
int query_cols,
int train_rows,
int train_cols,
- int step,
- int distType)
+ int step)
{
/* Todo */
}
-kernel void BruteForceMatch_findBestMatch_D5(
+kernel void BruteForceMatch_findBestMatch(
__global float *allDist,
__global int *bestTrainIdx,
__global float *bestDistance,
- int k,
- int block_size
+ int k
)
{
/* Todo */
-}
\ No newline at end of file
+}
***********************************************************************************/
__kernel __attribute__((reqd_work_group_size(LSIZE0,LSIZE1,1))) void row_filter_C1_D0
- (__global const uchar * restrict src,
- __global float * dst,
- const int dst_cols,
- const int dst_rows,
- const int src_whole_cols,
- const int src_whole_rows,
- const int src_step_in_pixel,
- const int src_offset_x,
- const int src_offset_y,
- const int dst_step_in_pixel,
- const int radiusy,
- __constant float * mat_kernel __attribute__((max_constant_size(4*(2*RADIUSX+1)))))
+(__global const uchar * restrict src,
+ __global float * dst,
+ const int dst_cols,
+ const int dst_rows,
+ const int src_whole_cols,
+ const int src_whole_rows,
+ const int src_step_in_pixel,
+ const int src_offset_x,
+ const int src_offset_y,
+ const int dst_step_in_pixel,
+ const int radiusy,
+ __constant float * mat_kernel __attribute__((max_constant_size(4*(2*RADIUSX+1)))))
{
int x = get_global_id(0)<<2;
int y = get_global_id(1);
uchar4 temp[READ_TIMES_ROW];
__local uchar4 LDS_DAT[LSIZE1][READ_TIMES_ROW*LSIZE0+1];
- #ifdef BORDER_CONSTANT
+#ifdef BORDER_CONSTANT
int end_addr = mad24(src_whole_rows - 1,src_step_in_pixel,src_whole_cols);
//read pixels from src
- for(i = 0;i<READ_TIMES_ROW;i++)
+ for(i = 0; i<READ_TIMES_ROW; i++)
{
int current_addr = start_addr+i*LSIZE0*4;
current_addr = ((current_addr < end_addr) && (current_addr > 0)) ? current_addr : 0;
temp[i] = *(__global uchar4*)&src[current_addr];
}
//judge if read out of boundary
- for(i = 0;i<READ_TIMES_ROW;i++)
+ for(i = 0; i<READ_TIMES_ROW; i++)
{
temp[i].x= ELEM(start_x+i*LSIZE0*4,0,src_whole_cols,0,temp[i].x);
temp[i].y= ELEM(start_x+i*LSIZE0*4+1,0,src_whole_cols,0,temp[i].y);
temp[i].w= ELEM(start_x+i*LSIZE0*4+3,0,src_whole_cols,0,temp[i].w);
temp[i]= ELEM(start_y,0,src_whole_rows,(uchar4)0,temp[i]);
}
- #else
+#else
int not_all_in_range = (start_x<0) | (start_x + READ_TIMES_ROW*LSIZE0*4+4>src_whole_cols)| (start_y<0) | (start_y >= src_whole_rows);
int4 index[READ_TIMES_ROW];
int4 addr;
if(not_all_in_range)
{
//judge if read out of boundary
- for(i = 0;i<READ_TIMES_ROW;i++)
+ for(i = 0; i<READ_TIMES_ROW; i++)
{
index[i].x= ADDR_L(start_x+i*LSIZE0*4,0,src_whole_cols,start_x+i*LSIZE0*4);
index[i].x= ADDR_R(start_x+i*LSIZE0*4,src_whole_cols,index[i].x);
s_y= ADDR_L(start_y,0,src_whole_rows,start_y);
s_y= ADDR_R(start_y,src_whole_rows,s_y);
//read pixels from src
- for(i = 0;i<READ_TIMES_ROW;i++)
+ for(i = 0; i<READ_TIMES_ROW; i++)
{
addr = mad24((int4)s_y,(int4)src_step_in_pixel,index[i]);
temp[i].x = src[addr.x];
else
{
//read pixels from src
- for(i = 0;i<READ_TIMES_ROW;i++)
+ for(i = 0; i<READ_TIMES_ROW; i++)
{
temp[i] = *(__global uchar4*)&src[start_addr+i*LSIZE0*4];
}
}
- #endif
+#endif
//save pixels to lds
- for(i = 0;i<READ_TIMES_ROW;i++)
+ for(i = 0; i<READ_TIMES_ROW; i++)
{
LDS_DAT[l_y][l_x+i*LSIZE0]=temp[i];
}
//read pixels from lds and calculate the result
sum =convert_float4(vload4(0,(__local uchar*)&LDS_DAT[l_y][l_x]+RADIUSX+offset))*mat_kernel[RADIUSX];
- for(i=1;i<=RADIUSX;i++)
+ for(i=1; i<=RADIUSX; i++)
{
temp[0]=vload4(0,(__local uchar*)&LDS_DAT[l_y][l_x]+RADIUSX+offset-i);
temp[1]=vload4(0,(__local uchar*)&LDS_DAT[l_y][l_x]+RADIUSX+offset+i);
}
}
__kernel __attribute__((reqd_work_group_size(LSIZE0,LSIZE1,1))) void row_filter_C4_D0
- (__global const uchar4 * restrict src,
- __global float4 * dst,
- const int dst_cols,
- const int dst_rows,
- const int src_whole_cols,
- const int src_whole_rows,
- const int src_step_in_pixel,
- const int src_offset_x,
- const int src_offset_y,
- const int dst_step_in_pixel,
- const int radiusy,
- __constant float * mat_kernel __attribute__((max_constant_size(4*(2*RADIUSX+1)))))
+(__global const uchar4 * restrict src,
+ __global float4 * dst,
+ const int dst_cols,
+ const int dst_rows,
+ const int src_whole_cols,
+ const int src_whole_rows,
+ const int src_step_in_pixel,
+ const int src_offset_x,
+ const int src_offset_y,
+ const int dst_step_in_pixel,
+ const int radiusy,
+ __constant float * mat_kernel __attribute__((max_constant_size(4*(2*RADIUSX+1)))))
{
int x = get_global_id(0);
int y = get_global_id(1);
uchar4 temp[READ_TIMES_ROW];
__local uchar4 LDS_DAT[LSIZE1][READ_TIMES_ROW*LSIZE0+1];
- #ifdef BORDER_CONSTANT
+#ifdef BORDER_CONSTANT
int end_addr = mad24(src_whole_rows - 1,src_step_in_pixel,src_whole_cols);
//read pixels from src
- for(i = 0;i<READ_TIMES_ROW;i++)
+ for(i = 0; i<READ_TIMES_ROW; i++)
{
int current_addr = start_addr+i*LSIZE0;
current_addr = ((current_addr < end_addr) && (current_addr > 0)) ? current_addr : 0;
temp[i] = src[current_addr];
}
//judge if read out of boundary
- for(i = 0;i<READ_TIMES_ROW;i++)
+ for(i = 0; i<READ_TIMES_ROW; i++)
{
temp[i]= ELEM(start_x+i*LSIZE0,0,src_whole_cols,(uchar4)0,temp[i]);
temp[i]= ELEM(start_y,0,src_whole_rows,(uchar4)0,temp[i]);
}
- #else
+#else
int index[READ_TIMES_ROW];
int s_x,s_y;
//judge if read out of boundary
- for(i = 0;i<READ_TIMES_ROW;i++)
+ for(i = 0; i<READ_TIMES_ROW; i++)
{
s_x= ADDR_L(start_x+i*LSIZE0,0,src_whole_cols,start_x+i*LSIZE0);
s_x= ADDR_R(start_x+i*LSIZE0,src_whole_cols,s_x);
index[i]=mad24(s_y,src_step_in_pixel,s_x);
}
//read pixels from src
- for(i = 0;i<READ_TIMES_ROW;i++)
+ for(i = 0; i<READ_TIMES_ROW; i++)
{
temp[i] = src[index[i]];
}
- #endif
+#endif
//save pixels to lds
- for(i = 0;i<READ_TIMES_ROW;i++)
+ for(i = 0; i<READ_TIMES_ROW; i++)
{
LDS_DAT[l_y][l_x+i*LSIZE0]=temp[i];
}
//read pixels from lds and calculate the result
sum =convert_float4(LDS_DAT[l_y][l_x+RADIUSX])*mat_kernel[RADIUSX];
- for(i=1;i<=RADIUSX;i++)
+ for(i=1; i<=RADIUSX; i++)
{
temp[0]=LDS_DAT[l_y][l_x+RADIUSX-i];
temp[1]=LDS_DAT[l_y][l_x+RADIUSX+i];
}
__kernel __attribute__((reqd_work_group_size(LSIZE0,LSIZE1,1))) void row_filter_C1_D5
- (__global const float * restrict src,
- __global float * dst,
- const int dst_cols,
- const int dst_rows,
- const int src_whole_cols,
- const int src_whole_rows,
- const int src_step_in_pixel,
- const int src_offset_x,
- const int src_offset_y,
- const int dst_step_in_pixel,
- const int radiusy,
- __constant float * mat_kernel __attribute__((max_constant_size(4*(2*RADIUSX+1)))))
+(__global const float * restrict src,
+ __global float * dst,
+ const int dst_cols,
+ const int dst_rows,
+ const int src_whole_cols,
+ const int src_whole_rows,
+ const int src_step_in_pixel,
+ const int src_offset_x,
+ const int src_offset_y,
+ const int dst_step_in_pixel,
+ const int radiusy,
+ __constant float * mat_kernel __attribute__((max_constant_size(4*(2*RADIUSX+1)))))
{
int x = get_global_id(0);
int y = get_global_id(1);
float temp[READ_TIMES_ROW];
__local float LDS_DAT[LSIZE1][READ_TIMES_ROW*LSIZE0+1];
- #ifdef BORDER_CONSTANT
+#ifdef BORDER_CONSTANT
int end_addr = mad24(src_whole_rows - 1,src_step_in_pixel,src_whole_cols);
//read pixels from src
- for(i = 0;i<READ_TIMES_ROW;i++)
+ for(i = 0; i<READ_TIMES_ROW; i++)
{
int current_addr = start_addr+i*LSIZE0;
current_addr = ((current_addr < end_addr) && (current_addr > 0)) ? current_addr : 0;
temp[i] = src[current_addr];
}
//judge if read out of boundary
- for(i = 0;i<READ_TIMES_ROW;i++)
+ for(i = 0; i<READ_TIMES_ROW; i++)
{
- temp[i]= ELEM(start_x+i*LSIZE0,0,src_whole_cols,0,temp[i]);
- temp[i]= ELEM(start_y,0,src_whole_rows,0,temp[i]);
+ temp[i]= ELEM(start_x+i*LSIZE0,0,src_whole_cols,(float)0,temp[i]);
+ temp[i]= ELEM(start_y,0,src_whole_rows,(float)0,temp[i]);
}
- #else
+#else
int index[READ_TIMES_ROW];
int s_x,s_y;
//judge if read out of boundary
- for(i = 0;i<READ_TIMES_ROW;i++)
+ for(i = 0; i<READ_TIMES_ROW; i++)
{
s_x= ADDR_L(start_x+i*LSIZE0,0,src_whole_cols,start_x+i*LSIZE0);
s_x= ADDR_R(start_x+i*LSIZE0,src_whole_cols,s_x);
index[i]=mad24(s_y,src_step_in_pixel,s_x);
}
//read pixels from src
- for(i = 0;i<READ_TIMES_ROW;i++)
+ for(i = 0; i<READ_TIMES_ROW; i++)
{
temp[i] = src[index[i]];
}
- #endif
+#endif
//save pixels to lds
- for(i = 0;i<READ_TIMES_ROW;i++)
+ for(i = 0; i<READ_TIMES_ROW; i++)
{
LDS_DAT[l_y][l_x+i*LSIZE0]=temp[i];
}
//read pixels from lds and calculate the result
sum =LDS_DAT[l_y][l_x+RADIUSX]*mat_kernel[RADIUSX];
- for(i=1;i<=RADIUSX;i++)
+ for(i=1; i<=RADIUSX; i++)
{
temp[0]=LDS_DAT[l_y][l_x+RADIUSX-i];
temp[1]=LDS_DAT[l_y][l_x+RADIUSX+i];
}
__kernel __attribute__((reqd_work_group_size(LSIZE0,LSIZE1,1))) void row_filter_C4_D5
- (__global const float4 * restrict src,
- __global float4 * dst,
- const int dst_cols,
- const int dst_rows,
- const int src_whole_cols,
- const int src_whole_rows,
- const int src_step_in_pixel,
- const int src_offset_x,
- const int src_offset_y,
- const int dst_step_in_pixel,
- const int radiusy,
- __constant float * mat_kernel __attribute__((max_constant_size(4*(2*RADIUSX+1)))))
+(__global const float4 * restrict src,
+ __global float4 * dst,
+ const int dst_cols,
+ const int dst_rows,
+ const int src_whole_cols,
+ const int src_whole_rows,
+ const int src_step_in_pixel,
+ const int src_offset_x,
+ const int src_offset_y,
+ const int dst_step_in_pixel,
+ const int radiusy,
+ __constant float * mat_kernel __attribute__((max_constant_size(4*(2*RADIUSX+1)))))
{
int x = get_global_id(0);
int y = get_global_id(1);
float4 temp[READ_TIMES_ROW];
__local float4 LDS_DAT[LSIZE1][READ_TIMES_ROW*LSIZE0+1];
- #ifdef BORDER_CONSTANT
+#ifdef BORDER_CONSTANT
int end_addr = mad24(src_whole_rows - 1,src_step_in_pixel,src_whole_cols);
//read pixels from src
- for(i = 0;i<READ_TIMES_ROW;i++)
+ for(i = 0; i<READ_TIMES_ROW; i++)
{
int current_addr = start_addr+i*LSIZE0;
current_addr = ((current_addr < end_addr) && (current_addr > 0)) ? current_addr : 0;
temp[i] = src[current_addr];
}
//judge if read out of boundary
- for(i = 0;i<READ_TIMES_ROW;i++)
+ for(i = 0; i<READ_TIMES_ROW; i++)
{
- temp[i]= ELEM(start_x+i*LSIZE0,0,src_whole_cols,0,temp[i]);
- temp[i]= ELEM(start_y,0,src_whole_rows,0,temp[i]);
+ temp[i]= ELEM(start_x+i*LSIZE0,0,src_whole_cols,(float4)0,temp[i]);
+ temp[i]= ELEM(start_y,0,src_whole_rows,(float4)0,temp[i]);
}
- #else
+#else
int index[READ_TIMES_ROW];
int s_x,s_y;
//judge if read out of boundary
- for(i = 0;i<READ_TIMES_ROW;i++)
+ for(i = 0; i<READ_TIMES_ROW; i++)
{
s_x= ADDR_L(start_x+i*LSIZE0,0,src_whole_cols,start_x+i*LSIZE0);
s_x= ADDR_R(start_x+i*LSIZE0,src_whole_cols,s_x);
index[i]=mad24(s_y,src_step_in_pixel,s_x);
}
//read pixels from src
- for(i = 0;i<READ_TIMES_ROW;i++)
+ for(i = 0; i<READ_TIMES_ROW; i++)
{
temp[i] = src[index[i]];
}
- #endif
+#endif
//save pixels to lds
- for(i = 0;i<READ_TIMES_ROW;i++)
+ for(i = 0; i<READ_TIMES_ROW; i++)
{
LDS_DAT[l_y][l_x+i*LSIZE0]=temp[i];
}
//read pixels from lds and calculate the result
sum =LDS_DAT[l_y][l_x+RADIUSX]*mat_kernel[RADIUSX];
- for(i=1;i<=RADIUSX;i++)
+ for(i=1; i<=RADIUSX; i++)
{
temp[0]=LDS_DAT[l_y][l_x+RADIUSX-i];
temp[1]=LDS_DAT[l_y][l_x+RADIUSX+i];
start_addr = mad24(y,dst_step_in_pixel,x);
dst[start_addr] = sum;
}
+
}
+
+
int groupX_size = get_local_size(0);
int groupX_id = get_group_id(0);
- #define dst_align (dst_offset_x & 3)
+#define dst_align (dst_offset_x & 3)
int cols_start_index_group = src_offset_x - dst_align + groupX_size * groupX_id - ANX;
int rows_start_index = src_offset_y + (gY << ROWS_PER_GROUP_BITS) - ANY;
{
if((rows_start_index - src_offset_y) + i < rows + ANY)
{
- #ifdef BORDER_CONSTANT
+#ifdef BORDER_CONSTANT
int selected_row = rows_start_index + i;
int selected_cols = cols_start_index_group + lX;
data = con ? data : 0;
local_data[i * LOCAL_MEM_STEP + lX + groupX_size] =data;
}
- #else
+#else
int selected_row = ADDR_H(rows_start_index + i, 0, wholerows);
selected_row = ADDR_B(rows_start_index + i, wholerows, selected_row);
data = *(src + selected_row * src_step + selected_cols);
local_data[i * LOCAL_MEM_STEP + lX + groupX_size] =data;
}
- #endif
+#endif
}
}
}
for(int i = 0; i < ANCHOR; i++)
{
- #pragma unroll 3
- for(int j = 0; j < ANCHOR; j++)
- {
+#pragma unroll 3
+ for(int j = 0; j < ANCHOR; j++)
+ {
if(dst_rows_index < dst_rows_end)
{
- int local_row = (lX >> THREADS_PER_ROW_BIT) + i;
- int local_cols = ((lX % THREADS_PER_ROW) << ELEMENTS_PER_THREAD_BIT) + j;
+ int local_row = (lX >> THREADS_PER_ROW_BIT) + i;
+ int local_cols = ((lX % THREADS_PER_ROW) << ELEMENTS_PER_THREAD_BIT) + j;
- data = vload4(0, local_data+local_row * LOCAL_MEM_STEP + local_cols);
- sum = sum + (mat_kernel[i * ANCHOR + j] * convert_int4_sat(data));
- }
+ data = vload4(0, local_data+local_row * LOCAL_MEM_STEP + local_cols);
+ sum = sum + (mat_kernel[i * ANCHOR + j] * convert_int4_sat(data));
+ }
}
}
sum.w = ((dst_cols_index + 3 >= dst_cols_start) && (dst_cols_index + 3 < dst_cols_end)) ? sum.w : dst_data.w;
*((__global uchar4 *)(dst + dst_rows_index * dst_step + dst_cols_index)) = convert_uchar4_sat(sum);
}
- }
+ }
}
///////////////////////////////////////////////////////////////////////////////////////////////////
/////////////////////////////////////////32FC1////////////////////////////////////////////////////////
int groupX_size = get_local_size(0);
int groupX_id = get_group_id(0);
- #define dst_align (dst_offset_x & 3)
+#define dst_align (dst_offset_x & 3)
int cols_start_index_group = src_offset_x - dst_align + groupX_size * groupX_id - ANX;
int rows_start_index = src_offset_y + (gY << ROWS_PER_GROUP_BITS) - ANY;
{
if((rows_start_index - src_offset_y) + i < rows + ANY)
{
- #ifdef BORDER_CONSTANT
+#ifdef BORDER_CONSTANT
int selected_row = rows_start_index + i;
int selected_cols = cols_start_index_group + lX;
data = con ? data : 0;
local_data[i * LOCAL_MEM_STEP + lX + groupX_size] =data;
}
- #else
+#else
int selected_row = ADDR_H(rows_start_index + i, 0, wholerows);
selected_row = ADDR_B(rows_start_index + i, wholerows, selected_row);
data = *((__global float *)((__global char *)src + selected_row * src_step + (selected_cols << 2)));
local_data[i * LOCAL_MEM_STEP + lX + groupX_size] =data;
}
- #endif
+#endif
}
}
}
for(int i = 0; i < ANCHOR; i++)
{
- #pragma unroll 3
- for(int j = 0; j < ANCHOR; j++)
- {
+#pragma unroll 3
+ for(int j = 0; j < ANCHOR; j++)
+ {
if(dst_rows_index < dst_rows_end)
{
- int local_row = (lX >> THREADS_PER_ROW_BIT) + i;
- int local_cols = ((lX % THREADS_PER_ROW) << ELEMENTS_PER_THREAD_BIT) + j;
+ int local_row = (lX >> THREADS_PER_ROW_BIT) + i;
+ int local_cols = ((lX % THREADS_PER_ROW) << ELEMENTS_PER_THREAD_BIT) + j;
- data = vload4(0, local_data+local_row * LOCAL_MEM_STEP + local_cols);
- sum = sum + (mat_kernel[i * ANCHOR + j] * data);
- }
+ data = vload4(0, local_data+local_row * LOCAL_MEM_STEP + local_cols);
+ sum = sum + ((float)(mat_kernel[i * ANCHOR + j]) * data);
+ }
}
}
*((__global float4 *)((__global char *)dst + dst_rows_index * dst_step + (dst_cols_index << 2))) = sum;
}
- }
+ }
}
///////////////////////////////////////////////////////////////////////////////////////////////////
int groupX_size = get_local_size(0);
int groupX_id = get_group_id(0);
- #define dst_align (dst_offset_x & 3)
+#define dst_align (dst_offset_x & 3)
int cols_start_index_group = src_offset_x - dst_align + groupX_size * groupX_id - ANX;
int rows_start_index = src_offset_y + (gY << ROWS_PER_GROUP_BITS) - ANY;
{
if((rows_start_index - src_offset_y) + i < rows + ANY)
{
- #ifdef BORDER_CONSTANT
+#ifdef BORDER_CONSTANT
int selected_row = rows_start_index + i;
int selected_cols = cols_start_index_group + lX;
data = con ? data : 0;
local_data[i * LOCAL_MEM_STEP + lX + groupX_size] =data;
}
- #else
+#else
int selected_row = ADDR_H(rows_start_index + i, 0, wholerows);
selected_row = ADDR_B(rows_start_index + i, wholerows, selected_row);
data = *((__global uchar4*)((__global char*)src + selected_row * src_step + (selected_cols << 2)));
local_data[i * LOCAL_MEM_STEP + lX + groupX_size] =data;
}
- #endif
+#endif
}
}
}
for(int i = 0; i < ANCHOR; i++)
{
- #pragma unroll 3
- for(int j = 0; j < ANCHOR; j++)
- {
+#pragma unroll 3
+ for(int j = 0; j < ANCHOR; j++)
+ {
if(dst_rows_index < dst_rows_end)
{
- int local_row = (lX >> THREADS_PER_ROW_BIT) + i;
- int local_cols = ((lX % THREADS_PER_ROW) << ELEMENTS_PER_THREAD_BIT) + j;
+ int local_row = (lX >> THREADS_PER_ROW_BIT) + i;
+ int local_cols = ((lX % THREADS_PER_ROW) << ELEMENTS_PER_THREAD_BIT) + j;
- data = vload16(0, (__local uchar *)(local_data+local_row * LOCAL_MEM_STEP + local_cols));
- sum = sum + (mat_kernel[i * ANCHOR + j] * convert_int16_sat(data));
- }
+ data = vload16(0, (__local uchar *)(local_data+local_row * LOCAL_MEM_STEP + local_cols));
+ sum = sum + (mat_kernel[i * ANCHOR + j] * convert_int16_sat(data));
+ }
}
}
{
if((rows_start_index - src_offset_y) + i < rows + ANY)
{
- #ifdef BORDER_CONSTANT
+#ifdef BORDER_CONSTANT
int selected_row = rows_start_index + i;
int selected_cols = cols_start_index_group + lX;
data = con ? data : 0;
local_data[i * LOCAL_MEM_STEP + lX + groupX_size] =data;
}
- #else
+#else
int selected_row = ADDR_H(rows_start_index + i, 0, wholerows);
selected_row = ADDR_B(rows_start_index + i, wholerows, selected_row);
data = *((__global float4*)((__global char*)src + selected_row * src_step + (selected_cols << 4)));
local_data[i * LOCAL_MEM_STEP_C4 + lX + groupX_size] =data;
}
- #endif
+#endif
}
}
}
for(int i = 0; i < ANCHOR; i++)
{
- for(int j = 0; j < ANCHOR; j++)
- {
- int local_cols = lX + j;
- sum = sum + mat_kernel[i * ANCHOR + j] * local_data[i * LOCAL_MEM_STEP_C4 + local_cols];
+ for(int j = 0; j < ANCHOR; j++)
+ {
+ int local_cols = lX + j;
+ sum = sum + ((float)mat_kernel[i * ANCHOR + j] * local_data[i * LOCAL_MEM_STEP_C4 + local_cols]);
}
}
// dx_buf output dx buffer
// dy_buf output dy buffer
__kernel
- void calcSobelRowPass
- (
+void
+__attribute__((reqd_work_group_size(16,16,1)))
+calcSobelRowPass
+(
__global const uchar * src,
__global int * dx_buf,
__global int * dy_buf,
int dx_buf_offset,
int dy_buf_step,
int dy_buf_offset
- )
+)
{
- //src_step /= sizeof(*src);
- //src_offset /= sizeof(*src);
dx_buf_step /= sizeof(*dx_buf);
dx_buf_offset /= sizeof(*dx_buf);
dy_buf_step /= sizeof(*dy_buf);
__local int smem[16][18];
- smem[lidy][lidx + 1] = src[gidx + gidy * src_step + src_offset];
+ smem[lidy][lidx + 1] =
+ src[gidx + min(gidy, rows - 1) * src_step + src_offset];
if(lidx == 0)
{
- smem[lidy][0] = src[max(gidx - 1, 0) + gidy * src_step + src_offset];
- smem[lidy][17] = src[min(gidx + 16, cols - 1) + gidy * src_step + src_offset];
+ smem[lidy][0] =
+ src[max(gidx - 1, 0) + min(gidy, rows - 1) * src_step + src_offset];
+ smem[lidy][17] =
+ src[min(gidx + 16, cols - 1) + min(gidy, rows - 1) * src_step + src_offset];
}
barrier(CLK_LOCAL_MEM_FENCE);
- if(gidy < rows)
+ if(gidy < rows && gidx < cols)
{
-
- if(gidx < cols)
- {
- dx_buf[gidx + gidy * dx_buf_step + dx_buf_offset] =
- -smem[lidy][lidx] + smem[lidy][lidx + 2];
- dy_buf[gidx + gidy * dy_buf_step + dy_buf_offset] =
- smem[lidy][lidx] + 2 * smem[lidy][lidx + 1] + smem[lidy][lidx + 2];
- }
+ dx_buf[gidx + gidy * dx_buf_step + dx_buf_offset] =
+ -smem[lidy][lidx] + smem[lidy][lidx + 2];
+ dy_buf[gidx + gidy * dy_buf_step + dy_buf_offset] =
+ smem[lidy][lidx] + 2 * smem[lidy][lidx + 1] + smem[lidy][lidx + 2];
}
}
// dy direvitive in y direction output
// mag magnitude direvitive of xy output
__kernel
- void calcMagnitude_buf
- (
+void
+__attribute__((reqd_work_group_size(16,16,1)))
+calcMagnitude_buf
+(
__global const int * dx_buf,
__global const int * dy_buf,
__global int * dx,
int dy_offset,
int mag_step,
int mag_offset
- )
+)
{
dx_buf_step /= sizeof(*dx_buf);
dx_buf_offset /= sizeof(*dx_buf);
__local int sdx[18][16];
__local int sdy[18][16];
- sdx[lidy + 1][lidx] = dx_buf[gidx + gidy * dx_buf_step + dx_buf_offset];
- sdy[lidy + 1][lidx] = dy_buf[gidx + gidy * dy_buf_step + dy_buf_offset];
+ sdx[lidy + 1][lidx] =
+ dx_buf[gidx + min(gidy, rows - 1) * dx_buf_step + dx_buf_offset];
+ sdy[lidy + 1][lidx] =
+ dy_buf[gidx + min(gidy, rows - 1) * dy_buf_step + dy_buf_offset];
if(lidy == 0)
{
- sdx[0][lidx] = dx_buf[gidx + max(gidy - 1, 0) * dx_buf_step + dx_buf_offset];
- sdx[17][lidx] = dx_buf[gidx + min(gidy + 16, rows - 1) * dx_buf_step + dx_buf_offset];
-
- sdy[0][lidx] = dy_buf[gidx + max(gidy - 1, 0) * dy_buf_step + dy_buf_offset];
- sdy[17][lidx] = dy_buf[gidx + min(gidy + 16, rows - 1) * dy_buf_step + dy_buf_offset];
+ sdx[0][lidx] =
+ dx_buf[gidx + min(max(gidy-1,0),rows-1) * dx_buf_step + dx_buf_offset];
+ sdx[17][lidx] =
+ dx_buf[gidx + min(gidy + 16, rows - 1) * dx_buf_step + dx_buf_offset];
+
+ sdy[0][lidx] =
+ dy_buf[gidx + min(max(gidy-1,0),rows-1) * dy_buf_step + dy_buf_offset];
+ sdy[17][lidx] =
+ dy_buf[gidx + min(gidy + 16, rows - 1) * dy_buf_step + dy_buf_offset];
}
barrier(CLK_LOCAL_MEM_FENCE);
- if(gidx < cols)
+ if(gidx < cols && gidy < rows)
{
- if(gidy < rows)
- {
- int x = sdx[lidy][lidx] + 2 * sdx[lidy + 1][lidx] + sdx[lidy + 2][lidx];
- int y = -sdy[lidy][lidx] + sdy[lidy + 2][lidx];
+ int x = sdx[lidy][lidx] + 2 * sdx[lidy + 1][lidx] + sdx[lidy + 2][lidx];
+ int y = -sdy[lidy][lidx] + sdy[lidy + 2][lidx];
- dx[gidx + gidy * dx_step + dx_offset] = x;
- dy[gidx + gidy * dy_step + dy_offset] = y;
+ dx[gidx + gidy * dx_step + dx_offset] = x;
+ dy[gidx + gidy * dy_step + dy_offset] = y;
- mag[(gidx + 1) + (gidy + 1) * mag_step + mag_offset] = calc(x, y);
- }
+ mag[(gidx + 1) + (gidy + 1) * mag_step + mag_offset] = calc(x, y);
}
}
// dy direvitive in y direction output
// mag magnitude direvitive of xy output
__kernel
- void calcMagnitude
- (
+void calcMagnitude
+(
__global const int * dx,
__global const int * dy,
__global float * mag,
int dy_offset,
int mag_step,
int mag_offset
- )
+)
{
dx_step /= sizeof(*dx);
dx_offset /= sizeof(*dx);
{
mag[(gidx + 1) + (gidy + 1) * mag_step + mag_offset] =
calc(
- dx[gidx + gidy * dx_step + dx_offset],
- dy[gidx + gidy * dy_step + dy_offset]
- );
+ dx[gidx + gidy * dx_step + dx_offset],
+ dy[gidx + gidy * dy_step + dy_offset]
+ );
}
}
// mag magnitudes calculated from calcMagnitude function
// map output containing raw edge types
__kernel
- void calcMap
- (
+void
+__attribute__((reqd_work_group_size(16,16,1)))
+calcMap
+(
__global const int * dx,
__global const int * dy,
__global const float * mag,
int mag_offset,
int map_step,
int map_offset
- )
+)
{
dx_step /= sizeof(*dx);
dx_offset /= sizeof(*dx);
int ly = tid / 18;
if(ly < 14)
{
- smem[ly][lx] = mag[grp_idx + lx + (grp_idy + ly) * mag_step];
- }
- if(ly < 4 && grp_idy + ly + 14 <= rows && grp_idx + lx <= cols)
- {
- smem[ly + 14][lx] = mag[grp_idx + lx + (grp_idy + ly + 14) * mag_step];
- }
-
- barrier(CLK_LOCAL_MEM_FENCE);
-
- if(gidy < rows && gidx < cols)
- {
- int x = dx[gidx + gidy * dx_step];
- int y = dy[gidx + gidy * dy_step];
- const int s = (x ^ y) < 0 ? -1 : 1;
- const float m = smem[lidy + 1][lidx + 1];
- x = abs(x);
- y = abs(y);
-
- // 0 - the pixel can not belong to an edge
- // 1 - the pixel might belong to an edge
- // 2 - the pixel does belong to an edge
- int edge_type = 0;
- if(m > low_thresh)
- {
- const int tg22x = x * TG22;
- const int tg67x = tg22x + (x << (1 + CANNY_SHIFT));
- y <<= CANNY_SHIFT;
- if(y < tg22x)
- {
- if(m > smem[lidy + 1][lidx] && m >= smem[lidy + 1][lidx + 2])
- {
- edge_type = 1 + (int)(m > high_thresh);
- }
- }
- else if (y > tg67x)
- {
- if(m > smem[lidy][lidx + 1]&& m >= smem[lidy + 2][lidx + 1])
- {
- edge_type = 1 + (int)(m > high_thresh);
- }
- }
- else
- {
- if(m > smem[lidy][lidx + 1 - s]&& m > smem[lidy + 2][lidx + 1 + s])
- {
- edge_type = 1 + (int)(m > high_thresh);
- }
- }
- }
- map[gidx + 1 + (gidy + 1) * map_step] = edge_type;
- }
-}
-
-// non local memory version
-__kernel
- void calcMap_2
- (
- __global const int * dx,
- __global const int * dy,
- __global const float * mag,
- __global int * map,
- int rows,
- int cols,
- float low_thresh,
- float high_thresh,
- int dx_step,
- int dx_offset,
- int dy_step,
- int dy_offset,
- int mag_step,
- int mag_offset,
- int map_step,
- int map_offset
- )
-{
- dx_step /= sizeof(*dx);
- dx_offset /= sizeof(*dx);
- dy_step /= sizeof(*dy);
- dy_offset /= sizeof(*dy);
- mag_step /= sizeof(*mag);
- mag_offset /= sizeof(*mag);
- map_step /= sizeof(*map);
- map_offset /= sizeof(*map);
-
-
- int gidx = get_global_id(0);
- int gidy = get_global_id(1);
-
- if(gidy < rows && gidx < cols)
- {
- int x = dx[gidx + gidy * dx_step];
- int y = dy[gidx + gidy * dy_step];
- const int s = (x ^ y) < 0 ? -1 : 1;
- const float m = mag[gidx + 1 + (gidy + 1) * mag_step];
- x = abs(x);
- y = abs(y);
-
- // 0 - the pixel can not belong to an edge
- // 1 - the pixel might belong to an edge
- // 2 - the pixel does belong to an edge
- int edge_type = 0;
- if(m > low_thresh)
- {
- const int tg22x = x * TG22;
- const int tg67x = tg22x + (x << (1 + CANNY_SHIFT));
- y <<= CANNY_SHIFT;
- if(y < tg22x)
- {
- if(m > mag[gidx + (gidy + 1) * mag_step] && m >= mag[gidx + 2 + (gidy + 1) * mag_step])
- {
- edge_type = 1 + (int)(m > high_thresh);
- }
- }
- else if (y > tg67x)
- {
- if(m > mag[gidx + 1 + gidy* mag_step] && m >= mag[gidx + 1 + (gidy + 2) * mag_step])
- {
- edge_type = 1 + (int)(m > high_thresh);
- }
- }
- else
- {
- if(m > mag[gidx + 1 - s + gidy * mag_step] && m > mag[gidx + 1 + s + (gidy + 2) * mag_step])
- {
- edge_type = 1 + (int)(m > high_thresh);
- }
- }
- }
- map[gidx + 1 + (gidy + 1) * map_step] = edge_type;
- }
-}
-
-// [256, 1, 1] threaded, local memory version
-__kernel
- void calcMap_3
- (
- __global const int * dx,
- __global const int * dy,
- __global const float * mag,
- __global int * map,
- int rows,
- int cols,
- float low_thresh,
- float high_thresh,
- int dx_step,
- int dx_offset,
- int dy_step,
- int dy_offset,
- int mag_step,
- int mag_offset,
- int map_step,
- int map_offset
- )
-{
- dx_step /= sizeof(*dx);
- dx_offset /= sizeof(*dx);
- dy_step /= sizeof(*dy);
- dy_offset /= sizeof(*dy);
- mag_step /= sizeof(*mag);
- mag_offset /= sizeof(*mag);
- map_step /= sizeof(*map);
- map_offset /= sizeof(*map);
-
- __local float smem[18][18];
-
- int lidx = get_local_id(0) % 16;
- int lidy = get_local_id(0) / 16;
-
- int grp_pix = get_global_id(0); // identifies which pixel is processing currently in the target block
- int grp_ind = get_global_id(1); // identifies which block of pixels is currently processing
-
- int grp_idx = (grp_ind % (cols/16)) * 16;
- int grp_idy = (grp_ind / (cols/16)) * 16; //(grp_ind / (cols/16)) * 16
-
- int gidx = grp_idx + lidx;
- int gidy = grp_idy + lidy;
-
- int tid = get_global_id(0) % 256;
- int lx = tid % 18;
- int ly = tid / 18;
- if(ly < 14)
- {
- smem[ly][lx] = mag[grp_idx + lx + (grp_idy + ly) * mag_step];
+ smem[ly][lx] =
+ mag[grp_idx + lx + min(grp_idy + ly, rows - 1) * mag_step];
}
if(ly < 4 && grp_idy + ly + 14 <= rows && grp_idx + lx <= cols)
{
- smem[ly + 14][lx] = mag[grp_idx + lx + (grp_idy + ly + 14) * mag_step];
+ smem[ly + 14][lx] =
+ mag[grp_idx + lx + min(grp_idy + ly + 14, rows -1) * mag_step];
}
barrier(CLK_LOCAL_MEM_FENCE);
// st the potiential edge points found in this kernel call
// counter the number of potiential edge points
__kernel
- void edgesHysteresisLocal
- (
+void
+__attribute__((reqd_work_group_size(16,16,1)))
+edgesHysteresisLocal
+(
__global int * map,
__global ushort2 * st,
volatile __global unsigned int * counter,
int cols,
int map_step,
int map_offset
- )
+)
{
map_step /= sizeof(*map);
map_offset /= sizeof(*map);
int ly = tid / 18;
if(ly < 14)
{
- smem[ly][lx] = map[grp_idx + lx + (grp_idy + ly) * map_step + map_offset];
+ smem[ly][lx] =
+ map[grp_idx + lx + min(grp_idy + ly, rows - 1) * map_step + map_offset];
}
if(ly < 4 && grp_idy + ly + 14 <= rows && grp_idx + lx <= cols)
{
- smem[ly + 14][lx] = map[grp_idx + lx + (grp_idy + ly + 14) * map_step + map_offset];
+ smem[ly + 14][lx] =
+ map[grp_idx + lx + min(grp_idy + ly + 14, rows - 1) * map_step + map_offset];
}
barrier(CLK_LOCAL_MEM_FENCE);
#define stack_size 512
__kernel
- void edgesHysteresisGlobal
- (
+void edgesHysteresisGlobal
+(
__global int * map,
__global ushort2 * st1,
__global ushort2 * st2,
int count,
int map_step,
int map_offset
- )
+)
{
map_step /= sizeof(*map);
while (s_counter > 0 && s_counter <= stack_size - get_local_size(0))
{
const int subTaskIdx = lidx >> 3;
- const int portion = min(s_counter, get_local_size(0)>> 3);
+ const int portion = min(s_counter, (uint)(get_local_size(0)>> 3));
pos.x = pos.y = 0;
// map edge type mappings
// dst edge output
__kernel
- void getEdges
- (
+void getEdges
+(
__global const int * map,
__global uchar * dst,
int rows,
int map_offset,
int dst_step,
int dst_offset
- )
+)
{
map_step /= sizeof(*map);
map_offset /= sizeof(*map);
- //dst_step /= sizeof(*dst);
- //dst_offset /= sizeof(*dst);
int gidx = get_global_id(0);
int gidy = get_global_id(1);
if(gidy < rows && gidx < cols)
{
- //dst[gidx + gidy * dst_step] = map[gidx + 1 + (gidy + 1) * map_step] == 2 ? 255: 0;
- dst[gidx + gidy * dst_step] = (uchar)(-(map[gidx + 1 + (gidy + 1) * map_step] / 2));
+ dst[gidx + gidy * dst_step] = (uchar)(-(map[gidx + 1 + (gidy + 1) * map_step] >> 1));
}
}
//M*/
#if defined (DOUBLE_SUPPORT)
+#ifdef cl_khr_fp64
#pragma OPENCL EXTENSION cl_khr_fp64:enable
+#elif defined (cl_amd_fp64)
+#pragma OPENCL EXTENSION cl_amd_fp64:enable
+#endif
#endif
#define LSIZE 256
#define LSIZE_1 255
#define GET_CONFLICT_OFFSET(lid) ((lid) >> LOG_NUM_BANKS)
-kernel void integral_cols(__global uchar4 *src,__global int *sum ,__global float *sqsum,
+kernel void integral_cols_D4(__global uchar4 *src,__global int *sum ,__global float *sqsum,
int src_offset,int pre_invalid,int rows,int cols,int src_step,int dst_step)
{
unsigned int lid = get_local_id(0);
gid = gid << 1;
for(int i = 0; i < rows; i =i + LSIZE_1)
{
- src_t[0] = (i + lid < rows ? convert_int4(src[src_offset + (lid+i) * src_step + gid]) : 0);
- src_t[1] = (i + lid < rows ? convert_int4(src[src_offset + (lid+i) * src_step + gid + 1]) : 0);
+ src_t[0] = (i + lid < rows ? convert_int4(src[src_offset + (lid+i) * src_step + min(gid, (uint)cols - 1)]) : 0);
+ src_t[1] = (i + lid < rows ? convert_int4(src[src_offset + (lid+i) * src_step + min(gid + 1, (uint)cols - 1)]) : 0);
sum_t[0] = (i == 0 ? 0 : lm_sum[0][LSIZE_2 + LOG_LSIZE]);
- sqsum_t[0] = (i == 0 ? 0 : lm_sqsum[0][LSIZE_2 + LOG_LSIZE]);
+ sqsum_t[0] = (i == 0 ? (float4)0 : lm_sqsum[0][LSIZE_2 + LOG_LSIZE]);
sum_t[1] = (i == 0 ? 0 : lm_sum[1][LSIZE_2 + LOG_LSIZE]);
- sqsum_t[1] = (i == 0 ? 0 : lm_sqsum[1][LSIZE_2 + LOG_LSIZE]);
+ sqsum_t[1] = (i == 0 ? (float4)0 : lm_sqsum[1][LSIZE_2 + LOG_LSIZE]);
barrier(CLK_LOCAL_MEM_FENCE);
int bf_loc = lid + GET_CONFLICT_OFFSET(lid);
}
barrier(CLK_LOCAL_MEM_FENCE);
int loc_s0 = gid * dst_step + i + lid - 1 - pre_invalid * dst_step / 4, loc_s1 = loc_s0 + dst_step ;
- if(lid > 0 && (i+lid) <= rows){
+ if(lid > 0 && (i+lid) <= rows)
+ {
lm_sum[0][bf_loc] += sum_t[0];
lm_sum[1][bf_loc] += sum_t[1];
lm_sqsum[0][bf_loc] += sqsum_t[0];
}
-kernel void integral_rows(__global int4 *srcsum,__global float4 * srcsqsum,__global int *sum ,
+kernel void integral_rows_D4(__global int4 *srcsum,__global float4 * srcsqsum,__global int *sum ,
__global float *sqsum,int rows,int cols,int src_step,int sum_step,
int sqsum_step,int sum_offset,int sqsum_offset)
{
src_step = src_step >> 4;
for(int i = 0; i < rows; i =i + LSIZE_1)
{
- src_t[0] = i + lid < rows ? srcsum[(lid+i) * src_step + gid * 2] : 0;
- sqsrc_t[0] = i + lid < rows ? srcsqsum[(lid+i) * src_step + gid * 2] : 0;
- src_t[1] = i + lid < rows ? srcsum[(lid+i) * src_step + gid * 2 + 1] : 0;
- sqsrc_t[1] = i + lid < rows ? srcsqsum[(lid+i) * src_step + gid * 2 + 1] : 0;
+ src_t[0] = i + lid < rows ? srcsum[(lid+i) * src_step + gid * 2] : (int4)0;
+ sqsrc_t[0] = i + lid < rows ? srcsqsum[(lid+i) * src_step + gid * 2] : (float4)0;
+ src_t[1] = i + lid < rows ? srcsum[(lid+i) * src_step + gid * 2 + 1] : (int4)0;
+ sqsrc_t[1] = i + lid < rows ? srcsqsum[(lid+i) * src_step + gid * 2 + 1] : (float4)0;
sum_t[0] = (i == 0 ? 0 : lm_sum[0][LSIZE_2 + LOG_LSIZE]);
- sqsum_t[0] = (i == 0 ? 0 : lm_sqsum[0][LSIZE_2 + LOG_LSIZE]);
+ sqsum_t[0] = (i == 0 ? (float4)0 : lm_sqsum[0][LSIZE_2 + LOG_LSIZE]);
sum_t[1] = (i == 0 ? 0 : lm_sum[1][LSIZE_2 + LOG_LSIZE]);
- sqsum_t[1] = (i == 0 ? 0 : lm_sqsum[1][LSIZE_2 + LOG_LSIZE]);
+ sqsum_t[1] = (i == 0 ? (float4)0 : lm_sqsum[1][LSIZE_2 + LOG_LSIZE]);
barrier(CLK_LOCAL_MEM_FENCE);
int bf_loc = lid + GET_CONFLICT_OFFSET(lid);
barrier(CLK_LOCAL_MEM_FENCE);
if(gid == 0 && (i + lid) <= rows)
{
- sum[sum_offset + i + lid] = 0;
- sqsum[sqsum_offset + i + lid] = 0;
+ sum[sum_offset + i + lid] = 0;
+ sqsum[sqsum_offset + i + lid] = 0;
}
if(i + lid == 0)
{
int loc0 = gid * 2 * sum_step;
int loc1 = gid * 2 * sqsum_step;
- for(int k = 1;k <= 8;k++)
+ for(int k = 1; k <= 8; k++)
{
if(gid * 8 + k > cols) break;
sum[sum_offset + loc0 + k * sum_step / 4] = 0;
}
int loc_s0 = sum_offset + gid * 2 * sum_step + sum_step / 4 + i + lid, loc_s1 = loc_s0 + sum_step ;
int loc_sq0 = sqsum_offset + gid * 2 * sqsum_step + sqsum_step / 4 + i + lid, loc_sq1 = loc_sq0 + sqsum_step ;
- if(lid > 0 && (i+lid) <= rows){
+ if(lid > 0 && (i+lid) <= rows)
+ {
lm_sum[0][bf_loc] += sum_t[0];
lm_sum[1][bf_loc] += sum_t[1];
lm_sqsum[0][bf_loc] += sqsum_t[0];
barrier(CLK_LOCAL_MEM_FENCE);
}
}
+
+kernel void integral_cols_D5(__global uchar4 *src,__global float *sum ,__global float *sqsum,
+ int src_offset,int pre_invalid,int rows,int cols,int src_step,int dst_step)
+{
+ unsigned int lid = get_local_id(0);
+ unsigned int gid = get_group_id(0);
+ float4 src_t[2], sum_t[2];
+ float4 sqsum_t[2];
+ __local float4 lm_sum[2][LSIZE + LOG_LSIZE];
+ __local float4 lm_sqsum[2][LSIZE + LOG_LSIZE];
+ __local float* sum_p;
+ __local float* sqsum_p;
+ src_step = src_step >> 2;
+ gid = gid << 1;
+ for(int i = 0; i < rows; i =i + LSIZE_1)
+ {
+ src_t[0] = (i + lid < rows ? convert_float4(src[src_offset + (lid+i) * src_step + min(gid, (uint)cols - 1)]) : (float4)0);
+ src_t[1] = (i + lid < rows ? convert_float4(src[src_offset + (lid+i) * src_step + min(gid + 1, (uint)cols - 1)]) : (float4)0);
+
+ sum_t[0] = (i == 0 ? (float4)0 : lm_sum[0][LSIZE_2 + LOG_LSIZE]);
+ sqsum_t[0] = (i == 0 ? (float4)0 : lm_sqsum[0][LSIZE_2 + LOG_LSIZE]);
+ sum_t[1] = (i == 0 ? (float4)0 : lm_sum[1][LSIZE_2 + LOG_LSIZE]);
+ sqsum_t[1] = (i == 0 ? (float4)0 : lm_sqsum[1][LSIZE_2 + LOG_LSIZE]);
+ barrier(CLK_LOCAL_MEM_FENCE);
+
+ int bf_loc = lid + GET_CONFLICT_OFFSET(lid);
+ lm_sum[0][bf_loc] = src_t[0];
+ lm_sqsum[0][bf_loc] = convert_float4(src_t[0] * src_t[0]);
+
+ lm_sum[1][bf_loc] = src_t[1];
+ lm_sqsum[1][bf_loc] = convert_float4(src_t[1] * src_t[1]);
+
+ int offset = 1;
+ for(int d = LSIZE >> 1 ; d > 0; d>>=1)
+ {
+ barrier(CLK_LOCAL_MEM_FENCE);
+ int ai = offset * (((lid & 127)<<1) +1) - 1,bi = ai + offset;
+ ai += GET_CONFLICT_OFFSET(ai);
+ bi += GET_CONFLICT_OFFSET(bi);
+
+ if((lid & 127) < d)
+ {
+ lm_sum[lid >> 7][bi] += lm_sum[lid >> 7][ai];
+ lm_sqsum[lid >> 7][bi] += lm_sqsum[lid >> 7][ai];
+ }
+ offset <<= 1;
+ }
+ barrier(CLK_LOCAL_MEM_FENCE);
+ if(lid < 2)
+ {
+ lm_sum[lid][LSIZE_2 + LOG_LSIZE] = 0;
+ lm_sqsum[lid][LSIZE_2 + LOG_LSIZE] = 0;
+ }
+ for(int d = 1; d < LSIZE; d <<= 1)
+ {
+ barrier(CLK_LOCAL_MEM_FENCE);
+ offset >>= 1;
+ int ai = offset * (((lid & 127)<<1) +1) - 1,bi = ai + offset;
+ ai += GET_CONFLICT_OFFSET(ai);
+ bi += GET_CONFLICT_OFFSET(bi);
+
+ if((lid & 127) < d)
+ {
+ lm_sum[lid >> 7][bi] += lm_sum[lid >> 7][ai];
+ lm_sum[lid >> 7][ai] = lm_sum[lid >> 7][bi] - lm_sum[lid >> 7][ai];
+
+ lm_sqsum[lid >> 7][bi] += lm_sqsum[lid >> 7][ai];
+ lm_sqsum[lid >> 7][ai] = lm_sqsum[lid >> 7][bi] - lm_sqsum[lid >> 7][ai];
+ }
+ }
+ barrier(CLK_LOCAL_MEM_FENCE);
+ int loc_s0 = gid * dst_step + i + lid - 1 - pre_invalid * dst_step / 4, loc_s1 = loc_s0 + dst_step ;
+ if(lid > 0 && (i+lid) <= rows)
+ {
+ lm_sum[0][bf_loc] += sum_t[0];
+ lm_sum[1][bf_loc] += sum_t[1];
+ lm_sqsum[0][bf_loc] += sqsum_t[0];
+ lm_sqsum[1][bf_loc] += sqsum_t[1];
+ sum_p = (__local float*)(&(lm_sum[0][bf_loc]));
+ sqsum_p = (__local float*)(&(lm_sqsum[0][bf_loc]));
+ for(int k = 0; k < 4; k++)
+ {
+ if(gid * 4 + k >= cols + pre_invalid || gid * 4 + k < pre_invalid) continue;
+ sum[loc_s0 + k * dst_step / 4] = sum_p[k];
+ sqsum[loc_s0 + k * dst_step / 4] = sqsum_p[k];
+ }
+ sum_p = (__local float*)(&(lm_sum[1][bf_loc]));
+ sqsum_p = (__local float*)(&(lm_sqsum[1][bf_loc]));
+ for(int k = 0; k < 4; k++)
+ {
+ if(gid * 4 + k + 4 >= cols + pre_invalid) break;
+ sum[loc_s1 + k * dst_step / 4] = sum_p[k];
+ sqsum[loc_s1 + k * dst_step / 4] = sqsum_p[k];
+ }
+ }
+ barrier(CLK_LOCAL_MEM_FENCE);
+ }
+}
+
+
+kernel void integral_rows_D5(__global float4 *srcsum,__global float4 * srcsqsum,__global float *sum ,
+ __global float *sqsum,int rows,int cols,int src_step,int sum_step,
+ int sqsum_step,int sum_offset,int sqsum_offset)
+{
+ unsigned int lid = get_local_id(0);
+ unsigned int gid = get_group_id(0);
+ float4 src_t[2], sum_t[2];
+ float4 sqsrc_t[2],sqsum_t[2];
+ __local float4 lm_sum[2][LSIZE + LOG_LSIZE];
+ __local float4 lm_sqsum[2][LSIZE + LOG_LSIZE];
+ __local float *sum_p;
+ __local float *sqsum_p;
+ src_step = src_step >> 4;
+ for(int i = 0; i < rows; i =i + LSIZE_1)
+ {
+ src_t[0] = i + lid < rows ? srcsum[(lid+i) * src_step + gid * 2] : (float4)0;
+ sqsrc_t[0] = i + lid < rows ? srcsqsum[(lid+i) * src_step + gid * 2] : (float4)0;
+ src_t[1] = i + lid < rows ? srcsum[(lid+i) * src_step + gid * 2 + 1] : (float4)0;
+ sqsrc_t[1] = i + lid < rows ? srcsqsum[(lid+i) * src_step + gid * 2 + 1] : (float4)0;
+
+ sum_t[0] = (i == 0 ? (float4)0 : lm_sum[0][LSIZE_2 + LOG_LSIZE]);
+ sqsum_t[0] = (i == 0 ? (float4)0 : lm_sqsum[0][LSIZE_2 + LOG_LSIZE]);
+ sum_t[1] = (i == 0 ? (float4)0 : lm_sum[1][LSIZE_2 + LOG_LSIZE]);
+ sqsum_t[1] = (i == 0 ? (float4)0 : lm_sqsum[1][LSIZE_2 + LOG_LSIZE]);
+ barrier(CLK_LOCAL_MEM_FENCE);
+
+ int bf_loc = lid + GET_CONFLICT_OFFSET(lid);
+ lm_sum[0][bf_loc] = src_t[0];
+ lm_sqsum[0][bf_loc] = sqsrc_t[0];
+
+ lm_sum[1][bf_loc] = src_t[1];
+ lm_sqsum[1][bf_loc] = sqsrc_t[1];
+
+ int offset = 1;
+ for(int d = LSIZE >> 1 ; d > 0; d>>=1)
+ {
+ barrier(CLK_LOCAL_MEM_FENCE);
+ int ai = offset * (((lid & 127)<<1) +1) - 1,bi = ai + offset;
+ ai += GET_CONFLICT_OFFSET(ai);
+ bi += GET_CONFLICT_OFFSET(bi);
+
+ if((lid & 127) < d)
+ {
+ lm_sum[lid >> 7][bi] += lm_sum[lid >> 7][ai];
+ lm_sqsum[lid >> 7][bi] += lm_sqsum[lid >> 7][ai];
+ }
+ offset <<= 1;
+ }
+ barrier(CLK_LOCAL_MEM_FENCE);
+ if(lid < 2)
+ {
+ lm_sum[lid][LSIZE_2 + LOG_LSIZE] = 0;
+ lm_sqsum[lid][LSIZE_2 + LOG_LSIZE] = 0;
+ }
+ for(int d = 1; d < LSIZE; d <<= 1)
+ {
+ barrier(CLK_LOCAL_MEM_FENCE);
+ offset >>= 1;
+ int ai = offset * (((lid & 127)<<1) +1) - 1,bi = ai + offset;
+ ai += GET_CONFLICT_OFFSET(ai);
+ bi += GET_CONFLICT_OFFSET(bi);
+
+ if((lid & 127) < d)
+ {
+ lm_sum[lid >> 7][bi] += lm_sum[lid >> 7][ai];
+ lm_sum[lid >> 7][ai] = lm_sum[lid >> 7][bi] - lm_sum[lid >> 7][ai];
+
+ lm_sqsum[lid >> 7][bi] += lm_sqsum[lid >> 7][ai];
+ lm_sqsum[lid >> 7][ai] = lm_sqsum[lid >> 7][bi] - lm_sqsum[lid >> 7][ai];
+ }
+ }
+ barrier(CLK_LOCAL_MEM_FENCE);
+ if(gid == 0 && (i + lid) <= rows)
+ {
+ sum[sum_offset + i + lid] = 0;
+ sqsum[sqsum_offset + i + lid] = 0;
+ }
+ if(i + lid == 0)
+ {
+ int loc0 = gid * 2 * sum_step;
+ int loc1 = gid * 2 * sqsum_step;
+ for(int k = 1; k <= 8; k++)
+ {
+ if(gid * 8 + k > cols) break;
+ sum[sum_offset + loc0 + k * sum_step / 4] = 0;
+ sqsum[sqsum_offset + loc1 + k * sqsum_step / 4] = 0;
+ }
+ }
+ int loc_s0 = sum_offset + gid * 2 * sum_step + sum_step / 4 + i + lid, loc_s1 = loc_s0 + sum_step ;
+ int loc_sq0 = sqsum_offset + gid * 2 * sqsum_step + sqsum_step / 4 + i + lid, loc_sq1 = loc_sq0 + sqsum_step ;
+ if(lid > 0 && (i+lid) <= rows)
+ {
+ lm_sum[0][bf_loc] += sum_t[0];
+ lm_sum[1][bf_loc] += sum_t[1];
+ lm_sqsum[0][bf_loc] += sqsum_t[0];
+ lm_sqsum[1][bf_loc] += sqsum_t[1];
+ sum_p = (__local float*)(&(lm_sum[0][bf_loc]));
+ sqsum_p = (__local float*)(&(lm_sqsum[0][bf_loc]));
+ for(int k = 0; k < 4; k++)
+ {
+ if(gid * 8 + k >= cols) break;
+ sum[loc_s0 + k * sum_step / 4] = sum_p[k];
+ sqsum[loc_sq0 + k * sqsum_step / 4] = sqsum_p[k];
+ }
+ sum_p = (__local float*)(&(lm_sum[1][bf_loc]));
+ sqsum_p = (__local float*)(&(lm_sqsum[1][bf_loc]));
+ for(int k = 0; k < 4; k++)
+ {
+ if(gid * 8 + 4 + k >= cols) break;
+ sum[loc_s1 + k * sum_step / 4] = sum_p[k];
+ sqsum[loc_sq1 + k * sqsum_step / 4] = sqsum_p[k];
+ }
+ }
+ barrier(CLK_LOCAL_MEM_FENCE);
+ }
+}
\ No newline at end of file
//M*/
#if defined (DOUBLE_SUPPORT)
+#ifdef cl_khr_fp64
#pragma OPENCL EXTENSION cl_khr_fp64:enable
+#elif defined (cl_amd_fp64)
+#pragma OPENCL EXTENSION cl_amd_fp64:enable
#endif
+#endif
+
#define LSIZE 256
#define LSIZE_1 255
#define LSIZE_2 254
#define GET_CONFLICT_OFFSET(lid) ((lid) >> LOG_NUM_BANKS)
-kernel void integral_sum_cols(__global uchar4 *src,__global int *sum ,
- int src_offset,int pre_invalid,int rows,int cols,int src_step,int dst_step)
+kernel void integral_sum_cols_D4(__global uchar4 *src,__global int *sum ,
+ int src_offset,int pre_invalid,int rows,int cols,int src_step,int dst_step)
{
unsigned int lid = get_local_id(0);
unsigned int gid = get_group_id(0);
}
}
barrier(CLK_LOCAL_MEM_FENCE);
- if(lid > 0 && (i+lid) <= rows){
+ if(lid > 0 && (i+lid) <= rows)
+ {
int loc_s0 = gid * dst_step + i + lid - 1 - pre_invalid * dst_step / 4, loc_s1 = loc_s0 + dst_step ;
lm_sum[0][bf_loc] += sum_t[0];
lm_sum[1][bf_loc] += sum_t[1];
}
-kernel void integral_sum_rows(__global int4 *srcsum,__global int *sum ,
- int rows,int cols,int src_step,int sum_step,
- int sum_offset)
+kernel void integral_sum_rows_D4(__global int4 *srcsum,__global int *sum ,
+ int rows,int cols,int src_step,int sum_step,
+ int sum_offset)
{
unsigned int lid = get_local_id(0);
unsigned int gid = get_group_id(0);
barrier(CLK_LOCAL_MEM_FENCE);
if(gid == 0 && (i + lid) <= rows)
{
- sum[sum_offset + i + lid] = 0;
+ sum[sum_offset + i + lid] = 0;
}
if(i + lid == 0)
{
int loc0 = gid * 2 * sum_step;
- for(int k = 1;k <= 8;k++)
+ for(int k = 1; k <= 8; k++)
{
if(gid * 8 + k > cols) break;
sum[sum_offset + loc0 + k * sum_step / 4] = 0;
}
}
- if(lid > 0 && (i+lid) <= rows){
+ if(lid > 0 && (i+lid) <= rows)
+ {
int loc_s0 = sum_offset + gid * 2 * sum_step + sum_step / 4 + i + lid, loc_s1 = loc_s0 + sum_step ;
lm_sum[0][bf_loc] += sum_t[0];
lm_sum[1][bf_loc] += sum_t[1];
barrier(CLK_LOCAL_MEM_FENCE);
}
}
+
+kernel void integral_sum_cols_D5(__global uchar4 *src,__global float *sum ,
+ int src_offset,int pre_invalid,int rows,int cols,int src_step,int dst_step)
+{
+ unsigned int lid = get_local_id(0);
+ unsigned int gid = get_group_id(0);
+ float4 src_t[2], sum_t[2];
+ __local float4 lm_sum[2][LSIZE + LOG_LSIZE];
+ __local float* sum_p;
+ src_step = src_step >> 2;
+ gid = gid << 1;
+ for(int i = 0; i < rows; i =i + LSIZE_1)
+ {
+ src_t[0] = (i + lid < rows ? convert_float4(src[src_offset + (lid+i) * src_step + gid]) : (float4)0);
+ src_t[1] = (i + lid < rows ? convert_float4(src[src_offset + (lid+i) * src_step + gid + 1]) : (float4)0);
+
+ sum_t[0] = (i == 0 ? (float4)0 : lm_sum[0][LSIZE_2 + LOG_LSIZE]);
+ sum_t[1] = (i == 0 ? (float4)0 : lm_sum[1][LSIZE_2 + LOG_LSIZE]);
+ barrier(CLK_LOCAL_MEM_FENCE);
+
+ int bf_loc = lid + GET_CONFLICT_OFFSET(lid);
+ lm_sum[0][bf_loc] = src_t[0];
+
+ lm_sum[1][bf_loc] = src_t[1];
+
+ int offset = 1;
+ for(int d = LSIZE >> 1 ; d > 0; d>>=1)
+ {
+ barrier(CLK_LOCAL_MEM_FENCE);
+ int ai = offset * (((lid & 127)<<1) +1) - 1,bi = ai + offset;
+ ai += GET_CONFLICT_OFFSET(ai);
+ bi += GET_CONFLICT_OFFSET(bi);
+
+ if((lid & 127) < d)
+ {
+ lm_sum[lid >> 7][bi] += lm_sum[lid >> 7][ai];
+ }
+ offset <<= 1;
+ }
+ barrier(CLK_LOCAL_MEM_FENCE);
+ if(lid < 2)
+ {
+ lm_sum[lid][LSIZE_2 + LOG_LSIZE] = 0;
+ }
+ for(int d = 1; d < LSIZE; d <<= 1)
+ {
+ barrier(CLK_LOCAL_MEM_FENCE);
+ offset >>= 1;
+ int ai = offset * (((lid & 127)<<1) +1) - 1,bi = ai + offset;
+ ai += GET_CONFLICT_OFFSET(ai);
+ bi += GET_CONFLICT_OFFSET(bi);
+
+ if((lid & 127) < d)
+ {
+ lm_sum[lid >> 7][bi] += lm_sum[lid >> 7][ai];
+ lm_sum[lid >> 7][ai] = lm_sum[lid >> 7][bi] - lm_sum[lid >> 7][ai];
+ }
+ }
+ barrier(CLK_LOCAL_MEM_FENCE);
+ if(lid > 0 && (i+lid) <= rows)
+ {
+ int loc_s0 = gid * dst_step + i + lid - 1 - pre_invalid * dst_step / 4, loc_s1 = loc_s0 + dst_step ;
+ lm_sum[0][bf_loc] += sum_t[0];
+ lm_sum[1][bf_loc] += sum_t[1];
+ sum_p = (__local float*)(&(lm_sum[0][bf_loc]));
+ for(int k = 0; k < 4; k++)
+ {
+ if(gid * 4 + k >= cols + pre_invalid || gid * 4 + k < pre_invalid) continue;
+ sum[loc_s0 + k * dst_step / 4] = sum_p[k];
+ }
+ sum_p = (__local float*)(&(lm_sum[1][bf_loc]));
+ for(int k = 0; k < 4; k++)
+ {
+ if(gid * 4 + k + 4 >= cols + pre_invalid) break;
+ sum[loc_s1 + k * dst_step / 4] = sum_p[k];
+ }
+ }
+ barrier(CLK_LOCAL_MEM_FENCE);
+ }
+}
+
+
+kernel void integral_sum_rows_D5(__global float4 *srcsum,__global float *sum ,
+ int rows,int cols,int src_step,int sum_step,
+ int sum_offset)
+{
+ unsigned int lid = get_local_id(0);
+ unsigned int gid = get_group_id(0);
+ float4 src_t[2], sum_t[2];
+ __local float4 lm_sum[2][LSIZE + LOG_LSIZE];
+ __local float *sum_p;
+ src_step = src_step >> 4;
+ for(int i = 0; i < rows; i =i + LSIZE_1)
+ {
+ src_t[0] = i + lid < rows ? srcsum[(lid+i) * src_step + gid * 2] : (float4)0;
+ src_t[1] = i + lid < rows ? srcsum[(lid+i) * src_step + gid * 2 + 1] : (float4)0;
+
+ sum_t[0] = (i == 0 ? (float4)0 : lm_sum[0][LSIZE_2 + LOG_LSIZE]);
+ sum_t[1] = (i == 0 ? (float4)0 : lm_sum[1][LSIZE_2 + LOG_LSIZE]);
+ barrier(CLK_LOCAL_MEM_FENCE);
+
+ int bf_loc = lid + GET_CONFLICT_OFFSET(lid);
+ lm_sum[0][bf_loc] = src_t[0];
+
+ lm_sum[1][bf_loc] = src_t[1];
+
+ int offset = 1;
+ for(int d = LSIZE >> 1 ; d > 0; d>>=1)
+ {
+ barrier(CLK_LOCAL_MEM_FENCE);
+ int ai = offset * (((lid & 127)<<1) +1) - 1,bi = ai + offset;
+ ai += GET_CONFLICT_OFFSET(ai);
+ bi += GET_CONFLICT_OFFSET(bi);
+
+ if((lid & 127) < d)
+ {
+ lm_sum[lid >> 7][bi] += lm_sum[lid >> 7][ai];
+ }
+ offset <<= 1;
+ }
+ barrier(CLK_LOCAL_MEM_FENCE);
+ if(lid < 2)
+ {
+ lm_sum[lid][LSIZE_2 + LOG_LSIZE] = 0;
+ }
+ for(int d = 1; d < LSIZE; d <<= 1)
+ {
+ barrier(CLK_LOCAL_MEM_FENCE);
+ offset >>= 1;
+ int ai = offset * (((lid & 127)<<1) +1) - 1,bi = ai + offset;
+ ai += GET_CONFLICT_OFFSET(ai);
+ bi += GET_CONFLICT_OFFSET(bi);
+
+ if((lid & 127) < d)
+ {
+ lm_sum[lid >> 7][bi] += lm_sum[lid >> 7][ai];
+ lm_sum[lid >> 7][ai] = lm_sum[lid >> 7][bi] - lm_sum[lid >> 7][ai];
+ }
+ }
+ barrier(CLK_LOCAL_MEM_FENCE);
+ if(gid == 0 && (i + lid) <= rows)
+ {
+ sum[sum_offset + i + lid] = 0;
+ }
+ if(i + lid == 0)
+ {
+ int loc0 = gid * 2 * sum_step;
+ for(int k = 1; k <= 8; k++)
+ {
+ if(gid * 8 + k > cols) break;
+ sum[sum_offset + loc0 + k * sum_step / 4] = 0;
+ }
+ }
+
+ if(lid > 0 && (i+lid) <= rows)
+ {
+ int loc_s0 = sum_offset + gid * 2 * sum_step + sum_step / 4 + i + lid, loc_s1 = loc_s0 + sum_step ;
+ lm_sum[0][bf_loc] += sum_t[0];
+ lm_sum[1][bf_loc] += sum_t[1];
+ sum_p = (__local float*)(&(lm_sum[0][bf_loc]));
+ for(int k = 0; k < 4; k++)
+ {
+ if(gid * 8 + k >= cols) break;
+ sum[loc_s0 + k * sum_step / 4] = sum_p[k];
+ }
+ sum_p = (__local float*)(&(lm_sum[1][bf_loc]));
+ for(int k = 0; k < 4; k++)
+ {
+ if(gid * 8 + 4 + k >= cols) break;
+ sum[loc_s1 + k * sum_step / 4] = sum_p[k];
+ }
+ }
+ barrier(CLK_LOCAL_MEM_FENCE);
+ }
+}
//warpAffine kernel
//support data types: CV_8UC1, CV_8UC4, CV_32FC1, CV_32FC4, and three interpolation methods: NN, Linear, Cubic.
-#if defined DOUBLE_SUPPORT
+#if defined (DOUBLE_SUPPORT)
+#ifdef cl_khr_fp64
#pragma OPENCL EXTENSION cl_khr_fp64:enable
+#elif defined (cl_amd_fp64)
+#pragma OPENCL EXTENSION cl_amd_fp64:enable
+#endif
typedef double F;
typedef double4 F4;
#define convert_F4 convert_double4
#define convert_F4 convert_float4
#endif
-
#define INTER_BITS 5
#define INTER_TAB_SIZE (1 << INTER_BITS)
#define INTER_SCALE 1.f/INTER_TAB_SIZE
/**********************************************8UC1*********************************************
***********************************************************************************************/
__kernel void warpAffineNN_C1_D0(__global uchar const * restrict src, __global uchar * dst, int src_cols, int src_rows,
- int dst_cols, int dst_rows, int srcStep, int dstStep,
- int src_offset, int dst_offset, __constant F * M, int threadCols )
+ int dst_cols, int dst_rows, int srcStep, int dstStep,
+ int src_offset, int dst_offset, __constant F * M, int threadCols )
{
int dx = get_global_id(0);
int dy = get_global_id(1);
sval.s1 = scon.s1 ? src[spos.s1] : 0;
sval.s2 = scon.s2 ? src[spos.s2] : 0;
sval.s3 = scon.s3 ? src[spos.s3] : 0;
- dval = convert_uchar4(dcon != 0) ? sval : dval;
+ dval = convert_uchar4(dcon) != (uchar4)(0,0,0,0) ? sval : dval;
*d = dval;
}
}
__kernel void warpAffineLinear_C1_D0(__global const uchar * restrict src, __global uchar * dst, int src_cols, int src_rows,
- int dst_cols, int dst_rows, int srcStep, int dstStep,
- int src_offset, int dst_offset, __constant F * M, int threadCols )
+ int dst_cols, int dst_rows, int srcStep, int dstStep,
+ int src_offset, int dst_offset, __constant F * M, int threadCols )
{
int dx = get_global_id(0);
int dy = get_global_id(1);
spos1 = src_offset + sy * srcStep + sx + 1;
spos2 = src_offset + (sy+1) * srcStep + sx;
spos3 = src_offset + (sy+1) * srcStep + sx + 1;
-
+
v0.s0 = scon0.s0 ? src[spos0.s0] : 0;
v1.s0 = scon1.s0 ? src[spos1.s0] : 0;
v2.s0 = scon2.s0 ? src[spos2.s0] : 0;
v1.s3 = scon1.s3 ? src[spos1.s3] : 0;
v2.s3 = scon2.s3 ? src[spos2.s3] : 0;
v3.s3 = scon3.s3 ? src[spos3.s3] : 0;
-
+
short4 itab0, itab1, itab2, itab3;
float4 taby, tabx;
taby = INTER_SCALE * convert_float4(ay);
tabx = INTER_SCALE * convert_float4(ax);
- itab0 = convert_short4_sat(( (1.0f-taby)*(1.0f-tabx) * INTER_REMAP_COEF_SCALE ));
- itab1 = convert_short4_sat(( (1.0f-taby)*tabx * INTER_REMAP_COEF_SCALE ));
- itab2 = convert_short4_sat(( taby*(1.0f-tabx) * INTER_REMAP_COEF_SCALE ));
- itab3 = convert_short4_sat(( taby*tabx * INTER_REMAP_COEF_SCALE ));
+ itab0 = convert_short4_sat(( (1.0f-taby)*(1.0f-tabx) * (float4)INTER_REMAP_COEF_SCALE ));
+ itab1 = convert_short4_sat(( (1.0f-taby)*tabx * (float4)INTER_REMAP_COEF_SCALE ));
+ itab2 = convert_short4_sat(( taby*(1.0f-tabx) * (float4)INTER_REMAP_COEF_SCALE ));
+ itab3 = convert_short4_sat(( taby*tabx * (float4)INTER_REMAP_COEF_SCALE ));
int4 val;
uchar4 tval;
val = convert_int4(v0) * convert_int4(itab0) + convert_int4(v1) * convert_int4(itab1)
- + convert_int4(v2) * convert_int4(itab2) + convert_int4(v3) * convert_int4(itab3);
+ + convert_int4(v2) * convert_int4(itab2) + convert_int4(v3) * convert_int4(itab3);
tval = convert_uchar4_sat ( (val + (1 << (INTER_REMAP_COEF_BITS-1))) >> INTER_REMAP_COEF_BITS ) ;
__global uchar4 * d =(__global uchar4 *)(dst+dst_offset+dy*dstStep+dx);
}
__kernel void warpAffineCubic_C1_D0(__global uchar * src, __global uchar * dst, int src_cols, int src_rows,
- int dst_cols, int dst_rows, int srcStep, int dstStep,
- int src_offset, int dst_offset, __constant F * M, int threadCols )
+ int dst_cols, int dst_rows, int srcStep, int dstStep,
+ int src_offset, int dst_offset, __constant F * M, int threadCols )
{
int dx = get_global_id(0);
int dy = get_global_id(1);
#pragma unroll 4
for(i=0; i<4; i++)
- for(j=0; j<4; j++)
- {
- v[i*4+j] = (sx+j >= 0 && sx+j < src_cols && sy+i >= 0 && sy+i < src_rows) ? src[src_offset+(sy+i) * srcStep + (sx+j)] : 0;
- }
+ for(j=0; j<4; j++)
+ {
+ v[i*4+j] = (sx+j >= 0 && sx+j < src_cols && sy+i >= 0 && sy+i < src_rows) ? src[src_offset+(sy+i) * srcStep + (sx+j)] : 0;
+ }
short itab[16];
float tab1y[4], tab1x[4];
if( itab[(k1<<2)+k2] < itab[(mk1<<2)+mk2] )
mk1 = k1, mk2 = k2;
else if( itab[(k1<<2)+k2] > itab[(Mk1<<2)+Mk2] )
- Mk1 = k1, Mk2 = k2;
+ Mk1 = k1, Mk2 = k2;
}
diff<0 ? (itab[(Mk1<<2)+Mk2]=(short)(itab[(Mk1<<2)+Mk2]-diff)) : (itab[(mk1<<2)+mk2]=(short)(itab[(mk1<<2)+mk2]-diff));
}
***********************************************************************************************/
__kernel void warpAffineNN_C4_D0(__global uchar4 const * restrict src, __global uchar4 * dst, int src_cols, int src_rows,
- int dst_cols, int dst_rows, int srcStep, int dstStep,
- int src_offset, int dst_offset, __constant F * M, int threadCols )
+ int dst_cols, int dst_rows, int srcStep, int dstStep,
+ int src_offset, int dst_offset, __constant F * M, int threadCols )
{
int dx = get_global_id(0);
int dy = get_global_id(1);
}
__kernel void warpAffineLinear_C4_D0(__global uchar4 const * restrict src, __global uchar4 * dst, int src_cols, int src_rows,
- int dst_cols, int dst_rows, int srcStep, int dstStep,
- int src_offset, int dst_offset, __constant F * M, int threadCols )
+ int dst_cols, int dst_rows, int srcStep, int dstStep,
+ int src_offset, int dst_offset, __constant F * M, int threadCols )
{
int dx = get_global_id(0);
int dy = get_global_id(1);
}
__kernel void warpAffineCubic_C4_D0(__global uchar4 const * restrict src, __global uchar4 * dst, int src_cols, int src_rows,
- int dst_cols, int dst_rows, int srcStep, int dstStep,
- int src_offset, int dst_offset, __constant F * M, int threadCols )
+ int dst_cols, int dst_rows, int srcStep, int dstStep,
+ int src_offset, int dst_offset, __constant F * M, int threadCols )
{
int dx = get_global_id(0);
int dy = get_global_id(1);
int i,j;
#pragma unroll 4
for(i=0; i<4; i++)
- for(j=0; j<4; j++)
- {
- v[i*4+j] = (sx+j >= 0 && sx+j < src_cols && sy+i >= 0 && sy+i < src_rows) ? (src[src_offset+(sy+i) * srcStep + (sx+j)]) : (uchar4)0;
- }
+ for(j=0; j<4; j++)
+ {
+ v[i*4+j] = (sx+j >= 0 && sx+j < src_cols && sy+i >= 0 && sy+i < src_rows) ? (src[src_offset+(sy+i) * srcStep + (sx+j)]) : (uchar4)0;
+ }
int itab[16];
float tab1y[4], tab1x[4];
float axx, ayy;
int diff = isum - INTER_REMAP_COEF_SCALE;
int Mk1=2, Mk2=2, mk1=2, mk2=2;
- for( k1 = 2; k1 < 4; k1++ )
+ for( k1 = 2; k1 < 4; k1++ )
for( k2 = 2; k2 < 4; k2++ )
{
if( itab[(k1<<2)+k2] < itab[(mk1<<2)+mk2] )
mk1 = k1, mk2 = k2;
else if( itab[(k1<<2)+k2] > itab[(Mk1<<2)+Mk2] )
- Mk1 = k1, Mk2 = k2;
+ Mk1 = k1, Mk2 = k2;
}
diff<0 ? (itab[(Mk1<<2)+Mk2]=(short)(itab[(Mk1<<2)+Mk2]-diff)) : (itab[(mk1<<2)+mk2]=(short)(itab[(mk1<<2)+mk2]-diff));
***********************************************************************************************/
__kernel void warpAffineNN_C1_D5(__global float * src, __global float * dst, int src_cols, int src_rows,
- int dst_cols, int dst_rows, int srcStep, int dstStep,
- int src_offset, int dst_offset, __constant F * M, int threadCols )
+ int dst_cols, int dst_rows, int srcStep, int dstStep,
+ int src_offset, int dst_offset, __constant F * M, int threadCols )
{
int dx = get_global_id(0);
int dy = get_global_id(1);
}
__kernel void warpAffineLinear_C1_D5(__global float * src, __global float * dst, int src_cols, int src_rows,
- int dst_cols, int dst_rows, int srcStep, int dstStep,
- int src_offset, int dst_offset, __constant F * M, int threadCols )
+ int dst_cols, int dst_rows, int srcStep, int dstStep,
+ int src_offset, int dst_offset, __constant F * M, int threadCols )
{
int dx = get_global_id(0);
int dy = get_global_id(1);
sum += v0 * tab[0] + v1 * tab[1] + v2 * tab[2] + v3 * tab[3];
if(dx >= 0 && dx < dst_cols && dy >= 0 && dy < dst_rows)
dst[(dst_offset>>2)+dy*dstStep+dx] = sum;
- }
+ }
}
__kernel void warpAffineCubic_C1_D5(__global float * src, __global float * dst, int src_cols, int src_rows,
- int dst_cols, int dst_rows, int srcStep, int dstStep,
- int src_offset, int dst_offset, __constant F * M, int threadCols )
+ int dst_cols, int dst_rows, int srcStep, int dstStep,
+ int src_offset, int dst_offset, __constant F * M, int threadCols )
{
int dx = get_global_id(0);
int dy = get_global_id(1);
***********************************************************************************************/
__kernel void warpAffineNN_C4_D5(__global float4 * src, __global float4 * dst, int src_cols, int src_rows,
- int dst_cols, int dst_rows, int srcStep, int dstStep,
- int src_offset, int dst_offset, __constant F * M, int threadCols )
+ int dst_cols, int dst_rows, int srcStep, int dstStep,
+ int src_offset, int dst_offset, __constant F * M, int threadCols )
{
int dx = get_global_id(0);
int dy = get_global_id(1);
short sy0 = (short)(Y0 >> AB_BITS);
if(dx >= 0 && dx < dst_cols && dy >= 0 && dy < dst_rows)
- dst[(dst_offset>>4)+dy*(dstStep>>2)+dx]= (sx0>=0 && sx0<src_cols && sy0>=0 && sy0<src_rows) ? src[(src_offset>>4)+sy0*(srcStep>>2)+sx0] : 0;
+ dst[(dst_offset>>4)+dy*(dstStep>>2)+dx]= (sx0>=0 && sx0<src_cols && sy0>=0 && sy0<src_rows) ? src[(src_offset>>4)+sy0*(srcStep>>2)+sx0] : (float4)0;
}
}
__kernel void warpAffineLinear_C4_D5(__global float4 * src, __global float4 * dst, int src_cols, int src_rows,
- int dst_cols, int dst_rows, int srcStep, int dstStep,
- int src_offset, int dst_offset, __constant F * M, int threadCols )
+ int dst_cols, int dst_rows, int srcStep, int dstStep,
+ int src_offset, int dst_offset, __constant F * M, int threadCols )
{
int dx = get_global_id(0);
int dy = get_global_id(1);
float4 v0, v1, v2, v3;
- v0 = (sx0 >= 0 && sx0 < src_cols && sy0 >= 0 && sy0 < src_rows) ? src[src_offset+sy0 * srcStep + sx0] : 0;
- v1 = (sx0+1 >= 0 && sx0+1 < src_cols && sy0 >= 0 && sy0 < src_rows) ? src[src_offset+sy0 * srcStep + sx0+1] : 0;
- v2 = (sx0 >= 0 && sx0 < src_cols && sy0+1 >= 0 && sy0+1 < src_rows) ? src[src_offset+(sy0+1) * srcStep + sx0] : 0;
- v3 = (sx0+1 >= 0 && sx0+1 < src_cols && sy0+1 >= 0 && sy0+1 < src_rows) ? src[src_offset+(sy0+1) * srcStep + sx0+1] : 0;
+ v0 = (sx0 >= 0 && sx0 < src_cols && sy0 >= 0 && sy0 < src_rows) ? src[src_offset+sy0 * srcStep + sx0] : (float4)0;
+ v1 = (sx0+1 >= 0 && sx0+1 < src_cols && sy0 >= 0 && sy0 < src_rows) ? src[src_offset+sy0 * srcStep + sx0+1] : (float4)0;
+ v2 = (sx0 >= 0 && sx0 < src_cols && sy0+1 >= 0 && sy0+1 < src_rows) ? src[src_offset+(sy0+1) * srcStep + sx0] : (float4)0;
+ v3 = (sx0+1 >= 0 && sx0+1 < src_cols && sy0+1 >= 0 && sy0+1 < src_rows) ? src[src_offset+(sy0+1) * srcStep + sx0+1] : (float4)0;
float tab[4];
float taby[2], tabx[2];
sum += v0 * tab[0] + v1 * tab[1] + v2 * tab[2] + v3 * tab[3];
if(dx >= 0 && dx < dst_cols && dy >= 0 && dy < dst_rows)
dst[dst_offset+dy*dstStep+dx] = sum;
- }
+ }
}
__kernel void warpAffineCubic_C4_D5(__global float4 * src, __global float4 * dst, int src_cols, int src_rows,
- int dst_cols, int dst_rows, int srcStep, int dstStep,
- int src_offset, int dst_offset, __constant F * M, int threadCols )
+ int dst_cols, int dst_rows, int srcStep, int dstStep,
+ int src_offset, int dst_offset, __constant F * M, int threadCols )
{
int dx = get_global_id(0);
int dy = get_global_id(1);
int i;
for(i=0; i<16; i++)
- v[i] = (sx+(i&3) >= 0 && sx+(i&3) < src_cols && sy+(i>>2) >= 0 && sy+(i>>2) < src_rows) ? src[src_offset+(sy+(i>>2)) * srcStep + (sx+(i&3))] : 0;
+ v[i] = (sx+(i&3) >= 0 && sx+(i&3) < src_cols && sy+(i>>2) >= 0 && sy+(i>>2) < src_rows) ? src[src_offset+(sy+(i>>2)) * srcStep + (sx+(i&3))] : (float4)0;
float tab[16];
float tab1y[4], tab1x[4];
dst[dst_offset+dy*dstStep+dx] = sum;
}
- }
+ }
}
//wrapPerspective kernel
//support data types: CV_8UC1, CV_8UC4, CV_32FC1, CV_32FC4, and three interpolation methods: NN, Linear, Cubic.
-#if defined DOUBLE_SUPPORT
+#if defined (DOUBLE_SUPPORT)
+#ifdef cl_khr_fp64
#pragma OPENCL EXTENSION cl_khr_fp64:enable
+#elif defined (cl_amd_fp64)
+#pragma OPENCL EXTENSION cl_amd_fp64:enable
+#endif
typedef double F;
typedef double4 F4;
#define convert_F4 convert_double4
/**********************************************8UC1*********************************************
***********************************************************************************************/
__kernel void warpPerspectiveNN_C1_D0(__global uchar const * restrict src, __global uchar * dst, int src_cols, int src_rows,
- int dst_cols, int dst_rows, int srcStep, int dstStep,
- int src_offset, int dst_offset, __constant F * M, int threadCols )
+ int dst_cols, int dst_rows, int srcStep, int dstStep,
+ int src_offset, int dst_offset, __constant F * M, int threadCols )
{
int dx = get_global_id(0);
int dy = get_global_id(1);
sval.s1 = scon.s1 ? src[spos.s1] : 0;
sval.s2 = scon.s2 ? src[spos.s2] : 0;
sval.s3 = scon.s3 ? src[spos.s3] : 0;
- dval = convert_uchar4(dcon != 0) ? sval : dval;
+ dval = convert_uchar4(dcon) != (uchar4)(0,0,0,0) ? sval : dval;
*d = dval;
}
}
__kernel void warpPerspectiveLinear_C1_D0(__global const uchar * restrict src, __global uchar * dst,
- int src_cols, int src_rows, int dst_cols, int dst_rows, int srcStep,
- int dstStep, int src_offset, int dst_offset, __constant F * M, int threadCols )
+ int src_cols, int src_rows, int dst_cols, int dst_rows, int srcStep,
+ int dstStep, int src_offset, int dst_offset, __constant F * M, int threadCols )
{
int dx = get_global_id(0);
int dy = get_global_id(1);
int i;
#pragma unroll 4
for(i=0; i<4; i++)
- v[i] = (sx+(i&1) >= 0 && sx+(i&1) < src_cols && sy+(i>>1) >= 0 && sy+(i>>1) < src_rows) ? src[src_offset + (sy+(i>>1)) * srcStep + (sx+(i&1))] : 0;
+ v[i] = (sx+(i&1) >= 0 && sx+(i&1) < src_cols && sy+(i>>1) >= 0 && sy+(i>>1) < src_rows) ? src[src_offset + (sy+(i>>1)) * srcStep + (sx+(i&1))] : (uchar)0;
short itab[4];
float tab1y[2], tab1x[2];
}
__kernel void warpPerspectiveCubic_C1_D0(__global uchar * src, __global uchar * dst, int src_cols, int src_rows,
- int dst_cols, int dst_rows, int srcStep, int dstStep,
- int src_offset, int dst_offset, __constant F * M, int threadCols )
+ int dst_cols, int dst_rows, int srcStep, int dstStep,
+ int src_offset, int dst_offset, __constant F * M, int threadCols )
{
int dx = get_global_id(0);
int dy = get_global_id(1);
short ay = (short)(Y & (INTER_TAB_SIZE-1));
short ax = (short)(X & (INTER_TAB_SIZE-1));
- uchar v[16];
+ uchar v[16];
int i, j;
#pragma unroll 4
for(i=0; i<4; i++)
- for(j=0; j<4; j++)
- {
- v[i*4+j] = (sx+j >= 0 && sx+j < src_cols && sy+i >= 0 && sy+i < src_rows) ? src[src_offset+(sy+i) * srcStep + (sx+j)] : 0;
- }
+ for(j=0; j<4; j++)
+ {
+ v[i*4+j] = (sx+j >= 0 && sx+j < src_cols && sy+i >= 0 && sy+i < src_rows) ? src[src_offset+(sy+i) * srcStep + (sx+j)] : (uchar)0;
+ }
short itab[16];
float tab1y[4], tab1x[4];
if( itab[(k1<<2)+k2] < itab[(mk1<<2)+mk2] )
mk1 = k1, mk2 = k2;
else if( itab[(k1<<2)+k2] > itab[(Mk1<<2)+Mk2] )
- Mk1 = k1, Mk2 = k2;
+ Mk1 = k1, Mk2 = k2;
}
diff<0 ? (itab[(Mk1<<2)+Mk2]=(short)(itab[(Mk1<<2)+Mk2]-diff)) : (itab[(mk1<<2)+mk2]=(short)(itab[(mk1<<2)+mk2]-diff));
}
***********************************************************************************************/
__kernel void warpPerspectiveNN_C4_D0(__global uchar4 const * restrict src, __global uchar4 * dst,
- int src_cols, int src_rows, int dst_cols, int dst_rows, int srcStep,
- int dstStep, int src_offset, int dst_offset, __constant F * M, int threadCols )
+ int src_cols, int src_rows, int dst_cols, int dst_rows, int srcStep,
+ int dstStep, int src_offset, int dst_offset, __constant F * M, int threadCols )
{
int dx = get_global_id(0);
int dy = get_global_id(1);
}
__kernel void warpPerspectiveLinear_C4_D0(__global uchar4 const * restrict src, __global uchar4 * dst,
- int src_cols, int src_rows, int dst_cols, int dst_rows, int srcStep,
- int dstStep, int src_offset, int dst_offset, __constant F * M, int threadCols )
+ int src_cols, int src_rows, int dst_cols, int dst_rows, int srcStep,
+ int dstStep, int src_offset, int dst_offset, __constant F * M, int threadCols )
{
int dx = get_global_id(0);
int dy = get_global_id(1);
int4 v0, v1, v2, v3;
- v0 = (sx >= 0 && sx < src_cols && sy >= 0 && sy < src_rows) ? convert_int4(src[src_offset+sy * srcStep + sx]) : 0;
- v1 = (sx+1 >= 0 && sx+1 < src_cols && sy >= 0 && sy < src_rows) ? convert_int4(src[src_offset+sy * srcStep + sx+1]) : 0;
- v2 = (sx >= 0 && sx < src_cols && sy+1 >= 0 && sy+1 < src_rows) ? convert_int4(src[src_offset+(sy+1) * srcStep + sx]) : 0;
- v3 = (sx+1 >= 0 && sx+1 < src_cols && sy+1 >= 0 && sy+1 < src_rows) ? convert_int4(src[src_offset+(sy+1) * srcStep + sx+1]) : 0;
+ v0 = (sx >= 0 && sx < src_cols && sy >= 0 && sy < src_rows) ? convert_int4(src[src_offset+sy * srcStep + sx]) : (int4)0;
+ v1 = (sx+1 >= 0 && sx+1 < src_cols && sy >= 0 && sy < src_rows) ? convert_int4(src[src_offset+sy * srcStep + sx+1]) : (int4)0;
+ v2 = (sx >= 0 && sx < src_cols && sy+1 >= 0 && sy+1 < src_rows) ? convert_int4(src[src_offset+(sy+1) * srcStep + sx]) : (int4)0;
+ v3 = (sx+1 >= 0 && sx+1 < src_cols && sy+1 >= 0 && sy+1 < src_rows) ? convert_int4(src[src_offset+(sy+1) * srcStep + sx+1]) : (int4)0;
int itab0, itab1, itab2, itab3;
float taby, tabx;
}
__kernel void warpPerspectiveCubic_C4_D0(__global uchar4 const * restrict src, __global uchar4 * dst,
- int src_cols, int src_rows, int dst_cols, int dst_rows, int srcStep,
- int dstStep, int src_offset, int dst_offset, __constant F * M, int threadCols )
+ int src_cols, int src_rows, int dst_cols, int dst_rows, int srcStep,
+ int dstStep, int src_offset, int dst_offset, __constant F * M, int threadCols )
{
int dx = get_global_id(0);
int dy = get_global_id(1);
int i,j;
#pragma unroll 4
for(i=0; i<4; i++)
- for(j=0; j<4; j++)
- {
- v[i*4+j] = (sx+j >= 0 && sx+j < src_cols && sy+i >= 0 && sy+i < src_rows) ? (src[src_offset+(sy+i) * srcStep + (sx+j)]) : (uchar4)0;
- }
+ for(j=0; j<4; j++)
+ {
+ v[i*4+j] = (sx+j >= 0 && sx+j < src_cols && sy+i >= 0 && sy+i < src_rows) ? (src[src_offset+(sy+i) * srcStep + (sx+j)]) : (uchar4)0;
+ }
int itab[16];
float tab1y[4], tab1x[4];
float axx, ayy;
int diff = isum - INTER_REMAP_COEF_SCALE;
int Mk1=2, Mk2=2, mk1=2, mk2=2;
- for( k1 = 2; k1 < 4; k1++ )
+ for( k1 = 2; k1 < 4; k1++ )
for( k2 = 2; k2 < 4; k2++ )
{
if( itab[(k1<<2)+k2] < itab[(mk1<<2)+mk2] )
mk1 = k1, mk2 = k2;
else if( itab[(k1<<2)+k2] > itab[(Mk1<<2)+Mk2] )
- Mk1 = k1, Mk2 = k2;
+ Mk1 = k1, Mk2 = k2;
}
diff<0 ? (itab[(Mk1<<2)+Mk2]=(short)(itab[(Mk1<<2)+Mk2]-diff)) : (itab[(mk1<<2)+mk2]=(short)(itab[(mk1<<2)+mk2]-diff));
***********************************************************************************************/
__kernel void warpPerspectiveNN_C1_D5(__global float * src, __global float * dst, int src_cols, int src_rows,
- int dst_cols, int dst_rows, int srcStep, int dstStep,
- int src_offset, int dst_offset, __constant F * M, int threadCols )
+ int dst_cols, int dst_rows, int srcStep, int dstStep,
+ int src_offset, int dst_offset, __constant F * M, int threadCols )
{
int dx = get_global_id(0);
int dy = get_global_id(1);
}
__kernel void warpPerspectiveLinear_C1_D5(__global float * src, __global float * dst, int src_cols, int src_rows,
- int dst_cols, int dst_rows, int srcStep, int dstStep,
- int src_offset, int dst_offset, __constant F * M, int threadCols )
+ int dst_cols, int dst_rows, int srcStep, int dstStep,
+ int src_offset, int dst_offset, __constant F * M, int threadCols )
{
int dx = get_global_id(0);
int dy = get_global_id(1);
float v0, v1, v2, v3;
- v0 = (sx >= 0 && sx < src_cols && sy >= 0 && sy < src_rows) ? src[src_offset+sy * srcStep + sx] : 0;
- v1 = (sx+1 >= 0 && sx+1 < src_cols && sy >= 0 && sy < src_rows) ? src[src_offset+sy * srcStep + sx+1] : 0;
- v2 = (sx >= 0 && sx < src_cols && sy+1 >= 0 && sy+1 < src_rows) ? src[src_offset+(sy+1) * srcStep + sx] : 0;
- v3 = (sx+1 >= 0 && sx+1 < src_cols && sy+1 >= 0 && sy+1 < src_rows) ? src[src_offset+(sy+1) * srcStep + sx+1] : 0;
+ v0 = (sx >= 0 && sx < src_cols && sy >= 0 && sy < src_rows) ? src[src_offset+sy * srcStep + sx] : (float)0;
+ v1 = (sx+1 >= 0 && sx+1 < src_cols && sy >= 0 && sy < src_rows) ? src[src_offset+sy * srcStep + sx+1] : (float)0;
+ v2 = (sx >= 0 && sx < src_cols && sy+1 >= 0 && sy+1 < src_rows) ? src[src_offset+(sy+1) * srcStep + sx] : (float)0;
+ v3 = (sx+1 >= 0 && sx+1 < src_cols && sy+1 >= 0 && sy+1 < src_rows) ? src[src_offset+(sy+1) * srcStep + sx+1] : (float)0;
float tab[4];
float taby[2], tabx[2];
}
__kernel void warpPerspectiveCubic_C1_D5(__global float * src, __global float * dst, int src_cols, int src_rows,
- int dst_cols, int dst_rows, int srcStep, int dstStep,
- int src_offset, int dst_offset, __constant F * M, int threadCols )
+ int dst_cols, int dst_rows, int srcStep, int dstStep,
+ int src_offset, int dst_offset, __constant F * M, int threadCols )
{
int dx = get_global_id(0);
int dy = get_global_id(1);
int i;
for(i=0; i<16; i++)
- v[i] = (sx+(i&3) >= 0 && sx+(i&3) < src_cols && sy+(i>>2) >= 0 && sy+(i>>2) < src_rows) ? src[src_offset+(sy+(i>>2)) * srcStep + (sx+(i&3))] : 0;
+ v[i] = (sx+(i&3) >= 0 && sx+(i&3) < src_cols && sy+(i>>2) >= 0 && sy+(i>>2) < src_rows) ? src[src_offset+(sy+(i>>2)) * srcStep + (sx+(i&3))] : (float)0;
float tab[16];
float tab1y[4], tab1x[4];
***********************************************************************************************/
__kernel void warpPerspectiveNN_C4_D5(__global float4 * src, __global float4 * dst, int src_cols, int src_rows,
- int dst_cols, int dst_rows, int srcStep, int dstStep,
- int src_offset, int dst_offset, __constant F * M, int threadCols )
+ int dst_cols, int dst_rows, int srcStep, int dstStep,
+ int src_offset, int dst_offset, __constant F * M, int threadCols )
{
int dx = get_global_id(0);
int dy = get_global_id(1);
short sy = (short)Y;
if(dx >= 0 && dx < dst_cols && dy >= 0 && dy < dst_rows)
- dst[(dst_offset>>4)+dy*(dstStep>>2)+dx]= (sx>=0 && sx<src_cols && sy>=0 && sy<src_rows) ? src[(src_offset>>4)+sy*(srcStep>>2)+sx] : 0;
+ dst[(dst_offset>>4)+dy*(dstStep>>2)+dx]= (sx>=0 && sx<src_cols && sy>=0 && sy<src_rows) ? src[(src_offset>>4)+sy*(srcStep>>2)+sx] : (float)0;
}
}
__kernel void warpPerspectiveLinear_C4_D5(__global float4 * src, __global float4 * dst, int src_cols, int src_rows,
- int dst_cols, int dst_rows, int srcStep, int dstStep,
- int src_offset, int dst_offset, __constant F * M, int threadCols )
+ int dst_cols, int dst_rows, int srcStep, int dstStep,
+ int src_offset, int dst_offset, __constant F * M, int threadCols )
{
int dx = get_global_id(0);
int dy = get_global_id(1);
float4 v0, v1, v2, v3;
- v0 = (sx0 >= 0 && sx0 < src_cols && sy0 >= 0 && sy0 < src_rows) ? src[src_offset+sy0 * srcStep + sx0] : 0;
- v1 = (sx0+1 >= 0 && sx0+1 < src_cols && sy0 >= 0 && sy0 < src_rows) ? src[src_offset+sy0 * srcStep + sx0+1] : 0;
- v2 = (sx0 >= 0 && sx0 < src_cols && sy0+1 >= 0 && sy0+1 < src_rows) ? src[src_offset+(sy0+1) * srcStep + sx0] : 0;
- v3 = (sx0+1 >= 0 && sx0+1 < src_cols && sy0+1 >= 0 && sy0+1 < src_rows) ? src[src_offset+(sy0+1) * srcStep + sx0+1] : 0;
+ v0 = (sx0 >= 0 && sx0 < src_cols && sy0 >= 0 && sy0 < src_rows) ? src[src_offset+sy0 * srcStep + sx0] : (float4)0;
+ v1 = (sx0+1 >= 0 && sx0+1 < src_cols && sy0 >= 0 && sy0 < src_rows) ? src[src_offset+sy0 * srcStep + sx0+1] : (float4)0;
+ v2 = (sx0 >= 0 && sx0 < src_cols && sy0+1 >= 0 && sy0+1 < src_rows) ? src[src_offset+(sy0+1) * srcStep + sx0] : (float4)0;
+ v3 = (sx0+1 >= 0 && sx0+1 < src_cols && sy0+1 >= 0 && sy0+1 < src_rows) ? src[src_offset+(sy0+1) * srcStep + sx0+1] : (float4)0;
float tab[4];
float taby[2], tabx[2];
}
__kernel void warpPerspectiveCubic_C4_D5(__global float4 * src, __global float4 * dst,
- int src_cols, int src_rows, int dst_cols, int dst_rows, int srcStep,
- int dstStep, int src_offset, int dst_offset, __constant F * M, int threadCols )
+ int src_cols, int src_rows, int dst_cols, int dst_rows, int srcStep,
+ int dstStep, int src_offset, int dst_offset, __constant F * M, int threadCols )
{
int dx = get_global_id(0);
int dy = get_global_id(1);
int i;
for(i=0; i<16; i++)
- v[i] = (sx+(i&3) >= 0 && sx+(i&3) < src_cols && sy+(i>>2) >= 0 && sy+(i>>2) < src_rows) ? src[src_offset+(sy+(i>>2)) * srcStep + (sx+(i&3))] : 0;
+ v[i] = (sx+(i&3) >= 0 && sx+(i&3) < src_cols && sy+(i>>2) >= 0 && sy+(i>>2) < src_rows) ? src[src_offset+(sy+(i>>2)) * srcStep + (sx+(i&3))] : (float4)0;
float tab[16];
float tab1y[4], tab1x[4];
dst[dst_offset+dy*dstStep+dx] = sum;
}
- }
+ }
}
+
__global const uchar * tpl_ptr = tpl + mad24(i, tpl_step, tpl_offset);
for(j = 0; j < tpl_cols; j ++)
{
- sum = mad24(img_ptr[j], tpl_ptr[j], sum);
+ sum = mad24(convert_int(img_ptr[j]), convert_int(tpl_ptr[j]), sum);
}
}
- res[res_idx] = sum;
+ res[res_idx] = (float)sum;
}
}
sum = mad24(convert_int4(img_ptr[j]), convert_int4(tpl_ptr[j]), sum);
}
}
- res[res_idx] = sum.x + sum.y + sum.z + sum.w;
+ res[res_idx] = (float)(sum.x + sum.y + sum.z + sum.w);
}
}
if(gidx < res_cols && gidy < res_rows)
{
- float sum = (float)(
- (img_sums[SUMS_PTR(tpl_cols, tpl_rows)] - img_sums[SUMS_PTR(tpl_cols, 0)])
- - (img_sums[SUMS_PTR(0, tpl_rows)] - img_sums[SUMS_PTR(0, 0)]));
+ float sum = (float)((img_sums[SUMS_PTR(tpl_cols, tpl_rows)] - img_sums[SUMS_PTR(tpl_cols, 0)])
+ -(img_sums[SUMS_PTR(0, tpl_rows)] - img_sums[SUMS_PTR(0, 0)]));
res[res_idx] -= sum * tpl_sum;
}
}
//----------------------------------------------------------------------------
// Histogram computation
-
-__kernel void compute_hists_kernel(const int width, const int cblock_stride_x, const int cblock_stride_y,
- const int cnbins, const int cblock_hist_size, const int img_block_width,
- const int grad_quadstep, const int qangle_step,
- __global const float* grad, __global const uchar* qangle,
- const float scale, __global float* block_hists, __local float* smem)
+// 12 threads for a cell, 12x4 threads per block
+__kernel void compute_hists_kernel(
+ const int cblock_stride_x, const int cblock_stride_y,
+ const int cnbins, const int cblock_hist_size, const int img_block_width,
+ const int blocks_in_group, const int blocks_total,
+ const int grad_quadstep, const int qangle_step,
+ __global const float* grad, __global const uchar* qangle,
+ const float scale, __global float* block_hists, __local float* smem)
{
- const int lidX = get_local_id(0);
+ const int lx = get_local_id(0);
+ const int lp = lx / 24; /* local group id */
+ const int gid = get_group_id(0) * blocks_in_group + lp;/* global group id */
+ const int gidY = gid / img_block_width;
+ const int gidX = gid - gidY * img_block_width;
+
+ const int lidX = lx - lp * 24;
const int lidY = get_local_id(1);
- const int gidX = get_group_id(0);
- const int gidY = get_group_id(1);
- const int cell_x = lidX / 16;
+ const int cell_x = lidX / 12;
const int cell_y = lidY;
- const int cell_thread_x = lidX & 0xF;
+ const int cell_thread_x = lidX - cell_x * 12;
- __local float* hists = smem;
- __local float* final_hist = smem + cnbins * 48;
+ __local float* hists = smem + lp * cnbins * (CELLS_PER_BLOCK_X *
+ CELLS_PER_BLOCK_Y * 12 + CELLS_PER_BLOCK_X * CELLS_PER_BLOCK_Y);
+ __local float* final_hist = hists + cnbins *
+ (CELLS_PER_BLOCK_X * CELLS_PER_BLOCK_Y * 12);
const int offset_x = gidX * cblock_stride_x + (cell_x << 2) + cell_thread_x;
const int offset_y = gidY * cblock_stride_y + (cell_y << 2);
- __global const float* grad_ptr = grad + offset_y * grad_quadstep + (offset_x << 1);
- __global const uchar* qangle_ptr = qangle + offset_y * qangle_step + (offset_x << 1);
-
- // 12 means that 12 pixels affect on block's cell (in one row)
- if (cell_thread_x < 12)
- {
- __local float* hist = hists + 12 * (cell_y * CELLS_PER_BLOCK_Y + cell_x) + cell_thread_x;
- for (int bin_id = 0; bin_id < cnbins; ++bin_id)
- hist[bin_id * 48] = 0.f;
+ __global const float* grad_ptr = (gid < blocks_total) ?
+ grad + offset_y * grad_quadstep + (offset_x << 1) : grad;
+ __global const uchar* qangle_ptr = (gid < blocks_total) ?
+ qangle + offset_y * qangle_step + (offset_x << 1) : qangle;
- const int dist_x = -4 + cell_thread_x - 4 * cell_x;
+ __local float* hist = hists + 12 * (cell_y * CELLS_PER_BLOCK_Y + cell_x) +
+ cell_thread_x;
+ for (int bin_id = 0; bin_id < cnbins; ++bin_id)
+ hist[bin_id * 48] = 0.f;
- const int dist_y_begin = -4 - 4 * lidY;
- for (int dist_y = dist_y_begin; dist_y < dist_y_begin + 12; ++dist_y)
- {
- float2 vote = (float2) (grad_ptr[0], grad_ptr[1]);
- uchar2 bin = (uchar2) (qangle_ptr[0], qangle_ptr[1]);
+ const int dist_x = -4 + cell_thread_x - 4 * cell_x;
+ const int dist_center_x = dist_x - 4 * (1 - 2 * cell_x);
- grad_ptr += grad_quadstep;
- qangle_ptr += qangle_step;
+ const int dist_y_begin = -4 - 4 * lidY;
+ for (int dist_y = dist_y_begin; dist_y < dist_y_begin + 12; ++dist_y)
+ {
+ float2 vote = (float2) (grad_ptr[0], grad_ptr[1]);
+ uchar2 bin = (uchar2) (qangle_ptr[0], qangle_ptr[1]);
- int dist_center_y = dist_y - 4 * (1 - 2 * cell_y);
- int dist_center_x = dist_x - 4 * (1 - 2 * cell_x);
+ grad_ptr += grad_quadstep;
+ qangle_ptr += qangle_step;
- float gaussian = exp(-(dist_center_y * dist_center_y + dist_center_x * dist_center_x) * scale);
- float interp_weight = (8.f - fabs(dist_y + 0.5f)) * (8.f - fabs(dist_x + 0.5f)) / 64.f;
+ int dist_center_y = dist_y - 4 * (1 - 2 * cell_y);
- hist[bin.x * 48] += gaussian * interp_weight * vote.x;
- hist[bin.y * 48] += gaussian * interp_weight * vote.y;
- }
+ float gaussian = exp(-(dist_center_y * dist_center_y + dist_center_x *
+ dist_center_x) * scale);
+ float interp_weight = (8.f - fabs(dist_y + 0.5f)) *
+ (8.f - fabs(dist_x + 0.5f)) / 64.f;
- volatile __local float* hist_ = hist;
- for (int bin_id = 0; bin_id < cnbins; ++bin_id, hist_ += 48)
- {
- if (cell_thread_x < 6) hist_[0] += hist_[6];
- if (cell_thread_x < 3) hist_[0] += hist_[3];
- if (cell_thread_x == 0)
- final_hist[(cell_x * 2 + cell_y) * cnbins + bin_id] = hist_[0] + hist_[1] + hist_[2];
- }
+ hist[bin.x * 48] += gaussian * interp_weight * vote.x;
+ hist[bin.y * 48] += gaussian * interp_weight * vote.y;
}
-
barrier(CLK_LOCAL_MEM_FENCE);
- __global float* block_hist = block_hists + (gidY * img_block_width + gidX) * cblock_hist_size;
+ volatile __local float* hist_ = hist;
+ for (int bin_id = 0; bin_id < cnbins; ++bin_id, hist_ += 48)
+ {
+ if (cell_thread_x < 6)
+ hist_[0] += hist_[6];
+ barrier(CLK_LOCAL_MEM_FENCE);
+ if (cell_thread_x < 3)
+ hist_[0] += hist_[3];
+#ifdef WAVE_SIZE_1
+ barrier(CLK_LOCAL_MEM_FENCE);
+#endif
+ if (cell_thread_x == 0)
+ final_hist[(cell_x * 2 + cell_y) * cnbins + bin_id] =
+ hist_[0] + hist_[1] + hist_[2];
+ }
+#ifdef WAVE_SIZE_1
+ barrier(CLK_LOCAL_MEM_FENCE);
+#endif
- int tid = (cell_y * CELLS_PER_BLOCK_Y + cell_x) * 16 + cell_thread_x;
- if (tid < cblock_hist_size)
+ int tid = (cell_y * CELLS_PER_BLOCK_Y + cell_x) * 12 + cell_thread_x;
+ if ((tid < cblock_hist_size) && (gid < blocks_total))
+ {
+ __global float* block_hist = block_hists +
+ (gidY * img_block_width + gidX) * cblock_hist_size;
block_hist[tid] = final_hist[tid];
+ }
}
//-------------------------------------------------------------
unsigned int tid = get_local_id(0);
float sum = smem[tid];
- if (size >= 512) { if (tid < 256) smem[tid] = sum = sum + smem[tid + 256]; barrier(CLK_LOCAL_MEM_FENCE); }
- if (size >= 256) { if (tid < 128) smem[tid] = sum = sum + smem[tid + 128]; barrier(CLK_LOCAL_MEM_FENCE); }
- if (size >= 128) { if (tid < 64) smem[tid] = sum = sum + smem[tid + 64]; barrier(CLK_LOCAL_MEM_FENCE); }
+ if (size >= 512)
+ {
+ if (tid < 256) smem[tid] = sum = sum + smem[tid + 256];
+ barrier(CLK_LOCAL_MEM_FENCE);
+ }
+ if (size >= 256)
+ {
+ if (tid < 128) smem[tid] = sum = sum + smem[tid + 128];
+ barrier(CLK_LOCAL_MEM_FENCE);
+ }
+ if (size >= 128)
+ {
+ if (tid < 64) smem[tid] = sum = sum + smem[tid + 64];
+ barrier(CLK_LOCAL_MEM_FENCE);
+ }
if (tid < 32)
{
if (size >= 64) smem[tid] = sum = sum + smem[tid + 32];
+#if defined(WAVE_SIZE_16) || defined(WAVE_SIZE_1)
}
barrier(CLK_LOCAL_MEM_FENCE);
if (tid < 16)
{
+#endif
if (size >= 32) smem[tid] = sum = sum + smem[tid + 16];
+#ifdef WAVE_SIZE_1
+ }
+ barrier(CLK_LOCAL_MEM_FENCE);
+ if (tid < 8)
+ {
+#endif
if (size >= 16) smem[tid] = sum = sum + smem[tid + 8];
+#ifdef WAVE_SIZE_1
+ }
+ barrier(CLK_LOCAL_MEM_FENCE);
+ if (tid < 4)
+ {
+#endif
if (size >= 8) smem[tid] = sum = sum + smem[tid + 4];
+#ifdef WAVE_SIZE_1
+ }
+ barrier(CLK_LOCAL_MEM_FENCE);
+ if (tid < 2)
+ {
+#endif
if (size >= 4) smem[tid] = sum = sum + smem[tid + 2];
+#ifdef WAVE_SIZE_1
+ }
+ barrier(CLK_LOCAL_MEM_FENCE);
+ if (tid < 1)
+ {
+#endif
if (size >= 2) smem[tid] = sum = sum + smem[tid + 1];
}
if (tid < 64) products[tid] = product = product + products[tid + 64];
barrier(CLK_LOCAL_MEM_FENCE);
+ volatile __local float* smem = products;
if (tid < 32)
{
- volatile __local float* smem = products;
smem[tid] = product = product + smem[tid + 32];
+#if defined(WAVE_SIZE_16) || defined(WAVE_SIZE_1)
}
barrier(CLK_LOCAL_MEM_FENCE);
if (tid < 16)
{
- volatile __local float* smem = products;
+#endif
smem[tid] = product = product + smem[tid + 16];
+#ifdef WAVE_SIZE_1
+ }
+ barrier(CLK_LOCAL_MEM_FENCE);
+ if (tid < 8)
+ {
+#endif
smem[tid] = product = product + smem[tid + 8];
+#ifdef WAVE_SIZE_1
+ }
+ barrier(CLK_LOCAL_MEM_FENCE);
+ if (tid < 4)
+ {
+#endif
smem[tid] = product = product + smem[tid + 4];
+#ifdef WAVE_SIZE_1
+ }
+ barrier(CLK_LOCAL_MEM_FENCE);
+ if (tid < 2)
+ {
+#endif
smem[tid] = product = product + smem[tid + 2];
+#ifdef WAVE_SIZE_1
+ }
+ barrier(CLK_LOCAL_MEM_FENCE);
+ if (tid < 1)
+ {
+#endif
smem[tid] = product = product + smem[tid + 1];
}
// Extract descriptors
__kernel void extract_descrs_by_rows_kernel(const int cblock_hist_size, const int descriptors_quadstep, const int cdescr_size, const int cdescr_width,
- const int img_block_width, const int win_block_stride_x, const int win_block_stride_y,
- __global const float* block_hists, __global float* descriptors)
+ const int img_block_width, const int win_block_stride_x, const int win_block_stride_y,
+ __global const float* block_hists, __global float* descriptors)
{
int tid = get_local_id(0);
int gidX = get_group_id(0);
}
__kernel void extract_descrs_by_cols_kernel(const int cblock_hist_size, const int descriptors_quadstep, const int cdescr_size,
- const int cnblocks_win_x, const int cnblocks_win_y, const int img_block_width, const int win_block_stride_x,
- const int win_block_stride_y, __global const float* block_hists, __global float* descriptors)
+ const int cnblocks_win_x, const int cnblocks_win_y, const int img_block_width, const int win_block_stride_x,
+ const int win_block_stride_y, __global const float* block_hists, __global float* descriptors)
{
int tid = get_local_id(0);
int gidX = get_group_id(0);
// Gradients computation
__kernel void compute_gradients_8UC4_kernel(const int height, const int width, const int img_step, const int grad_quadstep, const int qangle_step,
- const __global uchar4 * img, __global float * grad, __global uchar * qangle,
- const float angle_scale, const char correct_gamma, const int cnbins)
+ const __global uchar4 * img, __global float * grad, __global uchar * qangle,
+ const float angle_scale, const char correct_gamma, const int cnbins)
{
const int x = get_global_id(0);
const int tid = get_local_id(0);
}
__kernel void compute_gradients_8UC1_kernel(const int height, const int width, const int img_step, const int grad_quadstep, const int qangle_step,
- __global const uchar * img, __global float * grad, __global uchar * qangle,
- const float angle_scale, const char correct_gamma, const int cnbins)
+ __global const uchar * img, __global float * grad, __global uchar * qangle,
+ const float angle_scale, const char correct_gamma, const int cnbins)
{
const int x = get_global_id(0);
const int tid = get_local_id(0);
}
#define BUFFER 64
+
+#ifdef CPU
+void reduce3(float val1, float val2, float val3, __local float* smem1, __local float* smem2, __local float* smem3, int tid)
+{
+ smem1[tid] = val1;
+ smem2[tid] = val2;
+ smem3[tid] = val3;
+ barrier(CLK_LOCAL_MEM_FENCE);
+
+#if BUFFER > 128
+ if (tid < 128)
+ {
+ smem1[tid] = val1 += smem1[tid + 128];
+ smem2[tid] = val2 += smem2[tid + 128];
+ smem3[tid] = val3 += smem3[tid + 128];
+ }
+ barrier(CLK_LOCAL_MEM_FENCE);
+#endif
+
+#if BUFFER > 64
+ if (tid < 64)
+ {
+ smem1[tid] = val1 += smem1[tid + 64];
+ smem2[tid] = val2 += smem2[tid + 64];
+ smem3[tid] = val3 += smem3[tid + 64];
+ }
+ barrier(CLK_LOCAL_MEM_FENCE);
+#endif
+
+ if (tid < 32)
+ {
+ smem1[tid] = val1 += smem1[tid + 32];
+ smem2[tid] = val2 += smem2[tid + 32];
+ smem3[tid] = val3 += smem3[tid + 32];
+ }
+ barrier(CLK_LOCAL_MEM_FENCE);
+
+ if (tid < 16)
+ {
+ smem1[tid] = val1 += smem1[tid + 16];
+ smem2[tid] = val2 += smem2[tid + 16];
+ smem3[tid] = val3 += smem3[tid + 16];
+ }
+ barrier(CLK_LOCAL_MEM_FENCE);
+
+ if (tid < 8)
+ {
+ smem1[tid] = val1 += smem1[tid + 8];
+ smem2[tid] = val2 += smem2[tid + 8];
+ smem3[tid] = val3 += smem3[tid + 8];
+ }
+ barrier(CLK_LOCAL_MEM_FENCE);
+
+ if (tid < 4)
+ {
+ smem1[tid] = val1 += smem1[tid + 4];
+ smem2[tid] = val2 += smem2[tid + 4];
+ smem3[tid] = val3 += smem3[tid + 4];
+ }
+ barrier(CLK_LOCAL_MEM_FENCE);
+
+ if (tid < 2)
+ {
+ smem1[tid] = val1 += smem1[tid + 2];
+ smem2[tid] = val2 += smem2[tid + 2];
+ smem3[tid] = val3 += smem3[tid + 2];
+ }
+ barrier(CLK_LOCAL_MEM_FENCE);
+
+ if (tid < 1)
+ {
+ smem1[BUFFER] = val1 += smem1[tid + 1];
+ smem2[BUFFER] = val2 += smem2[tid + 1];
+ smem3[BUFFER] = val3 += smem3[tid + 1];
+ }
+ barrier(CLK_LOCAL_MEM_FENCE);
+}
+
+void reduce2(float val1, float val2, volatile __local float* smem1, volatile __local float* smem2, int tid)
+{
+ smem1[tid] = val1;
+ smem2[tid] = val2;
+ barrier(CLK_LOCAL_MEM_FENCE);
+
+#if BUFFER > 128
+ if (tid < 128)
+ {
+ smem1[tid] = (val1 += smem1[tid + 128]);
+ smem2[tid] = (val2 += smem2[tid + 128]);
+ }
+ barrier(CLK_LOCAL_MEM_FENCE);
+#endif
+
+#if BUFFER > 64
+ if (tid < 64)
+ {
+ smem1[tid] = (val1 += smem1[tid + 64]);
+ smem2[tid] = (val2 += smem2[tid + 64]);
+ }
+ barrier(CLK_LOCAL_MEM_FENCE);
+#endif
+
+ if (tid < 32)
+ {
+ smem1[tid] = (val1 += smem1[tid + 32]);
+ smem2[tid] = (val2 += smem2[tid + 32]);
+ }
+ barrier(CLK_LOCAL_MEM_FENCE);
+
+ if (tid < 16)
+ {
+ smem1[tid] = (val1 += smem1[tid + 16]);
+ smem2[tid] = (val2 += smem2[tid + 16]);
+ }
+ barrier(CLK_LOCAL_MEM_FENCE);
+
+ if (tid < 8)
+ {
+ smem1[tid] = (val1 += smem1[tid + 8]);
+ smem2[tid] = (val2 += smem2[tid + 8]);
+ }
+ barrier(CLK_LOCAL_MEM_FENCE);
+
+ if (tid < 4)
+ {
+ smem1[tid] = (val1 += smem1[tid + 4]);
+ smem2[tid] = (val2 += smem2[tid + 4]);
+ }
+ barrier(CLK_LOCAL_MEM_FENCE);
+
+ if (tid < 2)
+ {
+ smem1[tid] = (val1 += smem1[tid + 2]);
+ smem2[tid] = (val2 += smem2[tid + 2]);
+ }
+ barrier(CLK_LOCAL_MEM_FENCE);
+
+ if (tid < 1)
+ {
+ smem1[BUFFER] = (val1 += smem1[tid + 1]);
+ smem2[BUFFER] = (val2 += smem2[tid + 1]);
+ }
+ barrier(CLK_LOCAL_MEM_FENCE);
+}
+
+void reduce1(float val1, volatile __local float* smem1, int tid)
+{
+ smem1[tid] = val1;
+ barrier(CLK_LOCAL_MEM_FENCE);
+
+#if BUFFER > 128
+ if (tid < 128)
+ {
+ smem1[tid] = (val1 += smem1[tid + 128]);
+ }
+ barrier(CLK_LOCAL_MEM_FENCE);
+#endif
+
+#if BUFFER > 64
+ if (tid < 64)
+ {
+ smem1[tid] = (val1 += smem1[tid + 64]);
+ }
+ barrier(CLK_LOCAL_MEM_FENCE);
+#endif
+
+ if (tid < 32)
+ {
+ smem1[tid] = (val1 += smem1[tid + 32]);
+ }
+ barrier(CLK_LOCAL_MEM_FENCE);
+
+ if (tid < 16)
+ {
+ smem1[tid] = (val1 += smem1[tid + 16]);
+ }
+ barrier(CLK_LOCAL_MEM_FENCE);
+
+ if (tid < 8)
+ {
+ smem1[tid] = (val1 += smem1[tid + 8]);
+ }
+ barrier(CLK_LOCAL_MEM_FENCE);
+
+ if (tid < 4)
+ {
+ smem1[tid] = (val1 += smem1[tid + 4]);
+ }
+ barrier(CLK_LOCAL_MEM_FENCE);
+
+ if (tid < 2)
+ {
+ smem1[tid] = (val1 += smem1[tid + 2]);
+ }
+ barrier(CLK_LOCAL_MEM_FENCE);
+
+ if (tid < 1)
+ {
+ smem1[BUFFER] = (val1 += smem1[tid + 1]);
+ }
+ barrier(CLK_LOCAL_MEM_FENCE);
+}
+#else
void reduce3(float val1, float val2, float val3, __local float* smem1, __local float* smem2, __local float* smem3, int tid)
{
smem1[tid] = val1;
vmem1[tid] = val1 += vmem1[tid + 1];
}
}
+#endif
#define SCALE (1.0f / (1 << 20))
#define THRESHOLD 0.01f
*errval += fabs(diff.x) + fabs(diff.y) + fabs(diff.z);
}
-
+#define GRIDSIZE 3
__kernel void lkSparse_C1_D5(image2d_t I, image2d_t J,
__global const float2* prevPts, int prevPtsStep, __global float2* nextPts, int nextPtsStep, __global uchar* status, __global float* err,
const int level, const int rows, const int cols, int PATCH_X, int PATCH_Y, int cn, int c_winSize_x, int c_winSize_y, int c_iters, char calcErr)
{
+#ifdef CPU
+ __local float smem1[BUFFER+1];
+ __local float smem2[BUFFER+1];
+ __local float smem3[BUFFER+1];
+#else
__local float smem1[BUFFER];
__local float smem2[BUFFER];
__local float smem3[BUFFER];
+#endif
unsigned int xid=get_local_id(0);
unsigned int yid=get_local_id(1);
const int tid = mad24(yid, xsize, xid);
- float2 prevPt = prevPts[gid] / (1 << level);
+ float2 prevPt = prevPts[gid] / (float2)(1 << level);
if (prevPt.x < 0 || prevPt.x >= cols || prevPt.y < 0 || prevPt.y >= rows)
{
float A12 = 0;
float A22 = 0;
- float I_patch[3][3];
- float dIdx_patch[3][3];
- float dIdy_patch[3][3];
+ float I_patch[GRIDSIZE][GRIDSIZE];
+ float dIdx_patch[GRIDSIZE][GRIDSIZE];
+ float dIdy_patch[GRIDSIZE][GRIDSIZE];
yBase=yid;
{
&I_patch[2][2], &dIdx_patch[2][2], &dIdy_patch[2][2],
&A11, &A12, &A22);
}
+
reduce3(A11, A12, A22, smem1, smem2, smem3, tid);
barrier(CLK_LOCAL_MEM_FENCE);
+#ifdef CPU
+ A11 = smem1[BUFFER];
+ A12 = smem2[BUFFER];
+ A22 = smem3[BUFFER];
+#else
A11 = smem1[0];
A12 = smem2[0];
A22 = smem3[0];
+#endif
float D = A11 * A22 - A12 * A12;
reduce2(b1, b2, smem1, smem2, tid);
barrier(CLK_LOCAL_MEM_FENCE);
+#ifdef CPU
+ b1 = smem1[BUFFER];
+ b2 = smem2[BUFFER];
+#else
b1 = smem1[0];
b2 = smem2[0];
+#endif
float2 delta;
delta.x = A12 * b2 - A22 * b1;
nextPts[gid] = prevPt;
if (calcErr)
- err[gid] = smem1[0] / (c_winSize_x * c_winSize_y);
+#ifdef CPU
+ err[gid] = smem1[BUFFER] / (float)(c_winSize_x * c_winSize_y);
+#else
+ err[gid] = smem1[0] / (float)(c_winSize_x * c_winSize_y);
+#endif
}
-
}
+
__kernel void lkSparse_C4_D5(image2d_t I, image2d_t J,
__global const float2* prevPts, int prevPtsStep, __global float2* nextPts, int nextPtsStep, __global uchar* status, __global float* err,
const int level, const int rows, const int cols, int PATCH_X, int PATCH_Y, int cn, int c_winSize_x, int c_winSize_y, int c_iters, char calcErr)
{
- __local float smem1[BUFFER];
- __local float smem2[BUFFER];
- __local float smem3[BUFFER];
+#ifdef CPU
+ __local float smem1[BUFFER+1];
+ __local float smem2[BUFFER+1];
+ __local float smem3[BUFFER+1];
+#else
+ __local float smem1[BUFFER];
+ __local float smem2[BUFFER];
+ __local float smem3[BUFFER];
+#endif
unsigned int xid=get_local_id(0);
unsigned int yid=get_local_id(1);
const int tid = mad24(yid, xsize, xid);
- float2 nextPt = prevPts[gid]/(1<<level);
+ float2 nextPt = prevPts[gid]/(float2)(1<<level);
if (nextPt.x < 0 || nextPt.x >= cols || nextPt.y < 0 || nextPt.y >= rows)
{
// extract the patch from the first image, compute covariation matrix of derivatives
- float A11 = 0;
- float A12 = 0;
- float A22 = 0;
+ float A11 = 0.0f;
+ float A12 = 0.0f;
+ float A22 = 0.0f;
float4 I_patch[8];
float4 dIdx_patch[8];
reduce3(A11, A12, A22, smem1, smem2, smem3, tid);
barrier(CLK_LOCAL_MEM_FENCE);
+#ifdef CPU
+ A11 = smem1[BUFFER];
+ A12 = smem2[BUFFER];
+ A22 = smem3[BUFFER];
+#else
A11 = smem1[0];
A12 = smem2[0];
A22 = smem3[0];
+#endif
float D = A11 * A22 - A12 * A12;
&b1, &b2);
}
-
reduce2(b1, b2, smem1, smem2, tid);
barrier(CLK_LOCAL_MEM_FENCE);
+#ifdef CPU
+ b1 = smem1[BUFFER];
+ b2 = smem2[BUFFER];
+#else
b1 = smem1[0];
b2 = smem2[0];
+#endif
float2 delta;
delta.x = A12 * b2 - A22 * b1;
nextPts[gid] = nextPt;
if (calcErr)
- err[gid] = smem1[0] / (3 * c_winSize_x * c_winSize_y);
+#ifdef CPU
+ err[gid] = smem1[BUFFER] / (float)(3 * c_winSize_x * c_winSize_y);
+#else
+ err[gid] = smem1[0] / (float)(3 * c_winSize_x * c_winSize_y);
+#endif
}
}
volatile __local unsigned int *col_ssd_extra = get_local_id(0) < (2 * radius) ? col_ssd + BLOCK_W : 0;
int X = get_group_id(0) * BLOCK_W + get_local_id(0) + maxdisp + radius;
- // int Y = get_group_id(1) * ROWSperTHREAD + radius;
+ // int Y = get_group_id(1) * ROWSperTHREAD + radius;
- #define Y (get_group_id(1) * ROWSperTHREAD + radius)
+#define Y (get_group_id(1) * ROWSperTHREAD + radius)
volatile __global unsigned int* minSSDImage = cminSSDImage + X + Y * cminSSD_step;
__global unsigned char* disparImage = disp + X + Y * disp_step;
barrier(CLK_LOCAL_MEM_FENCE); //before MinSSD function
+ uint2 minSSD = MinSSD(col_ssd_cache + get_local_id(0), col_ssd, radius);
if (X < cwidth - radius && Y < cheight - radius)
{
- uint2 minSSD = MinSSD(col_ssd_cache + get_local_id(0), col_ssd, radius);
if (minSSD.x < minSSDImage[0])
{
disparImage[0] = (unsigned char)(d + minSSD.y);
for(int row = 1; row < end_row; row++)
{
int idx1 = y_tex * img_step + x_tex;
- int idx2 = (y_tex + (2 * radius + 1)) * img_step + x_tex;
+ int idx2 = min(y_tex + (2 * radius + 1), cheight - 1) * img_step + x_tex;
barrier(CLK_GLOBAL_MEM_FENCE);
barrier(CLK_LOCAL_MEM_FENCE);
barrier(CLK_LOCAL_MEM_FENCE);
+ uint2 minSSD = MinSSD(col_ssd_cache + get_local_id(0), col_ssd, radius);
if (X < cwidth - radius && row < cheight - radius - Y)
{
int idx = row * cminSSD_step;
- uint2 minSSD = MinSSD(col_ssd_cache + get_local_id(0), col_ssd, radius);
if (minSSD.x < minSSDImage[idx])
{
disparImage[disp_step * row] = (unsigned char)(d + minSSD.y);
int beg_row = group_id_y * RpT;
int end_row = min(beg_row + RpT, disp_rows);
- // if (x < disp_cols)
- // {
- int y = beg_row;
+// if (x < disp_cols)
+// {
+ int y = beg_row;
- float sum = 0;
- float sum_extra = 0;
+ float sum = 0;
+ float sum_extra = 0;
- for(int i = y - winsz2; i <= y + winsz2; ++i)
- {
- sum += sobel(input, x - winsz2, i, input_rows, input_cols);
- if (cols_extra)
- sum_extra += sobel(input, x + group_size_x - winsz2, i, input_rows, input_cols);
- }
+ for(int i = y - winsz2; i <= y + winsz2; ++i)
+ {
+ sum += sobel(input, x - winsz2, i, input_rows, input_cols);
+ if (cols_extra)
+ sum_extra += sobel(input, x + group_size_x - winsz2, i, input_rows, input_cols);
+ }
+ *cols = sum;
+ if (cols_extra)
+ *cols_extra = sum_extra;
+
+ barrier(CLK_LOCAL_MEM_FENCE);
+
+ float sum_win = CalcSums(cols, cols_cache + local_id_x, winsz) * 255;
+ if (sum_win < threshold)
+ disp[y * disp_step + x] = 0;
+
+ barrier(CLK_LOCAL_MEM_FENCE);
+
+ for(int y = beg_row + 1; y < end_row; ++y)
+ {
+ sum = sum - sobel(input, x - winsz2, y - winsz2 - 1, input_rows, input_cols) +
+ sobel(input, x - winsz2, y + winsz2, input_rows, input_cols);
*cols = sum;
+
if (cols_extra)
+ {
+ sum_extra = sum_extra - sobel(input, x + group_size_x - winsz2, y - winsz2 - 1,input_rows, input_cols)
+ + sobel(input, x + group_size_x - winsz2, y + winsz2, input_rows, input_cols);
*cols_extra = sum_extra;
+ }
barrier(CLK_LOCAL_MEM_FENCE);
-
float sum_win = CalcSums(cols, cols_cache + local_id_x, winsz) * 255;
if (sum_win < threshold)
disp[y * disp_step + x] = 0;
barrier(CLK_LOCAL_MEM_FENCE);
-
- for(int y = beg_row + 1; y < end_row; ++y)
- {
- sum = sum - sobel(input, x - winsz2, y - winsz2 - 1, input_rows, input_cols) +
- sobel(input, x - winsz2, y + winsz2, input_rows, input_cols);
- *cols = sum;
-
- if (cols_extra)
- {
- sum_extra = sum_extra - sobel(input, x + group_size_x - winsz2, y - winsz2 - 1,input_rows, input_cols)
- + sobel(input, x + group_size_x - winsz2, y + winsz2, input_rows, input_cols);
- *cols_extra = sum_extra;
- }
-
- barrier(CLK_LOCAL_MEM_FENCE);
- float sum_win = CalcSums(cols, cols_cache + local_id_x, winsz) * 255;
- if (sum_win < threshold)
- disp[y * disp_step + x] = 0;
-
- barrier(CLK_LOCAL_MEM_FENCE);
- }
- // }
+ }
+ // }
}
for (int d = 0; d < cndisp; ++d)
{
float dst_reg;
- dst_reg = src[(d * src_rows + (2*y+0)) * src_step + 2*x+0];
- dst_reg += src[(d * src_rows + (2*y+1)) * src_step + 2*x+0];
- dst_reg += src[(d * src_rows + (2*y+0)) * src_step + 2*x+1];
- dst_reg += src[(d * src_rows + (2*y+1)) * src_step + 2*x+1];
+ dst_reg = src[(d * src_rows + min(2*y+0, src_rows-1)) * src_step + 2*x+0];
+ dst_reg += src[(d * src_rows + min(2*y+1, src_rows-1)) * src_step + 2*x+0];
+ dst_reg += src[(d * src_rows + min(2*y+0, src_rows-1)) * src_step + 2*x+1];
+ dst_reg += src[(d * src_rows + min(2*y+1, src_rows-1)) * src_step + 2*x+1];
dst[(d * dst_rows + y) * dst_step + x] = saturate_cast(dst_reg);
}
//
// @Authors
// Dachuan Zhao, dachuan@multicorewareinc.com
-// Yao Wang, yao@multicorewareinc.com
+// Yao Wang, bitwangyaoyao@gmail.com
// Nathan, liujun@multicorewareinc.com
//
// Redistribution and use in source and binary forms, with or without modification,
#include "precomp.hpp"
+
using namespace std;
using namespace cv;
using namespace cv::ocl;
///////////////////////////OpenCL kernel strings///////////////////////////
extern const char *pyrlk;
extern const char *pyrlk_no_image;
-extern const char *operator_setTo;
-extern const char *operator_convertTo;
-extern const char *operator_copyToM;
extern const char *arithm_mul;
-extern const char *pyr_down;
}
}
}
}
-inline int divUp(int total, int grain)
-{
- return (total + grain - 1) / grain;
-}
-
-///////////////////////////////////////////////////////////////////////////
-//////////////////////////////// ConvertTo ////////////////////////////////
-///////////////////////////////////////////////////////////////////////////
-static void convert_run_cus(const oclMat &src, oclMat &dst, double alpha, double beta)
-{
- string kernelName = "convert_to_S";
- stringstream idxStr;
- idxStr << src.depth();
- kernelName += idxStr.str();
- float alpha_f = (float)alpha, beta_f = (float)beta;
- CV_DbgAssert(src.rows == dst.rows && src.cols == dst.cols);
- vector<pair<size_t , const void *> > args;
- size_t localThreads[3] = {16, 16, 1};
- size_t globalThreads[3];
- globalThreads[0] = (dst.cols + localThreads[0] - 1) / localThreads[0] * localThreads[0];
- globalThreads[1] = (dst.rows + localThreads[1] - 1) / localThreads[1] * localThreads[1];
- globalThreads[2] = 1;
- int dststep_in_pixel = dst.step / dst.elemSize(), dstoffset_in_pixel = dst.offset / dst.elemSize();
- int srcstep_in_pixel = src.step / src.elemSize(), srcoffset_in_pixel = src.offset / src.elemSize();
- if(dst.type() == CV_8UC1)
- {
- globalThreads[0] = ((dst.cols + 4) / 4 + localThreads[0]) / localThreads[0] * localThreads[0];
- }
- args.push_back( make_pair( sizeof(cl_mem) , (void *)&src.data ));
- args.push_back( make_pair( sizeof(cl_mem) , (void *)&dst.data ));
- args.push_back( make_pair( sizeof(cl_int) , (void *)&src.cols ));
- args.push_back( make_pair( sizeof(cl_int) , (void *)&src.rows ));
- args.push_back( make_pair( sizeof(cl_int) , (void *)&srcstep_in_pixel ));
- args.push_back( make_pair( sizeof(cl_int) , (void *)&srcoffset_in_pixel ));
- args.push_back( make_pair( sizeof(cl_int) , (void *)&dststep_in_pixel ));
- args.push_back( make_pair( sizeof(cl_int) , (void *)&dstoffset_in_pixel ));
- args.push_back( make_pair( sizeof(cl_float) , (void *)&alpha_f ));
- args.push_back( make_pair( sizeof(cl_float) , (void *)&beta_f ));
- openCLExecuteKernel2(dst.clCxt , &operator_convertTo, kernelName, globalThreads,
- localThreads, args, dst.oclchannels(), dst.depth(), CLFLUSH);
-}
-void convertTo( const oclMat &src, oclMat &m, int rtype, double alpha = 1, double beta = 0 );
-void convertTo( const oclMat &src, oclMat &dst, int rtype, double alpha, double beta )
-{
- //cout << "cv::ocl::oclMat::convertTo()" << endl;
-
- bool noScale = fabs(alpha - 1) < std::numeric_limits<double>::epsilon()
- && fabs(beta) < std::numeric_limits<double>::epsilon();
-
- if( rtype < 0 )
- rtype = src.type();
- else
- rtype = CV_MAKETYPE(CV_MAT_DEPTH(rtype), src.oclchannels());
-
- int sdepth = src.depth(), ddepth = CV_MAT_DEPTH(rtype);
- if( sdepth == ddepth && noScale )
- {
- src.copyTo(dst);
- return;
- }
-
- oclMat temp;
- const oclMat *psrc = &src;
- if( sdepth != ddepth && psrc == &dst )
- psrc = &(temp = src);
-
- dst.create( src.size(), rtype );
- convert_run_cus(*psrc, dst, alpha, beta);
-}
-
-///////////////////////////////////////////////////////////////////////////
-//////////////////////////////// setTo ////////////////////////////////////
-///////////////////////////////////////////////////////////////////////////
-//oclMat &operator = (const Scalar &s)
-//{
-// //cout << "cv::ocl::oclMat::=" << endl;
-// setTo(s);
-// return *this;
-//}
-static void set_to_withoutmask_run_cus(const oclMat &dst, const Scalar &scalar, string kernelName)
-{
- vector<pair<size_t , const void *> > args;
-
- size_t localThreads[3] = {16, 16, 1};
- size_t globalThreads[3];
- globalThreads[0] = (dst.cols + localThreads[0] - 1) / localThreads[0] * localThreads[0];
- globalThreads[1] = (dst.rows + localThreads[1] - 1) / localThreads[1] * localThreads[1];
- globalThreads[2] = 1;
- int step_in_pixel = dst.step / dst.elemSize(), offset_in_pixel = dst.offset / dst.elemSize();
- if(dst.type() == CV_8UC1)
- {
- globalThreads[0] = ((dst.cols + 4) / 4 + localThreads[0] - 1) / localThreads[0] * localThreads[0];
- }
- char compile_option[32];
- union sc
- {
- cl_uchar4 uval;
- cl_char4 cval;
- cl_ushort4 usval;
- cl_short4 shval;
- cl_int4 ival;
- cl_float4 fval;
- cl_double4 dval;
- } val;
- switch(dst.depth())
- {
- case 0:
- val.uval.s[0] = saturate_cast<uchar>(scalar.val[0]);
- val.uval.s[1] = saturate_cast<uchar>(scalar.val[1]);
- val.uval.s[2] = saturate_cast<uchar>(scalar.val[2]);
- val.uval.s[3] = saturate_cast<uchar>(scalar.val[3]);
- switch(dst.oclchannels())
- {
- case 1:
- sprintf(compile_option, "-D GENTYPE=uchar");
- args.push_back( make_pair( sizeof(cl_uchar) , (void *)&val.uval.s[0] ));
- break;
- case 4:
- sprintf(compile_option, "-D GENTYPE=uchar4");
- args.push_back( make_pair( sizeof(cl_uchar4) , (void *)&val.uval ));
- break;
- default:
- CV_Error(CV_StsUnsupportedFormat, "unsupported channels");
- }
- break;
- case 1:
- val.cval.s[0] = saturate_cast<char>(scalar.val[0]);
- val.cval.s[1] = saturate_cast<char>(scalar.val[1]);
- val.cval.s[2] = saturate_cast<char>(scalar.val[2]);
- val.cval.s[3] = saturate_cast<char>(scalar.val[3]);
- switch(dst.oclchannels())
- {
- case 1:
- sprintf(compile_option, "-D GENTYPE=char");
- args.push_back( make_pair( sizeof(cl_char) , (void *)&val.cval.s[0] ));
- break;
- case 4:
- sprintf(compile_option, "-D GENTYPE=char4");
- args.push_back( make_pair( sizeof(cl_char4) , (void *)&val.cval ));
- break;
- default:
- CV_Error(CV_StsUnsupportedFormat, "unsupported channels");
- }
- break;
- case 2:
- val.usval.s[0] = saturate_cast<ushort>(scalar.val[0]);
- val.usval.s[1] = saturate_cast<ushort>(scalar.val[1]);
- val.usval.s[2] = saturate_cast<ushort>(scalar.val[2]);
- val.usval.s[3] = saturate_cast<ushort>(scalar.val[3]);
- switch(dst.oclchannels())
- {
- case 1:
- sprintf(compile_option, "-D GENTYPE=ushort");
- args.push_back( make_pair( sizeof(cl_ushort) , (void *)&val.usval.s[0] ));
- break;
- case 4:
- sprintf(compile_option, "-D GENTYPE=ushort4");
- args.push_back( make_pair( sizeof(cl_ushort4) , (void *)&val.usval ));
- break;
- default:
- CV_Error(CV_StsUnsupportedFormat, "unsupported channels");
- }
- break;
- case 3:
- val.shval.s[0] = saturate_cast<short>(scalar.val[0]);
- val.shval.s[1] = saturate_cast<short>(scalar.val[1]);
- val.shval.s[2] = saturate_cast<short>(scalar.val[2]);
- val.shval.s[3] = saturate_cast<short>(scalar.val[3]);
- switch(dst.oclchannels())
- {
- case 1:
- sprintf(compile_option, "-D GENTYPE=short");
- args.push_back( make_pair( sizeof(cl_short) , (void *)&val.shval.s[0] ));
- break;
- case 4:
- sprintf(compile_option, "-D GENTYPE=short4");
- args.push_back( make_pair( sizeof(cl_short4) , (void *)&val.shval ));
- break;
- default:
- CV_Error(CV_StsUnsupportedFormat, "unsupported channels");
- }
- break;
- case 4:
- val.ival.s[0] = saturate_cast<int>(scalar.val[0]);
- val.ival.s[1] = saturate_cast<int>(scalar.val[1]);
- val.ival.s[2] = saturate_cast<int>(scalar.val[2]);
- val.ival.s[3] = saturate_cast<int>(scalar.val[3]);
- switch(dst.oclchannels())
- {
- case 1:
- sprintf(compile_option, "-D GENTYPE=int");
- args.push_back( make_pair( sizeof(cl_int) , (void *)&val.ival.s[0] ));
- break;
- case 2:
- sprintf(compile_option, "-D GENTYPE=int2");
- cl_int2 i2val;
- i2val.s[0] = val.ival.s[0];
- i2val.s[1] = val.ival.s[1];
- args.push_back( make_pair( sizeof(cl_int2) , (void *)&i2val ));
- break;
- case 4:
- sprintf(compile_option, "-D GENTYPE=int4");
- args.push_back( make_pair( sizeof(cl_int4) , (void *)&val.ival ));
- break;
- default:
- CV_Error(CV_StsUnsupportedFormat, "unsupported channels");
- }
- break;
- case 5:
- val.fval.s[0] = (float)scalar.val[0];
- val.fval.s[1] = (float)scalar.val[1];
- val.fval.s[2] = (float)scalar.val[2];
- val.fval.s[3] = (float)scalar.val[3];
- switch(dst.oclchannels())
- {
- case 1:
- sprintf(compile_option, "-D GENTYPE=float");
- args.push_back( make_pair( sizeof(cl_float) , (void *)&val.fval.s[0] ));
- break;
- case 4:
- sprintf(compile_option, "-D GENTYPE=float4");
- args.push_back( make_pair( sizeof(cl_float4) , (void *)&val.fval ));
- break;
- default:
- CV_Error(CV_StsUnsupportedFormat, "unsupported channels");
- }
- break;
- case 6:
- val.dval.s[0] = scalar.val[0];
- val.dval.s[1] = scalar.val[1];
- val.dval.s[2] = scalar.val[2];
- val.dval.s[3] = scalar.val[3];
- switch(dst.oclchannels())
- {
- case 1:
- sprintf(compile_option, "-D GENTYPE=double");
- args.push_back( make_pair( sizeof(cl_double) , (void *)&val.dval.s[0] ));
- break;
- case 4:
- sprintf(compile_option, "-D GENTYPE=double4");
- args.push_back( make_pair( sizeof(cl_double4) , (void *)&val.dval ));
- break;
- default:
- CV_Error(CV_StsUnsupportedFormat, "unsupported channels");
- }
- break;
- default:
- CV_Error(CV_StsUnsupportedFormat, "unknown depth");
- }
-#ifdef CL_VERSION_1_2
- if(dst.offset == 0 && dst.cols == dst.wholecols)
- {
- clEnqueueFillBuffer((cl_command_queue)dst.clCxt->oclCommandQueue(), (cl_mem)dst.data, args[0].second, args[0].first, 0, dst.step * dst.rows, 0, NULL, NULL);
- }
- else
- {
- args.push_back( make_pair( sizeof(cl_mem) , (void *)&dst.data ));
- args.push_back( make_pair( sizeof(cl_int) , (void *)&dst.cols ));
- args.push_back( make_pair( sizeof(cl_int) , (void *)&dst.rows ));
- args.push_back( make_pair( sizeof(cl_int) , (void *)&step_in_pixel ));
- args.push_back( make_pair( sizeof(cl_int) , (void *)&offset_in_pixel));
- openCLExecuteKernel2(dst.clCxt , &operator_setTo, kernelName, globalThreads,
- localThreads, args, -1, -1, compile_option, CLFLUSH);
- }
-#else
- args.push_back( make_pair( sizeof(cl_mem) , (void *)&dst.data ));
- args.push_back( make_pair( sizeof(cl_int) , (void *)&dst.cols ));
- args.push_back( make_pair( sizeof(cl_int) , (void *)&dst.rows ));
- args.push_back( make_pair( sizeof(cl_int) , (void *)&step_in_pixel ));
- args.push_back( make_pair( sizeof(cl_int) , (void *)&offset_in_pixel));
- openCLExecuteKernel2(dst.clCxt , &operator_setTo, kernelName, globalThreads,
- localThreads, args, -1, -1, compile_option, CLFLUSH);
-#endif
-}
-
-static oclMat &setTo(oclMat &src, const Scalar &scalar)
-{
- CV_Assert( src.depth() >= 0 && src.depth() <= 6 );
- CV_DbgAssert( !src.empty());
-
- if(src.type() == CV_8UC1)
- {
- set_to_withoutmask_run_cus(src, scalar, "set_to_without_mask_C1_D0");
- }
- else
- {
- set_to_withoutmask_run_cus(src, scalar, "set_to_without_mask");
- }
-
- return src;
-}
-
-///////////////////////////////////////////////////////////////////////////
-////////////////////////////////// CopyTo /////////////////////////////////
-///////////////////////////////////////////////////////////////////////////
-// static void copy_to_with_mask_cus(const oclMat &src, oclMat &dst, const oclMat &mask, string kernelName)
-// {
-// CV_DbgAssert( dst.rows == mask.rows && dst.cols == mask.cols &&
-// src.rows == dst.rows && src.cols == dst.cols
-// && mask.type() == CV_8UC1);
-
-// vector<pair<size_t , const void *> > args;
-
-// std::string string_types[4][7] = {{"uchar", "char", "ushort", "short", "int", "float", "double"},
-// {"uchar2", "char2", "ushort2", "short2", "int2", "float2", "double2"},
-// {"uchar3", "char3", "ushort3", "short3", "int3", "float3", "double3"},
-// {"uchar4", "char4", "ushort4", "short4", "int4", "float4", "double4"}
-// };
-// char compile_option[32];
-// sprintf(compile_option, "-D GENTYPE=%s", string_types[dst.oclchannels() - 1][dst.depth()].c_str());
-// size_t localThreads[3] = {16, 16, 1};
-// size_t globalThreads[3];
-
-// globalThreads[0] = divUp(dst.cols, localThreads[0]) * localThreads[0];
-// globalThreads[1] = divUp(dst.rows, localThreads[1]) * localThreads[1];
-// globalThreads[2] = 1;
-
-// int dststep_in_pixel = dst.step / dst.elemSize(), dstoffset_in_pixel = dst.offset / dst.elemSize();
-// int srcstep_in_pixel = src.step / src.elemSize(), srcoffset_in_pixel = src.offset / src.elemSize();
-
-// args.push_back( make_pair( sizeof(cl_mem) , (void *)&src.data ));
-// args.push_back( make_pair( sizeof(cl_mem) , (void *)&dst.data ));
-// args.push_back( make_pair( sizeof(cl_mem) , (void *)&mask.data ));
-// args.push_back( make_pair( sizeof(cl_int) , (void *)&src.cols ));
-// args.push_back( make_pair( sizeof(cl_int) , (void *)&src.rows ));
-// args.push_back( make_pair( sizeof(cl_int) , (void *)&srcstep_in_pixel ));
-// args.push_back( make_pair( sizeof(cl_int) , (void *)&srcoffset_in_pixel ));
-// args.push_back( make_pair( sizeof(cl_int) , (void *)&dststep_in_pixel ));
-// args.push_back( make_pair( sizeof(cl_int) , (void *)&dstoffset_in_pixel ));
-// args.push_back( make_pair( sizeof(cl_int) , (void *)&mask.step ));
-// args.push_back( make_pair( sizeof(cl_int) , (void *)&mask.offset ));
-
-// openCLExecuteKernel2(dst.clCxt , &operator_copyToM, kernelName, globalThreads,
-// localThreads, args, -1, -1, compile_option, CLFLUSH);
-// }
-
-static void copyTo(const oclMat &src, oclMat &m )
-{
- CV_DbgAssert(!src.empty());
- m.create(src.size(), src.type());
- openCLCopyBuffer2D(src.clCxt, m.data, m.step, m.offset,
- src.data, src.step, src.cols * src.elemSize(), src.rows, src.offset);
-}
-
-// static void copyTo(const oclMat &src, oclMat &mat, const oclMat &mask)
-// {
-// if (mask.empty())
-// {
-// copyTo(src, mat);
-// }
-// else
-// {
-// mat.create(src.size(), src.type());
-// copy_to_with_mask_cus(src, mat, mask, "copy_to_with_mask");
-// }
-// }
-
-static void arithmetic_run(const oclMat &src1, oclMat &dst, string kernelName, const char **kernelString, void *_scalar)
+static void multiply_cus(const oclMat &src1, oclMat &dst, float scalar)
{
if(!src1.clCxt->supportsFeature(Context::CL_DOUBLE) && src1.type() == CV_64F)
{
return;
}
- //dst.create(src1.size(), src1.type());
- //CV_Assert(src1.cols == src2.cols && src2.cols == dst.cols &&
- // src1.rows == src2.rows && src2.rows == dst.rows);
CV_Assert(src1.cols == dst.cols &&
src1.rows == dst.rows);
CV_Assert(src1.depth() != CV_8S);
Context *clCxt = src1.clCxt;
- //int channels = dst.channels();
- //int depth = dst.depth();
-
- //int vector_lengths[4][7] = {{4, 0, 4, 4, 1, 1, 1},
- // {4, 0, 4, 4, 1, 1, 1},
- // {4, 0, 4, 4, 1, 1, 1},
- // {4, 0, 4, 4, 1, 1, 1}
- //};
-
- //size_t vector_length = vector_lengths[channels-1][depth];
- //int offset_cols = (dst.offset / dst.elemSize1()) & (vector_length - 1);
- //int cols = divUp(dst.cols * channels + offset_cols, vector_length);
size_t localThreads[3] = { 16, 16, 1 };
- //size_t globalThreads[3] = { divUp(cols, localThreads[0]) * localThreads[0],
- // divUp(dst.rows, localThreads[1]) * localThreads[1],
- // 1
- // };
size_t globalThreads[3] = { src1.cols,
src1.rows,
1
args.push_back( make_pair( sizeof(cl_mem), (void *)&src1.data ));
args.push_back( make_pair( sizeof(cl_int), (void *)&src1.step ));
args.push_back( make_pair( sizeof(cl_int), (void *)&src1.offset ));
- //args.push_back( make_pair( sizeof(cl_mem), (void *)&src2.data ));
- //args.push_back( make_pair( sizeof(cl_int), (void *)&src2.step ));
- //args.push_back( make_pair( sizeof(cl_int), (void *)&src2.offset ));
args.push_back( make_pair( sizeof(cl_mem), (void *)&dst.data ));
args.push_back( make_pair( sizeof(cl_int), (void *)&dst.step ));
args.push_back( make_pair( sizeof(cl_int), (void *)&dst.offset ));
args.push_back( make_pair( sizeof(cl_int), (void *)&src1.rows ));
args.push_back( make_pair( sizeof(cl_int), (void *)&src1.cols ));
args.push_back( make_pair( sizeof(cl_int), (void *)&dst_step1 ));
+ args.push_back( make_pair( sizeof(float), (float *)&scalar ));
- //if(_scalar != NULL)
- //{
- float scalar1 = *((float *)_scalar);
- args.push_back( make_pair( sizeof(float), (float *)&scalar1 ));
- //}
-
- openCLExecuteKernel2(clCxt, kernelString, kernelName, globalThreads, localThreads, args, -1, src1.depth(), CLFLUSH);
-}
-
-static void multiply_cus(const oclMat &src1, oclMat &dst, float scalar)
-{
- arithmetic_run(src1, dst, "arithm_muls", &arithm_mul, (void *)(&scalar));
-}
-
-static void pyrdown_run_cus(const oclMat &src, const oclMat &dst)
-{
-
- CV_Assert(src.type() == dst.type());
- CV_Assert(src.depth() != CV_8S);
-
- Context *clCxt = src.clCxt;
-
- string kernelName = "pyrDown";
-
- size_t localThreads[3] = { 256, 1, 1 };
- size_t globalThreads[3] = { src.cols, dst.rows, 1};
-
- vector<pair<size_t , const void *> > args;
- args.push_back( make_pair( sizeof(cl_mem), (void *)&src.data ));
- args.push_back( make_pair( sizeof(cl_int), (void *)&src.step ));
- args.push_back( make_pair( sizeof(cl_int), (void *)&src.rows));
- args.push_back( make_pair( sizeof(cl_int), (void *)&src.cols));
- args.push_back( make_pair( sizeof(cl_mem), (void *)&dst.data ));
- args.push_back( make_pair( sizeof(cl_int), (void *)&dst.step ));
- args.push_back( make_pair( sizeof(cl_int), (void *)&dst.cols));
-
- openCLExecuteKernel2(clCxt, &pyr_down, kernelName, globalThreads, localThreads, args, src.oclchannels(), src.depth(), CLFLUSH);
-}
-
-static void pyrDown_cus(const oclMat &src, oclMat &dst)
-{
- CV_Assert(src.depth() <= CV_32F && src.channels() <= 4);
-
- dst.create((src.rows + 1) / 2, (src.cols + 1) / 2, src.type());
-
- pyrdown_run_cus(src, dst);
+ openCLExecuteKernel(clCxt, &arithm_mul, "arithm_muls", globalThreads, localThreads, args, -1, src1.depth());
}
static void lkSparse_run(oclMat &I, oclMat &J,
- const oclMat &prevPts, oclMat &nextPts, oclMat &status, oclMat& err, bool /*GET_MIN_EIGENVALS*/, int ptcount,
- int level, /*dim3 block, */dim3 patch, Size winSize, int iters)
+ const oclMat &prevPts, oclMat &nextPts, oclMat &status, oclMat& err, bool /*GET_MIN_EIGENVALS*/, int ptcount,
+ int level, dim3 patch, Size winSize, int iters)
{
Context *clCxt = I.clCxt;
int elemCntPerRow = I.step / I.elemSize();
args.push_back( make_pair( sizeof(cl_int), (void *)&level ));
args.push_back( make_pair( sizeof(cl_int), (void *)&I.rows ));
args.push_back( make_pair( sizeof(cl_int), (void *)&I.cols ));
- if (!isImageSupported)
+ if (!isImageSupported)
args.push_back( make_pair( sizeof(cl_int), (void *)&elemCntPerRow ) );
args.push_back( make_pair( sizeof(cl_int), (void *)&patch.x ));
args.push_back( make_pair( sizeof(cl_int), (void *)&patch.y ));
args.push_back( make_pair( sizeof(cl_int), (void *)&iters ));
args.push_back( make_pair( sizeof(cl_char), (void *)&calcErr ));
- if(isImageSupported)
+ bool is_cpu;
+ queryDeviceInfo(IS_CPU_DEVICE, &is_cpu);
+ if (is_cpu)
{
- openCLExecuteKernel2(clCxt, &pyrlk, kernelName, globalThreads, localThreads, args, I.oclchannels(), I.depth(), CLFLUSH);
+ openCLExecuteKernel(clCxt, &pyrlk, kernelName, globalThreads, localThreads, args, I.oclchannels(), I.depth(), (char*)" -D CPU");
releaseTexture(ITex);
releaseTexture(JTex);
}
else
{
- openCLExecuteKernel2(clCxt, &pyrlk_no_image, kernelName, globalThreads, localThreads, args, I.oclchannels(), I.depth(), CLFLUSH);
+ if(isImageSupported)
+ {
+ openCLExecuteKernel(clCxt, &pyrlk, kernelName, globalThreads, localThreads, args, I.oclchannels(), I.depth());
+ releaseTexture(ITex);
+ releaseTexture(JTex);
+ }
+ else
+ {
+ openCLExecuteKernel(clCxt, &pyrlk_no_image, kernelName, globalThreads, localThreads, args, I.oclchannels(), I.depth());
+ }
}
}
{
nextPts.release();
status.release();
- //if (err) err->release();
+ if (err) err->release();
return;
}
oclMat temp1 = (useInitialFlow ? nextPts : prevPts).reshape(1);
oclMat temp2 = nextPts.reshape(1);
- //oclMat scalar(temp1.rows, temp1.cols, temp1.type(), Scalar(1.0f / (1 << maxLevel) / 2.0f));
multiply_cus(temp1, temp2, 1.0f / (1 << maxLevel) / 2.0f);
//::multiply(temp1, 1.0f / (1 << maxLevel) / 2.0f, temp2);
ensureSizeIsEnough(1, prevPts.cols, CV_8UC1, status);
- //status.setTo(Scalar::all(1));
- setTo(status, Scalar::all(1));
+ status.setTo(Scalar::all(1));
bool errMat = false;
if (!err)
}
else
ensureSizeIsEnough(1, prevPts.cols, CV_32FC1, *err);
- //ensureSizeIsEnough(1, prevPts.cols, CV_32FC1, err);
// build the image pyramids.
if (cn == 1 || cn == 4)
{
- //prevImg.convertTo(prevPyr_[0], CV_32F);
- //nextImg.convertTo(nextPyr_[0], CV_32F);
- convertTo(prevImg, prevPyr_[0], CV_32F);
- convertTo(nextImg, nextPyr_[0], CV_32F);
- }
- else
- {
- //oclMat buf_;
- // cvtColor(prevImg, buf_, COLOR_BGR2BGRA);
- // buf_.convertTo(prevPyr_[0], CV_32F);
-
- // cvtColor(nextImg, buf_, COLOR_BGR2BGRA);
- // buf_.convertTo(nextPyr_[0], CV_32F);
+ prevImg.convertTo(prevPyr_[0], CV_32F);
+ nextImg.convertTo(nextPyr_[0], CV_32F);
}
for (int level = 1; level <= maxLevel; ++level)
{
- pyrDown_cus(prevPyr_[level - 1], prevPyr_[level]);
- pyrDown_cus(nextPyr_[level - 1], nextPyr_[level]);
+ pyrDown(prevPyr_[level - 1], prevPyr_[level]);
+ pyrDown(nextPyr_[level - 1], nextPyr_[level]);
}
// dI/dx ~ Ix, dI/dy ~ Iy
{
lkSparse_run(prevPyr_[level], nextPyr_[level],
prevPts, nextPts, status, *err, getMinEigenVals, prevPts.cols,
- level, /*block, */patch, winSize, iters);
+ level, patch, winSize, iters);
}
- clFinish((cl_command_queue)prevImg.clCxt->oclCommandQueue());
-
if(errMat)
delete err;
}
static void lkDense_run(oclMat &I, oclMat &J, oclMat &u, oclMat &v,
- oclMat &prevU, oclMat &prevV, oclMat *err, Size winSize, int iters)
+ oclMat &prevU, oclMat &prevV, oclMat *err, Size winSize, int iters)
{
Context *clCxt = I.clCxt;
bool isImageSupported = support_image2d();
JTex = (cl_mem)J.data;
}
- //int2 halfWin = {(winSize.width - 1) / 2, (winSize.height - 1) / 2};
- //const int patchWidth = 16 + 2 * halfWin.x;
- //const int patchHeight = 16 + 2 * halfWin.y;
- //size_t smem_size = 3 * patchWidth * patchHeight * sizeof(int);
-
vector<pair<size_t , const void *> > args;
args.push_back( make_pair( sizeof(cl_mem), (void *)&ITex ));
if (isImageSupported)
{
- openCLExecuteKernel2(clCxt, &pyrlk, kernelName, globalThreads, localThreads, args, I.oclchannels(), I.depth(), CLFLUSH);
+ openCLExecuteKernel(clCxt, &pyrlk, kernelName, globalThreads, localThreads, args, I.oclchannels(), I.depth());
releaseTexture(ITex);
releaseTexture(JTex);
}
else
{
- //printf("Warning: The image2d_t is not supported by the device. Using alternative method!\n");
- openCLExecuteKernel2(clCxt, &pyrlk_no_image, kernelName, globalThreads, localThreads, args, I.oclchannels(), I.depth(), CLFLUSH);
+ openCLExecuteKernel(clCxt, &pyrlk_no_image, kernelName, globalThreads, localThreads, args, I.oclchannels(), I.depth());
}
}
nextPyr_.resize(maxLevel + 1);
prevPyr_[0] = prevImg;
- //nextImg.convertTo(nextPyr_[0], CV_32F);
- convertTo(nextImg, nextPyr_[0], CV_32F);
+ nextImg.convertTo(nextPyr_[0], CV_32F);
for (int level = 1; level <= maxLevel; ++level)
{
- pyrDown_cus(prevPyr_[level - 1], prevPyr_[level]);
- pyrDown_cus(nextPyr_[level - 1], nextPyr_[level]);
+ pyrDown(prevPyr_[level - 1], prevPyr_[level]);
+ pyrDown(nextPyr_[level - 1], nextPyr_[level]);
}
ensureSizeIsEnough(prevImg.size(), CV_32FC1, uPyr_[0]);
ensureSizeIsEnough(prevImg.size(), CV_32FC1, vPyr_[0]);
ensureSizeIsEnough(prevImg.size(), CV_32FC1, uPyr_[1]);
ensureSizeIsEnough(prevImg.size(), CV_32FC1, vPyr_[1]);
- //uPyr_[1].setTo(Scalar::all(0));
- //vPyr_[1].setTo(Scalar::all(0));
- setTo(uPyr_[1], Scalar::all(0));
- setTo(vPyr_[1], Scalar::all(0));
+ uPyr_[1].setTo(Scalar::all(0));
+ vPyr_[1].setTo(Scalar::all(0));
Size winSize2i(winSize.width, winSize.height);
idx = idx2;
}
- //uPyr_[idx].copyTo(u);
- //vPyr_[idx].copyTo(v);
- copyTo(uPyr_[idx], u);
- copyTo(vPyr_[idx], v);
-
- clFinish((cl_command_queue)prevImg.clCxt->oclCommandQueue());
+ uPyr_[idx].copyTo(u);
+ vPyr_[idx].copyTo(v);
}
std::cout << "platform invalid\n";
return -1;
}
- if(pid != 0 || device != 0)
- {
- setDevice(oclinfo[pid], device);
- }
+
+ setDevice(oclinfo[pid], device);
+
cout << "Device type:" << type << endl << "Device name:" << oclinfo[pid].DeviceName[device] << endl;
return RUN_ALL_TESTS();
}
Values(CV_8UC1, CV_8UC3, CV_8UC4, CV_32SC1, CV_32SC3, CV_32SC4, CV_32FC1, CV_32FC3, CV_32FC4),
Values(false)));
+INSTANTIATE_TEST_CASE_P(Arithm, Sub, Combine(
+ Values(CV_8UC1, CV_8UC3, CV_8UC4, CV_32SC1, CV_32SC3, CV_32SC4, CV_32FC1, CV_32FC3, CV_32FC4),
+ Values(false)));
+
INSTANTIATE_TEST_CASE_P(Arithm, Mul, Combine(
Values(CV_8UC1, CV_8UC3, CV_8UC4, CV_32SC1, CV_32SC3, CV_32SC4, CV_32FC1, CV_32FC3, CV_32FC4),
Values(false))); // Values(false) is the reserved parameter
INSTANTIATE_TEST_CASE_P(Arithm, Bitwise_and, Combine(
- Values(CV_8UC1, CV_32SC1, CV_32SC3, CV_32SC4, CV_32FC1, CV_32FC3, CV_32FC4), Values(false)));
+ Values(CV_8UC1, CV_8UC3, CV_8UC4, CV_32SC1, CV_32SC3, CV_32SC4, CV_32FC1, CV_32FC3, CV_32FC4), Values(false)));
//Values(false) is the reserved parameter
INSTANTIATE_TEST_CASE_P(Arithm, Bitwise_or, Combine(
- Values(CV_8UC1, CV_8UC3, CV_32SC1, CV_32FC1, CV_32FC3, CV_32FC4), Values(false)));
+ Values(CV_8UC1, CV_8UC3, CV_8UC4, CV_32SC1, CV_32FC1, CV_32FC3, CV_32FC4), Values(false)));
//Values(false) is the reserved parameter
INSTANTIATE_TEST_CASE_P(Arithm, Bitwise_xor, Combine(
- Values(CV_8UC1, CV_8UC3, CV_32SC1, CV_32FC1, CV_32FC3, CV_32FC4), Values(false)));
+ Values(CV_8UC1, CV_8UC3, CV_8UC4, CV_32SC1, CV_32FC1, CV_32FC3, CV_32FC4), Values(false)));
//Values(false) is the reserved parameter
INSTANTIATE_TEST_CASE_P(Arithm, Bitwise_not, Combine(
- Values(CV_8UC1, CV_8UC3, CV_32SC1, CV_32FC1, CV_32FC3, CV_32FC4), Values(false)));
+ Values(CV_8UC1, CV_8UC3, CV_8UC4, CV_32SC1, CV_32FC1, CV_32FC3, CV_32FC4), Values(false)));
//Values(false) is the reserved parameter
INSTANTIATE_TEST_CASE_P(Arithm, Compare, Combine(Values(CV_8UC1, CV_32SC1, CV_32FC1), Values(false)));
#ifdef HAVE_OPENCL
namespace
{
-
/////////////////////////////////////////////////////////////////////////////////////////////////
// BruteForceMatcher
-
- CV_ENUM(DistType, cv::ocl::BruteForceMatcher_OCL_base::L1Dist, cv::ocl::BruteForceMatcher_OCL_base::L2Dist, cv::ocl::BruteForceMatcher_OCL_base::HammingDist)
+ CV_ENUM(DistType, BruteForceMatcher_OCL_base::L1Dist,
+ BruteForceMatcher_OCL_base::L2Dist,
+ BruteForceMatcher_OCL_base::HammingDist)
IMPLEMENT_PARAM_CLASS(DescriptorSize, int)
-
- PARAM_TEST_CASE(BruteForceMatcher/*, NormCode*/, DistType, DescriptorSize)
+ PARAM_TEST_CASE(BruteForceMatcher, DistType, DescriptorSize)
{
- //std::vector<cv::ocl::Info> oclinfo;
cv::ocl::BruteForceMatcher_OCL_base::DistType distType;
int normCode;
int dim;
virtual void SetUp()
{
- //normCode = GET_PARAM(0);
distType = (cv::ocl::BruteForceMatcher_OCL_base::DistType)(int)GET_PARAM(0);
dim = GET_PARAM(1);
- //int devnums = getDevice(oclinfo, OPENCV_DEFAULT_OPENCL_DEVICE);
- //CV_Assert(devnums > 0);
-
queryDescCount = 300; // must be even number because we split train data in some cases in two
countFactor = 4; // do not change it
TEST_P(BruteForceMatcher, RadiusMatch_Single)
{
- float radius;
- if(distType == cv::ocl::BruteForceMatcher_OCL_base::L2Dist)
- radius = 1.f / countFactor / countFactor;
- else
- radius = 1.f / countFactor;
+ float radius = 1.f / countFactor;
cv::ocl::BruteForceMatcher_OCL_base matcher(distType);
- // assume support atomic.
- //if (!supportFeature(devInfo, cv::gpu::GLOBAL_ATOMICS))
- //{
- // try
- // {
- // std::vector< std::vector<cv::DMatch> > matches;
- // matcher.radiusMatch(loadMat(query), loadMat(train), matches, radius);
- // }
- // catch (const cv::Exception& e)
- // {
- // ASSERT_EQ(CV_StsNotImplemented, e.code);
- // }
- //}
- //else
- {
- std::vector< std::vector<cv::DMatch> > matches;
- matcher.radiusMatch(cv::ocl::oclMat(query), cv::ocl::oclMat(train), matches, radius);
+ std::vector< std::vector<cv::DMatch> > matches;
+ matcher.radiusMatch(cv::ocl::oclMat(query), cv::ocl::oclMat(train), matches, radius);
- ASSERT_EQ(static_cast<size_t>(queryDescCount), matches.size());
+ ASSERT_EQ(static_cast<size_t>(queryDescCount), matches.size());
- int badCount = 0;
- for (size_t i = 0; i < matches.size(); i++)
+ int badCount = 0;
+ for (size_t i = 0; i < matches.size(); i++)
+ {
+ if ((int)matches[i].size() != 1)
{
- if ((int)matches[i].size() != 1)
- {
+ badCount++;
+ }
+ else
+ {
+ cv::DMatch match = matches[i][0];
+ if ((match.queryIdx != (int)i) || (match.trainIdx != (int)i * countFactor) || (match.imgIdx != 0))
badCount++;
- }
- else
- {
- cv::DMatch match = matches[i][0];
- if ((match.queryIdx != (int)i) || (match.trainIdx != (int)i * countFactor) || (match.imgIdx != 0))
- badCount++;
- }
}
-
- ASSERT_EQ(0, badCount);
}
- }
- INSTANTIATE_TEST_CASE_P(GPU_Features2D, BruteForceMatcher, testing::Combine(
- //ALL_DEVICES,
- testing::Values(DistType(cv::ocl::BruteForceMatcher_OCL_base::L1Dist), DistType(cv::ocl::BruteForceMatcher_OCL_base::L2Dist)),
- testing::Values(DescriptorSize(57), DescriptorSize(64), DescriptorSize(83), DescriptorSize(128), DescriptorSize(179), DescriptorSize(256), DescriptorSize(304))));
+ ASSERT_EQ(0, badCount);
+ }
+ INSTANTIATE_TEST_CASE_P(OCL_Features2D, BruteForceMatcher,
+ testing::Combine(
+ testing::Values(
+ DistType(cv::ocl::BruteForceMatcher_OCL_base::L1Dist),
+ DistType(cv::ocl::BruteForceMatcher_OCL_base::L2Dist)/*,
+ DistType(cv::ocl::BruteForceMatcher_OCL_base::HammingDist)*/
+ ),
+ testing::Values(
+ DescriptorSize(57),
+ DescriptorSize(64),
+ DescriptorSize(83),
+ DescriptorSize(128),
+ DescriptorSize(179),
+ DescriptorSize(256),
+ DescriptorSize(304))
+ )
+ );
} // namespace
#endif
void PrintTo(const Inverse &useRoi, std::ostream *os);
-CV_ENUM(CmpCode, cv::CMP_EQ, cv::CMP_GT, cv::CMP_GE, cv::CMP_LT, cv::CMP_LE, cv::CMP_NE)
-
-CV_ENUM(NormCode, cv::NORM_INF, cv::NORM_L1, cv::NORM_L2, cv::NORM_TYPE_MASK, cv::NORM_RELATIVE, cv::NORM_MINMAX)
-
enum {FLIP_BOTH = 0, FLIP_X = 1, FLIP_Y = -1};
CV_ENUM(FlipCode, FLIP_BOTH, FLIP_X, FLIP_Y)
+CV_ENUM(CmpCode, CMP_EQ, CMP_GT, CMP_GE, CMP_LT, CMP_LE, CMP_NE)
+CV_ENUM(NormCode, NORM_INF, NORM_L1, NORM_L2, NORM_TYPE_MASK, NORM_RELATIVE, NORM_MINMAX)
CV_ENUM(ReduceOp, CV_REDUCE_SUM, CV_REDUCE_AVG, CV_REDUCE_MAX, CV_REDUCE_MIN)
-
-CV_FLAGS(GemmFlags, cv::GEMM_1_T, cv::GEMM_2_T, cv::GEMM_3_T);
-
-CV_ENUM(MorphOp, cv::MORPH_OPEN, cv::MORPH_CLOSE, cv::MORPH_GRADIENT, cv::MORPH_TOPHAT, cv::MORPH_BLACKHAT)
-
-CV_ENUM(ThreshOp, cv::THRESH_BINARY, cv::THRESH_BINARY_INV, cv::THRESH_TRUNC, cv::THRESH_TOZERO, cv::THRESH_TOZERO_INV)
-
-CV_ENUM(Interpolation, cv::INTER_NEAREST, cv::INTER_LINEAR, cv::INTER_CUBIC)
-
-CV_ENUM(Border, cv::BORDER_REFLECT101, cv::BORDER_REPLICATE, cv::BORDER_CONSTANT, cv::BORDER_REFLECT, cv::BORDER_WRAP)
-
-CV_FLAGS(WarpFlags, cv::INTER_NEAREST, cv::INTER_LINEAR, cv::INTER_CUBIC, cv::WARP_INVERSE_MAP)
-
-CV_ENUM(TemplateMethod, cv::TM_SQDIFF, cv::TM_SQDIFF_NORMED, cv::TM_CCORR, cv::TM_CCORR_NORMED, cv::TM_CCOEFF, cv::TM_CCOEFF_NORMED)
-
-CV_FLAGS(DftFlags, cv::DFT_INVERSE, cv::DFT_SCALE, cv::DFT_ROWS, cv::DFT_COMPLEX_OUTPUT, cv::DFT_REAL_OUTPUT)
+CV_ENUM(MorphOp, MORPH_OPEN, MORPH_CLOSE, MORPH_GRADIENT, MORPH_TOPHAT, MORPH_BLACKHAT)
+CV_ENUM(ThreshOp, THRESH_BINARY, THRESH_BINARY_INV, THRESH_TRUNC, THRESH_TOZERO, THRESH_TOZERO_INV)
+CV_ENUM(Interpolation, INTER_NEAREST, INTER_LINEAR, INTER_CUBIC)
+CV_ENUM(Border, BORDER_REFLECT101, BORDER_REPLICATE, BORDER_CONSTANT, BORDER_REFLECT, BORDER_WRAP)
+CV_ENUM(TemplateMethod, TM_SQDIFF, TM_SQDIFF_NORMED, TM_CCORR, TM_CCORR_NORMED, TM_CCOEFF, TM_CCOEFF_NORMED)
+
+CV_FLAGS(GemmFlags, GEMM_1_T, GEMM_2_T, GEMM_3_T);
+CV_FLAGS(WarpFlags, INTER_NEAREST, INTER_LINEAR, INTER_CUBIC, WARP_INVERSE_MAP)
+CV_FLAGS(DftFlags, DFT_INVERSE, DFT_SCALE, DFT_ROWS, DFT_COMPLEX_OUTPUT, DFT_REAL_OUTPUT)
void run_perf_test();
PERF_TEST_P(InpaintArea_InpaintingMethod, inpaint,
testing::Combine(
testing::Values(::perf::szSmall24, ::perf::szSmall32, ::perf::szSmall64),
- testing::ValuesIn(InpaintingMethod::all())
+ InpaintingMethod::all()
)
)
{
-#/usr/bin/env python
-
from cv2.cv import *
-#/usr/bin/env python
+#!/usr/bin/env python
import sys
from string import Template
-#/usr/bin/env python
+#!/usr/bin/env python
import hdr_parser, sys, re, os, cStringIO
from string import Template
-#/usr/bin/env python
+#!/usr/bin/env python
import os, sys, re, string
-#/usr/bin/env python
+#!/usr/bin/env python
# Calculating and displaying 2D Hue-Saturation histogram of a color image
import sys
-#/usr/bin/env python
+#!/usr/bin/env python
import sys
import math
-#/usr/bin/env python
+#!/usr/bin/env python
import sys
import cv2.cv as cv
-#/usr/bin/env python
+#!/usr/bin/env python
import cv2.cv as cv
import unittest
-#/usr/bin/env python
+#!/usr/bin/env python
import cv2.cv as cv
import numpy as np
-#/usr/bin/env python
+#!/usr/bin/env python
import cv2.cv as cv
import numpy as np
-#/usr/bin/env python
+#!/usr/bin/env python
import cv2.cv as cv
import math
-#/usr/bin/env python
+#!/usr/bin/env python
import cv2.cv as cv
import math
-#/usr/bin/env python
+#!/usr/bin/env python
import cv2.cv as cv
-#/usr/bin/env python
+#!/usr/bin/env python
import unittest
import random
-#/usr/bin/env python
+#!/usr/bin/env python
import unittest
import random
-#/usr/bin/env python
+#!/usr/bin/env python
import urllib
import cv2.cv as cv
-#/usr/bin/env python
+#!/usr/bin/env python
import unittest
import random
-#/usr/bin/env python
+#!/usr/bin/env python
# -*- coding: utf-8 -*-
# transformations.py
endif()
set(the_description "Super Resolution")
-ocv_add_module(superres opencv_imgproc opencv_video OPTIONAL opencv_gpu opencv_highgui)
-ocv_module_include_directories()
-
-ocv_warnings_disable(CMAKE_CXX_FLAGS -Wundef /wd4127)
-
-if(HAVE_CUDA)
- ocv_source_group("Src\\Cuda" GLOB "src/cuda/*.cu")
- ocv_include_directories("${OpenCV_SOURCE_DIR}/modules/gpu/include" ${CUDA_INCLUDE_DIRS})
-
- file(GLOB lib_cuda "src/cuda/*.cu")
- ocv_cuda_compile(cuda_objs ${lib_cuda})
-
- set(cuda_link_libs ${CUDA_LIBRARIES})
-else()
- set(lib_cuda "")
- set(cuda_objs "")
- set(cuda_link_libs "")
-endif()
-
-ocv_glob_module_sources(SOURCES ${lib_cuda} ${cuda_objs})
-
-ocv_create_module(${cuda_link_libs})
-ocv_add_precompiled_headers(${the_module})
-
-ocv_add_accuracy_tests()
-ocv_add_perf_tests()
+ocv_warnings_disable(CMAKE_CXX_FLAGS /wd4127 -Wundef)
+ocv_define_module(superres opencv_imgproc opencv_video OPTIONAL opencv_gpu opencv_highgui)
//
//M*/
+#include "opencv2/opencv_modules.hpp"
+
+#ifdef HAVE_OPENCV_GPU
+
#include "opencv2/gpu/device/common.hpp"
#include "opencv2/gpu/device/transform.hpp"
#include "opencv2/gpu/device/vec_traits.hpp"
template void calcBtvRegularization<3>(PtrStepSzb src, PtrStepSzb dst, int ksize);
template void calcBtvRegularization<4>(PtrStepSzb src, PtrStepSzb dst, int ksize);
}
+
+#endif /* HAVE_OPENCV_GPU */
namespace perf
{
- CV_ENUM(BorderMode, cv::BORDER_REFLECT101, cv::BORDER_REPLICATE, cv::BORDER_CONSTANT, cv::BORDER_REFLECT, cv::BORDER_WRAP)
- #define ALL_BORDER_MODES testing::ValuesIn(BorderMode::all())
+ #define ALL_BORDER_MODES BorderMode::all()
+ #define ALL_INTERPOLATIONS Interpolation::all()
- CV_ENUM(Interpolation, cv::INTER_NEAREST, cv::INTER_LINEAR, cv::INTER_CUBIC, cv::INTER_AREA)
- #define ALL_INTERPOLATIONS testing::ValuesIn(Interpolation::all())
-
- CV_ENUM(NormType, cv::NORM_INF, cv::NORM_L1, cv::NORM_L2, cv::NORM_HAMMING, cv::NORM_MINMAX)
+ CV_ENUM(BorderMode, BORDER_REFLECT101, BORDER_REPLICATE, BORDER_CONSTANT, BORDER_REFLECT, BORDER_WRAP)
+ CV_ENUM(Interpolation, INTER_NEAREST, INTER_LINEAR, INTER_CUBIC, INTER_AREA)
+ CV_ENUM(NormType, NORM_INF, NORM_L1, NORM_L2, NORM_HAMMING, NORM_MINMAX)
enum { Gray = 1, TwoChannel = 2, BGR = 3, BGRA = 4 };
CV_ENUM(MatCn, Gray, TwoChannel, BGR, BGRA)
+
#define GPU_CHANNELS_1_3_4 testing::Values(MatCn(Gray), MatCn(BGR), MatCn(BGRA))
#define GPU_CHANNELS_1_3 testing::Values(MatCn(Gray), MatCn(BGR))
// Flags and enums
- CV_ENUM(NormCode, cv::NORM_INF, cv::NORM_L1, cv::NORM_L2, cv::NORM_TYPE_MASK, cv::NORM_RELATIVE, cv::NORM_MINMAX)
+ CV_ENUM(NormCode, NORM_INF, NORM_L1, NORM_L2, NORM_TYPE_MASK, NORM_RELATIVE, NORM_MINMAX)
- CV_ENUM(Interpolation, cv::INTER_NEAREST, cv::INTER_LINEAR, cv::INTER_CUBIC, cv::INTER_AREA)
+ CV_ENUM(Interpolation, INTER_NEAREST, INTER_LINEAR, INTER_CUBIC, INTER_AREA)
- CV_ENUM(BorderType, cv::BORDER_REFLECT101, cv::BORDER_REPLICATE, cv::BORDER_CONSTANT, cv::BORDER_REFLECT, cv::BORDER_WRAP)
+ CV_ENUM(BorderType, BORDER_REFLECT101, BORDER_REPLICATE, BORDER_CONSTANT, BORDER_REFLECT, BORDER_WRAP)
#define ALL_BORDER_TYPES testing::Values(BorderType(cv::BORDER_REFLECT101), BorderType(cv::BORDER_REPLICATE), BorderType(cv::BORDER_CONSTANT), BorderType(cv::BORDER_REFLECT), BorderType(cv::BORDER_WRAP))
- CV_FLAGS(WarpFlags, cv::INTER_NEAREST, cv::INTER_LINEAR, cv::INTER_CUBIC, cv::WARP_INVERSE_MAP)
+ CV_FLAGS(WarpFlags, INTER_NEAREST, INTER_LINEAR, INTER_CUBIC, WARP_INVERSE_MAP)
//////////////////////////////////////////////////////////////////////
// Features2D
// Low-level types and utilities for porting Google Test to various
// platforms. They are subject to change without notice. DO NOT USE
// THEM IN USER CODE.
+//
+// This file is fundamental to Google Test. All other Google Test source
+// files are expected to #include this. Therefore, it cannot #include
+// any other Google Test header.
#ifndef GTEST_INCLUDE_GTEST_INTERNAL_GTEST_PORT_H_
#define GTEST_INCLUDE_GTEST_INTERNAL_GTEST_PORT_H_
namespace internal {
+// A secret type that Google Test users don't know about. It has no
+// definition on purpose. Therefore it's impossible to create a
+// Secret object, which is what we want.
+class Secret;
+
// The GTEST_COMPILE_ASSERT_ macro can be used to verify that a compile time
// expression is true. For example, you could use it to verify the
// size of a static array:
};
#define GTEST_COMPILE_ASSERT_(expr, msg) \
- typedef ::testing::internal::CompileAssert<(bool(expr))> \
- msg[bool(expr) ? 1 : -1]
+ typedef ::testing::internal::CompileAssert<(static_cast<bool>(expr))> \
+ msg[static_cast<bool>(expr) ? 1 : -1] GTEST_ATTRIBUTE_UNUSED_
// Implementation details of GTEST_COMPILE_ASSERT_:
//
# include <unistd.h>
#endif // GTEST_OS_LINUX
+#if GTEST_HAS_EXCEPTIONS
+# include <stdexcept>
+#endif
+
#include <ctype.h>
#include <string.h>
#include <iomanip>
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
+// Author: wan@google.com (Zhanyong Wan)
+//
+// The Google C++ Testing Framework (Google Test)
+//
+// This header file defines the Message class.
+//
+// IMPORTANT NOTE: Due to limitation of the C++ language, we have to
+// leave some internal implementation details in this header file.
+// They are clearly marked by comments like this:
+//
+// // INTERNAL IMPLEMENTATION - DO NOT USE IN A USER PROGRAM.
+//
+// Such code is NOT meant to be used by a user directly, and is subject
+// to CHANGE WITHOUT NOTICE. Therefore DO NOT DEPEND ON IT in a user
+// program!
+
+#ifndef GTEST_INCLUDE_GTEST_GTEST_MESSAGE_H_
+#define GTEST_INCLUDE_GTEST_GTEST_MESSAGE_H_
+
+#include <limits>
+
+
+// Ensures that there is at least one operator<< in the global namespace.
+// See Message& operator<<(...) below for why.
+void operator<<(const testing::internal::Secret&, int);
+
+namespace testing {
+
+// The Message class works like an ostream repeater.
+//
+// Typical usage:
+//
+// 1. You stream a bunch of values to a Message object.
+// It will remember the text in a stringstream.
+// 2. Then you stream the Message object to an ostream.
+// This causes the text in the Message to be streamed
+// to the ostream.
+//
+// For example;
+//
+// testing::Message foo;
+// foo << 1 << " != " << 2;
+// std::cout << foo;
+//
+// will print "1 != 2".
+//
+// Message is not intended to be inherited from. In particular, its
+// destructor is not virtual.
+//
+// Note that stringstream behaves differently in gcc and in MSVC. You
+// can stream a NULL char pointer to it in the former, but not in the
+// latter (it causes an access violation if you do). The Message
+// class hides this difference by treating a NULL char pointer as
+// "(null)".
+class GTEST_API_ Message {
+ private:
+ // The type of basic IO manipulators (endl, ends, and flush) for
+ // narrow streams.
+ typedef std::ostream& (*BasicNarrowIoManip)(std::ostream&);
+
+ public:
+ // Constructs an empty Message.
+ Message();
+
+ // Copy constructor.
+ Message(const Message& msg) : ss_(new ::std::stringstream) { // NOLINT
+ *ss_ << msg.GetString();
+ }
+
+ // Constructs a Message from a C-string.
+ explicit Message(const char* str) : ss_(new ::std::stringstream) {
+ *ss_ << str;
+ }
+
+#if GTEST_OS_SYMBIAN
+ // Streams a value (either a pointer or not) to this object.
+ template <typename T>
+ inline Message& operator <<(const T& value) {
+ StreamHelper(typename internal::is_pointer<T>::type(), value);
+ return *this;
+ }
+#else
+ // Streams a non-pointer value to this object.
+ template <typename T>
+ inline Message& operator <<(const T& val) {
+ // Some libraries overload << for STL containers. These
+ // overloads are defined in the global namespace instead of ::std.
+ //
+ // C++'s symbol lookup rule (i.e. Koenig lookup) says that these
+ // overloads are visible in either the std namespace or the global
+ // namespace, but not other namespaces, including the testing
+ // namespace which Google Test's Message class is in.
+ //
+ // To allow STL containers (and other types that has a << operator
+ // defined in the global namespace) to be used in Google Test
+ // assertions, testing::Message must access the custom << operator
+ // from the global namespace. With this using declaration,
+ // overloads of << defined in the global namespace and those
+ // visible via Koenig lookup are both exposed in this function.
+ using ::operator <<;
+ *ss_ << val;
+ return *this;
+ }
+
+ // Streams a pointer value to this object.
+ //
+ // This function is an overload of the previous one. When you
+ // stream a pointer to a Message, this definition will be used as it
+ // is more specialized. (The C++ Standard, section
+ // [temp.func.order].) If you stream a non-pointer, then the
+ // previous definition will be used.
+ //
+ // The reason for this overload is that streaming a NULL pointer to
+ // ostream is undefined behavior. Depending on the compiler, you
+ // may get "0", "(nil)", "(null)", or an access violation. To
+ // ensure consistent result across compilers, we always treat NULL
+ // as "(null)".
+ template <typename T>
+ inline Message& operator <<(T* const& pointer) { // NOLINT
+ if (pointer == NULL) {
+ *ss_ << "(null)";
+ } else {
+ *ss_ << pointer;
+ }
+ return *this;
+ }
+#endif // GTEST_OS_SYMBIAN
+
+ // Since the basic IO manipulators are overloaded for both narrow
+ // and wide streams, we have to provide this specialized definition
+ // of operator <<, even though its body is the same as the
+ // templatized version above. Without this definition, streaming
+ // endl or other basic IO manipulators to Message will confuse the
+ // compiler.
+ Message& operator <<(BasicNarrowIoManip val) {
+ *ss_ << val;
+ return *this;
+ }
+
+ // Instead of 1/0, we want to see true/false for bool values.
+ Message& operator <<(bool b) {
+ return *this << (b ? "true" : "false");
+ }
+
+ // These two overloads allow streaming a wide C string to a Message
+ // using the UTF-8 encoding.
+ Message& operator <<(const wchar_t* wide_c_str);
+ Message& operator <<(wchar_t* wide_c_str);
+
+#if GTEST_HAS_STD_WSTRING
+ // Converts the given wide string to a narrow string using the UTF-8
+ // encoding, and streams the result to this Message object.
+ Message& operator <<(const ::std::wstring& wstr);
+#endif // GTEST_HAS_STD_WSTRING
+
+#if GTEST_HAS_GLOBAL_WSTRING
+ // Converts the given wide string to a narrow string using the UTF-8
+ // encoding, and streams the result to this Message object.
+ Message& operator <<(const ::wstring& wstr);
+#endif // GTEST_HAS_GLOBAL_WSTRING
+
+ // Gets the text streamed to this object so far as an std::string.
+ // Each '\0' character in the buffer is replaced with "\\0".
+ //
+ // INTERNAL IMPLEMENTATION - DO NOT USE IN A USER PROGRAM.
+ std::string GetString() const;
+
+ private:
+
+#if GTEST_OS_SYMBIAN
+ // These are needed as the Nokia Symbian Compiler cannot decide between
+ // const T& and const T* in a function template. The Nokia compiler _can_
+ // decide between class template specializations for T and T*, so a
+ // tr1::type_traits-like is_pointer works, and we can overload on that.
+ template <typename T>
+ inline void StreamHelper(internal::true_type /*is_pointer*/, T* pointer) {
+ if (pointer == NULL) {
+ *ss_ << "(null)";
+ } else {
+ *ss_ << pointer;
+ }
+ }
+ template <typename T>
+ inline void StreamHelper(internal::false_type /*is_pointer*/,
+ const T& value) {
+ // See the comments in Message& operator <<(const T&) above for why
+ // we need this using statement.
+ using ::operator <<;
+ *ss_ << value;
+ }
+#endif // GTEST_OS_SYMBIAN
+
+ // We'll hold the text streamed to this object here.
+ const internal::scoped_ptr< ::std::stringstream> ss_;
+
+ // We declare (but don't implement) this to prevent the compiler
+ // from implementing the assignment operator.
+ void operator=(const Message&);
+};
+
+// Streams a Message to an ostream.
+inline std::ostream& operator <<(std::ostream& os, const Message& sb) {
+ return os << sb.GetString();
+}
+
+namespace internal {
+
+// Converts a streamable value to an std::string. A NULL pointer is
+// converted to "(null)". When the input value is a ::string,
+// ::std::string, ::wstring, or ::std::wstring object, each NUL
+// character in it is replaced with "\\0".
+template <typename T>
+std::string StreamableToString(const T& streamable) {
+ return (Message() << streamable).GetString();
+}
+
+} // namespace internal
+} // namespace testing
+
+#endif // GTEST_INCLUDE_GTEST_GTEST_MESSAGE_H_
+// Copyright 2005, Google Inc.
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+// * Neither the name of Google Inc. nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
// Authors: wan@google.com (Zhanyong Wan), eefacm@gmail.com (Sean Mcafee)
//
// The Google C++ Testing Framework (Google Test)
static bool EndsWithCaseInsensitive(
const std::string& str, const std::string& suffix);
- // Formats a list of arguments to an std::string, using the same format
- // spec string as for printf.
- //
- // We do not use the StringPrintf class as it is not universally
- // available.
- //
- // The result is limited to 4096 characters (including the tailing
- // 0). If 4096 characters are not enough to format the input,
- // "<buffer exceeded>" is returned.
- static std::string Format(const char* format, ...);
+ // Formats an int value as "%02d".
+ static std::string FormatIntWidth2(int value); // "%02d" for width == 2
+
+ // Formats an int value as "%X".
+ static std::string FormatHexInt(int value);
+
+ // Formats a byte as "%02X".
+ static std::string FormatByte(unsigned char value);
private:
String(); // Not meant to be instantiated.
// character in the buffer is replaced with "\\0".
GTEST_API_ std::string StringStreamToString(::std::stringstream* stream);
-// Converts a streamable value to an std::string. A NULL pointer is
-// converted to "(null)". When the input value is a ::string,
-// ::std::string, ::wstring, or ::std::wstring object, each NUL
-// character in it is replaced with "\\0".
-
-// Declared here but defined in gtest.h, so that it has access
-// to the definition of the Message class, required by the ARM
-// compiler.
-template <typename T>
-std::string StreamableToString(const T& streamable);
-
} // namespace internal
} // namespace testing
#define GTEST_CONCAT_TOKEN_(foo, bar) GTEST_CONCAT_TOKEN_IMPL_(foo, bar)
#define GTEST_CONCAT_TOKEN_IMPL_(foo, bar) foo ## bar
-// Google Test defines the testing::Message class to allow construction of
-// test messages via the << operator. The idea is that anything
-// streamable to std::ostream can be streamed to a testing::Message.
-// This allows a user to use his own types in Google Test assertions by
-// overloading the << operator.
-//
-// util/gtl/stl_logging.h overloads << for STL containers. These
-// overloads cannot be defined in the std namespace, as that will be
-// undefined behavior. Therefore, they are defined in the global
-// namespace instead.
-//
-// C++'s symbol lookup rule (i.e. Koenig lookup) says that these
-// overloads are visible in either the std namespace or the global
-// namespace, but not other namespaces, including the testing
-// namespace which Google Test's Message class is in.
-//
-// To allow STL containers (and other types that has a << operator
-// defined in the global namespace) to be used in Google Test assertions,
-// testing::Message must access the custom << operator from the global
-// namespace. Hence this helper function.
-//
-// Note: Jeffrey Yasskin suggested an alternative fix by "using
-// ::operator<<;" in the definition of Message's operator<<. That fix
-// doesn't require a helper function, but unfortunately doesn't
-// compile with MSVC.
-template <typename T>
-inline void GTestStreamToHelper(std::ostream* os, const T& val) {
- *os << val;
-}
-
class ProtocolMessage;
namespace proto2 { class Message; }
// stack trace.
GTEST_API_ extern const char kStackTraceMarker[];
-// A secret type that Google Test users don't know about. It has no
-// definition on purpose. Therefore it's impossible to create a
-// Secret object, which is what we want.
-class Secret;
-
// Two overloaded helpers for checking at compile time whether an
// expression is a null pointer literal (i.e. NULL or any 0-valued
// compile-time integral constant). Their return values have
GTEST_API_ std::string AppendUserMessage(
const std::string& gtest_msg, const Message& user_msg);
+#if GTEST_HAS_EXCEPTIONS
+
+// This exception is thrown by (and only by) a failed Google Test
+// assertion when GTEST_FLAG(throw_on_failure) is true (if exceptions
+// are enabled). We derive it from std::runtime_error, which is for
+// errors presumably detectable only at run time. Since
+// std::runtime_error inherits from std::exception, many testing
+// frameworks know how to extract and print the message inside it.
+
+#ifdef _MSC_VER
+# pragma warning(push) // Saves the current warning state.
+# pragma warning(disable:4275) // Temporarily disables warning 4275.
+#endif // _MSC_VER
+
+class GTEST_API_ GoogleTestFailureException : public ::std::runtime_error {
+ public:
+ explicit GoogleTestFailureException(const TestPartResult& failure);
+};
+
+#ifdef _MSC_VER
+# pragma warning(pop) // Restores the warning state.
+#endif // _MSC_VER
+
+#endif // GTEST_HAS_EXCEPTIONS
+
// A helper class for creating scoped traces in user programs.
class GTEST_API_ ScopedTrace {
public:
// c'tor and d'tor. Therefore it doesn't
// need to be used otherwise.
-// Converts a streamable value to an std::string. A NULL pointer is
-// converted to "(null)". When the input value is a ::string,
-// ::std::string, ::wstring, or ::std::wstring object, each NUL
-// character in it is replaced with "\\0".
-// Declared here but defined in gtest.h, so that it has access
-// to the definition of the Message class, required by the ARM
-// compiler.
-template <typename T>
-std::string StreamableToString(const T& streamable);
-
// Constructs and returns the message for an equality assertion
// (e.g. ASSERT_EQ, EXPECT_STREQ, etc) failure.
//
// bits. Therefore, 4 should be enough for ordinary use.
//
// See the following article for more details on ULP:
- // http://www.cygnus-software.com/papers/comparingfloats/comparingfloats.htm.
+ // http://randomascii.wordpress.com/2012/02/25/comparing-floating-point-numbers-2012-edition/
static const size_t kMaxUlps = 4;
// Constructs a FloatingPoint from a raw floating-point number.
// test_case_name: name of the test case
// name: name of the test
// type_param the name of the test's type parameter, or NULL if
-// this is not a typed or a type-parameterized test.
+// this is not a typed or a type-parameterized test.
// value_param text representation of the test's value parameter,
// or NULL if this is not a type-parameterized test.
// fixture_class_id: ID of the test fixture class
// The newly created TestInfo instance will assume
// ownership of the factory object.
GTEST_API_ TestInfo* MakeAndRegisterTestInfo(
- const char* test_case_name, const char* name,
+ const char* test_case_name,
+ const char* name,
const char* type_param,
const char* value_param,
TypeId fixture_class_id,
// First, registers the first type-parameterized test in the type
// list.
MakeAndRegisterTestInfo(
- String::Format("%s%s%s/%d", prefix, prefix[0] == '\0' ? "" : "/",
- case_name, index).c_str(),
+ (std::string(prefix) + (prefix[0] == '\0' ? "" : "/") + case_name + "/"
+ + StreamableToString(index)).c_str(),
GetPrefixUntilComma(test_names).c_str(),
GetTypeName<Type>().c_str(),
NULL, // No value parameter.
} // namespace testing
#endif // GTEST_INCLUDE_GTEST_GTEST_DEATH_TEST_H_
-// Copyright 2005, Google Inc.
-// All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions are
-// met:
-//
-// * Redistributions of source code must retain the above copyright
-// notice, this list of conditions and the following disclaimer.
-// * Redistributions in binary form must reproduce the above
-// copyright notice, this list of conditions and the following disclaimer
-// in the documentation and/or other materials provided with the
-// distribution.
-// * Neither the name of Google Inc. nor the names of its
-// contributors may be used to endorse or promote products derived from
-// this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-//
-// Author: wan@google.com (Zhanyong Wan)
-//
-// The Google C++ Testing Framework (Google Test)
-//
-// This header file defines the Message class.
-//
-// IMPORTANT NOTE: Due to limitation of the C++ language, we have to
-// leave some internal implementation details in this header file.
-// They are clearly marked by comments like this:
-//
-// // INTERNAL IMPLEMENTATION - DO NOT USE IN A USER PROGRAM.
-//
-// Such code is NOT meant to be used by a user directly, and is subject
-// to CHANGE WITHOUT NOTICE. Therefore DO NOT DEPEND ON IT in a user
-// program!
-
-#ifndef GTEST_INCLUDE_GTEST_GTEST_MESSAGE_H_
-#define GTEST_INCLUDE_GTEST_GTEST_MESSAGE_H_
-
-#include <limits>
-
-
-namespace testing {
-
-// The Message class works like an ostream repeater.
-//
-// Typical usage:
-//
-// 1. You stream a bunch of values to a Message object.
-// It will remember the text in a stringstream.
-// 2. Then you stream the Message object to an ostream.
-// This causes the text in the Message to be streamed
-// to the ostream.
-//
-// For example;
-//
-// testing::Message foo;
-// foo << 1 << " != " << 2;
-// std::cout << foo;
-//
-// will print "1 != 2".
-//
-// Message is not intended to be inherited from. In particular, its
-// destructor is not virtual.
-//
-// Note that stringstream behaves differently in gcc and in MSVC. You
-// can stream a NULL char pointer to it in the former, but not in the
-// latter (it causes an access violation if you do). The Message
-// class hides this difference by treating a NULL char pointer as
-// "(null)".
-class GTEST_API_ Message {
- private:
- // The type of basic IO manipulators (endl, ends, and flush) for
- // narrow streams.
- typedef std::ostream& (*BasicNarrowIoManip)(std::ostream&);
-
- public:
- // Constructs an empty Message.
- // We allocate the stringstream separately because otherwise each use of
- // ASSERT/EXPECT in a procedure adds over 200 bytes to the procedure's
- // stack frame leading to huge stack frames in some cases; gcc does not reuse
- // the stack space.
- Message() : ss_(new ::std::stringstream) {
- // By default, we want there to be enough precision when printing
- // a double to a Message.
- *ss_ << std::setprecision(std::numeric_limits<double>::digits10 + 2);
- }
-
- // Copy constructor.
- Message(const Message& msg) : ss_(new ::std::stringstream) { // NOLINT
- *ss_ << msg.GetString();
- }
-
- // Constructs a Message from a C-string.
- explicit Message(const char* str) : ss_(new ::std::stringstream) {
- *ss_ << str;
- }
-
-#if GTEST_OS_SYMBIAN
- // Streams a value (either a pointer or not) to this object.
- template <typename T>
- inline Message& operator <<(const T& value) {
- StreamHelper(typename internal::is_pointer<T>::type(), value);
- return *this;
- }
-#else
- // Streams a non-pointer value to this object.
- template <typename T>
- inline Message& operator <<(const T& val) {
- ::GTestStreamToHelper(ss_.get(), val);
- return *this;
- }
-
- // Streams a pointer value to this object.
- //
- // This function is an overload of the previous one. When you
- // stream a pointer to a Message, this definition will be used as it
- // is more specialized. (The C++ Standard, section
- // [temp.func.order].) If you stream a non-pointer, then the
- // previous definition will be used.
- //
- // The reason for this overload is that streaming a NULL pointer to
- // ostream is undefined behavior. Depending on the compiler, you
- // may get "0", "(nil)", "(null)", or an access violation. To
- // ensure consistent result across compilers, we always treat NULL
- // as "(null)".
- template <typename T>
- inline Message& operator <<(T* const& pointer) { // NOLINT
- if (pointer == NULL) {
- *ss_ << "(null)";
- } else {
- ::GTestStreamToHelper(ss_.get(), pointer);
- }
- return *this;
- }
-#endif // GTEST_OS_SYMBIAN
-
- // Since the basic IO manipulators are overloaded for both narrow
- // and wide streams, we have to provide this specialized definition
- // of operator <<, even though its body is the same as the
- // templatized version above. Without this definition, streaming
- // endl or other basic IO manipulators to Message will confuse the
- // compiler.
- Message& operator <<(BasicNarrowIoManip val) {
- *ss_ << val;
- return *this;
- }
-
- // Instead of 1/0, we want to see true/false for bool values.
- Message& operator <<(bool b) {
- return *this << (b ? "true" : "false");
- }
-
- // These two overloads allow streaming a wide C string to a Message
- // using the UTF-8 encoding.
- Message& operator <<(const wchar_t* wide_c_str) {
- return *this << internal::String::ShowWideCString(wide_c_str);
- }
- Message& operator <<(wchar_t* wide_c_str) {
- return *this << internal::String::ShowWideCString(wide_c_str);
- }
-
-#if GTEST_HAS_STD_WSTRING
- // Converts the given wide string to a narrow string using the UTF-8
- // encoding, and streams the result to this Message object.
- Message& operator <<(const ::std::wstring& wstr);
-#endif // GTEST_HAS_STD_WSTRING
-
-#if GTEST_HAS_GLOBAL_WSTRING
- // Converts the given wide string to a narrow string using the UTF-8
- // encoding, and streams the result to this Message object.
- Message& operator <<(const ::wstring& wstr);
-#endif // GTEST_HAS_GLOBAL_WSTRING
-
- // Gets the text streamed to this object so far as an std::string.
- // Each '\0' character in the buffer is replaced with "\\0".
- //
- // INTERNAL IMPLEMENTATION - DO NOT USE IN A USER PROGRAM.
- std::string GetString() const {
- return internal::StringStreamToString(ss_.get());
- }
-
- private:
-
-#if GTEST_OS_SYMBIAN
- // These are needed as the Nokia Symbian Compiler cannot decide between
- // const T& and const T* in a function template. The Nokia compiler _can_
- // decide between class template specializations for T and T*, so a
- // tr1::type_traits-like is_pointer works, and we can overload on that.
- template <typename T>
- inline void StreamHelper(internal::true_type /*dummy*/, T* pointer) {
- if (pointer == NULL) {
- *ss_ << "(null)";
- } else {
- ::GTestStreamToHelper(ss_.get(), pointer);
- }
- }
- template <typename T>
- inline void StreamHelper(internal::false_type /*dummy*/, const T& value) {
- ::GTestStreamToHelper(ss_.get(), value);
- }
-#endif // GTEST_OS_SYMBIAN
-
- // We'll hold the text streamed to this object here.
- const internal::scoped_ptr< ::std::stringstream> ss_;
-
- // We declare (but don't implement) this to prevent the compiler
- // from implementing the assignment operator.
- void operator=(const Message&);
-};
-
-// Streams a Message to an ostream.
-inline std::ostream& operator <<(std::ostream& os, const Message& sb) {
- return os << sb.GetString();
-}
-
-} // namespace testing
-
-#endif // GTEST_INCLUDE_GTEST_GTEST_MESSAGE_H_
// This file was GENERATED by command:
// pump.py gtest-param-test.h.pump
// DO NOT EDIT BY HAND!!!
const string& instantiation_name = gen_it->first;
ParamGenerator<ParamType> generator((*gen_it->second)());
- Message test_case_name_stream;
+ string test_case_name;
if ( !instantiation_name.empty() )
- test_case_name_stream << instantiation_name << "/";
- test_case_name_stream << test_info->test_case_base_name;
+ test_case_name = instantiation_name + "/";
+ test_case_name += test_info->test_case_base_name;
int i = 0;
for (typename ParamGenerator<ParamType>::iterator param_it =
Message test_name_stream;
test_name_stream << test_info->test_base_name << "/" << i;
MakeAndRegisterTestInfo(
- test_case_name_stream.GetString().c_str(),
+ test_case_name.c_str(),
test_name_stream.GetString().c_str(),
NULL, // No type parameter.
PrintToString(*param_it).c_str(),
class NoExecDeathTest;
class FinalSuccessChecker;
class GTestFlagSaver;
+class StreamingListenerTest;
class TestResultAccessor;
class TestEventListenersAccessor;
class TestEventRepeater;
+class UnitTestRecordPropertyTestHelper;
class WindowsDeathTest;
class UnitTestImpl* GetUnitTestImpl();
void ReportFailureInUnknownLocation(TestPartResult::Type result_type,
const std::string& message);
-// Converts a streamable value to an std::string. A NULL pointer is
-// converted to "(null)". When the input value is a ::string,
-// ::std::string, ::wstring, or ::std::wstring object, each NUL
-// character in it is replaced with "\\0".
-// Declared in gtest-internal.h but defined here, so that it has access
-// to the definition of the Message class, required by the ARM
-// compiler.
-template <typename T>
-std::string StreamableToString(const T& streamable) {
- return (Message() << streamable).GetString();
-}
-
} // namespace internal
// The friend relationship of some of these classes is cyclic.
// non-fatal) failure.
static bool HasFailure() { return HasFatalFailure() || HasNonfatalFailure(); }
- // Logs a property for the current test. Only the last value for a given
- // key is remembered.
- // These are public static so they can be called from utility functions
- // that are not members of the test fixture.
- // The arguments are const char* instead strings, as Google Test is used
- // on platforms where string doesn't compile.
- //
- // Note that a driving consideration for these RecordProperty methods
- // was to produce xml output suited to the Greenspan charting utility,
- // which at present will only chart values that fit in a 32-bit int. It
- // is the user's responsibility to restrict their values to 32-bit ints
- // if they intend them to be used with Greenspan.
- static void RecordProperty(const char* key, const char* value);
- static void RecordProperty(const char* key, int value);
+ // Logs a property for the current test, test case, or for the entire
+ // invocation of the test program when used outside of the context of a
+ // test case. Only the last value for a given key is remembered. These
+ // are public static so they can be called from utility functions that are
+ // not members of the test fixture. Calls to RecordProperty made during
+ // lifespan of the test (from the moment its constructor starts to the
+ // moment its destructor finishes) will be output in XML as attributes of
+ // the <testcase> element. Properties recorded from fixture's
+ // SetUpTestCase or TearDownTestCase are logged as attributes of the
+ // corresponding <testsuite> element. Calls to RecordProperty made in the
+ // global context (before or after invocation of RUN_ALL_TESTS and from
+ // SetUp/TearDown method of Environment objects registered with Google
+ // Test) will be output as attributes of the <testsuites> element.
+ static void RecordProperty(const std::string& key, const std::string& value);
+ static void RecordProperty(const std::string& key, int value);
protected:
// Creates a Test object.
// C'tor. TestProperty does NOT have a default constructor.
// Always use this constructor (with parameters) to create a
// TestProperty object.
- TestProperty(const char* a_key, const char* a_value) :
+ TestProperty(const std::string& a_key, const std::string& a_value) :
key_(a_key), value_(a_value) {
}
}
// Sets a new value, overriding the one supplied in the constructor.
- void SetValue(const char* new_value) {
+ void SetValue(const std::string& new_value) {
value_ = new_value;
}
private:
friend class TestInfo;
+ friend class TestCase;
friend class UnitTest;
friend class internal::DefaultGlobalTestPartResultReporter;
friend class internal::ExecDeathTest;
// a non-fatal failure if invalid (e.g., if it conflicts with reserved
// key names). If a property is already recorded for the same key, the
// value will be updated, rather than storing multiple values for the same
- // key.
- void RecordProperty(const TestProperty& test_property);
+ // key. xml_element specifies the element for which the property is being
+ // recorded and is used for validation.
+ void RecordProperty(const std::string& xml_element,
+ const TestProperty& test_property);
// Adds a failure if the key is a reserved attribute of Google Test
// testcase tags. Returns true if the property is valid.
// TODO(russr): Validate attribute names are legal and human readable.
- static bool ValidateTestProperty(const TestProperty& test_property);
+ static bool ValidateTestProperty(const std::string& xml_element,
+ const TestProperty& test_property);
// Adds a test part result to the list.
void AddTestPartResult(const TestPartResult& test_part_result);
friend class Test;
friend class TestCase;
friend class internal::UnitTestImpl;
+ friend class internal::StreamingListenerTest;
friend TestInfo* internal::MakeAndRegisterTestInfo(
- const char* test_case_name, const char* name,
+ const char* test_case_name,
+ const char* name,
const char* type_param,
const char* value_param,
internal::TypeId fixture_class_id,
// Constructs a TestInfo object. The newly constructed instance assumes
// ownership of the factory object.
- TestInfo(const char* test_case_name, const char* name,
- const char* a_type_param,
- const char* a_value_param,
+ TestInfo(const std::string& test_case_name,
+ const std::string& name,
+ const char* a_type_param, // NULL if not a type-parameterized test
+ const char* a_value_param, // NULL if not a value-parameterized test
internal::TypeId fixture_class_id,
internal::TestFactoryBase* factory);
// total_test_count() - 1. If i is not in that range, returns NULL.
const TestInfo* GetTestInfo(int i) const;
+ // Returns the TestResult that holds test properties recorded during
+ // execution of SetUpTestCase and TearDownTestCase.
+ const TestResult& ad_hoc_test_result() const { return ad_hoc_test_result_; }
+
private:
friend class Test;
friend class internal::UnitTestImpl;
bool should_run_;
// Elapsed time, in milliseconds.
TimeInMillis elapsed_time_;
+ // Holds test properties recorded during execution of SetUpTestCase and
+ // TearDownTestCase.
+ TestResult ad_hoc_test_result_;
// We disallow copying TestCases.
GTEST_DISALLOW_COPY_AND_ASSIGN_(TestCase);
// total_test_case_count() - 1. If i is not in that range, returns NULL.
const TestCase* GetTestCase(int i) const;
+ // Returns the TestResult containing information on test failures and
+ // properties logged outside of individual test cases.
+ const TestResult& ad_hoc_test_result() const;
+
// Returns the list of event listeners that can be used to track events
// inside Google Test.
TestEventListeners& listeners();
const std::string& os_stack_trace)
GTEST_LOCK_EXCLUDED_(mutex_);
- // Adds a TestProperty to the current TestResult object. If the result already
- // contains a property with the same key, the value will be updated.
- void RecordPropertyForCurrentTest(const char* key, const char* value);
+ // Adds a TestProperty to the current TestResult object when invoked from
+ // inside a test, to current TestCase's ad_hoc_test_result_ when invoked
+ // from SetUpTestCase or TearDownTestCase, or to the global property set
+ // when invoked elsewhere. If the result already contains a property with
+ // the same key, the value will be updated.
+ void RecordProperty(const std::string& key, const std::string& value);
// Gets the i-th test case among all the test cases. i can range from 0 to
// total_test_case_count() - 1. If i is not in that range, returns NULL.
friend class Test;
friend class internal::AssertHelper;
friend class internal::ScopedTrace;
+ friend class internal::StreamingListenerTest;
+ friend class internal::UnitTestRecordPropertyTestHelper;
friend Environment* AddGlobalTestEnvironment(Environment* env);
friend internal::UnitTestImpl* internal::GetUnitTestImpl();
friend void internal::ReportFailureInUnknownLocation(
// references static data, to reduce the opportunity for incorrect uses
// like writing 'WithParamInterface<bool>::GetParam()' for a test that
// uses a fixture whose parameter type is int.
- const ParamType& GetParam() const { return *parameter_; }
+ const ParamType& GetParam() const {
+ GTEST_CHECK_(parameter_ != NULL)
+ << "GetParam() can only be called inside a value-parameterized test "
+ << "-- did you intend to write TEST_P instead of TEST_F?";
+ return *parameter_;
+ }
private:
// Sets parameter value. The caller is responsible for making sure the value
GTEST_TEST_(test_fixture, test_name, test_fixture, \
::testing::internal::GetTypeId<test_fixture>())
-// Use this macro in main() to run all tests. It returns 0 if all
+} // namespace testing
+
+// Use this function in main() to run all tests. It returns 0 if all
// tests are successful, or 1 otherwise.
//
// RUN_ALL_TESTS() should be invoked after the command line has been
// parsed by InitGoogleTest().
+//
+// This function was formerly a macro; thus, it is in the global
+// namespace and has an all-caps name.
+int RUN_ALL_TESTS() GTEST_MUST_USE_RESULT_;
-#define RUN_ALL_TESTS()\
- (::testing::UnitTest::GetInstance()->Run())
-
-} // namespace testing
+inline int RUN_ALL_TESTS() {
+ return ::testing::UnitTest::GetInstance()->Run();
+}
#endif // GTEST_INCLUDE_GTEST_GTEST_H_
#define __OPENCV_TS_PERF_HPP__
#include "opencv2/core/core.hpp"
+#include "opencv2/imgproc/imgproc.hpp"
#include "opencv2/features2d/features2d.hpp"
#include "ts_gtest.h"
# endif
#endif
+// declare major namespaces to avoid errors on unknown namespace
+namespace cv { namespace gpu {} namespace ocl {} }
+
namespace perf
{
class TestBase;
* CV_ENUM and CV_FLAGS - macro to create printable wrappers for defines and enums *
\*****************************************************************************************/
-#define CV_ENUM(class_name, ...) \
-namespace { class CV_EXPORTS class_name {\
-public:\
- class_name(int val = 0) : _val(val) {}\
- operator int() const {return _val;}\
- void PrintTo(std::ostream* os) const {\
- const int vals[] = {__VA_ARGS__};\
- const char* svals = #__VA_ARGS__;\
- for(int i = 0, pos = 0; i < (int)(sizeof(vals)/sizeof(int)); ++i){\
- while(isspace(svals[pos]) || svals[pos] == ',') ++pos;\
- int start = pos;\
- while(!(isspace(svals[pos]) || svals[pos] == ',' || svals[pos] == 0)) ++pos;\
- if (_val == vals[i]) {\
- *os << std::string(svals + start, svals + pos);\
- return;\
- }\
- }\
- *os << "UNKNOWN";\
- }\
- struct Container{\
- typedef class_name value_type;\
- Container(class_name* first, size_t len): _begin(first), _end(first+len){}\
- const class_name* begin() const {return _begin;}\
- const class_name* end() const {return _end;}\
- private: class_name *_begin, *_end;\
- };\
- static Container all(){\
- static int vals[] = {__VA_ARGS__};\
- return Container((class_name*)vals, sizeof(vals)/sizeof(vals[0]));\
- }\
-private: int _val;\
-};\
-inline void PrintTo(const class_name& t, std::ostream* os) { t.PrintTo(os); } }
-
-#define CV_FLAGS(class_name, ...) \
-class CV_EXPORTS class_name {\
-public:\
- class_name(int val = 0) : _val(val) {}\
- operator int() const {return _val;}\
- void PrintTo(std::ostream* os) const {\
- const int vals[] = {__VA_ARGS__};\
- const char* svals = #__VA_ARGS__;\
- int value = _val;\
- bool first = true;\
- for(int i = 0, pos = 0; i < (int)(sizeof(vals)/sizeof(int)); ++i){\
- while(isspace(svals[pos]) || svals[pos] == ',') ++pos;\
- int start = pos;\
- while(!(isspace(svals[pos]) || svals[pos] == ',' || svals[pos] == 0)) ++pos;\
- if ((value & vals[i]) == vals[i]) {\
- value &= ~vals[i]; \
- if (first) first = false; else *os << "|"; \
- *os << std::string(svals + start, svals + pos);\
- if (!value) return;\
- }\
- }\
- if (first) *os << "UNKNOWN";\
- }\
-private: int _val;\
-};\
-inline void PrintTo(const class_name& t, std::ostream* os) { t.PrintTo(os); }
+#define CV_ENUM(class_name, ...) \
+ namespace { \
+ struct class_name { \
+ class_name(int val = 0) : val_(val) {} \
+ operator int() const { return val_; } \
+ void PrintTo(std::ostream* os) const { \
+ using namespace cv;using namespace cv::gpu; using namespace cv::ocl; \
+ const int vals[] = { __VA_ARGS__ }; \
+ const char* svals = #__VA_ARGS__; \
+ for(int i = 0, pos = 0; i < (int)(sizeof(vals)/sizeof(int)); ++i) { \
+ while(isspace(svals[pos]) || svals[pos] == ',') ++pos; \
+ int start = pos; \
+ while(!(isspace(svals[pos]) || svals[pos] == ',' || svals[pos] == 0)) \
+ ++pos; \
+ if (val_ == vals[i]) { \
+ *os << std::string(svals + start, svals + pos); \
+ return; \
+ } \
+ } \
+ *os << "UNKNOWN"; \
+ } \
+ static ::testing::internal::ParamGenerator<class_name> all() { \
+ using namespace cv;using namespace cv::gpu; using namespace cv::ocl; \
+ static class_name vals[] = { __VA_ARGS__ }; \
+ return ::testing::ValuesIn(vals); \
+ } \
+ private: int val_; \
+ }; \
+ inline void PrintTo(const class_name& t, std::ostream* os) { t.PrintTo(os); } }
+
+#define CV_FLAGS(class_name, ...) \
+ namespace { \
+ struct class_name { \
+ class_name(int val = 0) : val_(val) {} \
+ operator int() const { return val_; } \
+ void PrintTo(std::ostream* os) const { \
+ using namespace cv;using namespace cv::gpu; using namespace cv::ocl; \
+ const int vals[] = { __VA_ARGS__ }; \
+ const char* svals = #__VA_ARGS__; \
+ int value = val_; \
+ bool first = true; \
+ for(int i = 0, pos = 0; i < (int)(sizeof(vals)/sizeof(int)); ++i) { \
+ while(isspace(svals[pos]) || svals[pos] == ',') ++pos; \
+ int start = pos; \
+ while(!(isspace(svals[pos]) || svals[pos] == ',' || svals[pos] == 0)) \
+ ++pos; \
+ if ((value & vals[i]) == vals[i]) { \
+ value &= ~vals[i]; \
+ if (first) first = false; else *os << "|"; \
+ *os << std::string(svals + start, svals + pos); \
+ if (!value) return; \
+ } \
+ } \
+ if (first) *os << "UNKNOWN"; \
+ } \
+ private: int val_; \
+ }; \
+ inline void PrintTo(const class_name& t, std::ostream* os) { t.PrintTo(os); } }
CV_ENUM(MatDepth, CV_8U, CV_8S, CV_16U, CV_16S, CV_32S, CV_32F, CV_64F, CV_USRTYPE1)
-#/usr/bin/env python
+#!/usr/bin/env python
import sys, re, os.path
from xml.dom.minidom import parse
#include <wctype.h>
#include <algorithm>
+#include <iomanip>
+#include <limits>
#include <ostream> // NOLINT
#include <sstream>
#include <vector>
#include <vector>
+#if GTEST_CAN_STREAM_RESULTS_
+# include <arpa/inet.h> // NOLINT
+# include <netdb.h> // NOLINT
+#endif
+
#if GTEST_OS_WINDOWS
# include <windows.h> // NOLINT
#endif // GTEST_OS_WINDOWS
bool list_tests_;
std::string output_;
bool print_time_;
- bool pretty_;
internal::Int32 random_seed_;
internal::Int32 repeat_;
bool shuffle_;
// Converts a Unicode code point to a narrow string in UTF-8 encoding.
// code_point parameter is of type UInt32 because wchar_t may not be
// wide enough to contain a code point.
-// The output buffer str must containt at least 32 characters.
-// The function returns the address of the output buffer.
// If the code_point is not a valid Unicode code point
-// (i.e. outside of Unicode range U+0 to U+10FFFF) it will be output
-// as '(Invalid Unicode 0xXXXXXXXX)'.
-GTEST_API_ char* CodePointToUtf8(UInt32 code_point, char* str);
+// (i.e. outside of Unicode range U+0 to U+10FFFF) it will be converted
+// to "(Invalid Unicode 0xXXXXXXXX)".
+GTEST_API_ std::string CodePointToUtf8(UInt32 code_point);
// Converts a wide string to a narrow string in UTF-8 encoding.
// The wide string is assumed to have the following encoding:
// Constructor.
//
// TestPropertyKeyIs has NO default constructor.
- explicit TestPropertyKeyIs(const char* key)
- : key_(key) {}
+ explicit TestPropertyKeyIs(const std::string& key) : key_(key) {}
// Returns true iff the test name of test property matches on key_.
bool operator()(const TestProperty& test_property) const {
ad_hoc_test_result_.Clear();
}
+ // Adds a TestProperty to the current TestResult object when invoked in a
+ // context of a test or a test case, or to the global property set. If the
+ // result already contains a property with the same key, the value will be
+ // updated.
+ void RecordProperty(const TestProperty& test_property);
+
enum ReactionToSharding {
HONOR_SHARDING_PROTOCOL,
IGNORE_SHARDING_PROTOCOL
class TestResultAccessor {
public:
static void RecordProperty(TestResult* test_result,
+ const std::string& xml_element,
const TestProperty& property) {
- test_result->RecordProperty(property);
+ test_result->RecordProperty(xml_element, property);
}
static void ClearTestPartResults(TestResult* test_result) {
}
};
+#if GTEST_CAN_STREAM_RESULTS_
+
+// Streams test results to the given port on the given host machine.
+class StreamingListener : public EmptyTestEventListener {
+ public:
+ // Abstract base class for writing strings to a socket.
+ class AbstractSocketWriter {
+ public:
+ virtual ~AbstractSocketWriter() {}
+
+ // Sends a string to the socket.
+ virtual void Send(const string& message) = 0;
+
+ // Closes the socket.
+ virtual void CloseConnection() {}
+
+ // Sends a string and a newline to the socket.
+ void SendLn(const string& message) {
+ Send(message + "\n");
+ }
+ };
+
+ // Concrete class for actually writing strings to a socket.
+ class SocketWriter : public AbstractSocketWriter {
+ public:
+ SocketWriter(const string& host, const string& port)
+ : sockfd_(-1), host_name_(host), port_num_(port) {
+ MakeConnection();
+ }
+
+ virtual ~SocketWriter() {
+ if (sockfd_ != -1)
+ CloseConnection();
+ }
+
+ // Sends a string to the socket.
+ virtual void Send(const string& message) {
+ GTEST_CHECK_(sockfd_ != -1)
+ << "Send() can be called only when there is a connection.";
+
+ const int len = static_cast<int>(message.length());
+ if (write(sockfd_, message.c_str(), len) != len) {
+ GTEST_LOG_(WARNING)
+ << "stream_result_to: failed to stream to "
+ << host_name_ << ":" << port_num_;
+ }
+ }
+
+ private:
+ // Creates a client socket and connects to the server.
+ void MakeConnection();
+
+ // Closes the socket.
+ void CloseConnection() {
+ GTEST_CHECK_(sockfd_ != -1)
+ << "CloseConnection() can be called only when there is a connection.";
+
+ close(sockfd_);
+ sockfd_ = -1;
+ }
+
+ int sockfd_; // socket file descriptor
+ const string host_name_;
+ const string port_num_;
+
+ GTEST_DISALLOW_COPY_AND_ASSIGN_(SocketWriter);
+ }; // class SocketWriter
+
+ // Escapes '=', '&', '%', and '\n' characters in str as "%xx".
+ static string UrlEncode(const char* str);
+
+ StreamingListener(const string& host, const string& port)
+ : socket_writer_(new SocketWriter(host, port)) { Start(); }
+
+ explicit StreamingListener(AbstractSocketWriter* socket_writer)
+ : socket_writer_(socket_writer) { Start(); }
+
+ void OnTestProgramStart(const UnitTest& /* unit_test */) {
+ SendLn("event=TestProgramStart");
+ }
+
+ void OnTestProgramEnd(const UnitTest& unit_test) {
+ // Note that Google Test current only report elapsed time for each
+ // test iteration, not for the entire test program.
+ SendLn("event=TestProgramEnd&passed=" + FormatBool(unit_test.Passed()));
+
+ // Notify the streaming server to stop.
+ socket_writer_->CloseConnection();
+ }
+
+ void OnTestIterationStart(const UnitTest& /* unit_test */, int iteration) {
+ SendLn("event=TestIterationStart&iteration=" +
+ StreamableToString(iteration));
+ }
+
+ void OnTestIterationEnd(const UnitTest& unit_test, int /* iteration */) {
+ SendLn("event=TestIterationEnd&passed=" +
+ FormatBool(unit_test.Passed()) + "&elapsed_time=" +
+ StreamableToString(unit_test.elapsed_time()) + "ms");
+ }
+
+ void OnTestCaseStart(const TestCase& test_case) {
+ SendLn(std::string("event=TestCaseStart&name=") + test_case.name());
+ }
+
+ void OnTestCaseEnd(const TestCase& test_case) {
+ SendLn("event=TestCaseEnd&passed=" + FormatBool(test_case.Passed())
+ + "&elapsed_time=" + StreamableToString(test_case.elapsed_time())
+ + "ms");
+ }
+
+ void OnTestStart(const TestInfo& test_info) {
+ SendLn(std::string("event=TestStart&name=") + test_info.name());
+ }
+
+ void OnTestEnd(const TestInfo& test_info) {
+ SendLn("event=TestEnd&passed=" +
+ FormatBool((test_info.result())->Passed()) +
+ "&elapsed_time=" +
+ StreamableToString((test_info.result())->elapsed_time()) + "ms");
+ }
+
+ void OnTestPartResult(const TestPartResult& test_part_result) {
+ const char* file_name = test_part_result.file_name();
+ if (file_name == NULL)
+ file_name = "";
+ SendLn("event=TestPartResult&file=" + UrlEncode(file_name) +
+ "&line=" + StreamableToString(test_part_result.line_number()) +
+ "&message=" + UrlEncode(test_part_result.message()));
+ }
+
+ private:
+ // Sends the given message and a newline to the socket.
+ void SendLn(const string& message) { socket_writer_->SendLn(message); }
+
+ // Called at the start of streaming to notify the receiver what
+ // protocol we are using.
+ void Start() { SendLn("gtest_streaming_protocol_version=1.0"); }
+
+ string FormatBool(bool value) { return value ? "1" : "0"; }
+
+ const scoped_ptr<AbstractSocketWriter> socket_writer_;
+
+ GTEST_DISALLOW_COPY_AND_ASSIGN_(StreamingListener);
+}; // class StreamingListener
+
+#endif // GTEST_CAN_STREAM_RESULTS_
+
} // namespace internal
} // namespace testing
"Whether to use colors in the output. Valid values: yes, no, "
"and auto. 'auto' means to use colors if the output is "
"being sent to a terminal and the TERM environment variable "
- "is set to xterm, xterm-color, xterm-256color, linux or cygwin.");
+ "is set to a terminal type that supports colors.");
GTEST_DEFINE_string_(
filter,
const char* const colon = strchr(gtest_output_flag, ':');
if (colon == NULL)
return internal::FilePath::ConcatPaths(
- internal::FilePath(
- UnitTest::GetInstance()->original_working_dir()),
+ internal::FilePath(
+ UnitTest::GetInstance()->original_working_dir()),
internal::FilePath(kDefaultOutputFile)).string();
internal::FilePath output_name(colon + 1);
} // namespace internal
+// Constructs an empty Message.
+// We allocate the stringstream separately because otherwise each use of
+// ASSERT/EXPECT in a procedure adds over 200 bytes to the procedure's
+// stack frame leading to huge stack frames in some cases; gcc does not reuse
+// the stack space.
+Message::Message() : ss_(new ::std::stringstream) {
+ // By default, we want there to be enough precision when printing
+ // a double to a Message.
+ *ss_ << std::setprecision(std::numeric_limits<double>::digits10 + 2);
+}
+
+// These two overloads allow streaming a wide C string to a Message
+// using the UTF-8 encoding.
+Message& Message::operator <<(const wchar_t* wide_c_str) {
+ return *this << internal::String::ShowWideCString(wide_c_str);
+}
+Message& Message::operator <<(wchar_t* wide_c_str) {
+ return *this << internal::String::ShowWideCString(wide_c_str);
+}
+
#if GTEST_HAS_STD_WSTRING
// Converts the given wide string to a narrow string using the UTF-8
// encoding, and streams the result to this Message object.
}
#endif // GTEST_HAS_GLOBAL_WSTRING
+// Gets the text streamed to this object so far as an std::string.
+// Each '\0' character in the buffer is replaced with "\\0".
+std::string Message::GetString() const {
+ return internal::StringStreamToString(ss_.get());
+}
+
// AssertionResult constructors.
// Used in EXPECT_TRUE/FALSE(assertion_result).
AssertionResult::AssertionResult(const AssertionResult& other)
// Constructs a failure message for Boolean assertions such as EXPECT_TRUE.
std::string GetBoolAssertionFailureMessage(
const AssertionResult& assertion_result,
- const char* expression_text,
- const char* actual_predicate_value,
- const char* expected_predicate_value) {
+ const char* expression_text,
+ const char* actual_predicate_value,
+ const char* expected_predicate_value) {
const char* actual_message = assertion_result.message();
Message msg;
msg << "Value of: " << expression_text
// want inserts expanded.
const DWORD kFlags = FORMAT_MESSAGE_FROM_SYSTEM |
FORMAT_MESSAGE_IGNORE_INSERTS;
- const DWORD kBufSize = 4096; // String::Format can't exceed this length.
+ const DWORD kBufSize = 4096;
// Gets the system's human readable message string for this HRESULT.
char error_text[kBufSize] = { '\0' };
DWORD message_length = ::FormatMessageA(kFlags,
error_text, // output buffer
kBufSize, // buf size
NULL); // no arguments for inserts
- // Trims tailing white space (FormatMessage leaves a trailing cr-lf)
+ // Trims tailing white space (FormatMessage leaves a trailing CR-LF)
for (; message_length && IsSpace(error_text[message_length - 1]);
--message_length) {
error_text[message_length - 1] = '\0';
# endif // GTEST_OS_WINDOWS_MOBILE
- const std::string error_hex(String::Format("0x%08X ", hr));
+ const std::string error_hex("0x" + String::FormatHexInt(hr));
return ::testing::AssertionFailure()
<< "Expected: " << expr << " " << expected << ".\n"
- << " Actual: " << error_hex << error_text << "\n";
+ << " Actual: " << error_hex << " " << error_text << "\n";
}
} // namespace
// Converts a Unicode code point to a narrow string in UTF-8 encoding.
// code_point parameter is of type UInt32 because wchar_t may not be
// wide enough to contain a code point.
-// The output buffer str must containt at least 32 characters.
-// The function returns the address of the output buffer.
// If the code_point is not a valid Unicode code point
-// (i.e. outside of Unicode range U+0 to U+10FFFF) it will be output
-// as '(Invalid Unicode 0xXXXXXXXX)'.
-char* CodePointToUtf8(UInt32 code_point, char* str) {
+// (i.e. outside of Unicode range U+0 to U+10FFFF) it will be converted
+// to "(Invalid Unicode 0xXXXXXXXX)".
+std::string CodePointToUtf8(UInt32 code_point) {
+ if (code_point > kMaxCodePoint4) {
+ return "(Invalid Unicode 0x" + String::FormatHexInt(code_point) + ")";
+ }
+
+ char str[5]; // Big enough for the largest valid code point.
if (code_point <= kMaxCodePoint1) {
str[1] = '\0';
str[0] = static_cast<char>(code_point); // 0xxxxxxx
str[2] = static_cast<char>(0x80 | ChopLowBits(&code_point, 6)); // 10xxxxxx
str[1] = static_cast<char>(0x80 | ChopLowBits(&code_point, 6)); // 10xxxxxx
str[0] = static_cast<char>(0xE0 | code_point); // 1110xxxx
- } else if (code_point <= kMaxCodePoint4) {
+ } else { // code_point <= kMaxCodePoint4
str[4] = '\0';
str[3] = static_cast<char>(0x80 | ChopLowBits(&code_point, 6)); // 10xxxxxx
str[2] = static_cast<char>(0x80 | ChopLowBits(&code_point, 6)); // 10xxxxxx
str[1] = static_cast<char>(0x80 | ChopLowBits(&code_point, 6)); // 10xxxxxx
str[0] = static_cast<char>(0xF0 | code_point); // 11110xxx
- } else {
- // The longest string String::Format can produce when invoked
- // with these parameters is 28 character long (not including
- // the terminating nul character). We are asking for 32 character
- // buffer just in case. This is also enough for strncpy to
- // null-terminate the destination string.
- posix::StrNCpy(
- str, String::Format("(Invalid Unicode 0x%X)", code_point).c_str(), 32);
- str[31] = '\0'; // Makes sure no change in the format to strncpy leaves
- // the result unterminated.
}
return str;
}
unicode_code_point = static_cast<UInt32>(str[i]);
}
- char buffer[32]; // CodePointToUtf8 requires a buffer this big.
- stream << CodePointToUtf8(unicode_code_point, buffer);
+ stream << CodePointToUtf8(unicode_code_point);
}
return StringStreamToString(&stream);
}
suffix.c_str());
}
-// Formats a list of arguments to an std::string, using the same format
-// spec string as for printf.
-//
-// We do not use the StringPrintf class as it is not universally
-// available.
-//
-// The result is limited to 4096 characters (including the tailing 0).
-// If 4096 characters are not enough to format the input, or if
-// there's an error, "<formatting error or buffer exceeded>" is
-// returned.
-std::string String::Format(const char * format, ...) {
- va_list args;
- va_start(args, format);
-
- char buffer[4096];
- const int kBufferSize = sizeof(buffer)/sizeof(buffer[0]);
-
- // MSVC 8 deprecates vsnprintf(), so we want to suppress warning
- // 4996 (deprecated function) there.
-#ifdef _MSC_VER // We are using MSVC.
-# pragma warning(push) // Saves the current warning state.
-# pragma warning(disable:4996) // Temporarily disables warning 4996.
-
- const int size = vsnprintf(buffer, kBufferSize, format, args);
+// Formats an int value as "%02d".
+std::string String::FormatIntWidth2(int value) {
+ std::stringstream ss;
+ ss << std::setfill('0') << std::setw(2) << value;
+ return ss.str();
+}
-# pragma warning(pop) // Restores the warning state.
-#else // We are not using MSVC.
- const int size = vsnprintf(buffer, kBufferSize, format, args);
-#endif // _MSC_VER
- va_end(args);
+// Formats an int value as "%X".
+std::string String::FormatHexInt(int value) {
+ std::stringstream ss;
+ ss << std::hex << std::uppercase << value;
+ return ss.str();
+}
- // vsnprintf()'s behavior is not portable. When the buffer is not
- // big enough, it returns a negative value in MSVC, and returns the
- // needed buffer size on Linux. When there is an output error, it
- // always returns a negative value. For simplicity, we lump the two
- // error cases together.
- if (size < 0 || size >= kBufferSize) {
- return "<formatting error or buffer exceeded>";
- } else {
- return std::string(buffer, size);
- }
+// Formats a byte as "%02X".
+std::string String::FormatByte(unsigned char value) {
+ std::stringstream ss;
+ ss << std::setfill('0') << std::setw(2) << std::hex << std::uppercase
+ << static_cast<unsigned int>(value);
+ return ss.str();
}
// Converts the buffer in a stringstream to an std::string, converting NUL
// Appends the user-supplied message to the Google-Test-generated message.
std::string AppendUserMessage(const std::string& gtest_msg,
- const Message& user_msg) {
+ const Message& user_msg) {
// Appends the user message if it's non-empty.
const std::string user_msg_string = user_msg.GetString();
if (user_msg_string.empty()) {
// Adds a test property to the list. If a property with the same key as the
// supplied property is already represented, the value of this test_property
// replaces the old value for that key.
-void TestResult::RecordProperty(const TestProperty& test_property) {
- if (!ValidateTestProperty(test_property)) {
+void TestResult::RecordProperty(const std::string& xml_element,
+ const TestProperty& test_property) {
+ if (!ValidateTestProperty(xml_element, test_property)) {
return;
}
internal::MutexLock lock(&test_properites_mutex_);
property_with_matching_key->SetValue(test_property.value());
}
-// Adds a failure if the key is a reserved attribute of Google Test
-// testcase tags. Returns true if the property is valid.
-bool TestResult::ValidateTestProperty(const TestProperty& test_property) {
- const std::string& key = test_property.key();
- if (key == "name" || key == "status" || key == "time" || key == "classname") {
- ADD_FAILURE()
- << "Reserved key used in RecordProperty(): "
- << key
- << " ('name', 'status', 'time', and 'classname' are reserved by "
- << GTEST_NAME_ << ")";
+// The list of reserved attributes used in the <testsuites> element of XML
+// output.
+static const char* const kReservedTestSuitesAttributes[] = {
+ "disabled",
+ "errors",
+ "failures",
+ "name",
+ "random_seed",
+ "tests",
+ "time",
+ "timestamp"
+};
+
+// The list of reserved attributes used in the <testsuite> element of XML
+// output.
+static const char* const kReservedTestSuiteAttributes[] = {
+ "disabled",
+ "errors",
+ "failures",
+ "name",
+ "tests",
+ "time"
+};
+
+// The list of reserved attributes used in the <testcase> element of XML output.
+static const char* const kReservedTestCaseAttributes[] = {
+ "classname",
+ "name",
+ "status",
+ "time",
+ "type_param",
+ "value_param"
+};
+
+template <int kSize>
+std::vector<std::string> ArrayAsVector(const char* const (&array)[kSize]) {
+ return std::vector<std::string>(array, array + kSize);
+}
+
+static std::vector<std::string> GetReservedAttributesForElement(
+ const std::string& xml_element) {
+ if (xml_element == "testsuites") {
+ return ArrayAsVector(kReservedTestSuitesAttributes);
+ } else if (xml_element == "testsuite") {
+ return ArrayAsVector(kReservedTestSuiteAttributes);
+ } else if (xml_element == "testcase") {
+ return ArrayAsVector(kReservedTestCaseAttributes);
+ } else {
+ GTEST_CHECK_(false) << "Unrecognized xml_element provided: " << xml_element;
+ }
+ // This code is unreachable but some compilers may not realizes that.
+ return std::vector<std::string>();
+}
+
+static std::string FormatWordList(const std::vector<std::string>& words) {
+ Message word_list;
+ for (size_t i = 0; i < words.size(); ++i) {
+ if (i > 0 && words.size() > 2) {
+ word_list << ", ";
+ }
+ if (i == words.size() - 1) {
+ word_list << "and ";
+ }
+ word_list << "'" << words[i] << "'";
+ }
+ return word_list.GetString();
+}
+
+static bool ValidateTestPropertyName(const std::string& property_name,
+ const std::vector<std::string>& reserved_names) {
+ if (std::find(reserved_names.begin(), reserved_names.end(), property_name) !=
+ reserved_names.end()) {
+ ADD_FAILURE() << "Reserved key used in RecordProperty(): " << property_name
+ << " (" << FormatWordList(reserved_names)
+ << " are reserved by " << GTEST_NAME_ << ")";
return false;
}
return true;
}
+// Adds a failure if the key is a reserved attribute of the element named
+// xml_element. Returns true if the property is valid.
+bool TestResult::ValidateTestProperty(const std::string& xml_element,
+ const TestProperty& test_property) {
+ return ValidateTestPropertyName(test_property.key(),
+ GetReservedAttributesForElement(xml_element));
+}
+
// Clears the object.
void TestResult::Clear() {
test_part_results_.clear();
}
// Allows user supplied key value pairs to be recorded for later output.
-void Test::RecordProperty(const char* key, const char* value) {
- UnitTest::GetInstance()->RecordPropertyForCurrentTest(key, value);
+void Test::RecordProperty(const std::string& key, const std::string& value) {
+ UnitTest::GetInstance()->RecordProperty(key, value);
}
// Allows user supplied key value pairs to be recorded for later output.
-void Test::RecordProperty(const char* key, int value) {
+void Test::RecordProperty(const std::string& key, int value) {
Message value_message;
value_message << value;
RecordProperty(key, value_message.GetString().c_str());
// prohibits creation of objects with destructors on stack in functions
// using __try (see error C2712).
static std::string* FormatSehExceptionMessage(DWORD exception_code,
- const char* location) {
+ const char* location) {
Message message;
message << "SEH exception with code 0x" << std::setbase(16) <<
exception_code << std::setbase(10) << " thrown in " << location << ".";
#endif // GTEST_HAS_SEH
+namespace internal {
+
#if GTEST_HAS_EXCEPTIONS
// Adds an "exception thrown" fatal failure to the current test.
static std::string FormatCxxExceptionMessage(const char* description,
- const char* location) {
+ const char* location) {
Message message;
if (description != NULL) {
message << "C++ exception with description \"" << description << "\"";
static std::string PrintTestPartResultToString(
const TestPartResult& test_part_result);
-// A failed Google Test assertion will throw an exception of this type when
-// GTEST_FLAG(throw_on_failure) is true (if exceptions are enabled). We
-// derive it from std::runtime_error, which is for errors presumably
-// detectable only at run time. Since std::runtime_error inherits from
-// std::exception, many testing frameworks know how to extract and print the
-// message inside it.
-class GoogleTestFailureException : public ::std::runtime_error {
- public:
- explicit GoogleTestFailureException(const TestPartResult& failure)
+GoogleTestFailureException::GoogleTestFailureException(
+ const TestPartResult& failure)
: ::std::runtime_error(PrintTestPartResultToString(failure).c_str()) {}
-};
+
#endif // GTEST_HAS_EXCEPTIONS
-namespace internal {
// We put these helper functions in the internal namespace as IBM's xlC
// compiler rejects the code if they were declared static.
#if GTEST_HAS_EXCEPTIONS
try {
return HandleSehExceptionsInMethodIfSupported(object, method, location);
- } catch (const GoogleTestFailureException&) { // NOLINT
- // This exception doesn't originate in code under test. It makes no
- // sense to report it as a test failure.
+ } catch (const internal::GoogleTestFailureException&) { // NOLINT
+ // This exception type can only be thrown by a failed Google
+ // Test assertion with the intention of letting another testing
+ // framework catch it. Therefore we just re-throw it.
throw;
} catch (const std::exception& e) { // NOLINT
internal::ReportFailureInUnknownLocation(
// Constructs a TestInfo object. It assumes ownership of the test factory
// object.
-// TODO(vladl@google.com): Make a_test_case_name and a_name const string&'s
-// to signify they cannot be NULLs.
-TestInfo::TestInfo(const char* a_test_case_name,
- const char* a_name,
+TestInfo::TestInfo(const std::string& a_test_case_name,
+ const std::string& a_name,
const char* a_type_param,
const char* a_value_param,
internal::TypeId fixture_class_id,
// The newly created TestInfo instance will assume
// ownership of the factory object.
TestInfo* MakeAndRegisterTestInfo(
- const char* test_case_name, const char* name,
+ const char* test_case_name,
+ const char* name,
const char* type_param,
const char* value_param,
TypeId fixture_class_id,
// Clears the results of all tests in this test case.
void TestCase::ClearResult() {
+ ad_hoc_test_result_.Clear();
ForEach(test_info_list_, TestInfo::ClearTestResult);
}
// FormatCountableNoun(1, "formula", "formuli") returns "1 formula".
// FormatCountableNoun(5, "book", "books") returns "5 books".
static std::string FormatCountableNoun(int count,
- const char * singular_form,
- const char * plural_form) {
- return internal::String::Format("%d %s", count,
- count == 1 ? singular_form : plural_form);
+ const char * singular_form,
+ const char * plural_form) {
+ return internal::StreamableToString(count) + " " +
+ (count == 1 ? singular_form : plural_form);
}
// Formats the count of tests.
}
}
+namespace internal {
+
// Prints a TestPartResult to an std::string.
static std::string PrintTestPartResultToString(
const TestPartResult& test_part_result) {
// class PrettyUnitTestResultPrinter
-namespace internal {
-
enum GTestColor {
COLOR_DEFAULT,
COLOR_RED,
String::CStringEquals(term, "xterm-color") ||
String::CStringEquals(term, "xterm-256color") ||
String::CStringEquals(term, "screen") ||
+ String::CStringEquals(term, "screen-256color") ||
String::CStringEquals(term, "linux") ||
String::CStringEquals(term, "cygwin");
return stdout_is_tty && term_supports_color;
va_end(args);
}
+// Text printed in Google Test's text output and --gunit_list_tests
+// output to label the type parameter and value parameter for a test.
+static const char kTypeParamLabel[] = "TypeParam";
+static const char kValueParamLabel[] = "GetParam()";
+
static void PrintFullTestCommentIfPresent(const TestInfo& test_info) {
const char* const type_param = test_info.type_param();
const char* const value_param = test_info.value_param();
if (type_param != NULL || value_param != NULL) {
printf(", where ");
if (type_param != NULL) {
- printf("TypeParam = %s", type_param);
+ printf("%s = %s", kTypeParamLabel, type_param);
if (value_param != NULL)
printf(" and ");
}
if (value_param != NULL) {
- printf("GetParam() = %s", value_param);
+ printf("%s = %s", kValueParamLabel, value_param);
}
}
}
if (test_case.type_param() == NULL) {
printf("\n");
} else {
- printf(", where TypeParam = %s\n", test_case.type_param());
+ printf(", where %s = %s\n", kTypeParamLabel, test_case.type_param());
}
fflush(stdout);
}
// is_attribute is true, the text is meant to appear as an attribute
// value, and normalizable whitespace is preserved by replacing it
// with character references.
- static std::string EscapeXml(const char* str, bool is_attribute);
+ static std::string EscapeXml(const std::string& str, bool is_attribute);
// Returns the given string with all characters invalid in XML removed.
- static string RemoveInvalidXmlCharacters(const string& str);
+ static std::string RemoveInvalidXmlCharacters(const std::string& str);
// Convenience wrapper around EscapeXml when str is an attribute value.
- static std::string EscapeXmlAttribute(const char* str) {
+ static std::string EscapeXmlAttribute(const std::string& str) {
return EscapeXml(str, true);
}
return EscapeXml(str, false);
}
+ // Verifies that the given attribute belongs to the given element and
+ // streams the attribute as XML.
+ static void OutputXmlAttribute(std::ostream* stream,
+ const std::string& element_name,
+ const std::string& name,
+ const std::string& value);
+
// Streams an XML CDATA section, escaping invalid CDATA sequences as needed.
static void OutputXmlCDataSection(::std::ostream* stream, const char* data);
const TestInfo& test_info);
// Prints an XML representation of a TestCase object
- static void PrintXmlTestCase(FILE* out, const TestCase& test_case);
+ static void PrintXmlTestCase(::std::ostream* stream,
+ const TestCase& test_case);
// Prints an XML summary of unit_test to output stream out.
- static void PrintXmlUnitTest(FILE* out, const UnitTest& unit_test);
+ static void PrintXmlUnitTest(::std::ostream* stream,
+ const UnitTest& unit_test);
// Produces a string representing the test properties in a result as space
// delimited XML attributes based on the property key="value" pairs.
fflush(stderr);
exit(EXIT_FAILURE);
}
- PrintXmlUnitTest(xmlout, unit_test);
+ std::stringstream stream;
+ PrintXmlUnitTest(&stream, unit_test);
+ fprintf(xmlout, "%s", StringStreamToString(&stream).c_str());
fclose(xmlout);
}
// TODO(wan): It might be nice to have a minimally invasive, human-readable
// escaping scheme for invalid characters, rather than dropping them.
std::string XmlUnitTestResultPrinter::EscapeXml(
- const char* str, bool is_attribute) {
+ const std::string& str, bool is_attribute) {
Message m;
- if (str != NULL) {
- for (const char* src = str; *src; ++src) {
- switch (*src) {
+ for (size_t i = 0; i < str.size(); ++i) {
+ const char ch = str[i];
+ switch (ch) {
case '<':
m << "<";
break;
m << '"';
break;
default:
- if (IsValidXmlCharacter(*src)) {
- if (is_attribute && IsNormalizableWhitespace(*src))
- m << String::Format("&#x%02X;", unsigned(*src));
+ if (IsValidXmlCharacter(ch)) {
+ if (is_attribute && IsNormalizableWhitespace(ch))
+ m << "&#x" << String::FormatByte(static_cast<unsigned char>(ch))
+ << ";";
else
- m << *src;
+ m << ch;
}
break;
}
}
- }
return m.GetString();
}
// Returns the given string with all characters invalid in XML removed.
// Currently invalid characters are dropped from the string. An
// alternative is to replace them with certain characters such as . or ?.
-string XmlUnitTestResultPrinter::RemoveInvalidXmlCharacters(const string& str) {
- string output;
+std::string XmlUnitTestResultPrinter::RemoveInvalidXmlCharacters(
+ const std::string& str) {
+ std::string output;
output.reserve(str.size());
- for (string::const_iterator it = str.begin(); it != str.end(); ++it)
+ for (std::string::const_iterator it = str.begin(); it != str.end(); ++it)
if (IsValidXmlCharacter(*it))
output.push_back(*it);
if (time_struct == NULL)
return ""; // Invalid ms value
- return String::Format("%d-%02d-%02dT%02d:%02d:%02d", // YYYY-MM-DDThh:mm:ss
- time_struct->tm_year + 1900,
- time_struct->tm_mon + 1,
- time_struct->tm_mday,
- time_struct->tm_hour,
- time_struct->tm_min,
- time_struct->tm_sec);
+ // YYYY-MM-DDThh:mm:ss
+ return StreamableToString(time_struct->tm_year + 1900) + "-" +
+ String::FormatIntWidth2(time_struct->tm_mon + 1) + "-" +
+ String::FormatIntWidth2(time_struct->tm_mday) + "T" +
+ String::FormatIntWidth2(time_struct->tm_hour) + ":" +
+ String::FormatIntWidth2(time_struct->tm_min) + ":" +
+ String::FormatIntWidth2(time_struct->tm_sec);
}
// Streams an XML CDATA section, escaping invalid CDATA sequences as needed.
*stream << "]]>";
}
+void XmlUnitTestResultPrinter::OutputXmlAttribute(
+ std::ostream* stream,
+ const std::string& element_name,
+ const std::string& name,
+ const std::string& value) {
+ const std::vector<std::string>& allowed_names =
+ GetReservedAttributesForElement(element_name);
+
+ GTEST_CHECK_(std::find(allowed_names.begin(), allowed_names.end(), name) !=
+ allowed_names.end())
+ << "Attribute " << name << " is not allowed for element <" << element_name
+ << ">.";
+
+ *stream << " " << name << "=\"" << EscapeXmlAttribute(value) << "\"";
+}
+
// Prints an XML representation of a TestInfo object.
// TODO(wan): There is also value in printing properties with the plain printer.
void XmlUnitTestResultPrinter::OutputXmlTestInfo(::std::ostream* stream,
const char* test_case_name,
const TestInfo& test_info) {
const TestResult& result = *test_info.result();
- *stream << " <testcase name=\""
- << EscapeXmlAttribute(test_info.name()).c_str() << "\"";
+ const std::string kTestcase = "testcase";
+
+ *stream << " <testcase";
+ OutputXmlAttribute(stream, kTestcase, "name", test_info.name());
if (test_info.value_param() != NULL) {
- *stream << " value_param=\"" << EscapeXmlAttribute(test_info.value_param())
- << "\"";
+ OutputXmlAttribute(stream, kTestcase, "value_param",
+ test_info.value_param());
}
if (test_info.type_param() != NULL) {
- *stream << " type_param=\"" << EscapeXmlAttribute(test_info.type_param())
- << "\"";
+ OutputXmlAttribute(stream, kTestcase, "type_param", test_info.type_param());
}
- *stream << " status=\""
- << (test_info.should_run() ? "run" : "notrun")
- << "\" time=\""
- << FormatTimeInMillisAsSeconds(result.elapsed_time())
- << "\" classname=\"" << EscapeXmlAttribute(test_case_name).c_str()
- << "\"" << TestPropertiesAsXmlAttributes(result).c_str();
+ OutputXmlAttribute(stream, kTestcase, "status",
+ test_info.should_run() ? "run" : "notrun");
+ OutputXmlAttribute(stream, kTestcase, "time",
+ FormatTimeInMillisAsSeconds(result.elapsed_time()));
+ OutputXmlAttribute(stream, kTestcase, "classname", test_case_name);
+ *stream << TestPropertiesAsXmlAttributes(result);
int failures = 0;
for (int i = 0; i < result.total_part_count(); ++i) {
}
// Prints an XML representation of a TestCase object
-void XmlUnitTestResultPrinter::PrintXmlTestCase(FILE* out,
+void XmlUnitTestResultPrinter::PrintXmlTestCase(std::ostream* stream,
const TestCase& test_case) {
- fprintf(out,
- " <testsuite name=\"%s\" tests=\"%d\" failures=\"%d\" "
- "disabled=\"%d\" ",
- EscapeXmlAttribute(test_case.name()).c_str(),
- test_case.total_test_count(),
- test_case.failed_test_count(),
- test_case.disabled_test_count());
- fprintf(out,
- "errors=\"0\" time=\"%s\">\n",
- FormatTimeInMillisAsSeconds(test_case.elapsed_time()).c_str());
- for (int i = 0; i < test_case.total_test_count(); ++i) {
- ::std::stringstream stream;
- OutputXmlTestInfo(&stream, test_case.name(), *test_case.GetTestInfo(i));
- fprintf(out, "%s", StringStreamToString(&stream).c_str());
- }
- fprintf(out, " </testsuite>\n");
+ const std::string kTestsuite = "testsuite";
+ *stream << " <" << kTestsuite;
+ OutputXmlAttribute(stream, kTestsuite, "name", test_case.name());
+ OutputXmlAttribute(stream, kTestsuite, "tests",
+ StreamableToString(test_case.total_test_count()));
+ OutputXmlAttribute(stream, kTestsuite, "failures",
+ StreamableToString(test_case.failed_test_count()));
+ OutputXmlAttribute(stream, kTestsuite, "disabled",
+ StreamableToString(test_case.disabled_test_count()));
+ OutputXmlAttribute(stream, kTestsuite, "errors", "0");
+ OutputXmlAttribute(stream, kTestsuite, "time",
+ FormatTimeInMillisAsSeconds(test_case.elapsed_time()));
+ *stream << TestPropertiesAsXmlAttributes(test_case.ad_hoc_test_result())
+ << ">\n";
+
+ for (int i = 0; i < test_case.total_test_count(); ++i)
+ OutputXmlTestInfo(stream, test_case.name(), *test_case.GetTestInfo(i));
+ *stream << " </" << kTestsuite << ">\n";
}
// Prints an XML summary of unit_test to output stream out.
-void XmlUnitTestResultPrinter::PrintXmlUnitTest(FILE* out,
+void XmlUnitTestResultPrinter::PrintXmlUnitTest(std::ostream* stream,
const UnitTest& unit_test) {
- fprintf(out, "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n");
- fprintf(out,
- "<testsuites tests=\"%d\" failures=\"%d\" disabled=\"%d\" "
- "errors=\"0\" timestamp=\"%s\" time=\"%s\" ",
- unit_test.total_test_count(),
- unit_test.failed_test_count(),
- unit_test.disabled_test_count(),
- FormatEpochTimeInMillisAsIso8601(unit_test.start_timestamp()).c_str(),
- FormatTimeInMillisAsSeconds(unit_test.elapsed_time()).c_str());
+ const std::string kTestsuites = "testsuites";
+
+ *stream << "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n";
+ *stream << "<" << kTestsuites;
+
+ OutputXmlAttribute(stream, kTestsuites, "tests",
+ StreamableToString(unit_test.total_test_count()));
+ OutputXmlAttribute(stream, kTestsuites, "failures",
+ StreamableToString(unit_test.failed_test_count()));
+ OutputXmlAttribute(stream, kTestsuites, "disabled",
+ StreamableToString(unit_test.disabled_test_count()));
+ OutputXmlAttribute(stream, kTestsuites, "errors", "0");
+ OutputXmlAttribute(
+ stream, kTestsuites, "timestamp",
+ FormatEpochTimeInMillisAsIso8601(unit_test.start_timestamp()));
+ OutputXmlAttribute(stream, kTestsuites, "time",
+ FormatTimeInMillisAsSeconds(unit_test.elapsed_time()));
+
if (GTEST_FLAG(shuffle)) {
- fprintf(out, "random_seed=\"%d\" ", unit_test.random_seed());
+ OutputXmlAttribute(stream, kTestsuites, "random_seed",
+ StreamableToString(unit_test.random_seed()));
}
- fprintf(out, "name=\"AllTests\">\n");
- for (int i = 0; i < unit_test.total_test_case_count(); ++i)
- PrintXmlTestCase(out, *unit_test.GetTestCase(i));
- fprintf(out, "</testsuites>\n");
+
+ *stream << TestPropertiesAsXmlAttributes(unit_test.ad_hoc_test_result());
+
+ OutputXmlAttribute(stream, kTestsuites, "name", "AllTests");
+ *stream << ">\n";
+
+
+ for (int i = 0; i < unit_test.total_test_case_count(); ++i) {
+ PrintXmlTestCase(stream, *unit_test.GetTestCase(i));
+ }
+ *stream << "</" << kTestsuites << ">\n";
}
// Produces a string representing the test properties in a result as space
#if GTEST_CAN_STREAM_RESULTS_
-// Streams test results to the given port on the given host machine.
-class StreamingListener : public EmptyTestEventListener {
- public:
- // Escapes '=', '&', '%', and '\n' characters in str as "%xx".
- static string UrlEncode(const char* str);
-
- StreamingListener(const string& host, const string& port)
- : sockfd_(-1), host_name_(host), port_num_(port) {
- MakeConnection();
- Send("gtest_streaming_protocol_version=1.0\n");
- }
-
- virtual ~StreamingListener() {
- if (sockfd_ != -1)
- CloseConnection();
- }
-
- void OnTestProgramStart(const UnitTest& /* unit_test */) {
- Send("event=TestProgramStart\n");
- }
-
- void OnTestProgramEnd(const UnitTest& unit_test) {
- // Note that Google Test current only report elapsed time for each
- // test iteration, not for the entire test program.
- Send(String::Format("event=TestProgramEnd&passed=%d\n",
- unit_test.Passed()));
-
- // Notify the streaming server to stop.
- CloseConnection();
- }
-
- void OnTestIterationStart(const UnitTest& /* unit_test */, int iteration) {
- Send(String::Format("event=TestIterationStart&iteration=%d\n",
- iteration));
- }
-
- void OnTestIterationEnd(const UnitTest& unit_test, int /* iteration */) {
- Send(String::Format("event=TestIterationEnd&passed=%d&elapsed_time=%sms\n",
- unit_test.Passed(),
- StreamableToString(unit_test.elapsed_time()).c_str()));
- }
-
- void OnTestCaseStart(const TestCase& test_case) {
- Send(String::Format("event=TestCaseStart&name=%s\n", test_case.name()));
- }
-
- void OnTestCaseEnd(const TestCase& test_case) {
- Send(String::Format("event=TestCaseEnd&passed=%d&elapsed_time=%sms\n",
- test_case.Passed(),
- StreamableToString(test_case.elapsed_time()).c_str()));
- }
-
- void OnTestStart(const TestInfo& test_info) {
- Send(String::Format("event=TestStart&name=%s\n", test_info.name()));
- }
-
- void OnTestEnd(const TestInfo& test_info) {
- Send(String::Format(
- "event=TestEnd&passed=%d&elapsed_time=%sms\n",
- (test_info.result())->Passed(),
- StreamableToString((test_info.result())->elapsed_time()).c_str()));
- }
-
- void OnTestPartResult(const TestPartResult& test_part_result) {
- const char* file_name = test_part_result.file_name();
- if (file_name == NULL)
- file_name = "";
- Send(String::Format("event=TestPartResult&file=%s&line=%d&message=",
- UrlEncode(file_name).c_str(),
- test_part_result.line_number()));
- Send(UrlEncode(test_part_result.message()) + "\n");
- }
-
- private:
- // Creates a client socket and connects to the server.
- void MakeConnection();
-
- // Closes the socket.
- void CloseConnection() {
- GTEST_CHECK_(sockfd_ != -1)
- << "CloseConnection() can be called only when there is a connection.";
-
- close(sockfd_);
- sockfd_ = -1;
- }
-
- // Sends a string to the socket.
- void Send(const string& message) {
- GTEST_CHECK_(sockfd_ != -1)
- << "Send() can be called only when there is a connection.";
-
- const int len = static_cast<int>(message.length());
- if (write(sockfd_, message.c_str(), len) != len) {
- GTEST_LOG_(WARNING)
- << "stream_result_to: failed to stream to "
- << host_name_ << ":" << port_num_;
- }
- }
-
- int sockfd_; // socket file descriptor
- const string host_name_;
- const string port_num_;
-
- GTEST_DISALLOW_COPY_AND_ASSIGN_(StreamingListener);
-}; // class StreamingListener
-
// Checks if str contains '=', '&', '%' or '\n' characters. If yes,
// replaces them by "%xx" where xx is their hexadecimal value. For
// example, replaces "=" with "%3D". This algorithm is O(strlen(str))
case '=':
case '&':
case '\n':
- result.append(String::Format("%%%02x", static_cast<unsigned char>(ch)));
+ result.append("%" + String::FormatByte(static_cast<unsigned char>(ch)));
break;
default:
result.push_back(ch);
return result;
}
-void StreamingListener::MakeConnection() {
+void StreamingListener::SocketWriter::MakeConnection() {
GTEST_CHECK_(sockfd_ == -1)
<< "MakeConnection() can't be called when there is already a connection.";
return impl()->GetTestCase(i);
}
+// Returns the TestResult containing information on test failures and
+// properties logged outside of individual test cases.
+const TestResult& UnitTest::ad_hoc_test_result() const {
+ return *impl()->ad_hoc_test_result();
+}
+
// Gets the i-th test case among all the test cases. i can range from 0 to
// total_test_case_count() - 1. If i is not in that range, returns NULL.
TestCase* UnitTest::GetMutableTestCase(int i) {
// assertion macros (e.g. ASSERT_TRUE, EXPECT_EQ, etc) eventually call
// this to report their results. The user code should use the
// assertion macros instead of calling this directly.
+GTEST_LOCK_EXCLUDED_(mutex_)
void UnitTest::AddTestPartResult(
TestPartResult::Type result_type,
const char* file_name,
int line_number,
const std::string& message,
- const std::string& os_stack_trace)
- GTEST_LOCK_EXCLUDED_(mutex_) {
+ const std::string& os_stack_trace) {
Message msg;
msg << message;
#endif // GTEST_OS_WINDOWS
} else if (GTEST_FLAG(throw_on_failure)) {
#if GTEST_HAS_EXCEPTIONS
- throw GoogleTestFailureException(result);
+ throw internal::GoogleTestFailureException(result);
#else
// We cannot call abort() as it generates a pop-up in debug mode
// that cannot be suppressed in VC 7.1 or below.
}
}
-// Creates and adds a property to the current TestResult. If a property matching
-// the supplied value already exists, updates its value instead.
-void UnitTest::RecordPropertyForCurrentTest(const char* key,
- const char* value) {
- const TestProperty test_property(key, value);
- impl_->current_test_result()->RecordProperty(test_property);
+// Adds a TestProperty to the current TestResult object when invoked from
+// inside a test, to current TestCase's ad_hoc_test_result_ when invoked
+// from SetUpTestCase or TearDownTestCase, or to the global property set
+// when invoked elsewhere. If the result already contains a property with
+// the same key, the value will be updated.
+void UnitTest::RecordProperty(const std::string& key,
+ const std::string& value) {
+ impl_->RecordProperty(TestProperty(key, value));
}
// Runs all tests in this UnitTest object and prints the result.
delete os_stack_trace_getter_;
}
+// Adds a TestProperty to the current TestResult object when invoked in a
+// context of a test, to current test case's ad_hoc_test_result when invoke
+// from SetUpTestCase/TearDownTestCase, or to the global property set
+// otherwise. If the result already contains a property with the same key,
+// the value will be updated.
+void UnitTestImpl::RecordProperty(const TestProperty& test_property) {
+ std::string xml_element;
+ TestResult* test_result; // TestResult appropriate for property recording.
+
+ if (current_test_info_ != NULL) {
+ xml_element = "testcase";
+ test_result = &(current_test_info_->result_);
+ } else if (current_test_case_ != NULL) {
+ xml_element = "testsuite";
+ test_result = &(current_test_case_->ad_hoc_test_result_);
+ } else {
+ xml_element = "testsuites";
+ test_result = &ad_hoc_test_result_;
+ }
+ test_result->RecordProperty(xml_element, test_property);
+}
+
#if GTEST_HAS_DEATH_TEST
// Disables event forwarding if the control is currently in a death test
// subprocess. Must not be called before InitGoogleTest.
return num_selected_tests;
}
+// Prints the given C-string on a single line by replacing all '\n'
+// characters with string "\\n". If the output takes more than
+// max_length characters, only prints the first max_length characters
+// and "...".
+static void PrintOnOneLine(const char* str, int max_length) {
+ if (str != NULL) {
+ for (int i = 0; *str != '\0'; ++str) {
+ if (i >= max_length) {
+ printf("...");
+ break;
+ }
+ if (*str == '\n') {
+ printf("\\n");
+ i += 2;
+ } else {
+ printf("%c", *str);
+ ++i;
+ }
+ }
+ }
+}
+
// Prints the names of the tests matching the user-specified filter flag.
void UnitTestImpl::ListTestsMatchingFilter() {
+ // Print at most this many characters for each type/value parameter.
+ const int kMaxParamLength = 250;
+
for (size_t i = 0; i < test_cases_.size(); i++) {
const TestCase* const test_case = test_cases_[i];
bool printed_test_case_name = false;
if (test_info->matches_filter_) {
if (!printed_test_case_name) {
printed_test_case_name = true;
- printf("%s.\n", test_case->name());
+ printf("%s.", test_case->name());
+ if (test_case->type_param() != NULL) {
+ printf(" # %s = ", kTypeParamLabel);
+ // We print the type parameter on a single line to make
+ // the output easy to parse by a program.
+ PrintOnOneLine(test_case->type_param(), kMaxParamLength);
+ }
+ printf("\n");
+ }
+ printf(" %s", test_info->name());
+ if (test_info->value_param() != NULL) {
+ printf(" # %s = ", kValueParamLabel);
+ // We print the value parameter on a single line to make the
+ // output easy to parse by a program.
+ PrintOnOneLine(test_info->value_param(), kMaxParamLength);
}
- printf(" %s\n", test_info->name());
+ printf("\n");
}
}
}
// GetCurrentOsStackTraceExceptTop(..., 1), Foo() will be included in
// the trace but Bar() and GetCurrentOsStackTraceExceptTop() won't.
std::string GetCurrentOsStackTraceExceptTop(UnitTest* /*unit_test*/,
- int skip_count) {
+ int skip_count) {
// We pass skip_count + 1 to skip this wrapper function in addition
// to what the user really wants to skip.
return GetUnitTestImpl()->CurrentOsStackTraceExceptTop(skip_count + 1);
# define GTEST_DEATH_TEST_CHECK_(expression) \
do { \
if (!::testing::internal::IsTrue(expression)) { \
- DeathTestAbort(::testing::internal::String::Format( \
- "CHECK failed: File %s, line %d: %s", \
- __FILE__, __LINE__, #expression)); \
+ DeathTestAbort( \
+ ::std::string("CHECK failed: File ") + __FILE__ + ", line " \
+ + ::testing::internal::StreamableToString(__LINE__) + ": " \
+ + #expression); \
} \
} while (::testing::internal::AlwaysFalse())
gtest_retval = (expression); \
} while (gtest_retval == -1 && errno == EINTR); \
if (gtest_retval == -1) { \
- DeathTestAbort(::testing::internal::String::Format( \
- "CHECK failed: File %s, line %d: %s != -1", \
- __FILE__, __LINE__, #expression)); \
+ DeathTestAbort( \
+ ::std::string("CHECK failed: File ") + __FILE__ + ", line " \
+ + ::testing::internal::StreamableToString(__LINE__) + ": " \
+ + #expression + " != -1"); \
} \
} while (::testing::internal::AlwaysFalse())
info->test_case_name() + "." + info->name();
const std::string internal_flag =
std::string("--") + GTEST_FLAG_PREFIX_ + kInternalRunDeathTestFlag +
- "=" + file_ + "|" + String::Format("%d|%d|%u|%Iu|%Iu", line_,
- death_test_index,
- static_cast<unsigned int>(::GetCurrentProcessId()),
- // size_t has the same with as pointers on both 32-bit and 64-bit
+ "=" + file_ + "|" + StreamableToString(line_) + "|" +
+ StreamableToString(death_test_index) + "|" +
+ StreamableToString(static_cast<unsigned int>(::GetCurrentProcessId())) +
+ // size_t has the same width as pointers on both 32-bit and 64-bit
// Windows platforms.
// See http://msdn.microsoft.com/en-us/library/tcxf1dw6.aspx.
- reinterpret_cast<size_t>(write_handle),
- reinterpret_cast<size_t>(event_handle_.Get()));
+ "|" + StreamableToString(reinterpret_cast<size_t>(write_handle)) +
+ "|" + StreamableToString(reinterpret_cast<size_t>(event_handle_.Get()));
char executable_path[_MAX_PATH + 1]; // NOLINT
GTEST_DEATH_TEST_CHECK_(
GTEST_DEATH_TEST_CHECK_(fcntl(pipe_fd[1], F_SETFD, 0) != -1);
const std::string filter_flag =
- String::Format("--%s%s=%s.%s",
- GTEST_FLAG_PREFIX_, kFilterFlag,
- info->test_case_name(), info->name());
+ std::string("--") + GTEST_FLAG_PREFIX_ + kFilterFlag + "="
+ + info->test_case_name() + "." + info->name();
const std::string internal_flag =
- String::Format("--%s%s=%s|%d|%d|%d",
- GTEST_FLAG_PREFIX_, kInternalRunDeathTestFlag,
- file_, line_, death_test_index, pipe_fd[1]);
+ std::string("--") + GTEST_FLAG_PREFIX_ + kInternalRunDeathTestFlag + "="
+ + file_ + "|" + StreamableToString(line_) + "|"
+ + StreamableToString(death_test_index) + "|"
+ + StreamableToString(pipe_fd[1]);
Arguments args;
args.AddArguments(GetArgvsForDeathTestChildProcess());
args.AddArgument(filter_flag.c_str());
if (flag != NULL) {
if (death_test_index > flag->index()) {
- DeathTest::set_last_death_test_message(String::Format(
- "Death test count (%d) somehow exceeded expected maximum (%d)",
- death_test_index, flag->index()));
+ DeathTest::set_last_death_test_message(
+ "Death test count (" + StreamableToString(death_test_index)
+ + ") somehow exceeded expected maximum ("
+ + StreamableToString(flag->index()) + ")");
return false;
}
# endif // GTEST_OS_WINDOWS
else { // NOLINT - this is more readable than unbalanced brackets inside #if.
- DeathTest::set_last_death_test_message(String::Format(
- "Unknown death test style \"%s\" encountered",
- GTEST_FLAG(death_test_style).c_str()));
+ DeathTest::set_last_death_test_message(
+ "Unknown death test style \"" + GTEST_FLAG(death_test_style)
+ + "\" encountered");
return false;
}
FALSE, // Non-inheritable.
parent_process_id));
if (parent_process_handle.Get() == INVALID_HANDLE_VALUE) {
- DeathTestAbort(String::Format("Unable to open parent process %u",
- parent_process_id));
+ DeathTestAbort("Unable to open parent process " +
+ StreamableToString(parent_process_id));
}
// TODO(vladl@google.com): Replace the following check with a
// DUPLICATE_SAME_ACCESS is used.
FALSE, // Request non-inheritable handler.
DUPLICATE_SAME_ACCESS)) {
- DeathTestAbort(String::Format(
- "Unable to duplicate the pipe handle %Iu from the parent process %u",
- write_handle_as_size_t, parent_process_id));
+ DeathTestAbort("Unable to duplicate the pipe handle " +
+ StreamableToString(write_handle_as_size_t) +
+ " from the parent process " +
+ StreamableToString(parent_process_id));
}
const HANDLE event_handle = reinterpret_cast<HANDLE>(event_handle_as_size_t);
0x0,
FALSE,
DUPLICATE_SAME_ACCESS)) {
- DeathTestAbort(String::Format(
- "Unable to duplicate the event handle %Iu from the parent process %u",
- event_handle_as_size_t, parent_process_id));
+ DeathTestAbort("Unable to duplicate the event handle " +
+ StreamableToString(event_handle_as_size_t) +
+ " from the parent process " +
+ StreamableToString(parent_process_id));
}
const int write_fd =
::_open_osfhandle(reinterpret_cast<intptr_t>(dup_write_handle), O_APPEND);
if (write_fd == -1) {
- DeathTestAbort(String::Format(
- "Unable to convert pipe handle %Iu to a file descriptor",
- write_handle_as_size_t));
+ DeathTestAbort("Unable to convert pipe handle " +
+ StreamableToString(write_handle_as_size_t) +
+ " to a file descriptor");
}
// Signals the parent that the write end of the pipe has been acquired
|| !ParseNaturalNumber(fields[3], &parent_process_id)
|| !ParseNaturalNumber(fields[4], &write_handle_as_size_t)
|| !ParseNaturalNumber(fields[5], &event_handle_as_size_t)) {
- DeathTestAbort(String::Format(
- "Bad --gtest_internal_run_death_test flag: %s",
- GTEST_FLAG(internal_run_death_test).c_str()));
+ DeathTestAbort("Bad --gtest_internal_run_death_test flag: " +
+ GTEST_FLAG(internal_run_death_test));
}
write_fd = GetStatusFileDescriptor(parent_process_id,
write_handle_as_size_t,
|| !ParseNaturalNumber(fields[1], &line)
|| !ParseNaturalNumber(fields[2], &index)
|| !ParseNaturalNumber(fields[3], &write_fd)) {
- DeathTestAbort(String::Format(
- "Bad --gtest_internal_run_death_test flag: %s",
- GTEST_FLAG(internal_run_death_test).c_str()));
+ DeathTestAbort("Bad --gtest_internal_run_death_test flag: "
+ + GTEST_FLAG(internal_run_death_test));
}
# endif // GTEST_OS_WINDOWS
if (number == 0) {
file = base_name.string() + "." + extension;
} else {
- file = base_name.string() + "_" + String::Format("%d", number).c_str()
+ file = base_name.string() + "_" + StreamableToString(number)
+ "." + extension;
}
return ConcatPaths(directory, FilePath(file));
// Formats a source file path and a line number as they would appear
// in an error message from the compiler used to compile this code.
GTEST_API_ ::std::string FormatFileLocation(const char* file, int line) {
- const char* const file_name = file == NULL ? kUnknownFile : file;
+ const std::string file_name(file == NULL ? kUnknownFile : file);
if (line < 0) {
- return String::Format("%s:", file_name).c_str();
+ return file_name + ":";
}
#ifdef _MSC_VER
- return String::Format("%s(%d):", file_name, line).c_str();
+ return file_name + "(" + StreamableToString(line) + "):";
#else
- return String::Format("%s:%d:", file_name, line).c_str();
+ return file_name + ":" + StreamableToString(line) + ":";
#endif // _MSC_VER
}
// to the file location it produces, unlike FormatFileLocation().
GTEST_API_ ::std::string FormatCompilerIndependentFileLocation(
const char* file, int line) {
- const char* const file_name = file == NULL ? kUnknownFile : file;
+ const std::string file_name(file == NULL ? kUnknownFile : file);
if (line < 0)
return file_name;
else
- return String::Format("%s:%d", file_name, line).c_str();
+ return file_name + ":" + StreamableToString(line);
}
*os << static_cast<char>(c);
return kAsIs;
} else {
- *os << String::Format("\\x%X", static_cast<UnsignedChar>(c));
+ *os << "\\x" + String::FormatHexInt(static_cast<UnsignedChar>(c));
return kHexEscape;
}
}
// obvious).
if (c == 0)
return;
- *os << " (" << String::Format("%d", c).c_str();
+ *os << " (" << static_cast<int>(c);
// For more convenience, we print c's code again in hexidecimal,
// unless c was already printed in the form '\x##' or the code is in
if (format == kHexEscape || (1 <= c && c <= 9)) {
// Do nothing.
} else {
- *os << String::Format(", 0x%X",
- static_cast<UnsignedChar>(c)).c_str();
+ *os << ", 0x" << String::FormatHexInt(static_cast<UnsignedChar>(c));
}
*os << ")";
}
testing::Values<std::string>("cv/optflow/frames/720p_01.png"),
testing::Values(7, 11),
testing::Bool(),
- testing::ValuesIn(PyrBorderMode::all()),
+ PyrBorderMode::all(),
testing::Bool()
)
)
+++ /dev/null
-<LinearLayout xmlns:android="http://schemas.android.com/apk/res/android"
- xmlns:tools="http://schemas.android.com/tools"
- android:layout_width="match_parent"
- android:layout_height="match_parent" >
-
- <org.opencv.android.JavaCameraView
- android:layout_width="fill_parent"
- android:layout_height="fill_parent"
- android:id="@+id/puzzle_activity_surface_view" />
-
-</LinearLayout>
+++ /dev/null
-<menu xmlns:android="http://schemas.android.com/apk/res/android">
- <item android:id="@+id/menu_start_new_game"
- android:title="@string/menu_start_new_game"
- android:orderInCategory="100" />
- <item android:id="@+id/menu_toggle_tile_numbers" android:title="@string/menu_toggle_tile_numbers"></item>
-</menu>
import org.opencv.core.Mat;
import org.opencv.android.CameraBridgeViewBase;
import org.opencv.android.CameraBridgeViewBase.CvCameraViewListener;
+import org.opencv.android.JavaCameraView;
import android.os.Bundle;
import android.app.Activity;
private CameraBridgeViewBase mOpenCvCameraView;
private Puzzle15Processor mPuzzle15;
+ private MenuItem mItemHideNumbers;
+ private MenuItem mItemStartNewGame;
+
private int mGameWidth;
private int mGameHeight;
super.onCreate(savedInstanceState);
getWindow().addFlags(WindowManager.LayoutParams.FLAG_KEEP_SCREEN_ON);
- setContentView(R.layout.activity_puzzle15);
-
- mOpenCvCameraView = (CameraBridgeViewBase) findViewById(R.id.puzzle_activity_surface_view);
+ Log.d(TAG, "Creating and seting view");
+ mOpenCvCameraView = (CameraBridgeViewBase) new JavaCameraView(this, -1);
+ setContentView(mOpenCvCameraView);
mOpenCvCameraView.setCvCameraViewListener(this);
mPuzzle15 = new Puzzle15Processor();
mPuzzle15.prepareNewGame();
@Override
public boolean onCreateOptionsMenu(Menu menu) {
- getMenuInflater().inflate(R.menu.activity_puzzle15, menu);
+ Log.i(TAG, "called onCreateOptionsMenu");
+ mItemHideNumbers = menu.add("Show/hide tile numbers");
+ mItemStartNewGame = menu.add("Start new game");
return true;
}
@Override
public boolean onOptionsItemSelected(MenuItem item) {
Log.i(TAG, "Menu Item selected " + item);
- if (item.getItemId() == R.id.menu_start_new_game) {
+ if (item == mItemStartNewGame) {
/* We need to start new game */
mPuzzle15.prepareNewGame();
- } else if (item.getItemId() == R.id.menu_toggle_tile_numbers) {
+ } else if (item == mItemHideNumbers) {
/* We need to enable or disable drawing of the tile numbers */
mPuzzle15.toggleTileNumbers();
}
LOCAL_MODULE := native_activity
LOCAL_SRC_FILES := native.cpp
-LOCAL_LDLIBS := -lm -llog -landroid
+LOCAL_LDLIBS += -lm -llog -landroid
LOCAL_STATIC_LIBRARIES := android_native_app_glue
include $(BUILD_SHARED_LIBRARY)
APP_ABI := armeabi-v7a
+APP_STL := gnustl_static
+APP_CPPFLAGS := -frtti -fexceptions
APP_PLATFORM := android-9
import android.app.Activity;
import android.content.Intent;
-import android.os.Bundle;
import android.util.Log;
public class CvNativeActivity extends Activity {
-<LinearLayout xmlns:android="http://schemas.android.com/apk/res/android"
+<FrameLayout xmlns:android="http://schemas.android.com/apk/res/android"
xmlns:tools="http://schemas.android.com/tools"
xmlns:opencv="http://schemas.android.com/apk/res-auto"
android:layout_width="match_parent"
opencv:show_fps="true"
opencv:camera_id="any" />
-</LinearLayout>
+</FrameLayout>
{
// print a welcome message, and the OpenCV version
cout << "\nThis is a demo of Lukas-Kanade optical flow lkdemo(),\n"
- "Using OpenCV version %s\n" << CV_VERSION << "\n"
- << endl;
-
+ "Using OpenCV version " << CV_VERSION << endl;
+ cout << "\nIt uses camera by default, but you can provide a path to video as an argument.\n";
cout << "\nHot keys: \n"
"\tESC - quit the program\n"
"\tr - auto-initialize tracking\n"
{
if( event == CV_EVENT_LBUTTONDOWN )
{
- point = Point2f((float)x,(float)y);
+ point = Point2f((float)x, (float)y);
addRemovePt = true;
}
}
int main( int argc, char** argv )
{
+ help();
+
VideoCapture cap;
- TermCriteria termcrit(CV_TERMCRIT_ITER|CV_TERMCRIT_EPS,20,0.03);
+ TermCriteria termcrit(CV_TERMCRIT_ITER|CV_TERMCRIT_EPS, 20, 0.03);
Size subPixWinSize(10,10), winSize(31,31);
const int MAX_COUNT = 500;
return 0;
}
- help();
-
namedWindow( "LK Demo", 1 );
setMouseCallback( "LK Demo", onMouse, 0 );
needToInit = true;
break;
case 'c':
+ points[0].clear();
points[1].clear();
break;
case 'n':
nightMode = !nightMode;
break;
- default:
- ;
}
std::swap(points[1], points[0]);
- swap(prevGray, gray);
+ cv::swap(prevGray, gray);
}
return 0;
-#/usr/bin/env python
+#!/usr/bin/env python
'''
Utility for measuring python opencv API coverage by samples.
-#/usr/bin/env python
+#!/usr/bin/env python
'''
Scans current directory for *.py files and reports
-#/usr/bin/env python
+#!/usr/bin/env python
'''
Affine invariant feature-based image matching sample.
-#/usr/bin/env python
+#!/usr/bin/env python
'''
browse.py
-#/usr/bin/env python
+#!/usr/bin/env python
import numpy as np
import cv2
-#/usr/bin/env python
+#!/usr/bin/env python
'''
Camshift tracker
-#/usr/bin/env python
+#!/usr/bin/env python
'''
Coherence-enhancing filtering example
-#/usr/bin/env python
+#!/usr/bin/env python
import numpy as np
import cv2
-#/usr/bin/env python
+#!/usr/bin/env python
'''
This module contais some common routines used by other samples.
-#/usr/bin/env python
+#!/usr/bin/env python
'''
This program illustrates the use of findContours and drawContours.
-#/usr/bin/env python
+#!/usr/bin/env python
'''
Wiener deconvolution.
-#/usr/bin/env python
+#!/usr/bin/env python
'''
Sample-launcher application.
-#/usr/bin/env python
+#!/usr/bin/env python
import cv2
import numpy as np
-#/usr/bin/env python
+#!/usr/bin/env python
'''
SVM and KNearest digit recognition.
-#/usr/bin/env python
+#!/usr/bin/env python
'''
Digit recognition adjustment.
-#/usr/bin/env python
+#!/usr/bin/env python
import numpy as np
import cv2
-#/usr/bin/env python
+#!/usr/bin/env python
'''
Distance transform sample.
-#/usr/bin/env python
+#!/usr/bin/env python
'''
This sample demonstrates Canny edge detection.
-#/usr/bin/env python
+#!/usr/bin/env python
import numpy as np
import cv2
-#/usr/bin/env python
+#!/usr/bin/env python
'''
Feature homography
-#/usr/bin/env python
+#!/usr/bin/env python
'''
Feature-based image matching sample.
-#/usr/bin/env python
+#!/usr/bin/env python
'''
Robust line fitting.
-#/usr/bin/env python
+#!/usr/bin/env python
'''
Floodfill sample.
-#/usr/bin/env python
+#!/usr/bin/env python
'''
gabor_threads.py
-#/usr/bin/env python
+#!/usr/bin/env python
import numpy as np
from numpy import random
-#/usr/bin/env python
+#!/usr/bin/env python
''' This is a sample for histogram plotting for RGB images and grayscale images for better understanding of colour distribution
-#/usr/bin/env python
+#!/usr/bin/env python
'''
Inpainting sample.
-#/usr/bin/env python
+#!/usr/bin/env python
'''
K-means clusterization sample.
-#/usr/bin/env python
+#!/usr/bin/env python
''' An example of Laplacian Pyramid construction and merging.
-#/usr/bin/env python
+#!/usr/bin/env python
'''
The sample demonstrates how to train Random Trees classifier
-#/usr/bin/env python
+#!/usr/bin/env python
'''
Lucas-Kanade homography tracker
-#/usr/bin/env python
+#!/usr/bin/env python
'''
Lucas-Kanade tracker
-#/usr/bin/env python
+#!/usr/bin/env python
'''
Morphology operations.
-#/usr/bin/env python
+#!/usr/bin/env python
'''
MOSSE tracking sample
-#/usr/bin/env python
+#!/usr/bin/env python
import numpy as np
import cv2
-#/usr/bin/env python
+#!/usr/bin/env python
#!/usr/bin/env python
'''
-#/usr/bin/env python
+#!/usr/bin/env python
'''
MSER detector demo
-#/usr/bin/env python
+#!/usr/bin/env python
import numpy as np
import cv2
-#/usr/bin/env python
+#!/usr/bin/env python
import numpy as np
import cv2
-#/usr/bin/env python
+#!/usr/bin/env python
'''
Planar augmented reality
-#/usr/bin/env python
+#!/usr/bin/env python
'''
Multitarget planar tracking
-#/usr/bin/env python
+#!/usr/bin/env python
'''
Simple "Square Detector" program.
-#/usr/bin/env python
+#!/usr/bin/env python
'''
Simple example of stereo image matching and point cloud generation.
-#/usr/bin/env python
+#!/usr/bin/env python
'''
Texture flow direction estimation.
-#/usr/bin/env python
+#!/usr/bin/env python
'''
Multiscale Turing Patterns generator
-#/usr/bin/env python
+#!/usr/bin/env python
'''
Video capture sample.
-#/usr/bin/env python
+#!/usr/bin/env python
'''
Data matrix detector sample.
-#/usr/bin/env python
+#!/usr/bin/env python
'''
Multithreaded video processing sample.
-#/usr/bin/env python
+#!/usr/bin/env python
'''
Watershed segmentation