From 278fb617d21ca5af6a0b41a25909587d999c2cdc Mon Sep 17 00:00:00 2001 From: Vadim Pisarevsky Date: Tue, 22 Oct 2013 14:04:49 +0400 Subject: [PATCH] the first draft of transparent API and new UMat class. --- modules/core/include/opencv2/core/ocl.hpp | 462 +++++ modules/core/src/ocl.cpp | 2901 +++++++++++++++++++++++++++++ modules/core/src/umatrix.cpp | 643 +++++++ 3 files changed, 4006 insertions(+) create mode 100644 modules/core/include/opencv2/core/ocl.hpp create mode 100644 modules/core/src/ocl.cpp create mode 100644 modules/core/src/umatrix.cpp diff --git a/modules/core/include/opencv2/core/ocl.hpp b/modules/core/include/opencv2/core/ocl.hpp new file mode 100644 index 0000000..0f62c08 --- /dev/null +++ b/modules/core/include/opencv2/core/ocl.hpp @@ -0,0 +1,462 @@ +/*M/////////////////////////////////////////////////////////////////////////////////////// +// +// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. +// +// By downloading, copying, installing or using the software you agree to this license. +// If you do not agree to this license, do not download, install, +// copy or use the software. +// +// +// License Agreement +// For Open Source Computer Vision Library +// +// Copyright (C) 2013, OpenCV Foundation, all rights reserved. +// Third party copyrights are property of their respective owners. +// +// Redistribution and use in source and binary forms, with or without modification, +// are permitted provided that the following conditions are met: +// +// * Redistribution's of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// +// * Redistribution's in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// +// * The name of the copyright holders may not be used to endorse or promote products +// derived from this software without specific prior written permission. +// +// This software is provided by the copyright holders and contributors "as is" and +// any express or implied warranties, including, but not limited to, the implied +// warranties of merchantability and fitness for a particular purpose are disclaimed. +// In no event shall the OpenCV Foundation or contributors be liable for any direct, +// indirect, incidental, special, exemplary, or consequential damages +// (including, but not limited to, procurement of substitute goods or services; +// loss of use, data, or profits; or business interruption) however caused +// and on any theory of liability, whether in contract, strict liability, +// or tort (including negligence or otherwise) arising in any way out of +// the use of this software, even if advised of the possibility of such damage. +// +//M*/ + +#ifndef __OPENCV_OPENCL_HPP__ +#define __OPENCV_OPENCL_HPP__ + +#include "opencv2/core.hpp" + +namespace cv { namespace ocl { + +CV_EXPORTS bool haveOpenCL(); +CV_EXPORTS bool useOpenCL(); +CV_EXPORTS void setUseOpenCL(bool flag); +CV_EXPORTS void finish(); + +class CV_EXPORTS Context; +class CV_EXPORTS Device; +class CV_EXPORTS Kernel; +class CV_EXPORTS Program; +class CV_EXPORTS ProgramSource; +class CV_EXPORTS Queue; + +class CV_EXPORTS Device +{ +public: + Device(); + explicit Device(void* d); + Device(const Device& d); + Device& operator = (const Device& d); + ~Device(); + + void set(void* d); + + enum + { + TYPE_DEFAULT = (1 << 0), + TYPE_CPU = (1 << 1), + TYPE_GPU = (1 << 2), + TYPE_ACCELERATOR = (1 << 3), + TYPE_DGPU = TYPE_GPU + (1 << 16), + TYPE_IGPU = TYPE_GPU + (1 << 17), + TYPE_ALL = 0xFFFFFFFF + }; + + String name() const; + String extensions() const; + String vendor() const; + String OpenCL_C_Version() const; + String OpenCLVersion() const; + String driverVersion() const; + void* ptr() const; + + int type() const; + + int addressBits() const; + bool available() const; + bool compilerAvailable() const; + bool linkerAvailable() const; + + enum + { + FP_DENORM=(1 << 0), + FP_INF_NAN=(1 << 1), + FP_ROUND_TO_NEAREST=(1 << 2), + FP_ROUND_TO_ZERO=(1 << 3), + FP_ROUND_TO_INF=(1 << 4), + FP_FMA=(1 << 5), + FP_SOFT_FLOAT=(1 << 6), + FP_CORRECTLY_ROUNDED_DIVIDE_SQRT=(1 << 7) + }; + int doubleFPConfig() const; + int singleFPConfig() const; + int halfFPConfig() const; + + bool endianLittle() const; + bool errorCorrectionSupport() const; + + enum + { + EXEC_KERNEL=(1 << 0), + EXEC_NATIVE_KERNEL=(1 << 1) + }; + int executionCapabilities() const; + + size_t globalMemCacheSize() const; + + enum + { + NO_CACHE=0, + READ_ONLY_CACHE=1, + READ_WRITE_CACHE=2 + }; + int globalMemCacheType() const; + int globalMemCacheLineSize() const; + size_t globalMemSize() const; + + size_t localMemSize() const; + enum + { + NO_LOCAL_MEM=0, + LOCAL_IS_LOCAL=1, + LOCAL_IS_GLOBAL=2 + }; + int localMemType() const; + bool hostUnifiedMemory() const; + + bool imageSupport() const; + + size_t image2DMaxWidth() const; + size_t image2DMaxHeight() const; + + size_t image3DMaxWidth() const; + size_t image3DMaxHeight() const; + size_t image3DMaxDepth() const; + + size_t imageMaxBufferSize() const; + size_t imageMaxArraySize() const; + + int maxClockFrequency() const; + int maxComputeUnits() const; + int maxConstantArgs() const; + size_t maxConstantBufferSize() const; + + size_t maxMemAllocSize() const; + size_t maxParameterSize() const; + + int maxReadImageArgs() const; + int maxWriteImageArgs() const; + int maxSamplers() const; + + size_t maxWorkGroupSize() const; + int maxWorkItemDims() const; + void maxWorkItemSizes(size_t*) const; + + int memBaseAddrAlign() const; + + int nativeVectorWidthChar() const; + int nativeVectorWidthShort() const; + int nativeVectorWidthInt() const; + int nativeVectorWidthLong() const; + int nativeVectorWidthFloat() const; + int nativeVectorWidthDouble() const; + int nativeVectorWidthHalf() const; + + int preferredVectorWidthChar() const; + int preferredVectorWidthShort() const; + int preferredVectorWidthInt() const; + int preferredVectorWidthLong() const; + int preferredVectorWidthFloat() const; + int preferredVectorWidthDouble() const; + int preferredVectorWidthHalf() const; + + size_t printfBufferSize() const; + size_t profilingTimerResolution() const; + + static const Device& getDefault(); + +protected: + struct Impl; + Impl* p; +}; + + +class CV_EXPORTS Context +{ +public: + Context(); + explicit Context(int dtype); + ~Context(); + Context(const Context& c); + Context& operator = (const Context& c); + + bool create(int dtype); + size_t ndevices() const; + const Device& device(size_t idx) const; + int dtype() const; + Program getProg(const ProgramSource& prog, + const String& buildopt, String& errmsg); + + static Context& getDefault(); + void* ptr() const; +protected: + struct Impl; + Impl* p; +}; + + +class CV_EXPORTS Queue +{ +public: + Queue(); + explicit Queue(const Context& c, const Device& d=Device()); + ~Queue(); + Queue(const Queue& q); + Queue& operator = (const Queue& q); + + bool create(const Context& c=Context(), const Device& d=Device()); + void finish(); + void* ptr() const; + static Queue& getDefault(); + +protected: + struct Impl; + Impl* p; +}; + + +class CV_EXPORTS KernelArg +{ +public: + enum { LOCAL=1, READ_ONLY=2, WRITE_ONLY=4, READ_WRITE=6, CONSTANT=8 }; + KernelArg(int _flags, UMat* _m, void* _obj=0, size_t _sz=0); + + static KernelArg Local() { return KernelArg(LOCAL, 0); } + static KernelArg ReadOnly(const UMat& m) { return KernelArg(READ_ONLY, (UMat*)&m); } + static KernelArg WriteOnly(const UMat& m) { return KernelArg(WRITE_ONLY, (UMat*)&m); } + static KernelArg Constant(const Mat& m); + template static KernelArg Constant(const _Tp* arr, size_t n) + { return KernelArg(CONSTANT, 0, (void*)arr, n); } + + int flags; + UMat* m; + void* obj; + size_t sz; +}; + +class CV_EXPORTS Kernel +{ +public: + class CV_EXPORTS Callback + { + public: + virtual ~Callback() {} + virtual void operator()() = 0; + }; + + Kernel(); + Kernel(const char* kname, const Program& prog); + Kernel(const char* kname, const ProgramSource& prog, + const String& buildopts, String& errmsg); + ~Kernel(); + Kernel(const Kernel& k); + Kernel& operator = (const Kernel& k); + + bool create(const char* kname, const Program& prog); + bool create(const char* kname, const ProgramSource& prog, + const String& buildopts, String& errmsg); + + int set(int i, const void* value, size_t sz); + int set(int i, const UMat& m); + int set(int i, const KernelArg& arg); + template int set(int i, const _Tp& value) + { return set(i, &value, sizeof(value)); } + + template + Kernel& args(_Tp1 a1) + { + set(0, a1); return *this; + } + + template + Kernel& args(_Tp1 a1, _Tp2 a2) + { + int i = set(0, a1); set(i, a2); return *this; + } + + template + Kernel& args(_Tp1 a1, _Tp2 a2, _Tp3 a3) + { + int i = set(0, a1); i = set(i, a2); set(i, a3); return *this; + } + + template + Kernel& args(_Tp1 a1, _Tp2 a2, _Tp3 a3, _Tp4 a4) + { + int i = set(0, a1); i = set(i, a2); i = set(i, a3); set(i, a4); + return *this; + } + + template + Kernel& args(_Tp1 a1, _Tp2 a2, _Tp3 a3, _Tp4 a4, _Tp5 a5) + { + int i = set(0, a1); i = set(i, a2); i = set(i, a3); i = set(i, a4); set(i, a5); + return *this; + } + + template + Kernel& args(_Tp1 a1, _Tp2 a2, _Tp3 a3, _Tp4 a4, _Tp5 a5, _Tp6 a6) + { + int i = set(0, a1); i = set(i, a2); i = set(i, a3); i = set(i, a4); + i = set(i, a5); set(i, a6); return *this; + } + + template + Kernel& args(_Tp1 a1, _Tp2 a2, _Tp3 a3, _Tp4 a4, _Tp5 a5, _Tp6 a6, _Tp7 a7) + { + int i = set(0, a1); i = set(i, a2); i = set(i, a3); i = set(i, a4); + i = set(i, a5); i = set(i, a6); set(i, a7); return *this; + } + + template + Kernel& args(_Tp1 a1, _Tp2 a2, _Tp3 a3, _Tp4 a4, _Tp5 a5, _Tp6 a6, _Tp7 a7, _Tp8 a8) + { + int i = set(0, a1); i = set(i, a2); i = set(i, a3); i = set(i, a4); + i = set(i, a5); i = set(i, a6); i = set(i, a7); set(i, a8); + return *this; + } + + template + Kernel& args(_Tp1 a1, _Tp2 a2, _Tp3 a3, _Tp4 a4, _Tp5 a5, _Tp6 a6, _Tp7 a7, _Tp8 a8, _Tp9 a9) + { + int i = set(0, a1); i = set(i, a2); i = set(i, a3); i = set(i, a4); + i = set(i, a5); i = set(i, a6); i = set(i, a7); i = set(i, a8); + set(i, a9); return *this; + } + + template + Kernel& args(_Tp1 a1, _Tp2 a2, _Tp3 a3, _Tp4 a4, _Tp5 a5, _Tp6 a6, _Tp7 a7, + _Tp8 a8, _Tp9 a9, _Tp10 a10) + { + int i = set(0, a1); i = set(i, a2); i = set(i, a3); i = set(i, a4); + i = set(i, a5); i = set(i, a6); i = set(i, a7); i = set(i, a8); + i = set(i, a9); set(i, a10); return *this; + } + + template + Kernel& args(_Tp1 a1, _Tp2 a2, _Tp3 a3, _Tp4 a4, _Tp5 a5, _Tp6 a6, _Tp7 a7, + _Tp8 a8, _Tp9 a9, _Tp10 a10, _Tp11 a11) + { + int i = set(0, a1); i = set(i, a2); i = set(i, a3); i = set(i, a4); + i = set(i, a5); i = set(i, a6); i = set(i, a7); i = set(i, a8); + i = set(i, a9); i = set(i, a10); set(i, a11); return *this; + } + + template + Kernel& args(_Tp1 a1, _Tp2 a2, _Tp3 a3, _Tp4 a4, _Tp5 a5, _Tp6 a6, _Tp7 a7, + _Tp8 a8, _Tp9 a9, _Tp10 a10, _Tp11 a11, _Tp12 a12) + { + int i = set(0, a1); i = set(i, a2); i = set(i, a3); i = set(i, a4); + i = set(i, a5); i = set(i, a6); i = set(i, a7); i = set(i, a8); + i = set(i, a9); i = set(i, a10); i = set(i, a11); set(i, a12); + return *this; + } + + void run(int dims, size_t offset[], + size_t globalsize[], size_t localsize[], bool sync, + const Ptr& cleanupCallback=Ptr(), + const Queue& q=Queue()); + void runTask(bool sync, + const Ptr& cleanupCallback=Ptr(), + const Queue& q=Queue()); + + size_t workGroupSize() const; + bool compileWorkGroupSize(size_t wsz[]) const; + size_t localMemSize() const; + + void* ptr() const; + struct Impl; + +protected: + Impl* p; +}; + +class CV_EXPORTS Program +{ +public: + Program(); + Program(const ProgramSource& src, + const String& buildflags, String& errmsg); + explicit Program(const String& buf); + Program(const Program& prog); + + Program& operator = (const Program& prog); + ~Program(); + + bool create(const ProgramSource& src, + const String& buildflags, String& errmsg); + bool read(const String& buf, const String& buildflags); + bool write(String& buf) const; + + const ProgramSource& source() const; + void* ptr() const; + + String getPrefix() const; + static String getPrefix(const String& buildflags); + +protected: + struct Impl; + Impl* p; +}; + + +class CV_EXPORTS ProgramSource +{ +public: + typedef uint64 hash_t; + + ProgramSource(); + explicit ProgramSource(const String& prog); + explicit ProgramSource(const char* prog); + ~ProgramSource(); + ProgramSource(const ProgramSource& prog); + ProgramSource& operator = (const ProgramSource& prog); + + const String& source() const; + hash_t hash() const; + +protected: + struct Impl; + Impl* p; +}; + +}} + +#endif diff --git a/modules/core/src/ocl.cpp b/modules/core/src/ocl.cpp new file mode 100644 index 0000000..fdcedfa --- /dev/null +++ b/modules/core/src/ocl.cpp @@ -0,0 +1,2901 @@ +/*M/////////////////////////////////////////////////////////////////////////////////////// +// +// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. +// +// By downloading, copying, installing or using the software you agree to this license. +// If you do not agree to this license, do not download, install, +// copy or use the software. +// +// +// License Agreement +// For Open Source Computer Vision Library +// +// Copyright (C) 2013, OpenCV Foundation, all rights reserved. +// Third party copyrights are property of their respective owners. +// +// Redistribution and use in source and binary forms, with or without modification, +// are permitted provided that the following conditions are met: +// +// * Redistribution's of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// +// * Redistribution's in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// +// * The name of the copyright holders may not be used to endorse or promote products +// derived from this software without specific prior written permission. +// +// This software is provided by the copyright holders and contributors "as is" and +// any express or implied warranties, including, but not limited to, the implied +// warranties of merchantability and fitness for a particular purpose are disclaimed. +// In no event shall the OpenCV Foundation or contributors be liable for any direct, +// indirect, incidental, special, exemplary, or consequential damages +// (including, but not limited to, procurement of substitute goods or services; +// loss of use, data, or profits; or business interruption) however caused +// and on any theory of liability, whether in contract, strict liability, +// or tort (including negligence or otherwise) arising in any way out of +// the use of this software, even if advised of the possibility of such damage. +// +//M*/ + +#include "precomp.hpp" +#include + +/* + Part of the file is an extract from the standard OpenCL headers from Khronos site. + Below is the original copyright. +*/ + +/******************************************************************************* + * Copyright (c) 2008 - 2012 The Khronos Group Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and/or associated documentation files (the + * "Materials"), to deal in the Materials without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Materials, and to + * permit persons to whom the Materials are furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be included + * in all copies or substantial portions of the Materials. + * + * THE MATERIALS ARE PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY + * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * MATERIALS OR THE USE OR OTHER DEALINGS IN THE MATERIALS. + ******************************************************************************/ + +#if 0 //defined __APPLE__ +#define HAVE_OPENCL 1 +#else +#undef HAVE_OPENCL +#endif + +#define OPENCV_CL_NOT_IMPLEMENTED -1000 + +#ifdef HAVE_OPENCL + +#if defined __APPLE__ +#include +#else +#include +#endif + +static const bool g_haveOpenCL = true; + +#else + +extern "C" { + +struct _cl_platform_id { int dummy; }; +struct _cl_device_id { int dummy; }; +struct _cl_context { int dummy; }; +struct _cl_command_queue { int dummy; }; +struct _cl_mem { int dummy; }; +struct _cl_program { int dummy; }; +struct _cl_kernel { int dummy; }; +struct _cl_event { int dummy; }; +struct _cl_sampler { int dummy; }; + +typedef struct _cl_platform_id * cl_platform_id; +typedef struct _cl_device_id * cl_device_id; +typedef struct _cl_context * cl_context; +typedef struct _cl_command_queue * cl_command_queue; +typedef struct _cl_mem * cl_mem; +typedef struct _cl_program * cl_program; +typedef struct _cl_kernel * cl_kernel; +typedef struct _cl_event * cl_event; +typedef struct _cl_sampler * cl_sampler; + +typedef int cl_int; +typedef unsigned cl_uint; +typedef long cl_long; +typedef unsigned long cl_ulong; + +typedef cl_uint cl_bool; /* WARNING! Unlike cl_ types in cl_platform.h, cl_bool is not guaranteed to be the same size as the bool in kernels. */ +typedef cl_ulong cl_bitfield; +typedef cl_bitfield cl_device_type; +typedef cl_uint cl_platform_info; +typedef cl_uint cl_device_info; +typedef cl_bitfield cl_device_fp_config; +typedef cl_uint cl_device_mem_cache_type; +typedef cl_uint cl_device_local_mem_type; +typedef cl_bitfield cl_device_exec_capabilities; +typedef cl_bitfield cl_command_queue_properties; +typedef intptr_t cl_device_partition_property; +typedef cl_bitfield cl_device_affinity_domain; + +typedef intptr_t cl_context_properties; +typedef cl_uint cl_context_info; +typedef cl_uint cl_command_queue_info; +typedef cl_uint cl_channel_order; +typedef cl_uint cl_channel_type; +typedef cl_bitfield cl_mem_flags; +typedef cl_uint cl_mem_object_type; +typedef cl_uint cl_mem_info; +typedef cl_bitfield cl_mem_migration_flags; +typedef cl_uint cl_image_info; +typedef cl_uint cl_buffer_create_type; +typedef cl_uint cl_addressing_mode; +typedef cl_uint cl_filter_mode; +typedef cl_uint cl_sampler_info; +typedef cl_bitfield cl_map_flags; +typedef cl_uint cl_program_info; +typedef cl_uint cl_program_build_info; +typedef cl_uint cl_program_binary_type; +typedef cl_int cl_build_status; +typedef cl_uint cl_kernel_info; +typedef cl_uint cl_kernel_arg_info; +typedef cl_uint cl_kernel_arg_address_qualifier; +typedef cl_uint cl_kernel_arg_access_qualifier; +typedef cl_bitfield cl_kernel_arg_type_qualifier; +typedef cl_uint cl_kernel_work_group_info; +typedef cl_uint cl_event_info; +typedef cl_uint cl_command_type; +typedef cl_uint cl_profiling_info; + + +typedef struct _cl_image_format { + cl_channel_order image_channel_order; + cl_channel_type image_channel_data_type; +} cl_image_format; + +typedef struct _cl_image_desc { + cl_mem_object_type image_type; + size_t image_width; + size_t image_height; + size_t image_depth; + size_t image_array_size; + size_t image_row_pitch; + size_t image_slice_pitch; + cl_uint num_mip_levels; + cl_uint num_samples; + cl_mem buffer; +} cl_image_desc; + +typedef struct _cl_buffer_region { + size_t origin; + size_t size; +} cl_buffer_region; + + +////////////////////////////////////////////////////////// + +#define CL_SUCCESS 0 +#define CL_DEVICE_NOT_FOUND -1 +#define CL_DEVICE_NOT_AVAILABLE -2 +#define CL_COMPILER_NOT_AVAILABLE -3 +#define CL_MEM_OBJECT_ALLOCATION_FAILURE -4 +#define CL_OUT_OF_RESOURCES -5 +#define CL_OUT_OF_HOST_MEMORY -6 +#define CL_PROFILING_INFO_NOT_AVAILABLE -7 +#define CL_MEM_COPY_OVERLAP -8 +#define CL_IMAGE_FORMAT_MISMATCH -9 +#define CL_IMAGE_FORMAT_NOT_SUPPORTED -10 +#define CL_BUILD_PROGRAM_FAILURE -11 +#define CL_MAP_FAILURE -12 +#define CL_MISALIGNED_SUB_BUFFER_OFFSET -13 +#define CL_EXEC_STATUS_ERROR_FOR_EVENTS_IN_WAIT_LIST -14 +#define CL_COMPILE_PROGRAM_FAILURE -15 +#define CL_LINKER_NOT_AVAILABLE -16 +#define CL_LINK_PROGRAM_FAILURE -17 +#define CL_DEVICE_PARTITION_FAILED -18 +#define CL_KERNEL_ARG_INFO_NOT_AVAILABLE -19 + +#define CL_INVALID_VALUE -30 +#define CL_INVALID_DEVICE_TYPE -31 +#define CL_INVALID_PLATFORM -32 +#define CL_INVALID_DEVICE -33 +#define CL_INVALID_CONTEXT -34 +#define CL_INVALID_QUEUE_PROPERTIES -35 +#define CL_INVALID_COMMAND_QUEUE -36 +#define CL_INVALID_HOST_PTR -37 +#define CL_INVALID_MEM_OBJECT -38 +#define CL_INVALID_IMAGE_FORMAT_DESCRIPTOR -39 +#define CL_INVALID_IMAGE_SIZE -40 +#define CL_INVALID_SAMPLER -41 +#define CL_INVALID_BINARY -42 +#define CL_INVALID_BUILD_OPTIONS -43 +#define CL_INVALID_PROGRAM -44 +#define CL_INVALID_PROGRAM_EXECUTABLE -45 +#define CL_INVALID_KERNEL_NAME -46 +#define CL_INVALID_KERNEL_DEFINITION -47 +#define CL_INVALID_KERNEL -48 +#define CL_INVALID_ARG_INDEX -49 +#define CL_INVALID_ARG_VALUE -50 +#define CL_INVALID_ARG_SIZE -51 +#define CL_INVALID_KERNEL_ARGS -52 +#define CL_INVALID_WORK_DIMENSION -53 +#define CL_INVALID_WORK_GROUP_SIZE -54 +#define CL_INVALID_WORK_ITEM_SIZE -55 +#define CL_INVALID_GLOBAL_OFFSET -56 +#define CL_INVALID_EVENT_WAIT_LIST -57 +#define CL_INVALID_EVENT -58 +#define CL_INVALID_OPERATION -59 +#define CL_INVALID_GL_OBJECT -60 +#define CL_INVALID_BUFFER_SIZE -61 +#define CL_INVALID_MIP_LEVEL -62 +#define CL_INVALID_GLOBAL_WORK_SIZE -63 +#define CL_INVALID_PROPERTY -64 +#define CL_INVALID_IMAGE_DESCRIPTOR -65 +#define CL_INVALID_COMPILER_OPTIONS -66 +#define CL_INVALID_LINKER_OPTIONS -67 +#define CL_INVALID_DEVICE_PARTITION_COUNT -68 + +/*#define CL_VERSION_1_0 1 +#define CL_VERSION_1_1 1 +#define CL_VERSION_1_2 1*/ + +#define CL_FALSE 0 +#define CL_TRUE 1 +#define CL_BLOCKING CL_TRUE +#define CL_NON_BLOCKING CL_FALSE + +#define CL_PLATFORM_PROFILE 0x0900 +#define CL_PLATFORM_VERSION 0x0901 +#define CL_PLATFORM_NAME 0x0902 +#define CL_PLATFORM_VENDOR 0x0903 +#define CL_PLATFORM_EXTENSIONS 0x0904 + +#define CL_DEVICE_TYPE_DEFAULT (1 << 0) +#define CL_DEVICE_TYPE_CPU (1 << 1) +#define CL_DEVICE_TYPE_GPU (1 << 2) +#define CL_DEVICE_TYPE_ACCELERATOR (1 << 3) +#define CL_DEVICE_TYPE_CUSTOM (1 << 4) +#define CL_DEVICE_TYPE_ALL 0xFFFFFFFF +#define CL_DEVICE_TYPE 0x1000 +#define CL_DEVICE_VENDOR_ID 0x1001 +#define CL_DEVICE_MAX_COMPUTE_UNITS 0x1002 +#define CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS 0x1003 +#define CL_DEVICE_MAX_WORK_GROUP_SIZE 0x1004 +#define CL_DEVICE_MAX_WORK_ITEM_SIZES 0x1005 +#define CL_DEVICE_PREFERRED_VECTOR_WIDTH_CHAR 0x1006 +#define CL_DEVICE_PREFERRED_VECTOR_WIDTH_SHORT 0x1007 +#define CL_DEVICE_PREFERRED_VECTOR_WIDTH_INT 0x1008 +#define CL_DEVICE_PREFERRED_VECTOR_WIDTH_LONG 0x1009 +#define CL_DEVICE_PREFERRED_VECTOR_WIDTH_FLOAT 0x100A +#define CL_DEVICE_PREFERRED_VECTOR_WIDTH_DOUBLE 0x100B +#define CL_DEVICE_MAX_CLOCK_FREQUENCY 0x100C +#define CL_DEVICE_ADDRESS_BITS 0x100D +#define CL_DEVICE_MAX_READ_IMAGE_ARGS 0x100E +#define CL_DEVICE_MAX_WRITE_IMAGE_ARGS 0x100F +#define CL_DEVICE_MAX_MEM_ALLOC_SIZE 0x1010 +#define CL_DEVICE_IMAGE2D_MAX_WIDTH 0x1011 +#define CL_DEVICE_IMAGE2D_MAX_HEIGHT 0x1012 +#define CL_DEVICE_IMAGE3D_MAX_WIDTH 0x1013 +#define CL_DEVICE_IMAGE3D_MAX_HEIGHT 0x1014 +#define CL_DEVICE_IMAGE3D_MAX_DEPTH 0x1015 +#define CL_DEVICE_IMAGE_SUPPORT 0x1016 +#define CL_DEVICE_MAX_PARAMETER_SIZE 0x1017 +#define CL_DEVICE_MAX_SAMPLERS 0x1018 +#define CL_DEVICE_MEM_BASE_ADDR_ALIGN 0x1019 +#define CL_DEVICE_MIN_DATA_TYPE_ALIGN_SIZE 0x101A +#define CL_DEVICE_SINGLE_FP_CONFIG 0x101B +#define CL_DEVICE_GLOBAL_MEM_CACHE_TYPE 0x101C +#define CL_DEVICE_GLOBAL_MEM_CACHELINE_SIZE 0x101D +#define CL_DEVICE_GLOBAL_MEM_CACHE_SIZE 0x101E +#define CL_DEVICE_GLOBAL_MEM_SIZE 0x101F +#define CL_DEVICE_MAX_CONSTANT_BUFFER_SIZE 0x1020 +#define CL_DEVICE_MAX_CONSTANT_ARGS 0x1021 +#define CL_DEVICE_LOCAL_MEM_TYPE 0x1022 +#define CL_DEVICE_LOCAL_MEM_SIZE 0x1023 +#define CL_DEVICE_ERROR_CORRECTION_SUPPORT 0x1024 +#define CL_DEVICE_PROFILING_TIMER_RESOLUTION 0x1025 +#define CL_DEVICE_ENDIAN_LITTLE 0x1026 +#define CL_DEVICE_AVAILABLE 0x1027 +#define CL_DEVICE_COMPILER_AVAILABLE 0x1028 +#define CL_DEVICE_EXECUTION_CAPABILITIES 0x1029 +#define CL_DEVICE_QUEUE_PROPERTIES 0x102A +#define CL_DEVICE_NAME 0x102B +#define CL_DEVICE_VENDOR 0x102C +#define CL_DRIVER_VERSION 0x102D +#define CL_DEVICE_PROFILE 0x102E +#define CL_DEVICE_VERSION 0x102F +#define CL_DEVICE_EXTENSIONS 0x1030 +#define CL_DEVICE_PLATFORM 0x1031 +#define CL_DEVICE_DOUBLE_FP_CONFIG 0x1032 +#define CL_DEVICE_HALF_FP_CONFIG 0x1033 +#define CL_DEVICE_PREFERRED_VECTOR_WIDTH_HALF 0x1034 +#define CL_DEVICE_HOST_UNIFIED_MEMORY 0x1035 +#define CL_DEVICE_NATIVE_VECTOR_WIDTH_CHAR 0x1036 +#define CL_DEVICE_NATIVE_VECTOR_WIDTH_SHORT 0x1037 +#define CL_DEVICE_NATIVE_VECTOR_WIDTH_INT 0x1038 +#define CL_DEVICE_NATIVE_VECTOR_WIDTH_LONG 0x1039 +#define CL_DEVICE_NATIVE_VECTOR_WIDTH_FLOAT 0x103A +#define CL_DEVICE_NATIVE_VECTOR_WIDTH_DOUBLE 0x103B +#define CL_DEVICE_NATIVE_VECTOR_WIDTH_HALF 0x103C +#define CL_DEVICE_OPENCL_C_VERSION 0x103D +#define CL_DEVICE_LINKER_AVAILABLE 0x103E +#define CL_DEVICE_BUILT_IN_KERNELS 0x103F +#define CL_DEVICE_IMAGE_MAX_BUFFER_SIZE 0x1040 +#define CL_DEVICE_IMAGE_MAX_ARRAY_SIZE 0x1041 +#define CL_DEVICE_PARENT_DEVICE 0x1042 +#define CL_DEVICE_PARTITION_MAX_SUB_DEVICES 0x1043 +#define CL_DEVICE_PARTITION_PROPERTIES 0x1044 +#define CL_DEVICE_PARTITION_AFFINITY_DOMAIN 0x1045 +#define CL_DEVICE_PARTITION_TYPE 0x1046 +#define CL_DEVICE_REFERENCE_COUNT 0x1047 +#define CL_DEVICE_PREFERRED_INTEROP_USER_SYNC 0x1048 +#define CL_DEVICE_PRINTF_BUFFER_SIZE 0x1049 +#define CL_DEVICE_IMAGE_PITCH_ALIGNMENT 0x104A +#define CL_DEVICE_IMAGE_BASE_ADDRESS_ALIGNMENT 0x104B + +#define CL_FP_DENORM (1 << 0) +#define CL_FP_INF_NAN (1 << 1) +#define CL_FP_ROUND_TO_NEAREST (1 << 2) +#define CL_FP_ROUND_TO_ZERO (1 << 3) +#define CL_FP_ROUND_TO_INF (1 << 4) +#define CL_FP_FMA (1 << 5) +#define CL_FP_SOFT_FLOAT (1 << 6) +#define CL_FP_CORRECTLY_ROUNDED_DIVIDE_SQRT (1 << 7) + +#define CL_NONE 0x0 +#define CL_READ_ONLY_CACHE 0x1 +#define CL_READ_WRITE_CACHE 0x2 +#define CL_LOCAL 0x1 +#define CL_GLOBAL 0x2 +#define CL_EXEC_KERNEL (1 << 0) +#define CL_EXEC_NATIVE_KERNEL (1 << 1) +#define CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE (1 << 0) +#define CL_QUEUE_PROFILING_ENABLE (1 << 1) + +#define CL_CONTEXT_REFERENCE_COUNT 0x1080 +#define CL_CONTEXT_DEVICES 0x1081 +#define CL_CONTEXT_PROPERTIES 0x1082 +#define CL_CONTEXT_NUM_DEVICES 0x1083 +#define CL_CONTEXT_PLATFORM 0x1084 +#define CL_CONTEXT_INTEROP_USER_SYNC 0x1085 + +#define CL_DEVICE_PARTITION_EQUALLY 0x1086 +#define CL_DEVICE_PARTITION_BY_COUNTS 0x1087 +#define CL_DEVICE_PARTITION_BY_COUNTS_LIST_END 0x0 +#define CL_DEVICE_PARTITION_BY_AFFINITY_DOMAIN 0x1088 +#define CL_DEVICE_AFFINITY_DOMAIN_NUMA (1 << 0) +#define CL_DEVICE_AFFINITY_DOMAIN_L4_CACHE (1 << 1) +#define CL_DEVICE_AFFINITY_DOMAIN_L3_CACHE (1 << 2) +#define CL_DEVICE_AFFINITY_DOMAIN_L2_CACHE (1 << 3) +#define CL_DEVICE_AFFINITY_DOMAIN_L1_CACHE (1 << 4) +#define CL_DEVICE_AFFINITY_DOMAIN_NEXT_PARTITIONABLE (1 << 5) +#define CL_QUEUE_CONTEXT 0x1090 +#define CL_QUEUE_DEVICE 0x1091 +#define CL_QUEUE_REFERENCE_COUNT 0x1092 +#define CL_QUEUE_PROPERTIES 0x1093 +#define CL_MEM_READ_WRITE (1 << 0) +#define CL_MEM_WRITE_ONLY (1 << 1) +#define CL_MEM_READ_ONLY (1 << 2) +#define CL_MEM_USE_HOST_PTR (1 << 3) +#define CL_MEM_ALLOC_HOST_PTR (1 << 4) +#define CL_MEM_COPY_HOST_PTR (1 << 5) +// reserved (1 << 6) +#define CL_MEM_HOST_WRITE_ONLY (1 << 7) +#define CL_MEM_HOST_READ_ONLY (1 << 8) +#define CL_MEM_HOST_NO_ACCESS (1 << 9) +#define CL_MIGRATE_MEM_OBJECT_HOST (1 << 0) +#define CL_MIGRATE_MEM_OBJECT_CONTENT_UNDEFINED (1 << 1) + +#define CL_R 0x10B0 +#define CL_A 0x10B1 +#define CL_RG 0x10B2 +#define CL_RA 0x10B3 +#define CL_RGB 0x10B4 +#define CL_RGBA 0x10B5 +#define CL_BGRA 0x10B6 +#define CL_ARGB 0x10B7 +#define CL_INTENSITY 0x10B8 +#define CL_LUMINANCE 0x10B9 +#define CL_Rx 0x10BA +#define CL_RGx 0x10BB +#define CL_RGBx 0x10BC +#define CL_DEPTH 0x10BD +#define CL_DEPTH_STENCIL 0x10BE + +#define CL_SNORM_INT8 0x10D0 +#define CL_SNORM_INT16 0x10D1 +#define CL_UNORM_INT8 0x10D2 +#define CL_UNORM_INT16 0x10D3 +#define CL_UNORM_SHORT_565 0x10D4 +#define CL_UNORM_SHORT_555 0x10D5 +#define CL_UNORM_INT_101010 0x10D6 +#define CL_SIGNED_INT8 0x10D7 +#define CL_SIGNED_INT16 0x10D8 +#define CL_SIGNED_INT32 0x10D9 +#define CL_UNSIGNED_INT8 0x10DA +#define CL_UNSIGNED_INT16 0x10DB +#define CL_UNSIGNED_INT32 0x10DC +#define CL_HALF_FLOAT 0x10DD +#define CL_FLOAT 0x10DE +#define CL_UNORM_INT24 0x10DF + +#define CL_MEM_OBJECT_BUFFER 0x10F0 +#define CL_MEM_OBJECT_IMAGE2D 0x10F1 +#define CL_MEM_OBJECT_IMAGE3D 0x10F2 +#define CL_MEM_OBJECT_IMAGE2D_ARRAY 0x10F3 +#define CL_MEM_OBJECT_IMAGE1D 0x10F4 +#define CL_MEM_OBJECT_IMAGE1D_ARRAY 0x10F5 +#define CL_MEM_OBJECT_IMAGE1D_BUFFER 0x10F6 + +#define CL_MEM_TYPE 0x1100 +#define CL_MEM_FLAGS 0x1101 +#define CL_MEM_SIZE 0x1102 +#define CL_MEM_HOST_PTR 0x1103 +#define CL_MEM_MAP_COUNT 0x1104 +#define CL_MEM_REFERENCE_COUNT 0x1105 +#define CL_MEM_CONTEXT 0x1106 +#define CL_MEM_ASSOCIATED_MEMOBJECT 0x1107 +#define CL_MEM_OFFSET 0x1108 + +#define CL_IMAGE_FORMAT 0x1110 +#define CL_IMAGE_ELEMENT_SIZE 0x1111 +#define CL_IMAGE_ROW_PITCH 0x1112 +#define CL_IMAGE_SLICE_PITCH 0x1113 +#define CL_IMAGE_WIDTH 0x1114 +#define CL_IMAGE_HEIGHT 0x1115 +#define CL_IMAGE_DEPTH 0x1116 +#define CL_IMAGE_ARRAY_SIZE 0x1117 +#define CL_IMAGE_BUFFER 0x1118 +#define CL_IMAGE_NUM_MIP_LEVELS 0x1119 +#define CL_IMAGE_NUM_SAMPLES 0x111A + +#define CL_ADDRESS_NONE 0x1130 +#define CL_ADDRESS_CLAMP_TO_EDGE 0x1131 +#define CL_ADDRESS_CLAMP 0x1132 +#define CL_ADDRESS_REPEAT 0x1133 +#define CL_ADDRESS_MIRRORED_REPEAT 0x1134 + +#define CL_FILTER_NEAREST 0x1140 +#define CL_FILTER_LINEAR 0x1141 + +#define CL_SAMPLER_REFERENCE_COUNT 0x1150 +#define CL_SAMPLER_CONTEXT 0x1151 +#define CL_SAMPLER_NORMALIZED_COORDS 0x1152 +#define CL_SAMPLER_ADDRESSING_MODE 0x1153 +#define CL_SAMPLER_FILTER_MODE 0x1154 + +#define CL_MAP_READ (1 << 0) +#define CL_MAP_WRITE (1 << 1) +#define CL_MAP_WRITE_INVALIDATE_REGION (1 << 2) + +#define CL_PROGRAM_REFERENCE_COUNT 0x1160 +#define CL_PROGRAM_CONTEXT 0x1161 +#define CL_PROGRAM_NUM_DEVICES 0x1162 +#define CL_PROGRAM_DEVICES 0x1163 +#define CL_PROGRAM_SOURCE 0x1164 +#define CL_PROGRAM_BINARY_SIZES 0x1165 +#define CL_PROGRAM_BINARIES 0x1166 +#define CL_PROGRAM_NUM_KERNELS 0x1167 +#define CL_PROGRAM_KERNEL_NAMES 0x1168 +#define CL_PROGRAM_BUILD_STATUS 0x1181 +#define CL_PROGRAM_BUILD_OPTIONS 0x1182 +#define CL_PROGRAM_BUILD_LOG 0x1183 +#define CL_PROGRAM_BINARY_TYPE 0x1184 +#define CL_PROGRAM_BINARY_TYPE_NONE 0x0 +#define CL_PROGRAM_BINARY_TYPE_COMPILED_OBJECT 0x1 +#define CL_PROGRAM_BINARY_TYPE_LIBRARY 0x2 +#define CL_PROGRAM_BINARY_TYPE_EXECUTABLE 0x4 + +#define CL_BUILD_SUCCESS 0 +#define CL_BUILD_NONE -1 +#define CL_BUILD_ERROR -2 +#define CL_BUILD_IN_PROGRESS -3 + +#define CL_KERNEL_FUNCTION_NAME 0x1190 +#define CL_KERNEL_NUM_ARGS 0x1191 +#define CL_KERNEL_REFERENCE_COUNT 0x1192 +#define CL_KERNEL_CONTEXT 0x1193 +#define CL_KERNEL_PROGRAM 0x1194 +#define CL_KERNEL_ATTRIBUTES 0x1195 +#define CL_KERNEL_ARG_ADDRESS_QUALIFIER 0x1196 +#define CL_KERNEL_ARG_ACCESS_QUALIFIER 0x1197 +#define CL_KERNEL_ARG_TYPE_NAME 0x1198 +#define CL_KERNEL_ARG_TYPE_QUALIFIER 0x1199 +#define CL_KERNEL_ARG_NAME 0x119A +#define CL_KERNEL_ARG_ADDRESS_GLOBAL 0x119B +#define CL_KERNEL_ARG_ADDRESS_LOCAL 0x119C +#define CL_KERNEL_ARG_ADDRESS_CONSTANT 0x119D +#define CL_KERNEL_ARG_ADDRESS_PRIVATE 0x119E +#define CL_KERNEL_ARG_ACCESS_READ_ONLY 0x11A0 +#define CL_KERNEL_ARG_ACCESS_WRITE_ONLY 0x11A1 +#define CL_KERNEL_ARG_ACCESS_READ_WRITE 0x11A2 +#define CL_KERNEL_ARG_ACCESS_NONE 0x11A3 +#define CL_KERNEL_ARG_TYPE_NONE 0 +#define CL_KERNEL_ARG_TYPE_CONST (1 << 0) +#define CL_KERNEL_ARG_TYPE_RESTRICT (1 << 1) +#define CL_KERNEL_ARG_TYPE_VOLATILE (1 << 2) +#define CL_KERNEL_WORK_GROUP_SIZE 0x11B0 +#define CL_KERNEL_COMPILE_WORK_GROUP_SIZE 0x11B1 +#define CL_KERNEL_LOCAL_MEM_SIZE 0x11B2 +#define CL_KERNEL_PREFERRED_WORK_GROUP_SIZE_MULTIPLE 0x11B3 +#define CL_KERNEL_PRIVATE_MEM_SIZE 0x11B4 +#define CL_KERNEL_GLOBAL_WORK_SIZE 0x11B5 + +#define CL_EVENT_COMMAND_QUEUE 0x11D0 +#define CL_EVENT_COMMAND_TYPE 0x11D1 +#define CL_EVENT_REFERENCE_COUNT 0x11D2 +#define CL_EVENT_COMMAND_EXECUTION_STATUS 0x11D3 +#define CL_EVENT_CONTEXT 0x11D4 + +#define CL_COMMAND_NDRANGE_KERNEL 0x11F0 +#define CL_COMMAND_TASK 0x11F1 +#define CL_COMMAND_NATIVE_KERNEL 0x11F2 +#define CL_COMMAND_READ_BUFFER 0x11F3 +#define CL_COMMAND_WRITE_BUFFER 0x11F4 +#define CL_COMMAND_COPY_BUFFER 0x11F5 +#define CL_COMMAND_READ_IMAGE 0x11F6 +#define CL_COMMAND_WRITE_IMAGE 0x11F7 +#define CL_COMMAND_COPY_IMAGE 0x11F8 +#define CL_COMMAND_COPY_IMAGE_TO_BUFFER 0x11F9 +#define CL_COMMAND_COPY_BUFFER_TO_IMAGE 0x11FA +#define CL_COMMAND_MAP_BUFFER 0x11FB +#define CL_COMMAND_MAP_IMAGE 0x11FC +#define CL_COMMAND_UNMAP_MEM_OBJECT 0x11FD +#define CL_COMMAND_MARKER 0x11FE +#define CL_COMMAND_ACQUIRE_GL_OBJECTS 0x11FF +#define CL_COMMAND_RELEASE_GL_OBJECTS 0x1200 +#define CL_COMMAND_READ_BUFFER_RECT 0x1201 +#define CL_COMMAND_WRITE_BUFFER_RECT 0x1202 +#define CL_COMMAND_COPY_BUFFER_RECT 0x1203 +#define CL_COMMAND_USER 0x1204 +#define CL_COMMAND_BARRIER 0x1205 +#define CL_COMMAND_MIGRATE_MEM_OBJECTS 0x1206 +#define CL_COMMAND_FILL_BUFFER 0x1207 +#define CL_COMMAND_FILL_IMAGE 0x1208 + +#define CL_COMPLETE 0x0 +#define CL_RUNNING 0x1 +#define CL_SUBMITTED 0x2 +#define CL_QUEUED 0x3 +#define CL_BUFFER_CREATE_TYPE_REGION 0x1220 + +#define CL_PROFILING_COMMAND_QUEUED 0x1280 +#define CL_PROFILING_COMMAND_SUBMIT 0x1281 +#define CL_PROFILING_COMMAND_START 0x1282 +#define CL_PROFILING_COMMAND_END 0x1283 + +#define CL_CALLBACK CV_STDCALL + +static volatile bool g_haveOpenCL = false; +static const char* oclFuncToCheck = "clEnqueueReadBufferRect"; + +#if defined(__APPLE__) +#include + +static void* initOpenCLAndLoad(const char* funcname) +{ + static bool initialized = false; + static void* handle = 0; + if (!handle) + { + if(!initialized) + { + handle = dlopen("/System/Library/Frameworks/OpenСL.framework/Versions/Current/OpenСL", RTLD_LAZY); + initialized = true; + g_haveOpenCL = handle != 0 && dlsym(handle, oclFuncToCheck) != 0; + } + if(!handle) + return 0; + } + + return funcname ? dlsym(handle, funcname) : 0; +} + +#elif defined WIN32 || defined _WIN32 + +#ifndef _WIN32_WINNT // This is needed for the declaration of TryEnterCriticalSection in winbase.h with Visual Studio 2005 (and older?) + #define _WIN32_WINNT 0x0400 // http://msdn.microsoft.com/en-us/library/ms686857(VS.85).aspx +#endif +#include +#if (_WIN32_WINNT >= 0x0602) + #include +#endif +#undef small +#undef min +#undef max +#undef abs + +static void* initOpenCLAndLoad(const char* funcname) +{ + static bool initialized = false; + static HMODULE handle = 0; + if (!handle) + { + if(!initialized) + { + handle = LoadLibraryA("OpenCL.dll"); + initialized = true; + g_haveOpenCL = handle != 0 && GetProcAddressA(handle, oclFuncToCheck) != 0; + } + if(!handle) + return 0; + } + + return funcname ? (void*)GetProcAddressA(handle, funcname) : 0; +} + +#elif defined(__linux) + +#include +#include + +static void* initOpenCLAndLoad(const char* funcname) +{ + static bool initialized = false; + static void* handle = 0; + if (!handle) + { + if(!initialized) + { + handle = dlopen("libOpenCL.so"); + if(!handle) + handle = dlopen("libCL.so"); + initialized = true; + g_haveOpenCL = handle != 0 && dlsym(handle, oclFuncToCheck) != 0; + } + if(!handle) + return 0; + } + + return funcname ? (void*)dlsym(handle, funcname) : 0; +} + +#else + +static void* initOpenCLAndLoad(const char*) +{ + return 0; +} + +#endif + + +#define OCL_FUNC(rettype, funcname, argsdecl, args) \ + typedef rettype (CV_STDCALL * funcname##_t) argsdecl; \ + static rettype funcname argsdecl \ + { \ + static funcname##_t funcname##_p = 0; \ + if( !funcname##_p ) \ + { \ + funcname##_p = (funcname##_t)initOpenCLAndLoad(#funcname); \ + if( !funcname##_p ) \ + return OPENCV_CL_NOT_IMPLEMENTED; \ + } \ + return funcname##_p args; \ + } + + +#define OCL_FUNC_P(rettype, funcname, argsdecl, args) \ + typedef rettype (CV_STDCALL * funcname##_t) argsdecl; \ + static rettype funcname argsdecl \ + { \ + static funcname##_t funcname##_p = 0; \ + if( !funcname##_p ) \ + { \ + funcname##_p = (funcname##_t)initOpenCLAndLoad(#funcname); \ + if( !funcname##_p ) \ + { \ + if( errcode_ret ) \ + *errcode_ret = OPENCV_CL_NOT_IMPLEMENTED; \ + return 0; \ + } \ + } \ + return funcname##_p args; \ + } + +OCL_FUNC(cl_int, clGetPlatformIDs, + (cl_uint num_entries, cl_platform_id* platforms, cl_uint* num_platforms), + (num_entries, platforms, num_platforms)) + +OCL_FUNC(cl_int, clGetPlatformInfo, + (cl_platform_id platform, cl_platform_info param_name, + size_t param_value_size, void * param_value, + size_t * param_value_size_ret), + (platform, param_name, param_value_size, param_value, param_value_size_ret)) + +OCL_FUNC(cl_int, clGetDeviceInfo, + (cl_device_id device, + cl_device_info param_name, + size_t param_value_size, + void * param_value, + size_t * param_value_size_ret), + (device, param_name, param_value_size, param_value, param_value_size_ret)) + + +OCL_FUNC(cl_int, clGetDeviceIDs, + (cl_platform_id platform, + cl_device_type device_type, + cl_uint num_entries, + cl_device_id * devices, + cl_uint * num_devices), + (platform, device_type, num_entries, devices, num_devices)) + +OCL_FUNC_P(cl_context, clCreateContext, + (const cl_context_properties * properties, + cl_uint num_devices, + const cl_device_id * devices, + void (CL_CALLBACK * pfn_notify)(const char *, const void *, size_t, void *), + void * user_data, + cl_int * errcode_ret), + (properties, num_devices, devices, pfn_notify, user_data, errcode_ret)) + +OCL_FUNC(cl_int, clReleaseContext, (cl_context context), (context)) + +/* +OCL_FUNC(cl_int, clRetainContext, (cl_context context), (context)) + +OCL_FUNC_P(cl_context, clCreateContextFromType, + (const cl_context_properties * properties, + cl_device_type device_type, + void (CL_CALLBACK * pfn_notify)(const char *, const void *, size_t, void *), + void * user_data, + cl_int * errcode_ret), + (properties, device_type, pfn_notify, user_data, errcode_ret)) + +OCL_FUNC(cl_int, clGetContextInfo, + (cl_context context, + cl_context_info param_name, + size_t param_value_size, + void * param_value, + size_t * param_value_size_ret), + (context, param_name, param_value_size, + param_value, param_value_size_ret)) +*/ +OCL_FUNC_P(cl_command_queue, clCreateCommandQueue, + (cl_context context, + cl_device_id device, + cl_command_queue_properties properties, + cl_int * errcode_ret), + (context, device, properties, errcode_ret)) + +OCL_FUNC(cl_int, clReleaseCommandQueue, (cl_command_queue command_queue), (command_queue)) + +OCL_FUNC_P(cl_mem, clCreateBuffer, + (cl_context context, + cl_mem_flags flags, + size_t size, + void * host_ptr, + cl_int * errcode_ret), + (context, flags, size, host_ptr, errcode_ret)) + +/* +OCL_FUNC(cl_int, clRetainCommandQueue, (cl_command_queue command_queue), (command_queue)) + +OCL_FUNC(cl_int, clGetCommandQueueInfo, + (cl_command_queue command_queue, + cl_command_queue_info param_name, + size_t param_value_size, + void * param_value, + size_t * param_value_size_ret), + (command_queue, param_name, param_value_size, param_value, param_value_size_ret)) + +OCL_FUNC_P(cl_mem, clCreateSubBuffer, + (cl_mem buffer, + cl_mem_flags flags, + cl_buffer_create_type buffer_create_type, + const void * buffer_create_info, + cl_int * errcode_ret), + (buffer, flags, buffer_create_type, buffer_create_info, errcode_ret)) + +OCL_FUNC_P(cl_mem, clCreateImage, + (cl_context context, + cl_mem_flags flags, + const cl_image_format * image_format, + const cl_image_desc * image_desc, + void * host_ptr, + cl_int * errcode_ret), + (context, flags, image_format, image_desc, host_ptr, errcode_ret)) + +OCL_FUNC(cl_int, clGetSupportedImageFormats, + (cl_context context, + cl_mem_flags flags, + cl_mem_object_type image_type, + cl_uint num_entries, + cl_image_format * image_formats, + cl_uint * num_image_formats), + (context, flags, image_type, num_entries, image_formats, num_image_formats)) + +OCL_FUNC(cl_int, clGetMemObjectInfo, + (cl_mem memobj, + cl_mem_info param_name, + size_t param_value_size, + void * param_value, + size_t * param_value_size_ret), + (memobj, param_name, param_value_size, param_value, param_value_size_ret)) + +OCL_FUNC(cl_int, clGetImageInfo, + (cl_mem image, + cl_image_info param_name, + size_t param_value_size, + void * param_value, + size_t * param_value_size_ret), + (image, param_name, param_value_size, param_value, param_value_size_ret)) + +OCL_FUNC(cl_int, clCreateKernelsInProgram, + (cl_program program, + cl_uint num_kernels, + cl_kernel * kernels, + cl_uint * num_kernels_ret), + (program, num_kernels, kernels, num_kernels_ret)) + +OCL_FUNC(cl_int, clRetainKernel, (cl_kernel kernel), (kernel)) + +OCL_FUNC(cl_int, clGetKernelArgInfo, + (cl_kernel kernel, + cl_uint arg_indx, + cl_kernel_arg_info param_name, + size_t param_value_size, + void * param_value, + size_t * param_value_size_ret), + (kernel, arg_indx, param_name, param_value_size, param_value, param_value_size_ret)) + +OCL_FUNC(cl_int, clEnqueueReadImage, + (cl_command_queue command_queue, + cl_mem image, + cl_bool blocking_read, + const size_t * origin[3], + const size_t * region[3], + size_t row_pitch, + size_t slice_pitch, + void * ptr, + cl_uint num_events_in_wait_list, + const cl_event * event_wait_list, + cl_event * event), + (command_queue, image, blocking_read, origin, region, + row_pitch, slice_pitch, + ptr, + num_events_in_wait_list, + event_wait_list, + event)) + +OCL_FUNC(cl_int, clEnqueueWriteImage, + (cl_command_queue command_queue, + cl_mem image, + cl_bool blocking_write, + const size_t * origin[3], + const size_t * region[3], + size_t input_row_pitch, + size_t input_slice_pitch, + const void * ptr, + cl_uint num_events_in_wait_list, + const cl_event * event_wait_list, + cl_event * event), + (command_queue, image, blocking_write, origin, region, input_row_pitch, + input_slice_pitch, ptr, num_events_in_wait_list, event_wait_list, event)) + +OCL_FUNC(cl_int, clEnqueueFillImage, + (cl_command_queue command_queue, + cl_mem image, + const void * fill_color, + const size_t * origin[3], + const size_t * region[3], + cl_uint num_events_in_wait_list, + const cl_event * event_wait_list, + cl_event * event), + (command_queue, image, fill_color, origin, region, + num_events_in_wait_list, event_wait_list, event)) + +OCL_FUNC(cl_int, clEnqueueCopyImage, + (cl_command_queue command_queue, + cl_mem src_image, + cl_mem dst_image, + const size_t * src_origin[3], + const size_t * dst_origin[3], + const size_t * region[3], + cl_uint num_events_in_wait_list, + const cl_event * event_wait_list, + cl_event * event), + (command_queue, src_image, dst_image, src_origin, dst_origin, + region, num_events_in_wait_list, event_wait_list, event)) + +OCL_FUNC(cl_int, clEnqueueCopyImageToBuffer, + (cl_command_queue command_queue, + cl_mem src_image, + cl_mem dst_buffer, + const size_t * src_origin[3], + const size_t * region[3], + size_t dst_offset, + cl_uint num_events_in_wait_list, + const cl_event * event_wait_list, + cl_event * event), + (command_queue, src_image, dst_buffer, src_origin, region, dst_offset, + num_events_in_wait_list, event_wait_list, event)) + +OCL_FUNC(cl_int, clEnqueueCopyBufferToImage, + (cl_command_queue command_queue, + cl_mem src_buffer, + cl_mem dst_image, + size_t src_offset, + const size_t * dst_origin[3], + const size_t * region[3], + cl_uint num_events_in_wait_list, + const cl_event * event_wait_list, + cl_event * event), + (command_queue, src_buffer, dst_image, src_offset, dst_origin, + region, num_events_in_wait_list, event_wait_list, event)) + + +OCL_FUNC_P(void*, clEnqueueMapImage, + (cl_command_queue command_queue, + cl_mem image, + cl_bool blocking_map, + cl_map_flags map_flags, + const size_t * origin[3], + const size_t * region[3], + size_t * image_row_pitch, + size_t * image_slice_pitch, + cl_uint num_events_in_wait_list, + const cl_event * event_wait_list, + cl_event * event, + cl_int * errcode_ret), + (command_queue, image, blocking_map, map_flags, origin, region, + image_row_pitch, image_slice_pitch, num_events_in_wait_list, + event_wait_list, event, errcode_ret)) + +OCL_FUNC(cl_int, clRetainProgram, (cl_program program), (program)) + +OCL_FUNC(cl_int, clGetKernelInfo, + (cl_kernel kernel, + cl_kernel_info param_name, + size_t param_value_size, + void * param_value, + size_t * param_value_size_ret), + (kernel, param_name, param_value_size, param_value, param_value_size_ret)) + +OCL_FUNC(cl_int, clRetainMemObject, (cl_mem memobj), (memobj)) + +*/ + +OCL_FUNC(cl_int, clReleaseMemObject, (cl_mem memobj), (memobj)) + + +OCL_FUNC_P(cl_program, clCreateProgramWithSource, + (cl_context context, + cl_uint count, + const char ** strings, + const size_t * lengths, + cl_int * errcode_ret), + (context, count, strings, lengths, errcode_ret)) + +OCL_FUNC_P(cl_program, clCreateProgramWithBinary, + (cl_context context, + cl_uint num_devices, + const cl_device_id * device_list, + const size_t * lengths, + const unsigned char ** binaries, + cl_int * binary_status, + cl_int * errcode_ret), + (context, num_devices, device_list, lengths, binaries, binary_status, errcode_ret)) + +OCL_FUNC(cl_int, clReleaseProgram, (cl_program program), (program)) + +OCL_FUNC(cl_int, clBuildProgram, + (cl_program program, + cl_uint num_devices, + const cl_device_id * device_list, + const char * options, + void (CL_CALLBACK * pfn_notify)(cl_program, void *), + void * user_data), + (program, num_devices, device_list, options, pfn_notify, user_data)) + +OCL_FUNC(cl_int, clGetProgramInfo, + (cl_program program, + cl_program_info param_name, + size_t param_value_size, + void * param_value, + size_t * param_value_size_ret), + (program, param_name, param_value_size, param_value, param_value_size_ret)) + +OCL_FUNC(cl_int, clGetProgramBuildInfo, + (cl_program program, + cl_device_id device, + cl_program_build_info param_name, + size_t param_value_size, + void * param_value, + size_t * param_value_size_ret), + (program, device, param_name, param_value_size, param_value, param_value_size_ret)) + +OCL_FUNC_P(cl_kernel, clCreateKernel, + (cl_program program, + const char * kernel_name, + cl_int * errcode_ret), + (program, kernel_name, errcode_ret)) + +OCL_FUNC(cl_int, clReleaseKernel, (cl_kernel kernel), (kernel)) + +OCL_FUNC(cl_int, clSetKernelArg, + (cl_kernel kernel, + cl_uint arg_index, + size_t arg_size, + const void * arg_value), + (kernel, arg_index, arg_size, arg_value)) + +OCL_FUNC(cl_int, clGetKernelWorkGroupInfo, + (cl_kernel kernel, + cl_device_id device, + cl_kernel_work_group_info param_name, + size_t param_value_size, + void * param_value, + size_t * param_value_size_ret), + (kernel, device, param_name, param_value_size, param_value, param_value_size_ret)) + +OCL_FUNC(cl_int, clFinish, (cl_command_queue command_queue), (command_queue)) + +OCL_FUNC(cl_int, clEnqueueReadBuffer, + (cl_command_queue command_queue, + cl_mem buffer, + cl_bool blocking_read, + size_t offset, + size_t size, + void * ptr, + cl_uint num_events_in_wait_list, + const cl_event * event_wait_list, + cl_event * event), + (command_queue, buffer, blocking_read, offset, size, ptr, + num_events_in_wait_list, event_wait_list, event)) + +OCL_FUNC(cl_int, clEnqueueReadBufferRect, + (cl_command_queue command_queue, + cl_mem buffer, + cl_bool blocking_read, + const size_t * buffer_offset, + const size_t * host_offset, + const size_t * region, + size_t buffer_row_pitch, + size_t buffer_slice_pitch, + size_t host_row_pitch, + size_t host_slice_pitch, + void * ptr, + cl_uint num_events_in_wait_list, + const cl_event * event_wait_list, + cl_event * event), + (command_queue, buffer, blocking_read, buffer_offset, host_offset, region, buffer_row_pitch, + buffer_slice_pitch, host_row_pitch, host_slice_pitch, ptr, num_events_in_wait_list, + event_wait_list, event)) + +OCL_FUNC(cl_int, clEnqueueWriteBuffer, + (cl_command_queue command_queue, + cl_mem buffer, + cl_bool blocking_write, + size_t offset, + size_t size, + const void * ptr, + cl_uint num_events_in_wait_list, + const cl_event * event_wait_list, + cl_event * event), + (command_queue, buffer, blocking_write, offset, size, ptr, + num_events_in_wait_list, event_wait_list, event)) + +OCL_FUNC(cl_int, clEnqueueWriteBufferRect, + (cl_command_queue command_queue, + cl_mem buffer, + cl_bool blocking_write, + const size_t * buffer_offset, + const size_t * host_offset, + const size_t * region, + size_t buffer_row_pitch, + size_t buffer_slice_pitch, + size_t host_row_pitch, + size_t host_slice_pitch, + const void * ptr, + cl_uint num_events_in_wait_list, + const cl_event * event_wait_list, + cl_event * event), + (command_queue, buffer, blocking_write, buffer_offset, host_offset, + region, buffer_row_pitch, buffer_slice_pitch, host_row_pitch, + host_slice_pitch, ptr, num_events_in_wait_list, event_wait_list, event)) + +OCL_FUNC(cl_int, clEnqueueFillBuffer, + (cl_command_queue command_queue, + cl_mem buffer, + const void * pattern, + size_t pattern_size, + size_t offset, + size_t size, + cl_uint num_events_in_wait_list, + const cl_event * event_wait_list, + cl_event * event), + (command_queue, buffer, pattern, pattern_size, offset, size, + num_events_in_wait_list, event_wait_list, event)) + +OCL_FUNC(cl_int, clEnqueueCopyBuffer, + (cl_command_queue command_queue, + cl_mem src_buffer, + cl_mem dst_buffer, + size_t src_offset, + size_t dst_offset, + size_t size, + cl_uint num_events_in_wait_list, + const cl_event * event_wait_list, + cl_event * event), + (command_queue, src_buffer, dst_buffer, src_offset, dst_offset, + size, num_events_in_wait_list, event_wait_list, event)) + +OCL_FUNC(cl_int, clEnqueueCopyBufferRect, + (cl_command_queue command_queue, + cl_mem src_buffer, + cl_mem dst_buffer, + const size_t * src_origin, + const size_t * dst_origin, + const size_t * region, + size_t src_row_pitch, + size_t src_slice_pitch, + size_t dst_row_pitch, + size_t dst_slice_pitch, + cl_uint num_events_in_wait_list, + const cl_event * event_wait_list, + cl_event * event), + (command_queue, src_buffer, dst_buffer, src_origin, dst_origin, + region, src_row_pitch, src_slice_pitch, dst_row_pitch, dst_slice_pitch, + num_events_in_wait_list, event_wait_list, event)) + +OCL_FUNC_P(void*, clEnqueueMapBuffer, + (cl_command_queue command_queue, + cl_mem buffer, + cl_bool blocking_map, + cl_map_flags map_flags, + size_t offset, + size_t size, + cl_uint num_events_in_wait_list, + const cl_event * event_wait_list, + cl_event * event, + cl_int * errcode_ret), + (command_queue, buffer, blocking_map, map_flags, offset, size, + num_events_in_wait_list, event_wait_list, event, errcode_ret)) + +OCL_FUNC(cl_int, clEnqueueUnmapMemObject, + (cl_command_queue command_queue, + cl_mem memobj, + void * mapped_ptr, + cl_uint num_events_in_wait_list, + const cl_event * event_wait_list, + cl_event * event), + (command_queue, memobj, mapped_ptr, num_events_in_wait_list, event_wait_list, event)) + +OCL_FUNC(cl_int, clEnqueueNDRangeKernel, + (cl_command_queue command_queue, + cl_kernel kernel, + cl_uint work_dim, + const size_t * global_work_offset, + const size_t * global_work_size, + const size_t * local_work_size, + cl_uint num_events_in_wait_list, + const cl_event * event_wait_list, + cl_event * event), + (command_queue, kernel, work_dim, global_work_offset, global_work_size, + local_work_size, num_events_in_wait_list, event_wait_list, event)) + +OCL_FUNC(cl_int, clEnqueueTask, + (cl_command_queue command_queue, + cl_kernel kernel, + cl_uint num_events_in_wait_list, + const cl_event * event_wait_list, + cl_event * event), + (command_queue, kernel, num_events_in_wait_list, event_wait_list, event)) + +OCL_FUNC(cl_int, clSetEventCallback, + (cl_event event, + cl_int command_exec_callback_type , + void (CL_CALLBACK *pfn_event_notify) (cl_event event, cl_int event_command_exec_status, void *user_data), + void *user_data), + (event, command_exec_callback_type, pfn_event_notify, user_data)) + +OCL_FUNC(cl_int, clReleaseEvent, (cl_event event), (event)) + +} + +#endif + +namespace cv { namespace ocl { + +// Computes 64-bit "cyclic redundancy check" sum, as specified in ECMA-182 +static uint64 crc64( const uchar* data, size_t size, uint64 crc0=0 ) +{ + static uint64 table[256]; + static bool initialized = false; + + if( !initialized ) + { + for( int i = 0; i < 256; i++ ) + { + uint64 c = i; + for( int j = 0; j < 8; j++ ) + c = ((c & 1) ? CV_BIG_UINT(0xc96c5795d7870f42) : 0) ^ (c >> 1); + table[i] = c; + } + initialized = true; + } + + uint64 crc = ~crc0; + for( size_t idx = 0; idx < size; idx++ ) + crc = table[(uchar)crc ^ data[idx]] ^ (crc >> 8); + + return ~crc; +} + +struct HashKey +{ + typedef uint64 part; + HashKey(part _a, part _b) : a(_a), b(_b) {} + part a, b; +}; + +inline bool operator == (const HashKey& h1, const HashKey& h2) +{ + return h1.a == h2.a && h1.b == h2.b; +} + +inline bool operator < (const HashKey& h1, const HashKey& h2) +{ + return h1.a < h2.a || (h1.a == h2.a && h1.b < h2.b); +} + +bool haveOpenCL() +{ + initOpenCLAndLoad(0); + return g_haveOpenCL; +} + +bool useOpenCL() +{ + TLSData* data = TLSData::get(); + if( data->useOpenCL < 0 ) + data->useOpenCL = (int)haveOpenCL(); + return data->useOpenCL > 0; +} + +void finish() +{ + Queue::getDefault().finish(); +} + +#define IMPLEMENT_REFCOUNTABLE() \ + void addref() { CV_XADD(&refcount, 1); } \ + void release() { if( CV_XADD(&refcount, -1) == 1 ) delete this; } \ + int refcount + +class Platform +{ +public: + Platform(); + ~Platform(); + Platform(const Platform& p); + Platform& operator = (const Platform& p); + + void* ptr() const; + static Platform& getDefault(); +protected: + struct Impl; + Impl* p; +}; + +struct Platform::Impl +{ + Impl() + { + refcount = 1; + handle = 0; + initialized = false; + } + + ~Impl() {} + + void init() + { + if( !initialized ) + { + //cl_uint num_entries + cl_uint n = 0; + if( clGetPlatformIDs(1, &handle, &n) < 0 || n == 0 ) + handle = 0; + if( handle != 0 ) + { + char buf[1000]; + size_t len = 0; + clGetPlatformInfo(handle, CL_PLATFORM_VENDOR, sizeof(buf), buf, &len); + buf[len] = '\0'; + vendor = String(buf); + } + + initialized = true; + } + } + + IMPLEMENT_REFCOUNTABLE(); + + cl_platform_id handle; + String vendor; + bool initialized; +}; + +Platform::Platform() +{ + p = 0; +} + +Platform::~Platform() +{ + if(p) + p->release(); +} + +Platform::Platform(const Platform& pl) +{ + p = (Impl*)pl.p; + if(p) + p->addref(); +} + +Platform& Platform::operator = (const Platform& pl) +{ + Impl* newp = (Impl*)pl.p; + if(newp) + newp->addref(); + if(p) + p->release(); + p = newp; + return *this; +} + +void* Platform::ptr() const +{ + return p ? p->handle : 0; +} + +Platform& Platform::getDefault() +{ + static Platform p; + if( !p.p ) + { + p.p = new Impl; + p.p->init(); + } + return p; +} + +/////////////////////////////////////////////////////////////////////////////////// + +struct Device::Impl +{ + Impl(void* d) + { + handle = (cl_device_id)d; + } + + template + _TpOut getProp(cl_device_info prop) const + { + _TpCL temp=_TpCL(); + size_t sz = 0; + + return clGetDeviceInfo(handle, prop, sizeof(temp), &temp, &sz) >= 0 && + sz == sizeof(temp) ? _TpOut(temp) : _TpOut(); + } + + String getStrProp(cl_device_info prop) const + { + char buf[1024]; + size_t sz=0; + return clGetDeviceInfo(handle, prop, sizeof(buf)-16, buf, &sz) >= 0 && + sz < sizeof(buf) ? String(buf) : String(); + } + + IMPLEMENT_REFCOUNTABLE(); + cl_device_id handle; +}; + + +Device::Device() +{ + p = 0; +} + +Device::Device(void* d) +{ + p = 0; + set(d); +} + +Device::Device(const Device& d) +{ + p = d.p; + if(p) + p->addref(); +} + +Device& Device::operator = (const Device& d) +{ + Impl* newp = (Impl*)d.p; + if(newp) + newp->addref(); + if(p) + p->release(); + p = newp; + return *this; +} + +Device::~Device() +{ + if(p) + p->release(); +} + +void Device::set(void* d) +{ + if(p) + p->release(); + p = new Impl(d); +} + +void* Device::ptr() const +{ + return p ? p->handle : 0; +} + +String Device::name() const +{ return p ? p->getStrProp(CL_DEVICE_NAME) : String(); } + +String Device::extensions() const +{ return p ? p->getStrProp(CL_DEVICE_EXTENSIONS) : String(); } + +String Device::vendor() const +{ return p ? p->getStrProp(CL_DEVICE_VENDOR) : String(); } + +String Device::OpenCL_C_Version() const +{ return p ? p->getStrProp(CL_DEVICE_OPENCL_C_VERSION) : String(); } + +String Device::OpenCLVersion() const +{ return p ? p->getStrProp(CL_DEVICE_EXTENSIONS) : String(); } + +String Device::driverVersion() const +{ return p ? p->getStrProp(CL_DEVICE_EXTENSIONS) : String(); } + +int Device::type() const +{ return p ? p->getProp(CL_DEVICE_TYPE) : 0; } + +int Device::addressBits() const +{ return p ? p->getProp(CL_DEVICE_ADDRESS_BITS) : 0; } + +bool Device::available() const +{ return p ? p->getProp(CL_DEVICE_AVAILABLE) : 0; } + +bool Device::compilerAvailable() const +{ return p ? p->getProp(CL_DEVICE_COMPILER_AVAILABLE) : 0; } + +bool Device::linkerAvailable() const +{ return p ? p->getProp(CL_DEVICE_LINKER_AVAILABLE) : 0; } + +int Device::doubleFPConfig() const +{ return p ? p->getProp(CL_DEVICE_DOUBLE_FP_CONFIG) : 0; } + +int Device::singleFPConfig() const +{ return p ? p->getProp(CL_DEVICE_SINGLE_FP_CONFIG) : 0; } + +int Device::halfFPConfig() const +{ return p ? p->getProp(CL_DEVICE_HALF_FP_CONFIG) : 0; } + +bool Device::endianLittle() const +{ return p ? p->getProp(CL_DEVICE_ENDIAN_LITTLE) : 0; } + +bool Device::errorCorrectionSupport() const +{ return p ? p->getProp(CL_DEVICE_ERROR_CORRECTION_SUPPORT) : 0; } + +int Device::executionCapabilities() const +{ return p ? p->getProp(CL_DEVICE_EXECUTION_CAPABILITIES) : 0; } + +size_t Device::globalMemCacheSize() const +{ return p ? p->getProp(CL_DEVICE_GLOBAL_MEM_CACHE_SIZE) : 0; } + +int Device::globalMemCacheType() const +{ return p ? p->getProp(CL_DEVICE_GLOBAL_MEM_CACHE_TYPE) : 0; } + +int Device::globalMemCacheLineSize() const +{ return p ? p->getProp(CL_DEVICE_GLOBAL_MEM_CACHELINE_SIZE) : 0; } + +size_t Device::globalMemSize() const +{ return p ? p->getProp(CL_DEVICE_GLOBAL_MEM_SIZE) : 0; } + +size_t Device::localMemSize() const +{ return p ? p->getProp(CL_DEVICE_LOCAL_MEM_SIZE) : 0; } + +int Device::localMemType() const +{ return p ? p->getProp(CL_DEVICE_LOCAL_MEM_TYPE) : 0; } + +bool Device::hostUnifiedMemory() const +{ return p ? p->getProp(CL_DEVICE_HOST_UNIFIED_MEMORY) : 0; } + +bool Device::imageSupport() const +{ return p ? p->getProp(CL_DEVICE_IMAGE_SUPPORT) : 0; } + +size_t Device::image2DMaxWidth() const +{ return p ? p->getProp(CL_DEVICE_IMAGE2D_MAX_WIDTH) : 0; } + +size_t Device::image2DMaxHeight() const +{ return p ? p->getProp(CL_DEVICE_IMAGE2D_MAX_HEIGHT) : 0; } + +size_t Device::image3DMaxWidth() const +{ return p ? p->getProp(CL_DEVICE_IMAGE3D_MAX_WIDTH) : 0; } + +size_t Device::image3DMaxHeight() const +{ return p ? p->getProp(CL_DEVICE_IMAGE3D_MAX_HEIGHT) : 0; } + +size_t Device::image3DMaxDepth() const +{ return p ? p->getProp(CL_DEVICE_IMAGE3D_MAX_DEPTH) : 0; } + +size_t Device::imageMaxBufferSize() const +{ return p ? p->getProp(CL_DEVICE_IMAGE_MAX_BUFFER_SIZE) : 0; } + +size_t Device::imageMaxArraySize() const +{ return p ? p->getProp(CL_DEVICE_IMAGE_MAX_ARRAY_SIZE) : 0; } + +int Device::maxClockFrequency() const +{ return p ? p->getProp(CL_DEVICE_MAX_CLOCK_FREQUENCY) : 0; } + +int Device::maxComputeUnits() const +{ return p ? p->getProp(CL_DEVICE_MAX_COMPUTE_UNITS) : 0; } + +int Device::maxConstantArgs() const +{ return p ? p->getProp(CL_DEVICE_MAX_CONSTANT_ARGS) : 0; } + +size_t Device::maxConstantBufferSize() const +{ return p ? p->getProp(CL_DEVICE_MAX_CONSTANT_BUFFER_SIZE) : 0; } + +size_t Device::maxMemAllocSize() const +{ return p ? p->getProp(CL_DEVICE_MAX_MEM_ALLOC_SIZE) : 0; } + +size_t Device::maxParameterSize() const +{ return p ? p->getProp(CL_DEVICE_MAX_PARAMETER_SIZE) : 0; } + +int Device::maxReadImageArgs() const +{ return p ? p->getProp(CL_DEVICE_MAX_READ_IMAGE_ARGS) : 0; } + +int Device::maxWriteImageArgs() const +{ return p ? p->getProp(CL_DEVICE_MAX_WRITE_IMAGE_ARGS) : 0; } + +int Device::maxSamplers() const +{ return p ? p->getProp(CL_DEVICE_MAX_SAMPLERS) : 0; } + +size_t Device::maxWorkGroupSize() const +{ return p ? p->getProp(CL_DEVICE_MAX_WORK_GROUP_SIZE) : 0; } + +int Device::maxWorkItemDims() const +{ return p ? p->getProp(CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS) : 0; } + +void Device::maxWorkItemSizes(size_t* sizes) const +{ + if(p) + { + const int MAX_DIMS = 32; + size_t retsz = 0; + clGetDeviceInfo(p->handle, CL_DEVICE_MAX_WORK_ITEM_SIZES, + MAX_DIMS*sizeof(sizes[0]), &sizes[0], &retsz); + } +} + +int Device::memBaseAddrAlign() const +{ return p ? p->getProp(CL_DEVICE_MEM_BASE_ADDR_ALIGN) : 0; } + +int Device::nativeVectorWidthChar() const +{ return p ? p->getProp(CL_DEVICE_NATIVE_VECTOR_WIDTH_CHAR) : 0; } + +int Device::nativeVectorWidthShort() const +{ return p ? p->getProp(CL_DEVICE_NATIVE_VECTOR_WIDTH_SHORT) : 0; } + +int Device::nativeVectorWidthInt() const +{ return p ? p->getProp(CL_DEVICE_NATIVE_VECTOR_WIDTH_INT) : 0; } + +int Device::nativeVectorWidthLong() const +{ return p ? p->getProp(CL_DEVICE_NATIVE_VECTOR_WIDTH_LONG) : 0; } + +int Device::nativeVectorWidthFloat() const +{ return p ? p->getProp(CL_DEVICE_NATIVE_VECTOR_WIDTH_FLOAT) : 0; } + +int Device::nativeVectorWidthDouble() const +{ return p ? p->getProp(CL_DEVICE_NATIVE_VECTOR_WIDTH_DOUBLE) : 0; } + +int Device::nativeVectorWidthHalf() const +{ return p ? p->getProp(CL_DEVICE_NATIVE_VECTOR_WIDTH_HALF) : 0; } + +int Device::preferredVectorWidthChar() const +{ return p ? p->getProp(CL_DEVICE_PREFERRED_VECTOR_WIDTH_CHAR) : 0; } + +int Device::preferredVectorWidthShort() const +{ return p ? p->getProp(CL_DEVICE_PREFERRED_VECTOR_WIDTH_SHORT) : 0; } + +int Device::preferredVectorWidthInt() const +{ return p ? p->getProp(CL_DEVICE_PREFERRED_VECTOR_WIDTH_INT) : 0; } + +int Device::preferredVectorWidthLong() const +{ return p ? p->getProp(CL_DEVICE_PREFERRED_VECTOR_WIDTH_LONG) : 0; } + +int Device::preferredVectorWidthFloat() const +{ return p ? p->getProp(CL_DEVICE_PREFERRED_VECTOR_WIDTH_FLOAT) : 0; } + +int Device::preferredVectorWidthDouble() const +{ return p ? p->getProp(CL_DEVICE_PREFERRED_VECTOR_WIDTH_DOUBLE) : 0; } + +int Device::preferredVectorWidthHalf() const +{ return p ? p->getProp(CL_DEVICE_PREFERRED_VECTOR_WIDTH_HALF) : 0; } + +size_t Device::printfBufferSize() const +{ return p ? p->getProp(CL_DEVICE_PRINTF_BUFFER_SIZE) : 0; } + +size_t Device::profilingTimerResolution() const +{ return p ? p->getProp(CL_DEVICE_PROFILING_TIMER_RESOLUTION) : 0; } + +const Device& Device::getDefault() +{ + const Context& ctx = Context::getDefault(); + int idx = TLSData::get()->device; + return ctx.device(idx); +} + +///////////////////////////////////////////////////////////////////////////////////////// + +struct Context::Impl +{ + Impl(int dtype0) + { + refcount = 1; + handle = 0; + + cl_int retval = 0; + cl_platform_id pl = (cl_platform_id)Platform::getDefault().ptr(); + cl_context_properties prop[] = + { + CL_CONTEXT_PLATFORM, (cl_context_properties)pl, + 0 + }; + + cl_uint i, nd0 = 0, nd = 0; + int dtype = dtype0 & 15; + clGetDeviceIDs( pl, dtype, 0, 0, &nd0 ); + if(retval < 0) + return; + AutoBuffer dlistbuf(nd0*2+1); + cl_device_id* dlist = (cl_device_id*)(void**)dlistbuf; + cl_device_id* dlist_new = dlist + nd0; + clGetDeviceIDs( pl, dtype, nd0, dlist, &nd0 ); + String name0; + + for(i = 0; i < nd0; i++) + { + Device d(dlist[i]); + if( !d.available() || !d.compilerAvailable() ) + continue; + if( dtype0 == Device::TYPE_DGPU && d.hostUnifiedMemory() ) + continue; + if( dtype0 == Device::TYPE_IGPU && !d.hostUnifiedMemory() ) + continue; + String name = d.name(); + if( nd != 0 && name != name0 ) + continue; + name0 = name; + dlist_new[nd++] = dlist[i]; + } + + if(nd == 0) + return; + + // !!! in the current implementation force the number of devices to 1 !!! + nd = 1; + + handle = clCreateContext(prop, nd, dlist_new, 0, 0, &retval); + bool ok = handle != 0 && retval >= 0; + if( ok ) + { + devices.resize(nd); + for( i = 0; i < nd; i++ ) + devices[i].set(dlist_new[i]); + } + } + + ~Impl() + { + if(handle) + clReleaseContext(handle); + devices.clear(); + } + + Program getProg(const ProgramSource& src, + const String& buildflags, String& errmsg) + { + String prefix = Program::getPrefix(buildflags); + HashKey k(src.hash(), crc64((const uchar*)prefix.c_str(), prefix.size())); + phash_t::iterator it = phash.find(k); + if( it != phash.end() ) + return it->second; + //String filename = format("%08x%08x_%08x%08x.clb2", + Program prog(src, buildflags, errmsg); + phash.insert(std::pair(k, prog)); + return prog; + } + + IMPLEMENT_REFCOUNTABLE(); + + cl_context handle; + std::vector devices; + int dtype; + bool initialized; + + typedef ProgramSource::hash_t hash_t; + + struct HashKey + { + HashKey(hash_t _a, hash_t _b) : a(_a), b(_b) {} + bool operator < (const HashKey& k) const { return a < k.a || (a == k.a && b < k.b); } + bool operator == (const HashKey& k) const { return a == k.a && b == k.b; } + bool operator != (const HashKey& k) const { return a != k.a || b != k.b; } + hash_t a, b; + }; + typedef std::map phash_t; + phash_t phash; +}; + + +Context::Context() +{ + p = 0; +} + +Context::Context(int dtype) +{ + p = 0; + create(dtype); +} + +int Context::dtype() const +{ + return p ? p->dtype : 0; +} + +bool Context::create(int dtype0) +{ + if( !haveOpenCL() ) + return false; + if(p) + p->release(); + p = new Impl(dtype0); + if(!p->handle) + { + delete p; + p = 0; + } + return p != 0; +} + +Context::~Context() +{ + p->release(); +} + +Context::Context(const Context& c) +{ + p = (Impl*)c.p; + if(p) + p->addref(); +} + +Context& Context::operator = (const Context& c) +{ + Impl* newp = (Impl*)c.p; + if(newp) + newp->addref(); + if(p) + p->release(); + p = newp; + return *this; +} + +void* Context::ptr() const +{ + return p->handle; +} + +size_t Context::ndevices() const +{ + return p ? p->devices.size() : 0; +} + +const Device& Context::device(size_t idx) const +{ + static Device dummy; + return !p || idx >= p->devices.size() ? dummy : p->devices[idx]; +} + +Context& Context::getDefault() +{ + static Context ctx; + if( !ctx.p->handle && haveOpenCL() ) + { + // do not create new Context right away. + // First, try to retrieve existing context of the same type. + // In its turn, Platform::getContext() may call Context::create() + // if there is no such context. + ctx.create(Device::TYPE_ACCELERATOR); + if(!ctx.p->handle) + ctx.create(Device::TYPE_DGPU); + if(!ctx.p->handle) + ctx.create(Device::TYPE_IGPU); + if(!ctx.p->handle) + ctx.create(Device::TYPE_CPU); + } + + return ctx; +} + +Program Context::getProg(const ProgramSource& prog, + const String& buildopts, String& errmsg) +{ + return p ? p->getProg(prog, buildopts, errmsg) : Program(); +} + +struct Queue::Impl +{ + Impl(const Context& c, const Device& d) + { + refcount = 1; + const Context* pc = &c; + cl_context ch = (cl_context)pc->ptr(); + if( !ch ) + { + pc = &Context::getDefault(); + ch = (cl_context)pc->ptr(); + } + cl_device_id dh = (cl_device_id)d.ptr(); + if( !dh ) + dh = (cl_device_id)pc->device(0).ptr(); + cl_int retval = 0; + handle = clCreateCommandQueue(ch, dh, 0, &retval); + } + + ~Impl() + { + if(handle) + { + clFinish(handle); + clReleaseCommandQueue(handle); + } + } + + IMPLEMENT_REFCOUNTABLE(); + + cl_command_queue handle; + bool initialized; +}; + +Queue::Queue() +{ + p = 0; +} + +Queue::Queue(const Context& c, const Device& d) +{ + p = 0; + create(c, d); +} + +Queue::Queue(const Queue& q) +{ + p = q.p; + if(p) + p->addref(); +} + +Queue& Queue::operator = (const Queue& q) +{ + Impl* newp = (Impl*)q.p; + if(newp) + newp->addref(); + if(p) + p->release(); + p = newp; + return *this; +} + +Queue::~Queue() +{ + if(p) + p->release(); +} + +bool Queue::create(const Context& c, const Device& d) +{ + if(p) + p->release(); + p = new Impl(c, d); + return p->handle != 0; +} + +void Queue::finish() +{ + if(p && p->handle) + clFinish(p->handle); +} + +void* Queue::ptr() const +{ + return p ? p->handle : 0; +} + +Queue& Queue::getDefault() +{ + Queue& q = TLSData::get()->oclQueue; + if( !q.p->handle ) + q.create(Context::getDefault()); + return q; +} + +static cl_command_queue getQueue(const Queue& q) +{ + cl_command_queue qq = (cl_command_queue)q.ptr(); + if(!qq) + qq = (cl_command_queue)Queue::getDefault().ptr(); + return qq; +} + +KernelArg::KernelArg(int _flags, UMat* _m, void* _obj, size_t _sz) + : flags(_flags), m(_m), obj(_obj), sz(_sz) +{ +} + +KernelArg KernelArg::Constant(const Mat& m) +{ + CV_Assert(m.isContinuous()); + return KernelArg(CONSTANT, 0, m.data, m.total()*m.elemSize()); +} + + +struct Kernel::Impl +{ + Impl(const char* kname, const Program& prog) + { + e = 0; refcount = 1; + cl_program ph = (cl_program)prog.ptr(); + cl_int retval = 0; + handle = ph != 0 ? + clCreateKernel(ph, kname, &retval) : 0; + } + void finit() + { + if(!f.empty()) f->operator()(); + if(e) { clReleaseEvent(e); e = 0; } + release(); + } + + ~Impl() + { + if(handle) + clReleaseKernel(handle); + } + + IMPLEMENT_REFCOUNTABLE(); + + cl_kernel handle; + cl_event e; + Ptr f; +}; + +}} + +extern "C" +{ +static void CL_CALLBACK oclCleanupCallback(cl_event, cl_int, void *p) +{ + ((cv::ocl::Kernel::Impl*)p)->finit(); +} + +} + +namespace cv { namespace ocl { + +Kernel::Kernel() +{ + p = 0; +} + +Kernel::Kernel(const char* kname, const Program& prog) +{ + p = 0; + create(kname, prog); +} + +Kernel::Kernel(const char* kname, const ProgramSource& src, + const String& buildopts, String& errmsg) +{ + p = 0; + create(kname, src, buildopts, errmsg); +} + +Kernel::Kernel(const Kernel& k) +{ + p = k.p; + if(p) + p->addref(); +} + +Kernel& Kernel::operator = (const Kernel& k) +{ + Impl* newp = (Impl*)k.p; + if(newp) + newp->addref(); + if(p) + p->release(); + p = newp; + return *this; +} + +Kernel::~Kernel() +{ + if(p) + p->release(); +} + +bool Kernel::create(const char* kname, const Program& prog) +{ + if(p) + p->release(); + p = new Impl(kname, prog); + if(p->handle == 0) + { + p->release(); + p = 0; + } + return p != 0; +} + +bool Kernel::create(const char* kname, const ProgramSource& src, + const String& buildopts, String& errmsg) +{ + if(p) + { + p->release(); + p = 0; + } + const Program& prog = Context::getDefault().getProg(src, buildopts, errmsg); + return create(kname, prog); +} + +void* Kernel::ptr() const +{ + return p ? p->handle : 0; +} + +int Kernel::set(int i, const void* value, size_t sz) +{ + CV_Assert( p && clSetKernelArg(p->handle, (cl_uint)i, sz, value) >= 0 ); + return i+1; +} + +int Kernel::set(int i, const UMat& m) +{ + return set(i, KernelArg(KernelArg::READ_WRITE, (UMat*)&m, 0, 0)); +} + +int Kernel::set(int i, const KernelArg& arg) +{ + CV_Assert( p && p->handle ); + if( arg.m ) + { + int dims = arg.m->dims; + void* h = arg.m->handle(((arg.flags & KernelArg::READ_ONLY) ? ACCESS_READ : 0) + + ((arg.flags & KernelArg::WRITE_ONLY) ? ACCESS_WRITE : 0)); + clSetKernelArg(p->handle, (cl_uint)i, sizeof(cl_mem), &h); + clSetKernelArg(p->handle, (cl_uint)(i+1), sizeof(size_t), &arg.m->offset); + if( dims <= 2 ) + { + clSetKernelArg(p->handle, (cl_uint)(i+2), sizeof(size_t), &arg.m->step.p[0]); + clSetKernelArg(p->handle, (cl_uint)(i+3), sizeof(arg.m->rows), &arg.m->rows); + clSetKernelArg(p->handle, (cl_uint)(i+4), sizeof(arg.m->cols), &arg.m->cols); + return i + 5; + } + else + { + clSetKernelArg(p->handle, (cl_uint)(i+1), sizeof(size_t), &arg.m->offset); + clSetKernelArg(p->handle, (cl_uint)(i+1), sizeof(size_t)*(dims-1), &arg.m->step.p[0]); + clSetKernelArg(p->handle, (cl_uint)(i+2), sizeof(cl_int)*dims, &arg.m->size.p[0]); + return i + 4; + } + } + else + { + clSetKernelArg(p->handle, (cl_uint)i, arg.sz, arg.obj); + return i+1; + } +} + + +void Kernel::run(int dims, size_t offset[], size_t globalsize[], size_t localsize[], + bool sync, const Ptr& cleanupCallback, const Queue& q) +{ + CV_Assert(p && p->handle && p->e == 0); + cl_command_queue qq = getQueue(q); + clEnqueueNDRangeKernel(qq, p->handle, (cl_uint)dims, + offset, globalsize, localsize, 0, 0, + sync ? 0 : &p->e); + if( sync ) + { + clFinish(qq); + if( !cleanupCallback.empty() ) + cleanupCallback->operator()(); + } + else + { + p->f = cleanupCallback; + p->addref(); + clSetEventCallback(p->e, CL_COMPLETE, oclCleanupCallback, p); + } +} + +void Kernel::runTask(bool sync, const Ptr& cleanupCallback, const Queue& q) +{ + CV_Assert(p && p->handle && p->e == 0); + cl_command_queue qq = getQueue(q); + clEnqueueTask(qq, p->handle, 0, 0, sync ? 0 : &p->e); + if( sync ) + { + clFinish(qq); + if( !cleanupCallback.empty() ) + cleanupCallback->operator()(); + } + else + { + p->f = cleanupCallback; + p->addref(); + clSetEventCallback(p->e, CL_COMPLETE, oclCleanupCallback, p); + } +} + + +size_t Kernel::workGroupSize() const +{ + if(!p) + return 0; + size_t val = 0, retsz = 0; + cl_device_id dev = (cl_device_id)Device::getDefault().ptr(); + return clGetKernelWorkGroupInfo(p->handle, dev, CL_KERNEL_WORK_GROUP_SIZE, + sizeof(val), &val, &retsz) >= 0 ? val : 0; +} + +bool Kernel::compileWorkGroupSize(size_t wsz[]) const +{ + if(!p || !wsz) + return 0; + size_t retsz = 0; + cl_device_id dev = (cl_device_id)Device::getDefault().ptr(); + return clGetKernelWorkGroupInfo(p->handle, dev, CL_KERNEL_COMPILE_WORK_GROUP_SIZE, + sizeof(wsz[0]*3), wsz, &retsz) >= 0; +} + +size_t Kernel::localMemSize() const +{ + if(!p) + return 0; + size_t retsz = 0; + cl_ulong val = 0; + cl_device_id dev = (cl_device_id)Device::getDefault().ptr(); + return clGetKernelWorkGroupInfo(p->handle, dev, CL_KERNEL_LOCAL_MEM_SIZE, + sizeof(val), &val, &retsz) >= 0 ? (size_t)val : 0; +} + +//////////////////////////////////////////////////////////////////////////////////////// + +struct Program::Impl +{ + Impl(const ProgramSource& _src, + const String& _buildflags, String& errmsg) + { + refcount = 1; + const Context& ctx = Context::getDefault(); + src = _src; + buildflags = _buildflags; + const String& srcstr = src.source(); + const char* srcptr = srcstr.c_str(); + size_t srclen = srcstr.size(); + cl_int retval = 0; + + handle = clCreateProgramWithSource((cl_context)ctx.ptr(), 1, &srcptr, &srclen, &retval); + if( handle && retval >= 0 ) + { + int i, n = ctx.ndevices(); + AutoBuffer deviceListBuf(n+1); + void** deviceList = deviceListBuf; + for( i = 0; i < n; i++ ) + deviceList[i] = ctx.device(i).ptr(); + retval = clBuildProgram(handle, n, + (const cl_device_id*)deviceList, + buildflags.c_str(), 0, 0); + if( retval == CL_BUILD_PROGRAM_FAILURE ) + { + char buf[1024]; + size_t retsz = 0; + clGetProgramBuildInfo(handle, (cl_device_id)deviceList[0], CL_PROGRAM_BUILD_LOG, + sizeof(buf)-16, buf, &retsz); + errmsg = String(buf); + } + } + } + + Impl(const String& _buf, const String& _buildflags) + { + refcount = 1; + handle = 0; + buildflags = _buildflags; + if(_buf.empty()) + return; + String prefix0 = Program::getPrefix(buildflags); + const Context& ctx = Context::getDefault(); + const Device& dev = Device::getDefault(); + const char* pos0 = _buf.c_str(); + char* pos1 = strchr(pos0, '\n'); + if(!pos1) + return; + char* pos2 = strchr(pos1+1, '\n'); + if(!pos2) + return; + char* pos3 = strchr(pos2+1, '\n'); + if(!pos3) + return; + size_t prefixlen = (pos3 - pos0)+1; + String prefix(pos0, prefixlen); + if( prefix != prefix0 ) + return; + const uchar* bin = (uchar*)(pos3+1); + void* devid = dev.ptr(); + size_t codelen = _buf.length() - prefixlen; + cl_int binstatus = 0, retval = 0; + handle = clCreateProgramWithBinary((cl_context)ctx.ptr(), 1, (cl_device_id*)&devid, + &codelen, &bin, &binstatus, &retval); + } + + String store() + { + if(!handle) + return String(); + size_t progsz = 0, retsz = 0; + String prefix = Program::getPrefix(buildflags); + size_t prefixlen = prefix.length(); + if(clGetProgramInfo(handle, CL_PROGRAM_BINARY_SIZES, sizeof(progsz), &progsz, &retsz) < 0) + return String(); + AutoBuffer bufbuf(prefixlen + progsz + 16); + uchar* buf = bufbuf; + memcpy(buf, prefix.c_str(), prefixlen); + buf += prefixlen; + if(clGetProgramInfo(handle, CL_PROGRAM_BINARIES, sizeof(buf), &buf, &retsz) < 0) + return String(); + buf[progsz] = (uchar)'\0'; + return String((const char*)(uchar*)bufbuf, prefixlen + progsz); + } + + ~Impl() + { + if( handle ) + clReleaseProgram(handle); + } + + IMPLEMENT_REFCOUNTABLE(); + + ProgramSource src; + String buildflags; + cl_program handle; +}; + + +Program::Program() { p = 0; } + +Program::Program(const ProgramSource& src, + const String& buildflags, String& errmsg) +{ + p = 0; + create(src, buildflags, errmsg); +} + +Program::Program(const Program& prog) +{ + p = prog.p; + if(p) + p->addref(); +} + +Program& Program::operator = (const Program& prog) +{ + Impl* newp = (Impl*)prog.p; + if(newp) + newp->addref(); + if(p) + p->release(); + p = newp; + return *this; +} + +Program::~Program() +{ + if(p) + p->release(); +} + +bool Program::create(const ProgramSource& src, + const String& buildflags, String& errmsg) +{ + if(p) + p->release(); + p = new Impl(src, buildflags, errmsg); + if(!p->handle) + { + p->release(); + p = 0; + } + return p != 0; +} + +const ProgramSource& Program::source() const +{ + static ProgramSource dummy; + return p ? p->src : dummy; +} + +void* Program::ptr() const +{ + return p ? p->handle : 0; +} + +bool Program::read(const String& bin, const String& buildflags) +{ + if(p) + p->release(); + p = new Impl(bin, buildflags); + return p->handle != 0; +} + +bool Program::write(String& bin) const +{ + if(!p) + return false; + bin = p->store(); + return !bin.empty(); +} + +String Program::getPrefix() const +{ + if(!p) + return String(); + return getPrefix(p->buildflags); +} + +String Program::getPrefix(const String& buildflags) +{ + const Context& ctx = Context::getDefault(); + const Device& dev = ctx.device(0); + return format("name=%s\ndriver=%s\nbuildflags=%s\n", + dev.name().c_str(), dev.driverVersion().c_str(), buildflags.c_str()); +} + +//////////////////////////////////////////////////////////////////////////////////////// + +struct ProgramSource::Impl +{ + Impl(const char* _src) + { + init(String(_src)); + } + Impl(const String& _src) + { + init(_src); + } + void init(const String& _src) + { + refcount = 1; + src = _src; + h = crc64((uchar*)src.c_str(), src.size()); + } + + IMPLEMENT_REFCOUNTABLE(); + String src; + ProgramSource::hash_t h; +}; + + +ProgramSource::ProgramSource() +{ + p = 0; +} + +ProgramSource::ProgramSource(const char* prog) +{ + p = new Impl(prog); +} + +ProgramSource::ProgramSource(const String& prog) +{ + p = new Impl(prog); +} + +ProgramSource::~ProgramSource() +{ + if(p) + p->release(); +} + +ProgramSource::ProgramSource(const ProgramSource& prog) +{ + p = prog.p; + if(p) + p->addref(); +} + +ProgramSource& ProgramSource::operator = (const ProgramSource& prog) +{ + Impl* newp = (Impl*)prog.p; + if(newp) + newp->addref(); + if(p) + p->release(); + p = newp; + return *this; +} + +const String& ProgramSource::source() const +{ + static String dummy; + return p ? p->src : dummy; +} + +ProgramSource::hash_t ProgramSource::hash() const +{ + return p ? p->h : 0; +} + +////////////////////////////////////////////////////////////////////////////////////////////// + +class OpenCLAllocator : public MatAllocator +{ +public: + OpenCLAllocator() {} + + UMatData* defaultAllocate(int dims, const int* sizes, int type, size_t* step) const + { + UMatData* u = Mat::getStdAllocator()->allocate(dims, sizes, type, step); + u->urefcount = 1; + u->refcount = 0; + return u; + } + + void getBestFlags(const Context& ctx, int& createFlags, int& flags0) const + { + const Device& dev = ctx.device(0); + createFlags = CL_MEM_READ_WRITE; + + if( dev.hostUnifiedMemory() ) + flags0 = 0; + else + flags0 = UMatData::COPY_ON_MAP; + } + + UMatData* allocate(int dims, const int* sizes, int type, size_t* step) const + { + if(!useOpenCL()) + return defaultAllocate(dims, sizes, type, step); + size_t total = CV_ELEM_SIZE(type); + for( int i = dims-1; i >= 0; i-- ) + { + if( step ) + step[i] = total; + total *= sizes[i]; + } + + Context& ctx = Context::getDefault(); + int createFlags = 0, flags0 = 0; + getBestFlags(ctx, createFlags, flags0); + + cl_int retval = 0; + void* handle = clCreateBuffer((cl_context)ctx.ptr(), + createFlags, total, 0, &retval); + if( !handle || retval < 0 ) + return defaultAllocate(dims, sizes, type, step); + UMatData* u = new UMatData(this); + u->data = 0; + u->size = total; + u->handle = handle; + u->urefcount = 1; + u->flags = flags0; + + return u; + } + + bool allocate(UMatData* u, int accessFlags) const + { + if(!u) + return false; + + UMatDataAutoLock lock(u); + + if(u->handle == 0) + { + CV_Assert(u->origdata != 0); + Context& ctx = Context::getDefault(); + int createFlags = 0, flags0 = 0; + getBestFlags(ctx, createFlags, flags0); + + cl_context ctx_handle = (cl_context)ctx.ptr(); + cl_int retval = 0; + int tempUMatFlags = UMatData::TEMP_UMAT; + u->handle = clCreateBuffer(ctx_handle, CL_MEM_USE_HOST_PTR|createFlags, + u->size, u->origdata, &retval); + if((!u->handle || retval < 0) && !(accessFlags & ACCESS_FAST)) + { + u->handle = clCreateBuffer(ctx_handle, CL_MEM_COPY_HOST_PTR|createFlags, + u->size, u->origdata, &retval); + tempUMatFlags = UMatData::TEMP_COPIED_UMAT; + } + if(!u->handle || retval < 0) + return false; + u->prevAllocator = u->currAllocator; + u->currAllocator = this; + u->flags |= tempUMatFlags; + } + if(accessFlags & ACCESS_WRITE) + u->markHostCopyObsolete(true); + CV_XADD(&u->urefcount, 1); + return true; + } + + void deallocate(UMatData* u) const + { + if(!u) + return; + + // TODO: !!! when we add Shared Virtual Memory Support, + // this function (as well as the others should be corrected) + CV_Assert(u->handle != 0 && u->urefcount == 0); + if(u->tempUMat()) + { + if( u->hostCopyObsolete() && u->refcount > 0 && u->tempCopiedUMat() ) + { + clEnqueueWriteBuffer((cl_command_queue)Queue::getDefault().ptr(), + (cl_mem)u->handle, CL_TRUE, 0, + u->size, u->origdata, 0, 0, 0); + } + u->markHostCopyObsolete(false); + clReleaseMemObject((cl_mem)u->handle); + u->currAllocator = u->prevAllocator; + if(u->refcount == 0) + u->currAllocator->deallocate(u); + } + else + { + if(u->data && u->copyOnMap()) + fastFree(u->data); + clReleaseMemObject((cl_mem)u->handle); + delete u; + } + } + + void map(UMatData* u, int accessFlags) const + { + if(!u) + return; + + CV_Assert( u->handle != 0 ); + + UMatDataAutoLock autolock(u); + + if(accessFlags & ACCESS_WRITE) + u->markDeviceCopyObsolete(true); + + cl_command_queue q = (cl_command_queue)Queue::getDefault().ptr(); + + if( u->refcount == 0 ) + { + if( !u->copyOnMap() ) + { + CV_Assert(u->data == 0); + // because there can be other map requests for the same UMat with different access flags, + // we use the universal (read-write) access mode. + cl_int retval = 0; + u->data = (uchar*)clEnqueueMapBuffer(q, (cl_mem)u->handle, CL_TRUE, + (CL_MAP_READ | CL_MAP_WRITE), + 0, u->size, 0, 0, 0, &retval); + if(u->data && retval >= 0) + { + u->markHostCopyObsolete(false); + return; + } + + // if map failed, switch to copy-on-map mode for the particular buffer + u->flags |= UMatData::COPY_ON_MAP; + } + + if(!u->data) + { + u->data = (uchar*)fastMalloc(u->size); + u->markHostCopyObsolete(true); + } + } + + if( (accessFlags & ACCESS_READ) != 0 && u->hostCopyObsolete() ) + { + CV_Assert( clEnqueueReadBuffer(q, (cl_mem)u->handle, CL_TRUE, 0, + u->size, u->data, 0, 0, 0) >= 0 ); + u->markHostCopyObsolete(false); + } + } + + void unmap(UMatData* u) const + { + if(!u) + return; + + CV_Assert(u->handle != 0); + + UMatDataAutoLock autolock(u); + + cl_command_queue q = (cl_command_queue)Queue::getDefault().ptr(); + if( !u->copyOnMap() && u->data ) + { + CV_Assert( clEnqueueUnmapMemObject(q, (cl_mem)u->handle, u->data, 0, 0, 0) >= 0 ); + u->data = 0; + } + else if( u->copyOnMap() && u->deviceCopyObsolete() ) + { + CV_Assert( clEnqueueWriteBuffer(q, (cl_mem)u->handle, CL_TRUE, 0, + u->size, u->data, 0, 0, 0) >= 0 ); + } + u->markDeviceCopyObsolete(false); + u->markHostCopyObsolete(false); + } + + bool checkContinuous(int dims, const size_t sz[], + const size_t srcofs[], const size_t srcstep[], + const size_t dstofs[], const size_t dststep[], + size_t& total, size_t new_sz[], + size_t& srcrawofs, size_t new_srcofs[], size_t new_srcstep[], + size_t& dstrawofs, size_t new_dstofs[], size_t new_dststep[]) const + { + bool iscontinuous = true; + srcrawofs = srcofs ? srcofs[dims-1] : 0; + dstrawofs = dstofs ? dstofs[dims-1] : 0; + total = sz[dims-1]; + for( int i = dims-2; i >= 0; i-- ) + { + if( i > 0 && (total != srcstep[i] || total != dststep[i]) ) + iscontinuous = false; + total *= sz[i]; + if( srcofs ) + srcrawofs += srcofs[i]*srcstep[i]; + if( dstofs ) + dstrawofs += dstofs[i]*dststep[i]; + } + + if( !iscontinuous ) + { + // OpenCL uses {x, y, z} order while OpenCV uses {z, y, x} order. + if( dims == 2 ) + { + new_sz[0] = sz[1]; new_sz[1] = sz[0]; new_sz[2] = 1; + // we assume that new_... arrays are initialized by caller + // with 0's, so there is no else branch + if( srcofs ) + { + new_srcofs[0] = srcofs[1]; + new_srcofs[1] = srcofs[0]; + new_srcofs[2] = 0; + } + + if( dstofs ) + { + new_dstofs[0] = dstofs[1]; + new_dstofs[1] = dstofs[0]; + new_dstofs[2] = 0; + } + + new_srcstep[0] = srcstep[0]; new_srcstep[1] = 0; + new_dststep[0] = dststep[0]; new_dststep[1] = 0; + } + else + { + // we could check for dims == 3 here, + // but from user perspective this one is more informative + CV_Assert(dims <= 3); + new_sz[0] = sz[2]; new_sz[1] = sz[1]; new_sz[2] = sz[0]; + if( srcofs ) + { + new_srcofs[0] = srcofs[2]; + new_srcofs[1] = srcofs[1]; + new_srcofs[2] = srcofs[0]; + } + + if( dstofs ) + { + new_dstofs[0] = dstofs[2]; + new_dstofs[1] = dstofs[1]; + new_dstofs[2] = dstofs[0]; + } + + new_srcstep[0] = srcstep[1]; new_srcstep[1] = srcstep[0]; + new_dststep[0] = dststep[1]; new_dststep[1] = dststep[0]; + } + } + return iscontinuous; + } + + void download(UMatData* u, void* dstptr, int dims, const size_t sz[], + const size_t srcofs[], const size_t srcstep[], + const size_t dststep[]) const + { + if(!u) + return; + UMatDataAutoLock autolock(u); + + if( u->data && !u->hostCopyObsolete() ) + { + Mat::getStdAllocator()->download(u, dstptr, dims, sz, srcofs, srcstep, dststep); + return; + } + CV_Assert( u->handle != 0 ); + + cl_command_queue q = (cl_command_queue)Queue::getDefault().ptr(); + + size_t total = 0, new_sz[] = {0, 0, 0}; + size_t srcrawofs = 0, new_srcofs[] = {0, 0, 0}, new_srcstep[] = {0, 0, 0}; + size_t dstrawofs = 0, new_dstofs[] = {0, 0, 0}, new_dststep[] = {0, 0, 0}; + + bool iscontinuous = checkContinuous(dims, sz, srcofs, srcstep, 0, dststep, + total, new_sz, + srcrawofs, new_srcofs, new_srcstep, + dstrawofs, new_dstofs, new_dststep); + if( iscontinuous ) + { + CV_Assert( clEnqueueReadBuffer(q, (cl_mem)u->handle, CL_TRUE, + srcrawofs, total, dstptr, 0, 0, 0) >= 0 ); + } + else + { + CV_Assert( clEnqueueReadBufferRect(q, (cl_mem)u->handle, CL_TRUE, + new_srcofs, new_dstofs, new_sz, new_srcstep[0], new_srcstep[1], + new_dststep[0], new_dststep[1], dstptr, 0, 0, 0) >= 0 ); + } + clFinish(q); + } + + void upload(UMatData* u, const void* srcptr, int dims, const size_t sz[], + const size_t dstofs[], const size_t dststep[], + const size_t srcstep[]) const + { + if(!u) + return; + + // there should be no user-visible CPU copies of the UMat which we are going to copy to + CV_Assert(u->refcount == 0); + + size_t total = 0, new_sz[] = {0, 0, 0}; + size_t srcrawofs = 0, new_srcofs[] = {0, 0, 0}, new_srcstep[] = {0, 0, 0}; + size_t dstrawofs = 0, new_dstofs[] = {0, 0, 0}, new_dststep[] = {0, 0, 0}; + + bool iscontinuous = checkContinuous(dims, sz, 0, srcstep, dstofs, dststep, + total, new_sz, + srcrawofs, new_srcofs, new_srcstep, + dstrawofs, new_dstofs, new_dststep); + + UMatDataAutoLock autolock(u); + + // if there is cached CPU copy of the GPU matrix, + // we could use it as a destination. + // we can do it in 2 cases: + // 1. we overwrite the whole content + // 2. we overwrite part of the matrix, but the GPU copy is out-of-date + if( u->data && (u->hostCopyObsolete() <= u->deviceCopyObsolete() || total == u->size)) + { + Mat::getStdAllocator()->upload(u, srcptr, dims, sz, dstofs, dststep, srcstep); + u->markHostCopyObsolete(false); + u->markDeviceCopyObsolete(true); + return; + } + + CV_Assert( u->handle != 0 ); + cl_command_queue q = (cl_command_queue)Queue::getDefault().ptr(); + + if( iscontinuous ) + { + CV_Assert( clEnqueueWriteBuffer(q, (cl_mem)u->handle, + CL_TRUE, dstrawofs, total, srcptr, 0, 0, 0) >= 0 ); + } + else + { + CV_Assert( clEnqueueWriteBufferRect(q, (cl_mem)u->handle, CL_TRUE, + new_dstofs, new_srcofs, new_sz, new_dststep[0], new_dststep[1], + new_srcstep[0], new_srcstep[1], srcptr, 0, 0, 0) >= 0 ); + } + + u->markHostCopyObsolete(true); + u->markDeviceCopyObsolete(false); + + clFinish(q); + } + + void copy(UMatData* src, UMatData* dst, int dims, const size_t sz[], + const size_t srcofs[], const size_t srcstep[], + const size_t dstofs[], const size_t dststep[], bool sync) const + { + if(!src || !dst) + return; + + size_t total = 0, new_sz[] = {0, 0, 0}; + size_t srcrawofs = 0, new_srcofs[] = {0, 0, 0}, new_srcstep[] = {0, 0, 0}; + size_t dstrawofs = 0, new_dstofs[] = {0, 0, 0}, new_dststep[] = {0, 0, 0}; + + bool iscontinuous = checkContinuous(dims, sz, srcofs, srcstep, dstofs, dststep, + total, new_sz, + srcrawofs, new_srcofs, new_srcstep, + dstrawofs, new_dstofs, new_dststep); + + UMatDataAutoLock src_autolock(src); + UMatDataAutoLock dst_autolock(dst); + + if( !src->handle || (src->data && src->hostCopyObsolete() <= src->deviceCopyObsolete()) ) + { + upload(dst, src->data + srcrawofs, dims, sz, dstofs, dststep, srcstep); + return; + } + if( !dst->handle || (dst->data && dst->hostCopyObsolete() <= dst->deviceCopyObsolete()) ) + { + download(src, dst->data + dstrawofs, dims, sz, srcofs, srcstep, dststep); + dst->markHostCopyObsolete(false); + dst->markDeviceCopyObsolete(true); + return; + } + + // there should be no user-visible CPU copies of the UMat which we are going to copy to + CV_Assert(dst->refcount == 0); + cl_command_queue q = (cl_command_queue)Queue::getDefault().ptr(); + + if( iscontinuous ) + { + CV_Assert( clEnqueueCopyBuffer(q, (cl_mem)src->handle, (cl_mem)dst->handle, + srcrawofs, dstrawofs, total, 0, 0, 0) >= 0 ); + } + else + { + CV_Assert( clEnqueueCopyBufferRect(q, (cl_mem)src->handle, (cl_mem)dst->handle, + new_srcofs, new_dstofs, new_sz, + new_srcstep[0], new_srcstep[1], new_dststep[0], new_dststep[1], + 0, 0, 0) >= 0 ); + } + + dst->markHostCopyObsolete(true); + dst->markDeviceCopyObsolete(false); + + if( sync ) + clFinish(q); + } +}; + +MatAllocator* getOpenCLAllocator() +{ + static OpenCLAllocator allocator; + return &allocator; +} + +}} + diff --git a/modules/core/src/umatrix.cpp b/modules/core/src/umatrix.cpp new file mode 100644 index 0000000..4ed0e51 --- /dev/null +++ b/modules/core/src/umatrix.cpp @@ -0,0 +1,643 @@ +/*M/////////////////////////////////////////////////////////////////////////////////////// +// +// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. +// +// By downloading, copying, installing or using the software you agree to this license. +// If you do not agree to this license, do not download, install, +// copy or use the software. +// +// +// License Agreement +// For Open Source Computer Vision Library +// +// Copyright (C) 2000-2008, Intel Corporation, all rights reserved. +// Copyright (C) 2009, Willow Garage Inc., all rights reserved. +// Third party copyrights are property of their respective owners. +// +// Redistribution and use in source and binary forms, with or without modification, +// are permitted provided that the following conditions are met: +// +// * Redistribution's of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// +// * Redistribution's in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// +// * The name of the copyright holders may not be used to endorse or promote products +// derived from this software without specific prior written permission. +// +// This software is provided by the copyright holders and contributors "as is" and +// any express or implied warranties, including, but not limited to, the implied +// warranties of merchantability and fitness for a particular purpose are disclaimed. +// In no event shall the Intel Corporation or contributors be liable for any direct, +// indirect, incidental, special, exemplary, or consequential damages +// (including, but not limited to, procurement of substitute goods or services; +// loss of use, data, or profits; or business interruption) however caused +// and on any theory of liability, whether in contract, strict liability, +// or tort (including negligence or otherwise) arising in any way out of +// the use of this software, even if advised of the possibility of such damage. +// +//M*/ + +#include "precomp.hpp" + +///////////////////////////////// UMat implementation /////////////////////////////// + +namespace cv { + +// it should be a prime number for the best hash function +enum { UMAT_NLOCKS = 31 }; +static Mutex umatLocks[UMAT_NLOCKS]; + +UMatData::UMatData(const MatAllocator* allocator) +{ + prevAllocator = currAllocator = allocator; + urefcount = refcount = 0; + data = origdata = 0; + size = 0; + flags = 0; + handle = 0; +} + +void UMatData::lock() +{ + umatLocks[(size_t)(void*)this % UMAT_NLOCKS].lock(); +} + +void UMatData::unlock() +{ + umatLocks[(size_t)(void*)this % UMAT_NLOCKS].unlock(); +} + + +MatAllocator* UMat::getStdAllocator() +{ + return ocl::getOpenCLAllocator(); +} + +void swap( UMat& a, UMat& b ) +{ + std::swap(a.flags, b.flags); + std::swap(a.dims, b.dims); + std::swap(a.rows, b.rows); + std::swap(a.cols, b.cols); + std::swap(a.allocator, b.allocator); + std::swap(a.u, b.u); + std::swap(a.offset, b.offset); + + std::swap(a.size.p, b.size.p); + std::swap(a.step.p, b.step.p); + std::swap(a.step.buf[0], b.step.buf[0]); + std::swap(a.step.buf[1], b.step.buf[1]); + + if( a.step.p == b.step.buf ) + { + a.step.p = a.step.buf; + a.size.p = &a.rows; + } + + if( b.step.p == a.step.buf ) + { + b.step.p = b.step.buf; + b.size.p = &b.rows; + } +} + + +static inline void setSize( UMat& m, int _dims, const int* _sz, + const size_t* _steps, bool autoSteps=false ) +{ + CV_Assert( 0 <= _dims && _dims <= CV_MAX_DIM ); + if( m.dims != _dims ) + { + if( m.step.p != m.step.buf ) + { + fastFree(m.step.p); + m.step.p = m.step.buf; + m.size.p = &m.rows; + } + if( _dims > 2 ) + { + m.step.p = (size_t*)fastMalloc(_dims*sizeof(m.step.p[0]) + (_dims+1)*sizeof(m.size.p[0])); + m.size.p = (int*)(m.step.p + _dims) + 1; + m.size.p[-1] = _dims; + m.rows = m.cols = -1; + } + } + + m.dims = _dims; + if( !_sz ) + return; + + size_t esz = CV_ELEM_SIZE(m.flags), total = esz; + int i; + for( i = _dims-1; i >= 0; i-- ) + { + int s = _sz[i]; + CV_Assert( s >= 0 ); + m.size.p[i] = s; + + if( _steps ) + m.step.p[i] = i < _dims-1 ? _steps[i] : esz; + else if( autoSteps ) + { + m.step.p[i] = total; + int64 total1 = (int64)total*s; + if( (uint64)total1 != (size_t)total1 ) + CV_Error( CV_StsOutOfRange, "The total matrix size does not fit to \"size_t\" type" ); + total = (size_t)total1; + } + } + + if( _dims == 1 ) + { + m.dims = 2; + m.cols = 1; + m.step[1] = esz; + } +} + +static void updateContinuityFlag(UMat& m) +{ + int i, j; + for( i = 0; i < m.dims; i++ ) + { + if( m.size[i] > 1 ) + break; + } + + for( j = m.dims-1; j > i; j-- ) + { + if( m.step[j]*m.size[j] < m.step[j-1] ) + break; + } + + uint64 t = (uint64)m.step[0]*m.size[0]; + if( j <= i && t == (size_t)t ) + m.flags |= UMat::CONTINUOUS_FLAG; + else + m.flags &= ~UMat::CONTINUOUS_FLAG; +} + + +static void finalizeHdr(UMat& m) +{ + updateContinuityFlag(m); + int d = m.dims; + if( d > 2 ) + m.rows = m.cols = -1; +} + + +UMat Mat::getUMat(int accessFlags) const +{ + UMat hdr; + if(!u) + return hdr; + UMat::getStdAllocator()->allocate(u, accessFlags); + setSize(hdr, dims, size.p, step.p); + finalizeHdr(hdr); + hdr.u = u; + hdr.offset = data - datastart; + return hdr; +} + +void UMat::create(int d, const int* _sizes, int _type) +{ + int i; + CV_Assert(0 <= d && d <= CV_MAX_DIM && _sizes); + _type = CV_MAT_TYPE(_type); + + if( u && (d == dims || (d == 1 && dims <= 2)) && _type == type() ) + { + if( d == 2 && rows == _sizes[0] && cols == _sizes[1] ) + return; + for( i = 0; i < d; i++ ) + if( size[i] != _sizes[i] ) + break; + if( i == d && (d > 1 || size[1] == 1)) + return; + } + + release(); + if( d == 0 ) + return; + flags = (_type & CV_MAT_TYPE_MASK) | MAGIC_VAL; + setSize(*this, d, _sizes, 0, true); + offset = 0; + + if( total() > 0 ) + { + MatAllocator *a = allocator, *a0 = getStdAllocator(); + if(!a) + a = a0; + try + { + u = a->allocate(dims, size, _type, step.p); + CV_Assert(u != 0); + } + catch(...) + { + if(a != a0) + u = a0->allocate(dims, size, _type, step.p); + CV_Assert(u != 0); + } + CV_Assert( step[dims-1] == (size_t)CV_ELEM_SIZE(flags) ); + } + + finalizeHdr(*this); +} + +void UMat::copySize(const UMat& m) +{ + setSize(*this, m.dims, 0, 0); + for( int i = 0; i < dims; i++ ) + { + size[i] = m.size[i]; + step[i] = m.step[i]; + } +} + +void UMat::deallocate() +{ + u->currAllocator->deallocate(u); +} + + +UMat::UMat(const UMat& m, const Range& _rowRange, const Range& _colRange) + : flags(MAGIC_VAL), dims(0), rows(0), cols(0), allocator(0), u(0), offset(0), size(&rows) +{ + CV_Assert( m.dims >= 2 ); + if( m.dims > 2 ) + { + AutoBuffer rs(m.dims); + rs[0] = _rowRange; + rs[1] = _colRange; + for( int i = 2; i < m.dims; i++ ) + rs[i] = Range::all(); + *this = m(rs); + return; + } + + *this = m; + if( _rowRange != Range::all() && _rowRange != Range(0,rows) ) + { + CV_Assert( 0 <= _rowRange.start && _rowRange.start <= _rowRange.end && _rowRange.end <= m.rows ); + rows = _rowRange.size(); + offset += step*_rowRange.start; + flags |= SUBMATRIX_FLAG; + } + + if( _colRange != Range::all() && _colRange != Range(0,cols) ) + { + CV_Assert( 0 <= _colRange.start && _colRange.start <= _colRange.end && _colRange.end <= m.cols ); + cols = _colRange.size(); + offset += _colRange.start*elemSize(); + flags &= cols < m.cols ? ~CONTINUOUS_FLAG : -1; + flags |= SUBMATRIX_FLAG; + } + + if( rows == 1 ) + flags |= CONTINUOUS_FLAG; + + if( rows <= 0 || cols <= 0 ) + { + release(); + rows = cols = 0; + } +} + + +UMat::UMat(const UMat& m, const Rect& roi) + : flags(m.flags), dims(2), rows(roi.height), cols(roi.width), + allocator(m.allocator), u(m.u), offset(m.offset + roi.y*m.step[0]), size(&rows) +{ + CV_Assert( m.dims <= 2 ); + flags &= roi.width < m.cols ? ~CONTINUOUS_FLAG : -1; + flags |= roi.height == 1 ? CONTINUOUS_FLAG : 0; + + size_t esz = CV_ELEM_SIZE(flags); + offset += roi.x*esz; + CV_Assert( 0 <= roi.x && 0 <= roi.width && roi.x + roi.width <= m.cols && + 0 <= roi.y && 0 <= roi.height && roi.y + roi.height <= m.rows ); + if( u ) + CV_XADD(&(u->urefcount), 1); + if( roi.width < m.cols || roi.height < m.rows ) + flags |= SUBMATRIX_FLAG; + + step[0] = m.step[0]; step[1] = esz; + + if( rows <= 0 || cols <= 0 ) + { + release(); + rows = cols = 0; + } +} + + +UMat::UMat(const UMat& m, const Range* ranges) + : flags(MAGIC_VAL), dims(0), rows(0), cols(0), allocator(0), u(0), offset(0), size(&rows) +{ + int i, d = m.dims; + + CV_Assert(ranges); + for( i = 0; i < d; i++ ) + { + Range r = ranges[i]; + CV_Assert( r == Range::all() || (0 <= r.start && r.start < r.end && r.end <= m.size[i]) ); + } + *this = m; + for( i = 0; i < d; i++ ) + { + Range r = ranges[i]; + if( r != Range::all() && r != Range(0, size.p[i])) + { + size.p[i] = r.end - r.start; + offset += r.start*step.p[i]; + flags |= SUBMATRIX_FLAG; + } + } + updateContinuityFlag(*this); +} + +UMat UMat::diag(int d) const +{ + CV_Assert( dims <= 2 ); + UMat m = *this; + size_t esz = elemSize(); + int len; + + if( d >= 0 ) + { + len = std::min(cols - d, rows); + m.offset += esz*d; + } + else + { + len = std::min(rows + d, cols); + m.offset -= step[0]*d; + } + CV_DbgAssert( len > 0 ); + + m.size[0] = m.rows = len; + m.size[1] = m.cols = 1; + m.step[0] += (len > 1 ? esz : 0); + + if( m.rows > 1 ) + m.flags &= ~CONTINUOUS_FLAG; + else + m.flags |= CONTINUOUS_FLAG; + + if( size() != Size(1,1) ) + m.flags |= SUBMATRIX_FLAG; + + return m; +} + +void UMat::locateROI( Size& wholeSize, Point& ofs ) const +{ + CV_Assert( dims <= 2 && step[0] > 0 ); + size_t esz = elemSize(), minstep; + ptrdiff_t delta1 = (ptrdiff_t)offset, delta2 = (ptrdiff_t)u->size; + + if( delta1 == 0 ) + ofs.x = ofs.y = 0; + else + { + ofs.y = (int)(delta1/step[0]); + ofs.x = (int)((delta1 - step[0]*ofs.y)/esz); + CV_DbgAssert( offset == (size_t)(ofs.y*step[0] + ofs.x*esz) ); + } + minstep = (ofs.x + cols)*esz; + wholeSize.height = (int)((delta2 - minstep)/step[0] + 1); + wholeSize.height = std::max(wholeSize.height, ofs.y + rows); + wholeSize.width = (int)((delta2 - step*(wholeSize.height-1))/esz); + wholeSize.width = std::max(wholeSize.width, ofs.x + cols); +} + + +UMat& UMat::adjustROI( int dtop, int dbottom, int dleft, int dright ) +{ + CV_Assert( dims <= 2 && step[0] > 0 ); + Size wholeSize; Point ofs; + size_t esz = elemSize(); + locateROI( wholeSize, ofs ); + int row1 = std::max(ofs.y - dtop, 0), row2 = std::min(ofs.y + rows + dbottom, wholeSize.height); + int col1 = std::max(ofs.x - dleft, 0), col2 = std::min(ofs.x + cols + dright, wholeSize.width); + offset += (row1 - ofs.y)*step + (col1 - ofs.x)*esz; + rows = row2 - row1; cols = col2 - col1; + size.p[0] = rows; size.p[1] = cols; + if( esz*cols == step[0] || rows == 1 ) + flags |= CONTINUOUS_FLAG; + else + flags &= ~CONTINUOUS_FLAG; + return *this; +} + + +UMat UMat::reshape(int new_cn, int new_rows) const +{ + int cn = channels(); + UMat hdr = *this; + + if( dims > 2 && new_rows == 0 && new_cn != 0 && size[dims-1]*cn % new_cn == 0 ) + { + hdr.flags = (hdr.flags & ~CV_MAT_CN_MASK) | ((new_cn-1) << CV_CN_SHIFT); + hdr.step[dims-1] = CV_ELEM_SIZE(hdr.flags); + hdr.size[dims-1] = hdr.size[dims-1]*cn / new_cn; + return hdr; + } + + CV_Assert( dims <= 2 ); + + if( new_cn == 0 ) + new_cn = cn; + + int total_width = cols * cn; + + if( (new_cn > total_width || total_width % new_cn != 0) && new_rows == 0 ) + new_rows = rows * total_width / new_cn; + + if( new_rows != 0 && new_rows != rows ) + { + int total_size = total_width * rows; + if( !isContinuous() ) + CV_Error( CV_BadStep, + "The matrix is not continuous, thus its number of rows can not be changed" ); + + if( (unsigned)new_rows > (unsigned)total_size ) + CV_Error( CV_StsOutOfRange, "Bad new number of rows" ); + + total_width = total_size / new_rows; + + if( total_width * new_rows != total_size ) + CV_Error( CV_StsBadArg, "The total number of matrix elements " + "is not divisible by the new number of rows" ); + + hdr.rows = new_rows; + hdr.step[0] = total_width * elemSize1(); + } + + int new_width = total_width / new_cn; + + if( new_width * new_cn != total_width ) + CV_Error( CV_BadNumChannels, + "The total width is not divisible by the new number of channels" ); + + hdr.cols = new_width; + hdr.flags = (hdr.flags & ~CV_MAT_CN_MASK) | ((new_cn-1) << CV_CN_SHIFT); + hdr.step[1] = CV_ELEM_SIZE(hdr.flags); + return hdr; +} + +UMat UMat::diag(const UMat& d) +{ + CV_Assert( d.cols == 1 || d.rows == 1 ); + int len = d.rows + d.cols - 1; + UMat m(len, len, d.type(), Scalar(0)); + UMat md = m.diag(); + if( d.cols == 1 ) + d.copyTo(md); + else + transpose(d, md); + return m; +} + +int UMat::checkVector(int _elemChannels, int _depth, bool _requireContinuous) const +{ + return (depth() == _depth || _depth <= 0) && + (isContinuous() || !_requireContinuous) && + ((dims == 2 && (((rows == 1 || cols == 1) && channels() == _elemChannels) || + (cols == _elemChannels && channels() == 1))) || + (dims == 3 && channels() == 1 && size.p[2] == _elemChannels && (size.p[0] == 1 || size.p[1] == 1) && + (isContinuous() || step.p[1] == step.p[2]*size.p[2]))) + ? (int)(total()*channels()/_elemChannels) : -1; +} + + +UMat UMat::cross(InputArray) const +{ + CV_Error(CV_StsNotImplemented, ""); + return UMat(); +} + + +UMat UMat::reshape(int _cn, int _newndims, const int* _newsz) const +{ + if(_newndims == dims) + { + if(_newsz == 0) + return reshape(_cn); + if(_newndims == 2) + return reshape(_cn, _newsz[0]); + } + + CV_Error(CV_StsNotImplemented, ""); + // TBD + return UMat(); +} + + +Mat UMat::getMat(int accessFlags) const +{ + if(!u) + return Mat(); + u->currAllocator->map(u, accessFlags); + CV_Assert(u->data != 0); + Mat hdr(dims, size.p, type(), u->data + offset, step.p); + hdr.refcount = &u->refcount; + hdr.u = u; + hdr.datastart = u->data; + hdr.datalimit = hdr.dataend = u->data + u->size; + CV_XADD(hdr.refcount, 1); + return hdr; +} + +void* UMat::handle(int accessFlags) const +{ + if( !u ) + return 0; + + // check flags: if CPU copy is newer, copy it back to GPU. + if( u->deviceCopyObsolete() ) + { + CV_Assert(u->refcount == 0); + u->currAllocator->unmap(u); + } + else if( u->refcount > 0 && (accessFlags & ACCESS_WRITE) ) + { + CV_Error(Error::StsError, + "it's not allowed to access UMat handle for writing " + "while it's mapped; call Mat::release() first for all its mappings"); + } + return u->handle; +} + +void UMat::ndoffset(size_t* ofs) const +{ + // offset = step[0]*ofs[0] + step[1]*ofs[1] + step[2]*ofs[2] + ...; + size_t t = offset; + for( int i = 0; i < dims; i++ ) + { + size_t s = step.p[i]; + ofs[i] = t / s; + t -= ofs[i]*s; + } +} + +void UMat::copyTo(OutputArray _dst) const +{ + int dtype = _dst.type(); + if( _dst.fixedType() && dtype != type() ) + { + CV_Assert( channels() == CV_MAT_CN(dtype) ); + convertTo( _dst, dtype ); + return; + } + + if( empty() ) + { + _dst.release(); + return; + } + + size_t i, sz[CV_MAX_DIM], srcofs[CV_MAX_DIM], dstofs[CV_MAX_DIM]; + for( i = 0; i < (size_t)dims; i++ ) + sz[i] = size.p[i]; + sz[dims-1] *= elemSize(); + ndoffset(srcofs); + + _dst.create( dims, size, type() ); + if( _dst.kind() == _InputArray::UMAT ) + { + UMat dst = _dst.getUMat(); + void* srchandle = handle(ACCESS_READ); + void* dsthandle = dst.handle(ACCESS_WRITE); + if( srchandle == dsthandle && dst.offset == offset ) + return; + ndoffset(dstofs); + CV_Assert(u->currAllocator == dst.u->currAllocator); + u->currAllocator->copy(u, dst.u, dims, sz, srcofs, step.p, dstofs, dst.step.p, false); + } + else + { + Mat dst = _dst.getMat(); + u->currAllocator->download(u, dst.data, dims, sz, srcofs, step.p, dst.step.p); + } +} + +void UMat::convertTo(OutputArray, int, double, double) const +{ + CV_Error(Error::StsNotImplemented, ""); +} + +UMat& UMat::operator = (const Scalar&) +{ + CV_Error(Error::StsNotImplemented, ""); + return *this; +} + +} + +/* End of file. */ -- 2.7.4