1 /*M///////////////////////////////////////////////////////////////////////////////////////
3 // IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
5 // By downloading, copying, installing or using the software you agree to this license.
6 // If you do not agree to this license, do not download, install,
7 // copy or use the software.
11 // For Open Source Computer Vision Library
13 // Copyright (C) 2013, OpenCV Foundation, all rights reserved.
14 // Third party copyrights are property of their respective owners.
16 // Redistribution and use in source and binary forms, with or without modification,
17 // are permitted provided that the following conditions are met:
19 // * Redistribution's of source code must retain the above copyright notice,
20 // this list of conditions and the following disclaimer.
22 // * Redistribution's in binary form must reproduce the above copyright notice,
23 // this list of conditions and the following disclaimer in the documentation
24 // and/or other materials provided with the distribution.
26 // * The name of the copyright holders may not be used to endorse or promote products
27 // derived from this software without specific prior written permission.
29 // This software is provided by the copyright holders and contributors "as is" and
30 // any express or implied warranties, including, but not limited to, the implied
31 // warranties of merchantability and fitness for a particular purpose are disclaimed.
32 // In no event shall the OpenCV Foundation or contributors be liable for any direct,
33 // indirect, incidental, special, exemplary, or consequential damages
34 // (including, but not limited to, procurement of substitute goods or services;
35 // loss of use, data, or profits; or business interruption) however caused
36 // and on any theory of liability, whether in contract, strict liability,
37 // or tort (including negligence or otherwise) arising in any way out of
38 // the use of this software, even if advised of the possibility of such damage.
42 #include "precomp.hpp"
47 #include <iostream> // std::cerr
49 #define CV_OPENCL_ALWAYS_SHOW_BUILD_LOG 0
51 #include "opencv2/core/bufferpool.hpp"
52 #ifndef LOG_BUFFER_POOL
54 # define LOG_BUFFER_POOL printf
56 # define LOG_BUFFER_POOL(...)
60 // TODO Move to some common place
61 static size_t getConfigurationParameterForSize(const char* name, size_t defaultValue)
64 const char* envValue = NULL;
66 const char* envValue = getenv(name);
72 cv::String value = envValue;
74 for (; pos < value.size(); pos++)
76 if (!isdigit(value[pos]))
79 cv::String valueStr = value.substr(0, pos);
80 cv::String suffixStr = value.substr(pos, value.length() - pos);
81 int v = atoi(valueStr.c_str());
82 if (suffixStr.length() == 0)
84 else if (suffixStr == "MB" || suffixStr == "Mb" || suffixStr == "mb")
85 return v * 1024 * 1024;
86 else if (suffixStr == "KB" || suffixStr == "Kb" || suffixStr == "kb")
88 CV_ErrorNoReturn(cv::Error::StsBadArg, cv::format("Invalid value for %s parameter: %s", name, value.c_str()));
91 #include "opencv2/core/opencl/runtime/opencl_clamdblas.hpp"
92 #include "opencv2/core/opencl/runtime/opencl_clamdfft.hpp"
95 #include "opencv2/core/opencl/runtime/opencl_core.hpp"
97 // TODO FIXIT: This file can't be build without OPENCL
100 Part of the file is an extract from the standard OpenCL headers from Khronos site.
101 Below is the original copyright.
104 /*******************************************************************************
105 * Copyright (c) 2008 - 2012 The Khronos Group Inc.
107 * Permission is hereby granted, free of charge, to any person obtaining a
108 * copy of this software and/or associated documentation files (the
109 * "Materials"), to deal in the Materials without restriction, including
110 * without limitation the rights to use, copy, modify, merge, publish,
111 * distribute, sublicense, and/or sell copies of the Materials, and to
112 * permit persons to whom the Materials are furnished to do so, subject to
113 * the following conditions:
115 * The above copyright notice and this permission notice shall be included
116 * in all copies or substantial portions of the Materials.
118 * THE MATERIALS ARE PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
119 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
120 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
121 * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
122 * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
123 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
124 * MATERIALS OR THE USE OR OTHER DEALINGS IN THE MATERIALS.
125 ******************************************************************************/
127 #if 0 //defined __APPLE__
128 #define HAVE_OPENCL 1
133 #define OPENCV_CL_NOT_IMPLEMENTED -1000
137 #if defined __APPLE__
138 #include <OpenCL/opencl.h>
140 #include <CL/opencl.h>
143 static const bool g_haveOpenCL = true;
149 struct _cl_platform_id { int dummy; };
150 struct _cl_device_id { int dummy; };
151 struct _cl_context { int dummy; };
152 struct _cl_command_queue { int dummy; };
153 struct _cl_mem { int dummy; };
154 struct _cl_program { int dummy; };
155 struct _cl_kernel { int dummy; };
156 struct _cl_event { int dummy; };
157 struct _cl_sampler { int dummy; };
159 typedef struct _cl_platform_id * cl_platform_id;
160 typedef struct _cl_device_id * cl_device_id;
161 typedef struct _cl_context * cl_context;
162 typedef struct _cl_command_queue * cl_command_queue;
163 typedef struct _cl_mem * cl_mem;
164 typedef struct _cl_program * cl_program;
165 typedef struct _cl_kernel * cl_kernel;
166 typedef struct _cl_event * cl_event;
167 typedef struct _cl_sampler * cl_sampler;
170 typedef unsigned cl_uint;
171 #if defined (_WIN32) && defined(_MSC_VER)
172 typedef __int64 cl_long;
173 typedef unsigned __int64 cl_ulong;
175 typedef long cl_long;
176 typedef unsigned long cl_ulong;
179 typedef cl_uint cl_bool; /* WARNING! Unlike cl_ types in cl_platform.h, cl_bool is not guaranteed to be the same size as the bool in kernels. */
180 typedef cl_ulong cl_bitfield;
181 typedef cl_bitfield cl_device_type;
182 typedef cl_uint cl_platform_info;
183 typedef cl_uint cl_device_info;
184 typedef cl_bitfield cl_device_fp_config;
185 typedef cl_uint cl_device_mem_cache_type;
186 typedef cl_uint cl_device_local_mem_type;
187 typedef cl_bitfield cl_device_exec_capabilities;
188 typedef cl_bitfield cl_command_queue_properties;
189 typedef intptr_t cl_device_partition_property;
190 typedef cl_bitfield cl_device_affinity_domain;
192 typedef intptr_t cl_context_properties;
193 typedef cl_uint cl_context_info;
194 typedef cl_uint cl_command_queue_info;
195 typedef cl_uint cl_channel_order;
196 typedef cl_uint cl_channel_type;
197 typedef cl_bitfield cl_mem_flags;
198 typedef cl_uint cl_mem_object_type;
199 typedef cl_uint cl_mem_info;
200 typedef cl_bitfield cl_mem_migration_flags;
201 typedef cl_uint cl_image_info;
202 typedef cl_uint cl_buffer_create_type;
203 typedef cl_uint cl_addressing_mode;
204 typedef cl_uint cl_filter_mode;
205 typedef cl_uint cl_sampler_info;
206 typedef cl_bitfield cl_map_flags;
207 typedef cl_uint cl_program_info;
208 typedef cl_uint cl_program_build_info;
209 typedef cl_uint cl_program_binary_type;
210 typedef cl_int cl_build_status;
211 typedef cl_uint cl_kernel_info;
212 typedef cl_uint cl_kernel_arg_info;
213 typedef cl_uint cl_kernel_arg_address_qualifier;
214 typedef cl_uint cl_kernel_arg_access_qualifier;
215 typedef cl_bitfield cl_kernel_arg_type_qualifier;
216 typedef cl_uint cl_kernel_work_group_info;
217 typedef cl_uint cl_event_info;
218 typedef cl_uint cl_command_type;
219 typedef cl_uint cl_profiling_info;
222 typedef struct _cl_image_format {
223 cl_channel_order image_channel_order;
224 cl_channel_type image_channel_data_type;
227 typedef struct _cl_image_desc {
228 cl_mem_object_type image_type;
232 size_t image_array_size;
233 size_t image_row_pitch;
234 size_t image_slice_pitch;
235 cl_uint num_mip_levels;
240 typedef struct _cl_buffer_region {
246 //////////////////////////////////////////////////////////
249 #define CL_DEVICE_NOT_FOUND -1
250 #define CL_DEVICE_NOT_AVAILABLE -2
251 #define CL_COMPILER_NOT_AVAILABLE -3
252 #define CL_MEM_OBJECT_ALLOCATION_FAILURE -4
253 #define CL_OUT_OF_RESOURCES -5
254 #define CL_OUT_OF_HOST_MEMORY -6
255 #define CL_PROFILING_INFO_NOT_AVAILABLE -7
256 #define CL_MEM_COPY_OVERLAP -8
257 #define CL_IMAGE_FORMAT_MISMATCH -9
258 #define CL_IMAGE_FORMAT_NOT_SUPPORTED -10
259 #define CL_BUILD_PROGRAM_FAILURE -11
260 #define CL_MAP_FAILURE -12
261 #define CL_MISALIGNED_SUB_BUFFER_OFFSET -13
262 #define CL_EXEC_STATUS_ERROR_FOR_EVENTS_IN_WAIT_LIST -14
263 #define CL_COMPILE_PROGRAM_FAILURE -15
264 #define CL_LINKER_NOT_AVAILABLE -16
265 #define CL_LINK_PROGRAM_FAILURE -17
266 #define CL_DEVICE_PARTITION_FAILED -18
267 #define CL_KERNEL_ARG_INFO_NOT_AVAILABLE -19
269 #define CL_INVALID_VALUE -30
270 #define CL_INVALID_DEVICE_TYPE -31
271 #define CL_INVALID_PLATFORM -32
272 #define CL_INVALID_DEVICE -33
273 #define CL_INVALID_CONTEXT -34
274 #define CL_INVALID_QUEUE_PROPERTIES -35
275 #define CL_INVALID_COMMAND_QUEUE -36
276 #define CL_INVALID_HOST_PTR -37
277 #define CL_INVALID_MEM_OBJECT -38
278 #define CL_INVALID_IMAGE_FORMAT_DESCRIPTOR -39
279 #define CL_INVALID_IMAGE_SIZE -40
280 #define CL_INVALID_SAMPLER -41
281 #define CL_INVALID_BINARY -42
282 #define CL_INVALID_BUILD_OPTIONS -43
283 #define CL_INVALID_PROGRAM -44
284 #define CL_INVALID_PROGRAM_EXECUTABLE -45
285 #define CL_INVALID_KERNEL_NAME -46
286 #define CL_INVALID_KERNEL_DEFINITION -47
287 #define CL_INVALID_KERNEL -48
288 #define CL_INVALID_ARG_INDEX -49
289 #define CL_INVALID_ARG_VALUE -50
290 #define CL_INVALID_ARG_SIZE -51
291 #define CL_INVALID_KERNEL_ARGS -52
292 #define CL_INVALID_WORK_DIMENSION -53
293 #define CL_INVALID_WORK_GROUP_SIZE -54
294 #define CL_INVALID_WORK_ITEM_SIZE -55
295 #define CL_INVALID_GLOBAL_OFFSET -56
296 #define CL_INVALID_EVENT_WAIT_LIST -57
297 #define CL_INVALID_EVENT -58
298 #define CL_INVALID_OPERATION -59
299 #define CL_INVALID_GL_OBJECT -60
300 #define CL_INVALID_BUFFER_SIZE -61
301 #define CL_INVALID_MIP_LEVEL -62
302 #define CL_INVALID_GLOBAL_WORK_SIZE -63
303 #define CL_INVALID_PROPERTY -64
304 #define CL_INVALID_IMAGE_DESCRIPTOR -65
305 #define CL_INVALID_COMPILER_OPTIONS -66
306 #define CL_INVALID_LINKER_OPTIONS -67
307 #define CL_INVALID_DEVICE_PARTITION_COUNT -68
309 /*#define CL_VERSION_1_0 1
310 #define CL_VERSION_1_1 1
311 #define CL_VERSION_1_2 1*/
315 #define CL_BLOCKING CL_TRUE
316 #define CL_NON_BLOCKING CL_FALSE
318 #define CL_PLATFORM_PROFILE 0x0900
319 #define CL_PLATFORM_VERSION 0x0901
320 #define CL_PLATFORM_NAME 0x0902
321 #define CL_PLATFORM_VENDOR 0x0903
322 #define CL_PLATFORM_EXTENSIONS 0x0904
324 #define CL_DEVICE_TYPE_DEFAULT (1 << 0)
325 #define CL_DEVICE_TYPE_CPU (1 << 1)
326 #define CL_DEVICE_TYPE_GPU (1 << 2)
327 #define CL_DEVICE_TYPE_ACCELERATOR (1 << 3)
328 #define CL_DEVICE_TYPE_CUSTOM (1 << 4)
329 #define CL_DEVICE_TYPE_ALL 0xFFFFFFFF
330 #define CL_DEVICE_TYPE 0x1000
331 #define CL_DEVICE_VENDOR_ID 0x1001
332 #define CL_DEVICE_MAX_COMPUTE_UNITS 0x1002
333 #define CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS 0x1003
334 #define CL_DEVICE_MAX_WORK_GROUP_SIZE 0x1004
335 #define CL_DEVICE_MAX_WORK_ITEM_SIZES 0x1005
336 #define CL_DEVICE_PREFERRED_VECTOR_WIDTH_CHAR 0x1006
337 #define CL_DEVICE_PREFERRED_VECTOR_WIDTH_SHORT 0x1007
338 #define CL_DEVICE_PREFERRED_VECTOR_WIDTH_INT 0x1008
339 #define CL_DEVICE_PREFERRED_VECTOR_WIDTH_LONG 0x1009
340 #define CL_DEVICE_PREFERRED_VECTOR_WIDTH_FLOAT 0x100A
341 #define CL_DEVICE_PREFERRED_VECTOR_WIDTH_DOUBLE 0x100B
342 #define CL_DEVICE_MAX_CLOCK_FREQUENCY 0x100C
343 #define CL_DEVICE_ADDRESS_BITS 0x100D
344 #define CL_DEVICE_MAX_READ_IMAGE_ARGS 0x100E
345 #define CL_DEVICE_MAX_WRITE_IMAGE_ARGS 0x100F
346 #define CL_DEVICE_MAX_MEM_ALLOC_SIZE 0x1010
347 #define CL_DEVICE_IMAGE2D_MAX_WIDTH 0x1011
348 #define CL_DEVICE_IMAGE2D_MAX_HEIGHT 0x1012
349 #define CL_DEVICE_IMAGE3D_MAX_WIDTH 0x1013
350 #define CL_DEVICE_IMAGE3D_MAX_HEIGHT 0x1014
351 #define CL_DEVICE_IMAGE3D_MAX_DEPTH 0x1015
352 #define CL_DEVICE_IMAGE_SUPPORT 0x1016
353 #define CL_DEVICE_MAX_PARAMETER_SIZE 0x1017
354 #define CL_DEVICE_MAX_SAMPLERS 0x1018
355 #define CL_DEVICE_MEM_BASE_ADDR_ALIGN 0x1019
356 #define CL_DEVICE_MIN_DATA_TYPE_ALIGN_SIZE 0x101A
357 #define CL_DEVICE_SINGLE_FP_CONFIG 0x101B
358 #define CL_DEVICE_GLOBAL_MEM_CACHE_TYPE 0x101C
359 #define CL_DEVICE_GLOBAL_MEM_CACHELINE_SIZE 0x101D
360 #define CL_DEVICE_GLOBAL_MEM_CACHE_SIZE 0x101E
361 #define CL_DEVICE_GLOBAL_MEM_SIZE 0x101F
362 #define CL_DEVICE_MAX_CONSTANT_BUFFER_SIZE 0x1020
363 #define CL_DEVICE_MAX_CONSTANT_ARGS 0x1021
364 #define CL_DEVICE_LOCAL_MEM_TYPE 0x1022
365 #define CL_DEVICE_LOCAL_MEM_SIZE 0x1023
366 #define CL_DEVICE_ERROR_CORRECTION_SUPPORT 0x1024
367 #define CL_DEVICE_PROFILING_TIMER_RESOLUTION 0x1025
368 #define CL_DEVICE_ENDIAN_LITTLE 0x1026
369 #define CL_DEVICE_AVAILABLE 0x1027
370 #define CL_DEVICE_COMPILER_AVAILABLE 0x1028
371 #define CL_DEVICE_EXECUTION_CAPABILITIES 0x1029
372 #define CL_DEVICE_QUEUE_PROPERTIES 0x102A
373 #define CL_DEVICE_NAME 0x102B
374 #define CL_DEVICE_VENDOR 0x102C
375 #define CL_DRIVER_VERSION 0x102D
376 #define CL_DEVICE_PROFILE 0x102E
377 #define CL_DEVICE_VERSION 0x102F
378 #define CL_DEVICE_EXTENSIONS 0x1030
379 #define CL_DEVICE_PLATFORM 0x1031
380 #define CL_DEVICE_DOUBLE_FP_CONFIG 0x1032
381 #define CL_DEVICE_HALF_FP_CONFIG 0x1033
382 #define CL_DEVICE_PREFERRED_VECTOR_WIDTH_HALF 0x1034
383 #define CL_DEVICE_HOST_UNIFIED_MEMORY 0x1035
384 #define CL_DEVICE_NATIVE_VECTOR_WIDTH_CHAR 0x1036
385 #define CL_DEVICE_NATIVE_VECTOR_WIDTH_SHORT 0x1037
386 #define CL_DEVICE_NATIVE_VECTOR_WIDTH_INT 0x1038
387 #define CL_DEVICE_NATIVE_VECTOR_WIDTH_LONG 0x1039
388 #define CL_DEVICE_NATIVE_VECTOR_WIDTH_FLOAT 0x103A
389 #define CL_DEVICE_NATIVE_VECTOR_WIDTH_DOUBLE 0x103B
390 #define CL_DEVICE_NATIVE_VECTOR_WIDTH_HALF 0x103C
391 #define CL_DEVICE_OPENCL_C_VERSION 0x103D
392 #define CL_DEVICE_LINKER_AVAILABLE 0x103E
393 #define CL_DEVICE_BUILT_IN_KERNELS 0x103F
394 #define CL_DEVICE_IMAGE_MAX_BUFFER_SIZE 0x1040
395 #define CL_DEVICE_IMAGE_MAX_ARRAY_SIZE 0x1041
396 #define CL_DEVICE_PARENT_DEVICE 0x1042
397 #define CL_DEVICE_PARTITION_MAX_SUB_DEVICES 0x1043
398 #define CL_DEVICE_PARTITION_PROPERTIES 0x1044
399 #define CL_DEVICE_PARTITION_AFFINITY_DOMAIN 0x1045
400 #define CL_DEVICE_PARTITION_TYPE 0x1046
401 #define CL_DEVICE_REFERENCE_COUNT 0x1047
402 #define CL_DEVICE_PREFERRED_INTEROP_USER_SYNC 0x1048
403 #define CL_DEVICE_PRINTF_BUFFER_SIZE 0x1049
404 #define CL_DEVICE_IMAGE_PITCH_ALIGNMENT 0x104A
405 #define CL_DEVICE_IMAGE_BASE_ADDRESS_ALIGNMENT 0x104B
407 #define CL_FP_DENORM (1 << 0)
408 #define CL_FP_INF_NAN (1 << 1)
409 #define CL_FP_ROUND_TO_NEAREST (1 << 2)
410 #define CL_FP_ROUND_TO_ZERO (1 << 3)
411 #define CL_FP_ROUND_TO_INF (1 << 4)
412 #define CL_FP_FMA (1 << 5)
413 #define CL_FP_SOFT_FLOAT (1 << 6)
414 #define CL_FP_CORRECTLY_ROUNDED_DIVIDE_SQRT (1 << 7)
417 #define CL_READ_ONLY_CACHE 0x1
418 #define CL_READ_WRITE_CACHE 0x2
420 #define CL_GLOBAL 0x2
421 #define CL_EXEC_KERNEL (1 << 0)
422 #define CL_EXEC_NATIVE_KERNEL (1 << 1)
423 #define CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE (1 << 0)
424 #define CL_QUEUE_PROFILING_ENABLE (1 << 1)
426 #define CL_CONTEXT_REFERENCE_COUNT 0x1080
427 #define CL_CONTEXT_DEVICES 0x1081
428 #define CL_CONTEXT_PROPERTIES 0x1082
429 #define CL_CONTEXT_NUM_DEVICES 0x1083
430 #define CL_CONTEXT_PLATFORM 0x1084
431 #define CL_CONTEXT_INTEROP_USER_SYNC 0x1085
433 #define CL_DEVICE_PARTITION_EQUALLY 0x1086
434 #define CL_DEVICE_PARTITION_BY_COUNTS 0x1087
435 #define CL_DEVICE_PARTITION_BY_COUNTS_LIST_END 0x0
436 #define CL_DEVICE_PARTITION_BY_AFFINITY_DOMAIN 0x1088
437 #define CL_DEVICE_AFFINITY_DOMAIN_NUMA (1 << 0)
438 #define CL_DEVICE_AFFINITY_DOMAIN_L4_CACHE (1 << 1)
439 #define CL_DEVICE_AFFINITY_DOMAIN_L3_CACHE (1 << 2)
440 #define CL_DEVICE_AFFINITY_DOMAIN_L2_CACHE (1 << 3)
441 #define CL_DEVICE_AFFINITY_DOMAIN_L1_CACHE (1 << 4)
442 #define CL_DEVICE_AFFINITY_DOMAIN_NEXT_PARTITIONABLE (1 << 5)
443 #define CL_QUEUE_CONTEXT 0x1090
444 #define CL_QUEUE_DEVICE 0x1091
445 #define CL_QUEUE_REFERENCE_COUNT 0x1092
446 #define CL_QUEUE_PROPERTIES 0x1093
447 #define CL_MEM_READ_WRITE (1 << 0)
448 #define CL_MEM_WRITE_ONLY (1 << 1)
449 #define CL_MEM_READ_ONLY (1 << 2)
450 #define CL_MEM_USE_HOST_PTR (1 << 3)
451 #define CL_MEM_ALLOC_HOST_PTR (1 << 4)
452 #define CL_MEM_COPY_HOST_PTR (1 << 5)
454 #define CL_MEM_HOST_WRITE_ONLY (1 << 7)
455 #define CL_MEM_HOST_READ_ONLY (1 << 8)
456 #define CL_MEM_HOST_NO_ACCESS (1 << 9)
457 #define CL_MIGRATE_MEM_OBJECT_HOST (1 << 0)
458 #define CL_MIGRATE_MEM_OBJECT_CONTENT_UNDEFINED (1 << 1)
464 #define CL_RGB 0x10B4
465 #define CL_RGBA 0x10B5
466 #define CL_BGRA 0x10B6
467 #define CL_ARGB 0x10B7
468 #define CL_INTENSITY 0x10B8
469 #define CL_LUMINANCE 0x10B9
471 #define CL_RGx 0x10BB
472 #define CL_RGBx 0x10BC
473 #define CL_DEPTH 0x10BD
474 #define CL_DEPTH_STENCIL 0x10BE
476 #define CL_SNORM_INT8 0x10D0
477 #define CL_SNORM_INT16 0x10D1
478 #define CL_UNORM_INT8 0x10D2
479 #define CL_UNORM_INT16 0x10D3
480 #define CL_UNORM_SHORT_565 0x10D4
481 #define CL_UNORM_SHORT_555 0x10D5
482 #define CL_UNORM_INT_101010 0x10D6
483 #define CL_SIGNED_INT8 0x10D7
484 #define CL_SIGNED_INT16 0x10D8
485 #define CL_SIGNED_INT32 0x10D9
486 #define CL_UNSIGNED_INT8 0x10DA
487 #define CL_UNSIGNED_INT16 0x10DB
488 #define CL_UNSIGNED_INT32 0x10DC
489 #define CL_HALF_FLOAT 0x10DD
490 #define CL_FLOAT 0x10DE
491 #define CL_UNORM_INT24 0x10DF
493 #define CL_MEM_OBJECT_BUFFER 0x10F0
494 #define CL_MEM_OBJECT_IMAGE2D 0x10F1
495 #define CL_MEM_OBJECT_IMAGE3D 0x10F2
496 #define CL_MEM_OBJECT_IMAGE2D_ARRAY 0x10F3
497 #define CL_MEM_OBJECT_IMAGE1D 0x10F4
498 #define CL_MEM_OBJECT_IMAGE1D_ARRAY 0x10F5
499 #define CL_MEM_OBJECT_IMAGE1D_BUFFER 0x10F6
501 #define CL_MEM_TYPE 0x1100
502 #define CL_MEM_FLAGS 0x1101
503 #define CL_MEM_SIZE 0x1102
504 #define CL_MEM_HOST_PTR 0x1103
505 #define CL_MEM_MAP_COUNT 0x1104
506 #define CL_MEM_REFERENCE_COUNT 0x1105
507 #define CL_MEM_CONTEXT 0x1106
508 #define CL_MEM_ASSOCIATED_MEMOBJECT 0x1107
509 #define CL_MEM_OFFSET 0x1108
511 #define CL_IMAGE_FORMAT 0x1110
512 #define CL_IMAGE_ELEMENT_SIZE 0x1111
513 #define CL_IMAGE_ROW_PITCH 0x1112
514 #define CL_IMAGE_SLICE_PITCH 0x1113
515 #define CL_IMAGE_WIDTH 0x1114
516 #define CL_IMAGE_HEIGHT 0x1115
517 #define CL_IMAGE_DEPTH 0x1116
518 #define CL_IMAGE_ARRAY_SIZE 0x1117
519 #define CL_IMAGE_BUFFER 0x1118
520 #define CL_IMAGE_NUM_MIP_LEVELS 0x1119
521 #define CL_IMAGE_NUM_SAMPLES 0x111A
523 #define CL_ADDRESS_NONE 0x1130
524 #define CL_ADDRESS_CLAMP_TO_EDGE 0x1131
525 #define CL_ADDRESS_CLAMP 0x1132
526 #define CL_ADDRESS_REPEAT 0x1133
527 #define CL_ADDRESS_MIRRORED_REPEAT 0x1134
529 #define CL_FILTER_NEAREST 0x1140
530 #define CL_FILTER_LINEAR 0x1141
532 #define CL_SAMPLER_REFERENCE_COUNT 0x1150
533 #define CL_SAMPLER_CONTEXT 0x1151
534 #define CL_SAMPLER_NORMALIZED_COORDS 0x1152
535 #define CL_SAMPLER_ADDRESSING_MODE 0x1153
536 #define CL_SAMPLER_FILTER_MODE 0x1154
538 #define CL_MAP_READ (1 << 0)
539 #define CL_MAP_WRITE (1 << 1)
540 #define CL_MAP_WRITE_INVALIDATE_REGION (1 << 2)
542 #define CL_PROGRAM_REFERENCE_COUNT 0x1160
543 #define CL_PROGRAM_CONTEXT 0x1161
544 #define CL_PROGRAM_NUM_DEVICES 0x1162
545 #define CL_PROGRAM_DEVICES 0x1163
546 #define CL_PROGRAM_SOURCE 0x1164
547 #define CL_PROGRAM_BINARY_SIZES 0x1165
548 #define CL_PROGRAM_BINARIES 0x1166
549 #define CL_PROGRAM_NUM_KERNELS 0x1167
550 #define CL_PROGRAM_KERNEL_NAMES 0x1168
551 #define CL_PROGRAM_BUILD_STATUS 0x1181
552 #define CL_PROGRAM_BUILD_OPTIONS 0x1182
553 #define CL_PROGRAM_BUILD_LOG 0x1183
554 #define CL_PROGRAM_BINARY_TYPE 0x1184
555 #define CL_PROGRAM_BINARY_TYPE_NONE 0x0
556 #define CL_PROGRAM_BINARY_TYPE_COMPILED_OBJECT 0x1
557 #define CL_PROGRAM_BINARY_TYPE_LIBRARY 0x2
558 #define CL_PROGRAM_BINARY_TYPE_EXECUTABLE 0x4
560 #define CL_BUILD_SUCCESS 0
561 #define CL_BUILD_NONE -1
562 #define CL_BUILD_ERROR -2
563 #define CL_BUILD_IN_PROGRESS -3
565 #define CL_KERNEL_FUNCTION_NAME 0x1190
566 #define CL_KERNEL_NUM_ARGS 0x1191
567 #define CL_KERNEL_REFERENCE_COUNT 0x1192
568 #define CL_KERNEL_CONTEXT 0x1193
569 #define CL_KERNEL_PROGRAM 0x1194
570 #define CL_KERNEL_ATTRIBUTES 0x1195
571 #define CL_KERNEL_ARG_ADDRESS_QUALIFIER 0x1196
572 #define CL_KERNEL_ARG_ACCESS_QUALIFIER 0x1197
573 #define CL_KERNEL_ARG_TYPE_NAME 0x1198
574 #define CL_KERNEL_ARG_TYPE_QUALIFIER 0x1199
575 #define CL_KERNEL_ARG_NAME 0x119A
576 #define CL_KERNEL_ARG_ADDRESS_GLOBAL 0x119B
577 #define CL_KERNEL_ARG_ADDRESS_LOCAL 0x119C
578 #define CL_KERNEL_ARG_ADDRESS_CONSTANT 0x119D
579 #define CL_KERNEL_ARG_ADDRESS_PRIVATE 0x119E
580 #define CL_KERNEL_ARG_ACCESS_READ_ONLY 0x11A0
581 #define CL_KERNEL_ARG_ACCESS_WRITE_ONLY 0x11A1
582 #define CL_KERNEL_ARG_ACCESS_READ_WRITE 0x11A2
583 #define CL_KERNEL_ARG_ACCESS_NONE 0x11A3
584 #define CL_KERNEL_ARG_TYPE_NONE 0
585 #define CL_KERNEL_ARG_TYPE_CONST (1 << 0)
586 #define CL_KERNEL_ARG_TYPE_RESTRICT (1 << 1)
587 #define CL_KERNEL_ARG_TYPE_VOLATILE (1 << 2)
588 #define CL_KERNEL_WORK_GROUP_SIZE 0x11B0
589 #define CL_KERNEL_COMPILE_WORK_GROUP_SIZE 0x11B1
590 #define CL_KERNEL_LOCAL_MEM_SIZE 0x11B2
591 #define CL_KERNEL_PREFERRED_WORK_GROUP_SIZE_MULTIPLE 0x11B3
592 #define CL_KERNEL_PRIVATE_MEM_SIZE 0x11B4
593 #define CL_KERNEL_GLOBAL_WORK_SIZE 0x11B5
595 #define CL_EVENT_COMMAND_QUEUE 0x11D0
596 #define CL_EVENT_COMMAND_TYPE 0x11D1
597 #define CL_EVENT_REFERENCE_COUNT 0x11D2
598 #define CL_EVENT_COMMAND_EXECUTION_STATUS 0x11D3
599 #define CL_EVENT_CONTEXT 0x11D4
601 #define CL_COMMAND_NDRANGE_KERNEL 0x11F0
602 #define CL_COMMAND_TASK 0x11F1
603 #define CL_COMMAND_NATIVE_KERNEL 0x11F2
604 #define CL_COMMAND_READ_BUFFER 0x11F3
605 #define CL_COMMAND_WRITE_BUFFER 0x11F4
606 #define CL_COMMAND_COPY_BUFFER 0x11F5
607 #define CL_COMMAND_READ_IMAGE 0x11F6
608 #define CL_COMMAND_WRITE_IMAGE 0x11F7
609 #define CL_COMMAND_COPY_IMAGE 0x11F8
610 #define CL_COMMAND_COPY_IMAGE_TO_BUFFER 0x11F9
611 #define CL_COMMAND_COPY_BUFFER_TO_IMAGE 0x11FA
612 #define CL_COMMAND_MAP_BUFFER 0x11FB
613 #define CL_COMMAND_MAP_IMAGE 0x11FC
614 #define CL_COMMAND_UNMAP_MEM_OBJECT 0x11FD
615 #define CL_COMMAND_MARKER 0x11FE
616 #define CL_COMMAND_ACQUIRE_GL_OBJECTS 0x11FF
617 #define CL_COMMAND_RELEASE_GL_OBJECTS 0x1200
618 #define CL_COMMAND_READ_BUFFER_RECT 0x1201
619 #define CL_COMMAND_WRITE_BUFFER_RECT 0x1202
620 #define CL_COMMAND_COPY_BUFFER_RECT 0x1203
621 #define CL_COMMAND_USER 0x1204
622 #define CL_COMMAND_BARRIER 0x1205
623 #define CL_COMMAND_MIGRATE_MEM_OBJECTS 0x1206
624 #define CL_COMMAND_FILL_BUFFER 0x1207
625 #define CL_COMMAND_FILL_IMAGE 0x1208
627 #define CL_COMPLETE 0x0
628 #define CL_RUNNING 0x1
629 #define CL_SUBMITTED 0x2
630 #define CL_QUEUED 0x3
631 #define CL_BUFFER_CREATE_TYPE_REGION 0x1220
633 #define CL_PROFILING_COMMAND_QUEUED 0x1280
634 #define CL_PROFILING_COMMAND_SUBMIT 0x1281
635 #define CL_PROFILING_COMMAND_START 0x1282
636 #define CL_PROFILING_COMMAND_END 0x1283
638 #define CL_CALLBACK CV_STDCALL
640 static volatile bool g_haveOpenCL = false;
641 static const char* oclFuncToCheck = "clEnqueueReadBufferRect";
643 #if defined(__APPLE__)
646 static void* initOpenCLAndLoad(const char* funcname)
648 static bool initialized = false;
649 static void* handle = 0;
654 const char* oclpath = getenv("OPENCV_OPENCL_RUNTIME");
655 oclpath = oclpath && strlen(oclpath) > 0 ? oclpath :
656 "/System/Library/Frameworks/OpenCL.framework/Versions/Current/OpenCL";
657 handle = dlopen(oclpath, RTLD_LAZY);
659 g_haveOpenCL = handle != 0 && dlsym(handle, oclFuncToCheck) != 0;
661 fprintf(stderr, "Successfully loaded OpenCL v1.1+ runtime from %s\n", oclpath);
663 fprintf(stderr, "Failed to load OpenCL runtime\n");
669 return funcname && handle ? dlsym(handle, funcname) : 0;
672 #elif defined WIN32 || defined _WIN32
674 #ifndef _WIN32_WINNT // This is needed for the declaration of TryEnterCriticalSection in winbase.h with Visual Studio 2005 (and older?)
675 #define _WIN32_WINNT 0x0400 // http://msdn.microsoft.com/en-us/library/ms686857(VS.85).aspx
678 #if (_WIN32_WINNT >= 0x0602)
679 #include <synchapi.h>
686 static void* initOpenCLAndLoad(const char* funcname)
688 static bool initialized = false;
689 static HMODULE handle = 0;
695 handle = LoadLibraryA("OpenCL.dll");
697 g_haveOpenCL = handle != 0 && GetProcAddress(handle, oclFuncToCheck) != 0;
704 return funcname ? (void*)GetProcAddress(handle, funcname) : 0;
707 #elif defined(__linux)
712 static void* initOpenCLAndLoad(const char* funcname)
714 static bool initialized = false;
715 static void* handle = 0;
720 handle = dlopen("libOpenCL.so", RTLD_LAZY);
722 handle = dlopen("libCL.so", RTLD_LAZY);
724 g_haveOpenCL = handle != 0 && dlsym(handle, oclFuncToCheck) != 0;
730 return funcname ? (void*)dlsym(handle, funcname) : 0;
735 static void* initOpenCLAndLoad(const char*)
743 #define OCL_FUNC(rettype, funcname, argsdecl, args) \
744 typedef rettype (CV_STDCALL * funcname##_t) argsdecl; \
745 static rettype funcname argsdecl \
747 static funcname##_t funcname##_p = 0; \
748 if( !funcname##_p ) \
750 funcname##_p = (funcname##_t)initOpenCLAndLoad(#funcname); \
751 if( !funcname##_p ) \
752 return OPENCV_CL_NOT_IMPLEMENTED; \
754 return funcname##_p args; \
758 #define OCL_FUNC_P(rettype, funcname, argsdecl, args) \
759 typedef rettype (CV_STDCALL * funcname##_t) argsdecl; \
760 static rettype funcname argsdecl \
762 static funcname##_t funcname##_p = 0; \
763 if( !funcname##_p ) \
765 funcname##_p = (funcname##_t)initOpenCLAndLoad(#funcname); \
766 if( !funcname##_p ) \
769 *errcode_ret = OPENCV_CL_NOT_IMPLEMENTED; \
773 return funcname##_p args; \
776 OCL_FUNC(cl_int, clGetPlatformIDs,
777 (cl_uint num_entries, cl_platform_id* platforms, cl_uint* num_platforms),
778 (num_entries, platforms, num_platforms))
780 OCL_FUNC(cl_int, clGetPlatformInfo,
781 (cl_platform_id platform, cl_platform_info param_name,
782 size_t param_value_size, void * param_value,
783 size_t * param_value_size_ret),
784 (platform, param_name, param_value_size, param_value, param_value_size_ret))
786 OCL_FUNC(cl_int, clGetDeviceInfo,
787 (cl_device_id device,
788 cl_device_info param_name,
789 size_t param_value_size,
791 size_t * param_value_size_ret),
792 (device, param_name, param_value_size, param_value, param_value_size_ret))
795 OCL_FUNC(cl_int, clGetDeviceIDs,
796 (cl_platform_id platform,
797 cl_device_type device_type,
799 cl_device_id * devices,
800 cl_uint * num_devices),
801 (platform, device_type, num_entries, devices, num_devices))
803 OCL_FUNC_P(cl_context, clCreateContext,
804 (const cl_context_properties * properties,
806 const cl_device_id * devices,
807 void (CL_CALLBACK * pfn_notify)(const char *, const void *, size_t, void *),
809 cl_int * errcode_ret),
810 (properties, num_devices, devices, pfn_notify, user_data, errcode_ret))
812 OCL_FUNC(cl_int, clReleaseContext, (cl_context context), (context))
815 OCL_FUNC(cl_int, clRetainContext, (cl_context context), (context))
817 OCL_FUNC_P(cl_context, clCreateContextFromType,
818 (const cl_context_properties * properties,
819 cl_device_type device_type,
820 void (CL_CALLBACK * pfn_notify)(const char *, const void *, size_t, void *),
822 cl_int * errcode_ret),
823 (properties, device_type, pfn_notify, user_data, errcode_ret))
825 OCL_FUNC(cl_int, clGetContextInfo,
827 cl_context_info param_name,
828 size_t param_value_size,
830 size_t * param_value_size_ret),
831 (context, param_name, param_value_size,
832 param_value, param_value_size_ret))
834 OCL_FUNC_P(cl_command_queue, clCreateCommandQueue,
837 cl_command_queue_properties properties,
838 cl_int * errcode_ret),
839 (context, device, properties, errcode_ret))
841 OCL_FUNC(cl_int, clReleaseCommandQueue, (cl_command_queue command_queue), (command_queue))
843 OCL_FUNC_P(cl_mem, clCreateBuffer,
848 cl_int * errcode_ret),
849 (context, flags, size, host_ptr, errcode_ret))
852 OCL_FUNC(cl_int, clRetainCommandQueue, (cl_command_queue command_queue), (command_queue))
854 OCL_FUNC(cl_int, clGetCommandQueueInfo,
855 (cl_command_queue command_queue,
856 cl_command_queue_info param_name,
857 size_t param_value_size,
859 size_t * param_value_size_ret),
860 (command_queue, param_name, param_value_size, param_value, param_value_size_ret))
862 OCL_FUNC_P(cl_mem, clCreateSubBuffer,
865 cl_buffer_create_type buffer_create_type,
866 const void * buffer_create_info,
867 cl_int * errcode_ret),
868 (buffer, flags, buffer_create_type, buffer_create_info, errcode_ret))
871 OCL_FUNC_P(cl_mem, clCreateImage,
874 const cl_image_format * image_format,
875 const cl_image_desc * image_desc,
877 cl_int * errcode_ret),
878 (context, flags, image_format, image_desc, host_ptr, errcode_ret))
880 OCL_FUNC_P(cl_mem, clCreateImage2D,
883 const cl_image_format * image_format,
886 size_t image_row_pitch,
888 cl_int *errcode_ret),
889 (context, flags, image_format, image_width, image_height, image_row_pitch, host_ptr, errcode_ret))
891 OCL_FUNC(cl_int, clGetSupportedImageFormats,
894 cl_mem_object_type image_type,
896 cl_image_format * image_formats,
897 cl_uint * num_image_formats),
898 (context, flags, image_type, num_entries, image_formats, num_image_formats))
901 OCL_FUNC(cl_int, clGetMemObjectInfo,
903 cl_mem_info param_name,
904 size_t param_value_size,
906 size_t * param_value_size_ret),
907 (memobj, param_name, param_value_size, param_value, param_value_size_ret))
909 OCL_FUNC(cl_int, clGetImageInfo,
911 cl_image_info param_name,
912 size_t param_value_size,
914 size_t * param_value_size_ret),
915 (image, param_name, param_value_size, param_value, param_value_size_ret))
917 OCL_FUNC(cl_int, clCreateKernelsInProgram,
921 cl_uint * num_kernels_ret),
922 (program, num_kernels, kernels, num_kernels_ret))
924 OCL_FUNC(cl_int, clRetainKernel, (cl_kernel kernel), (kernel))
926 OCL_FUNC(cl_int, clGetKernelArgInfo,
929 cl_kernel_arg_info param_name,
930 size_t param_value_size,
932 size_t * param_value_size_ret),
933 (kernel, arg_indx, param_name, param_value_size, param_value, param_value_size_ret))
935 OCL_FUNC(cl_int, clEnqueueReadImage,
936 (cl_command_queue command_queue,
938 cl_bool blocking_read,
939 const size_t * origin[3],
940 const size_t * region[3],
944 cl_uint num_events_in_wait_list,
945 const cl_event * event_wait_list,
947 (command_queue, image, blocking_read, origin, region,
948 row_pitch, slice_pitch,
950 num_events_in_wait_list,
954 OCL_FUNC(cl_int, clEnqueueWriteImage,
955 (cl_command_queue command_queue,
957 cl_bool blocking_write,
958 const size_t * origin[3],
959 const size_t * region[3],
960 size_t input_row_pitch,
961 size_t input_slice_pitch,
963 cl_uint num_events_in_wait_list,
964 const cl_event * event_wait_list,
966 (command_queue, image, blocking_write, origin, region, input_row_pitch,
967 input_slice_pitch, ptr, num_events_in_wait_list, event_wait_list, event))
969 OCL_FUNC(cl_int, clEnqueueFillImage,
970 (cl_command_queue command_queue,
972 const void * fill_color,
973 const size_t * origin[3],
974 const size_t * region[3],
975 cl_uint num_events_in_wait_list,
976 const cl_event * event_wait_list,
978 (command_queue, image, fill_color, origin, region,
979 num_events_in_wait_list, event_wait_list, event))
981 OCL_FUNC(cl_int, clEnqueueCopyImage,
982 (cl_command_queue command_queue,
985 const size_t * src_origin[3],
986 const size_t * dst_origin[3],
987 const size_t * region[3],
988 cl_uint num_events_in_wait_list,
989 const cl_event * event_wait_list,
991 (command_queue, src_image, dst_image, src_origin, dst_origin,
992 region, num_events_in_wait_list, event_wait_list, event))
994 OCL_FUNC(cl_int, clEnqueueCopyImageToBuffer,
995 (cl_command_queue command_queue,
998 const size_t * src_origin[3],
999 const size_t * region[3],
1001 cl_uint num_events_in_wait_list,
1002 const cl_event * event_wait_list,
1004 (command_queue, src_image, dst_buffer, src_origin, region, dst_offset,
1005 num_events_in_wait_list, event_wait_list, event))
1008 OCL_FUNC(cl_int, clEnqueueCopyBufferToImage,
1009 (cl_command_queue command_queue,
1013 const size_t dst_origin[3],
1014 const size_t region[3],
1015 cl_uint num_events_in_wait_list,
1016 const cl_event * event_wait_list,
1018 (command_queue, src_buffer, dst_image, src_offset, dst_origin,
1019 region, num_events_in_wait_list, event_wait_list, event))
1021 OCL_FUNC(cl_int, clFlush,
1022 (cl_command_queue command_queue),
1026 OCL_FUNC_P(void*, clEnqueueMapImage,
1027 (cl_command_queue command_queue,
1029 cl_bool blocking_map,
1030 cl_map_flags map_flags,
1031 const size_t * origin[3],
1032 const size_t * region[3],
1033 size_t * image_row_pitch,
1034 size_t * image_slice_pitch,
1035 cl_uint num_events_in_wait_list,
1036 const cl_event * event_wait_list,
1038 cl_int * errcode_ret),
1039 (command_queue, image, blocking_map, map_flags, origin, region,
1040 image_row_pitch, image_slice_pitch, num_events_in_wait_list,
1041 event_wait_list, event, errcode_ret))
1045 OCL_FUNC(cl_int, clRetainProgram, (cl_program program), (program))
1047 OCL_FUNC(cl_int, clGetKernelInfo,
1049 cl_kernel_info param_name,
1050 size_t param_value_size,
1052 size_t * param_value_size_ret),
1053 (kernel, param_name, param_value_size, param_value, param_value_size_ret))
1055 OCL_FUNC(cl_int, clRetainMemObject, (cl_mem memobj), (memobj))
1059 OCL_FUNC(cl_int, clReleaseMemObject, (cl_mem memobj), (memobj))
1062 OCL_FUNC_P(cl_program, clCreateProgramWithSource,
1063 (cl_context context,
1065 const char ** strings,
1066 const size_t * lengths,
1067 cl_int * errcode_ret),
1068 (context, count, strings, lengths, errcode_ret))
1070 OCL_FUNC_P(cl_program, clCreateProgramWithBinary,
1071 (cl_context context,
1072 cl_uint num_devices,
1073 const cl_device_id * device_list,
1074 const size_t * lengths,
1075 const unsigned char ** binaries,
1076 cl_int * binary_status,
1077 cl_int * errcode_ret),
1078 (context, num_devices, device_list, lengths, binaries, binary_status, errcode_ret))
1080 OCL_FUNC(cl_int, clReleaseProgram, (cl_program program), (program))
1082 OCL_FUNC(cl_int, clBuildProgram,
1083 (cl_program program,
1084 cl_uint num_devices,
1085 const cl_device_id * device_list,
1086 const char * options,
1087 void (CL_CALLBACK * pfn_notify)(cl_program, void *),
1089 (program, num_devices, device_list, options, pfn_notify, user_data))
1091 OCL_FUNC(cl_int, clGetProgramInfo,
1092 (cl_program program,
1093 cl_program_info param_name,
1094 size_t param_value_size,
1096 size_t * param_value_size_ret),
1097 (program, param_name, param_value_size, param_value, param_value_size_ret))
1099 OCL_FUNC(cl_int, clGetProgramBuildInfo,
1100 (cl_program program,
1101 cl_device_id device,
1102 cl_program_build_info param_name,
1103 size_t param_value_size,
1105 size_t * param_value_size_ret),
1106 (program, device, param_name, param_value_size, param_value, param_value_size_ret))
1108 OCL_FUNC_P(cl_kernel, clCreateKernel,
1109 (cl_program program,
1110 const char * kernel_name,
1111 cl_int * errcode_ret),
1112 (program, kernel_name, errcode_ret))
1114 OCL_FUNC(cl_int, clReleaseKernel, (cl_kernel kernel), (kernel))
1116 OCL_FUNC(cl_int, clSetKernelArg,
1120 const void * arg_value),
1121 (kernel, arg_index, arg_size, arg_value))
1123 OCL_FUNC(cl_int, clGetKernelWorkGroupInfo,
1125 cl_device_id device,
1126 cl_kernel_work_group_info param_name,
1127 size_t param_value_size,
1129 size_t * param_value_size_ret),
1130 (kernel, device, param_name, param_value_size, param_value, param_value_size_ret))
1132 OCL_FUNC(cl_int, clFinish, (cl_command_queue command_queue), (command_queue))
1134 OCL_FUNC(cl_int, clEnqueueReadBuffer,
1135 (cl_command_queue command_queue,
1137 cl_bool blocking_read,
1141 cl_uint num_events_in_wait_list,
1142 const cl_event * event_wait_list,
1144 (command_queue, buffer, blocking_read, offset, size, ptr,
1145 num_events_in_wait_list, event_wait_list, event))
1147 OCL_FUNC(cl_int, clEnqueueReadBufferRect,
1148 (cl_command_queue command_queue,
1150 cl_bool blocking_read,
1151 const size_t * buffer_offset,
1152 const size_t * host_offset,
1153 const size_t * region,
1154 size_t buffer_row_pitch,
1155 size_t buffer_slice_pitch,
1156 size_t host_row_pitch,
1157 size_t host_slice_pitch,
1159 cl_uint num_events_in_wait_list,
1160 const cl_event * event_wait_list,
1162 (command_queue, buffer, blocking_read, buffer_offset, host_offset, region, buffer_row_pitch,
1163 buffer_slice_pitch, host_row_pitch, host_slice_pitch, ptr, num_events_in_wait_list,
1164 event_wait_list, event))
1166 OCL_FUNC(cl_int, clEnqueueWriteBuffer,
1167 (cl_command_queue command_queue,
1169 cl_bool blocking_write,
1173 cl_uint num_events_in_wait_list,
1174 const cl_event * event_wait_list,
1176 (command_queue, buffer, blocking_write, offset, size, ptr,
1177 num_events_in_wait_list, event_wait_list, event))
1179 OCL_FUNC(cl_int, clEnqueueWriteBufferRect,
1180 (cl_command_queue command_queue,
1182 cl_bool blocking_write,
1183 const size_t * buffer_offset,
1184 const size_t * host_offset,
1185 const size_t * region,
1186 size_t buffer_row_pitch,
1187 size_t buffer_slice_pitch,
1188 size_t host_row_pitch,
1189 size_t host_slice_pitch,
1191 cl_uint num_events_in_wait_list,
1192 const cl_event * event_wait_list,
1194 (command_queue, buffer, blocking_write, buffer_offset, host_offset,
1195 region, buffer_row_pitch, buffer_slice_pitch, host_row_pitch,
1196 host_slice_pitch, ptr, num_events_in_wait_list, event_wait_list, event))
1198 /*OCL_FUNC(cl_int, clEnqueueFillBuffer,
1199 (cl_command_queue command_queue,
1201 const void * pattern,
1202 size_t pattern_size,
1205 cl_uint num_events_in_wait_list,
1206 const cl_event * event_wait_list,
1208 (command_queue, buffer, pattern, pattern_size, offset, size,
1209 num_events_in_wait_list, event_wait_list, event))*/
1211 OCL_FUNC(cl_int, clEnqueueCopyBuffer,
1212 (cl_command_queue command_queue,
1218 cl_uint num_events_in_wait_list,
1219 const cl_event * event_wait_list,
1221 (command_queue, src_buffer, dst_buffer, src_offset, dst_offset,
1222 size, num_events_in_wait_list, event_wait_list, event))
1224 OCL_FUNC(cl_int, clEnqueueCopyBufferRect,
1225 (cl_command_queue command_queue,
1228 const size_t * src_origin,
1229 const size_t * dst_origin,
1230 const size_t * region,
1231 size_t src_row_pitch,
1232 size_t src_slice_pitch,
1233 size_t dst_row_pitch,
1234 size_t dst_slice_pitch,
1235 cl_uint num_events_in_wait_list,
1236 const cl_event * event_wait_list,
1238 (command_queue, src_buffer, dst_buffer, src_origin, dst_origin,
1239 region, src_row_pitch, src_slice_pitch, dst_row_pitch, dst_slice_pitch,
1240 num_events_in_wait_list, event_wait_list, event))
1242 OCL_FUNC_P(void*, clEnqueueMapBuffer,
1243 (cl_command_queue command_queue,
1245 cl_bool blocking_map,
1246 cl_map_flags map_flags,
1249 cl_uint num_events_in_wait_list,
1250 const cl_event * event_wait_list,
1252 cl_int * errcode_ret),
1253 (command_queue, buffer, blocking_map, map_flags, offset, size,
1254 num_events_in_wait_list, event_wait_list, event, errcode_ret))
1256 OCL_FUNC(cl_int, clEnqueueUnmapMemObject,
1257 (cl_command_queue command_queue,
1260 cl_uint num_events_in_wait_list,
1261 const cl_event * event_wait_list,
1263 (command_queue, memobj, mapped_ptr, num_events_in_wait_list, event_wait_list, event))
1265 OCL_FUNC(cl_int, clEnqueueNDRangeKernel,
1266 (cl_command_queue command_queue,
1269 const size_t * global_work_offset,
1270 const size_t * global_work_size,
1271 const size_t * local_work_size,
1272 cl_uint num_events_in_wait_list,
1273 const cl_event * event_wait_list,
1275 (command_queue, kernel, work_dim, global_work_offset, global_work_size,
1276 local_work_size, num_events_in_wait_list, event_wait_list, event))
1278 OCL_FUNC(cl_int, clEnqueueTask,
1279 (cl_command_queue command_queue,
1281 cl_uint num_events_in_wait_list,
1282 const cl_event * event_wait_list,
1284 (command_queue, kernel, num_events_in_wait_list, event_wait_list, event))
1286 OCL_FUNC(cl_int, clSetEventCallback,
1288 cl_int command_exec_callback_type ,
1289 void (CL_CALLBACK *pfn_event_notify) (cl_event event, cl_int event_command_exec_status, void *user_data),
1291 (event, command_exec_callback_type, pfn_event_notify, user_data))
1293 OCL_FUNC(cl_int, clReleaseEvent, (cl_event event), (event))
1299 #ifndef CL_VERSION_1_2
1300 #define CL_VERSION_1_2
1306 #define CV_OclDbgAssert CV_DbgAssert
1308 #define CV_OclDbgAssert(expr) (void)(expr)
1311 namespace cv { namespace ocl {
1315 UMat2D(const UMat& m)
1317 offset = (int)m.offset;
1330 UMat3D(const UMat& m)
1332 offset = (int)m.offset;
1333 step = (int)m.step.p[1];
1334 slicestep = (int)m.step.p[0];
1335 slices = (int)m.size.p[0];
1347 // Computes 64-bit "cyclic redundancy check" sum, as specified in ECMA-182
1348 static uint64 crc64( const uchar* data, size_t size, uint64 crc0=0 )
1350 static uint64 table[256];
1351 static bool initialized = false;
1355 for( int i = 0; i < 256; i++ )
1358 for( int j = 0; j < 8; j++ )
1359 c = ((c & 1) ? CV_BIG_UINT(0xc96c5795d7870f42) : 0) ^ (c >> 1);
1366 for( size_t idx = 0; idx < size; idx++ )
1367 crc = table[(uchar)crc ^ data[idx]] ^ (crc >> 8);
1374 typedef uint64 part;
1375 HashKey(part _a, part _b) : a(_a), b(_b) {}
1379 inline bool operator == (const HashKey& h1, const HashKey& h2)
1381 return h1.a == h2.a && h1.b == h2.b;
1384 inline bool operator < (const HashKey& h1, const HashKey& h2)
1386 return h1.a < h2.a || (h1.a == h2.a && h1.b < h2.b);
1393 static bool g_isOpenCLInitialized = false;
1394 static bool g_isOpenCLAvailable = false;
1396 if (!g_isOpenCLInitialized)
1401 g_isOpenCLAvailable = ::clGetPlatformIDs(0, NULL, &n) == CL_SUCCESS;
1405 g_isOpenCLAvailable = false;
1407 g_isOpenCLInitialized = true;
1409 return g_isOpenCLAvailable;
1417 CoreTLSData* data = coreTlsData.get();
1418 if( data->useOpenCL < 0 )
1419 data->useOpenCL = (int)haveOpenCL() && Device::getDefault().ptr() != NULL;
1420 return data->useOpenCL > 0;
1423 void setUseOpenCL(bool flag)
1427 CoreTLSData* data = coreTlsData.get();
1428 data->useOpenCL = (flag && Device::getDefault().ptr() != NULL) ? 1 : 0;
1432 #ifdef HAVE_CLAMDBLAS
1437 static AmdBlasHelper & getInstance()
1439 static AmdBlasHelper amdBlas;
1443 bool isAvailable() const
1445 return g_isAmdBlasAvailable;
1452 clAmdBlasTeardown();
1460 if (!g_isAmdBlasInitialized)
1464 if (!g_isAmdBlasInitialized && haveOpenCL())
1468 g_isAmdBlasAvailable = clAmdBlasSetup() == clAmdBlasSuccess;
1472 g_isAmdBlasAvailable = false;
1476 g_isAmdBlasAvailable = false;
1478 g_isAmdBlasInitialized = true;
1484 static bool g_isAmdBlasInitialized;
1485 static bool g_isAmdBlasAvailable;
1488 bool AmdBlasHelper::g_isAmdBlasAvailable = false;
1489 bool AmdBlasHelper::g_isAmdBlasInitialized = false;
1490 Mutex AmdBlasHelper::m;
1494 return AmdBlasHelper::getInstance().isAvailable();
1506 #ifdef HAVE_CLAMDFFT
1511 static AmdFftHelper & getInstance()
1513 static AmdFftHelper amdFft;
1517 bool isAvailable() const
1519 return g_isAmdFftAvailable;
1526 // clAmdFftTeardown();
1534 if (!g_isAmdFftInitialized)
1538 if (!g_isAmdFftInitialized && haveOpenCL())
1542 CV_Assert(clAmdFftInitSetupData(&setupData) == CLFFT_SUCCESS);
1543 g_isAmdFftAvailable = true;
1545 catch (const Exception &)
1547 g_isAmdFftAvailable = false;
1551 g_isAmdFftAvailable = false;
1553 g_isAmdFftInitialized = true;
1558 static clAmdFftSetupData setupData;
1560 static bool g_isAmdFftInitialized;
1561 static bool g_isAmdFftAvailable;
1564 clAmdFftSetupData AmdFftHelper::setupData;
1565 bool AmdFftHelper::g_isAmdFftAvailable = false;
1566 bool AmdFftHelper::g_isAmdFftInitialized = false;
1567 Mutex AmdFftHelper::m;
1571 return AmdFftHelper::getInstance().isAvailable();
1585 Queue::getDefault().finish();
1588 #define IMPLEMENT_REFCOUNTABLE() \
1589 void addref() { CV_XADD(&refcount, 1); } \
1590 void release() { if( CV_XADD(&refcount, -1) == 1 && !cv::__termination) delete this; } \
1593 /////////////////////////////////////////// Platform /////////////////////////////////////////////
1595 struct Platform::Impl
1601 initialized = false;
1610 //cl_uint num_entries
1612 if( clGetPlatformIDs(1, &handle, &n) != CL_SUCCESS || n == 0 )
1618 CV_OclDbgAssert(clGetPlatformInfo(handle, CL_PLATFORM_VENDOR, sizeof(buf), buf, &len) == CL_SUCCESS);
1620 vendor = String(buf);
1627 IMPLEMENT_REFCOUNTABLE();
1629 cl_platform_id handle;
1634 Platform::Platform()
1639 Platform::~Platform()
1645 Platform::Platform(const Platform& pl)
1652 Platform& Platform::operator = (const Platform& pl)
1654 Impl* newp = (Impl*)pl.p;
1663 void* Platform::ptr() const
1665 return p ? p->handle : 0;
1668 Platform& Platform::getDefault()
1679 /////////////////////////////////////// Device ////////////////////////////////////////////
1681 // deviceVersion has format
1682 // OpenCL<space><major_version.minor_version><space><vendor-specific information>
1684 // http://www.khronos.org/registry/cl/sdk/1.1/docs/man/xhtml/clGetDeviceInfo.html
1685 // http://www.khronos.org/registry/cl/sdk/1.2/docs/man/xhtml/clGetDeviceInfo.html
1686 static void parseDeviceVersion(const String &deviceVersion, int &major, int &minor)
1689 if (10 >= deviceVersion.length())
1691 const char *pstr = deviceVersion.c_str();
1692 if (0 != strncmp(pstr, "OpenCL ", 7))
1694 size_t ppos = deviceVersion.find('.', 7);
1695 if (String::npos == ppos)
1697 String temp = deviceVersion.substr(7, ppos - 7);
1698 major = atoi(temp.c_str());
1699 temp = deviceVersion.substr(ppos + 1);
1700 minor = atoi(temp.c_str());
1707 handle = (cl_device_id)d;
1710 name_ = getStrProp(CL_DEVICE_NAME);
1711 version_ = getStrProp(CL_DEVICE_VERSION);
1712 doubleFPConfig_ = getProp<cl_device_fp_config, int>(CL_DEVICE_DOUBLE_FP_CONFIG);
1713 hostUnifiedMemory_ = getBoolProp(CL_DEVICE_HOST_UNIFIED_MEMORY);
1714 maxComputeUnits_ = getProp<cl_uint, int>(CL_DEVICE_MAX_COMPUTE_UNITS);
1715 maxWorkGroupSize_ = getProp<size_t, size_t>(CL_DEVICE_MAX_WORK_GROUP_SIZE);
1716 type_ = getProp<cl_device_type, int>(CL_DEVICE_TYPE);
1717 driverVersion_ = getStrProp(CL_DRIVER_VERSION);
1719 String deviceVersion_ = getStrProp(CL_DEVICE_VERSION);
1720 parseDeviceVersion(deviceVersion_, deviceVersionMajor_, deviceVersionMinor_);
1722 vendorName_ = getStrProp(CL_DEVICE_VENDOR);
1723 if (vendorName_ == "Advanced Micro Devices, Inc." ||
1724 vendorName_ == "AMD")
1725 vendorID_ = VENDOR_AMD;
1726 else if (vendorName_ == "Intel(R) Corporation")
1727 vendorID_ = VENDOR_INTEL;
1728 else if (vendorName_ == "NVIDIA Corporation")
1729 vendorID_ = VENDOR_NVIDIA;
1731 vendorID_ = UNKNOWN_VENDOR;
1734 template<typename _TpCL, typename _TpOut>
1735 _TpOut getProp(cl_device_info prop) const
1740 return clGetDeviceInfo(handle, prop, sizeof(temp), &temp, &sz) == CL_SUCCESS &&
1741 sz == sizeof(temp) ? _TpOut(temp) : _TpOut();
1744 bool getBoolProp(cl_device_info prop) const
1746 cl_bool temp = CL_FALSE;
1749 return clGetDeviceInfo(handle, prop, sizeof(temp), &temp, &sz) == CL_SUCCESS &&
1750 sz == sizeof(temp) ? temp != 0 : false;
1753 String getStrProp(cl_device_info prop) const
1757 return clGetDeviceInfo(handle, prop, sizeof(buf)-16, buf, &sz) == CL_SUCCESS &&
1758 sz < sizeof(buf) ? String(buf) : String();
1761 IMPLEMENT_REFCOUNTABLE();
1762 cl_device_id handle;
1766 int doubleFPConfig_;
1767 bool hostUnifiedMemory_;
1768 int maxComputeUnits_;
1769 size_t maxWorkGroupSize_;
1771 int deviceVersionMajor_;
1772 int deviceVersionMinor_;
1773 String driverVersion_;
1784 Device::Device(void* d)
1790 Device::Device(const Device& d)
1797 Device& Device::operator = (const Device& d)
1799 Impl* newp = (Impl*)d.p;
1814 void Device::set(void* d)
1821 void* Device::ptr() const
1823 return p ? p->handle : 0;
1826 String Device::name() const
1827 { return p ? p->name_ : String(); }
1829 String Device::extensions() const
1830 { return p ? p->getStrProp(CL_DEVICE_EXTENSIONS) : String(); }
1832 String Device::version() const
1833 { return p ? p->version_ : String(); }
1835 String Device::vendorName() const
1836 { return p ? p->vendorName_ : String(); }
1838 int Device::vendorID() const
1839 { return p ? p->vendorID_ : 0; }
1841 String Device::OpenCL_C_Version() const
1842 { return p ? p->getStrProp(CL_DEVICE_OPENCL_C_VERSION) : String(); }
1844 String Device::OpenCLVersion() const
1845 { return p ? p->getStrProp(CL_DEVICE_EXTENSIONS) : String(); }
1847 int Device::deviceVersionMajor() const
1848 { return p ? p->deviceVersionMajor_ : 0; }
1850 int Device::deviceVersionMinor() const
1851 { return p ? p->deviceVersionMinor_ : 0; }
1853 String Device::driverVersion() const
1854 { return p ? p->driverVersion_ : String(); }
1856 int Device::type() const
1857 { return p ? p->type_ : 0; }
1859 int Device::addressBits() const
1860 { return p ? p->getProp<cl_uint, int>(CL_DEVICE_ADDRESS_BITS) : 0; }
1862 bool Device::available() const
1863 { return p ? p->getBoolProp(CL_DEVICE_AVAILABLE) : false; }
1865 bool Device::compilerAvailable() const
1866 { return p ? p->getBoolProp(CL_DEVICE_COMPILER_AVAILABLE) : false; }
1868 bool Device::linkerAvailable() const
1869 #ifdef CL_VERSION_1_2
1870 { return p ? p->getBoolProp(CL_DEVICE_LINKER_AVAILABLE) : false; }
1872 { CV_REQUIRE_OPENCL_1_2_ERROR; }
1875 int Device::doubleFPConfig() const
1876 { return p ? p->doubleFPConfig_ : 0; }
1878 int Device::singleFPConfig() const
1879 { return p ? p->getProp<cl_device_fp_config, int>(CL_DEVICE_SINGLE_FP_CONFIG) : 0; }
1881 int Device::halfFPConfig() const
1882 #ifdef CL_VERSION_1_2
1883 { return p ? p->getProp<cl_device_fp_config, int>(CL_DEVICE_HALF_FP_CONFIG) : 0; }
1885 { CV_REQUIRE_OPENCL_1_2_ERROR; }
1888 bool Device::endianLittle() const
1889 { return p ? p->getBoolProp(CL_DEVICE_ENDIAN_LITTLE) : false; }
1891 bool Device::errorCorrectionSupport() const
1892 { return p ? p->getBoolProp(CL_DEVICE_ERROR_CORRECTION_SUPPORT) : false; }
1894 int Device::executionCapabilities() const
1895 { return p ? p->getProp<cl_device_exec_capabilities, int>(CL_DEVICE_EXECUTION_CAPABILITIES) : 0; }
1897 size_t Device::globalMemCacheSize() const
1898 { return p ? p->getProp<cl_ulong, size_t>(CL_DEVICE_GLOBAL_MEM_CACHE_SIZE) : 0; }
1900 int Device::globalMemCacheType() const
1901 { return p ? p->getProp<cl_device_mem_cache_type, int>(CL_DEVICE_GLOBAL_MEM_CACHE_TYPE) : 0; }
1903 int Device::globalMemCacheLineSize() const
1904 { return p ? p->getProp<cl_uint, int>(CL_DEVICE_GLOBAL_MEM_CACHELINE_SIZE) : 0; }
1906 size_t Device::globalMemSize() const
1907 { return p ? p->getProp<cl_ulong, size_t>(CL_DEVICE_GLOBAL_MEM_SIZE) : 0; }
1909 size_t Device::localMemSize() const
1910 { return p ? p->getProp<cl_ulong, size_t>(CL_DEVICE_LOCAL_MEM_SIZE) : 0; }
1912 int Device::localMemType() const
1913 { return p ? p->getProp<cl_device_local_mem_type, int>(CL_DEVICE_LOCAL_MEM_TYPE) : 0; }
1915 bool Device::hostUnifiedMemory() const
1916 { return p ? p->hostUnifiedMemory_ : false; }
1918 bool Device::imageSupport() const
1919 { return p ? p->getBoolProp(CL_DEVICE_IMAGE_SUPPORT) : false; }
1921 bool Device::imageFromBufferSupport() const
1926 size_t pos = p->getStrProp(CL_DEVICE_EXTENSIONS).find("cl_khr_image2d_from_buffer");
1927 if (pos != String::npos)
1935 uint Device::imagePitchAlignment() const
1937 #ifdef CL_DEVICE_IMAGE_PITCH_ALIGNMENT
1938 return p ? p->getProp<cl_uint, uint>(CL_DEVICE_IMAGE_PITCH_ALIGNMENT) : 0;
1944 uint Device::imageBaseAddressAlignment() const
1946 #ifdef CL_DEVICE_IMAGE_BASE_ADDRESS_ALIGNMENT
1947 return p ? p->getProp<cl_uint, uint>(CL_DEVICE_IMAGE_BASE_ADDRESS_ALIGNMENT) : 0;
1953 size_t Device::image2DMaxWidth() const
1954 { return p ? p->getProp<size_t, size_t>(CL_DEVICE_IMAGE2D_MAX_WIDTH) : 0; }
1956 size_t Device::image2DMaxHeight() const
1957 { return p ? p->getProp<size_t, size_t>(CL_DEVICE_IMAGE2D_MAX_HEIGHT) : 0; }
1959 size_t Device::image3DMaxWidth() const
1960 { return p ? p->getProp<size_t, size_t>(CL_DEVICE_IMAGE3D_MAX_WIDTH) : 0; }
1962 size_t Device::image3DMaxHeight() const
1963 { return p ? p->getProp<size_t, size_t>(CL_DEVICE_IMAGE3D_MAX_HEIGHT) : 0; }
1965 size_t Device::image3DMaxDepth() const
1966 { return p ? p->getProp<size_t, size_t>(CL_DEVICE_IMAGE3D_MAX_DEPTH) : 0; }
1968 size_t Device::imageMaxBufferSize() const
1969 #ifdef CL_VERSION_1_2
1970 { return p ? p->getProp<size_t, size_t>(CL_DEVICE_IMAGE_MAX_BUFFER_SIZE) : 0; }
1972 { CV_REQUIRE_OPENCL_1_2_ERROR; }
1975 size_t Device::imageMaxArraySize() const
1976 #ifdef CL_VERSION_1_2
1977 { return p ? p->getProp<size_t, size_t>(CL_DEVICE_IMAGE_MAX_ARRAY_SIZE) : 0; }
1979 { CV_REQUIRE_OPENCL_1_2_ERROR; }
1982 int Device::maxClockFrequency() const
1983 { return p ? p->getProp<cl_uint, int>(CL_DEVICE_MAX_CLOCK_FREQUENCY) : 0; }
1985 int Device::maxComputeUnits() const
1986 { return p ? p->maxComputeUnits_ : 0; }
1988 int Device::maxConstantArgs() const
1989 { return p ? p->getProp<cl_uint, int>(CL_DEVICE_MAX_CONSTANT_ARGS) : 0; }
1991 size_t Device::maxConstantBufferSize() const
1992 { return p ? p->getProp<cl_ulong, size_t>(CL_DEVICE_MAX_CONSTANT_BUFFER_SIZE) : 0; }
1994 size_t Device::maxMemAllocSize() const
1995 { return p ? p->getProp<cl_ulong, size_t>(CL_DEVICE_MAX_MEM_ALLOC_SIZE) : 0; }
1997 size_t Device::maxParameterSize() const
1998 { return p ? p->getProp<cl_ulong, size_t>(CL_DEVICE_MAX_PARAMETER_SIZE) : 0; }
2000 int Device::maxReadImageArgs() const
2001 { return p ? p->getProp<cl_uint, int>(CL_DEVICE_MAX_READ_IMAGE_ARGS) : 0; }
2003 int Device::maxWriteImageArgs() const
2004 { return p ? p->getProp<cl_uint, int>(CL_DEVICE_MAX_WRITE_IMAGE_ARGS) : 0; }
2006 int Device::maxSamplers() const
2007 { return p ? p->getProp<cl_uint, int>(CL_DEVICE_MAX_SAMPLERS) : 0; }
2009 size_t Device::maxWorkGroupSize() const
2010 { return p ? p->maxWorkGroupSize_ : 0; }
2012 int Device::maxWorkItemDims() const
2013 { return p ? p->getProp<cl_uint, int>(CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS) : 0; }
2015 void Device::maxWorkItemSizes(size_t* sizes) const
2019 const int MAX_DIMS = 32;
2021 CV_OclDbgAssert(clGetDeviceInfo(p->handle, CL_DEVICE_MAX_WORK_ITEM_SIZES,
2022 MAX_DIMS*sizeof(sizes[0]), &sizes[0], &retsz) == CL_SUCCESS);
2026 int Device::memBaseAddrAlign() const
2027 { return p ? p->getProp<cl_uint, int>(CL_DEVICE_MEM_BASE_ADDR_ALIGN) : 0; }
2029 int Device::nativeVectorWidthChar() const
2030 { return p ? p->getProp<cl_uint, int>(CL_DEVICE_NATIVE_VECTOR_WIDTH_CHAR) : 0; }
2032 int Device::nativeVectorWidthShort() const
2033 { return p ? p->getProp<cl_uint, int>(CL_DEVICE_NATIVE_VECTOR_WIDTH_SHORT) : 0; }
2035 int Device::nativeVectorWidthInt() const
2036 { return p ? p->getProp<cl_uint, int>(CL_DEVICE_NATIVE_VECTOR_WIDTH_INT) : 0; }
2038 int Device::nativeVectorWidthLong() const
2039 { return p ? p->getProp<cl_uint, int>(CL_DEVICE_NATIVE_VECTOR_WIDTH_LONG) : 0; }
2041 int Device::nativeVectorWidthFloat() const
2042 { return p ? p->getProp<cl_uint, int>(CL_DEVICE_NATIVE_VECTOR_WIDTH_FLOAT) : 0; }
2044 int Device::nativeVectorWidthDouble() const
2045 { return p ? p->getProp<cl_uint, int>(CL_DEVICE_NATIVE_VECTOR_WIDTH_DOUBLE) : 0; }
2047 int Device::nativeVectorWidthHalf() const
2048 { return p ? p->getProp<cl_uint, int>(CL_DEVICE_NATIVE_VECTOR_WIDTH_HALF) : 0; }
2050 int Device::preferredVectorWidthChar() const
2051 { return p ? p->getProp<cl_uint, int>(CL_DEVICE_PREFERRED_VECTOR_WIDTH_CHAR) : 0; }
2053 int Device::preferredVectorWidthShort() const
2054 { return p ? p->getProp<cl_uint, int>(CL_DEVICE_PREFERRED_VECTOR_WIDTH_SHORT) : 0; }
2056 int Device::preferredVectorWidthInt() const
2057 { return p ? p->getProp<cl_uint, int>(CL_DEVICE_PREFERRED_VECTOR_WIDTH_INT) : 0; }
2059 int Device::preferredVectorWidthLong() const
2060 { return p ? p->getProp<cl_uint, int>(CL_DEVICE_PREFERRED_VECTOR_WIDTH_LONG) : 0; }
2062 int Device::preferredVectorWidthFloat() const
2063 { return p ? p->getProp<cl_uint, int>(CL_DEVICE_PREFERRED_VECTOR_WIDTH_FLOAT) : 0; }
2065 int Device::preferredVectorWidthDouble() const
2066 { return p ? p->getProp<cl_uint, int>(CL_DEVICE_PREFERRED_VECTOR_WIDTH_DOUBLE) : 0; }
2068 int Device::preferredVectorWidthHalf() const
2069 { return p ? p->getProp<cl_uint, int>(CL_DEVICE_PREFERRED_VECTOR_WIDTH_HALF) : 0; }
2071 size_t Device::printfBufferSize() const
2072 #ifdef CL_VERSION_1_2
2073 { return p ? p->getProp<size_t, size_t>(CL_DEVICE_PRINTF_BUFFER_SIZE) : 0; }
2075 { CV_REQUIRE_OPENCL_1_2_ERROR; }
2079 size_t Device::profilingTimerResolution() const
2080 { return p ? p->getProp<size_t, size_t>(CL_DEVICE_PROFILING_TIMER_RESOLUTION) : 0; }
2082 const Device& Device::getDefault()
2084 const Context& ctx = Context::getDefault();
2085 int idx = coreTlsData.get()->device;
2086 return ctx.device(idx);
2089 ////////////////////////////////////// Context ///////////////////////////////////////////////////
2091 template <typename Functor, typename ObjectType>
2092 inline cl_int getStringInfo(Functor f, ObjectType obj, cl_uint name, std::string& param)
2095 cl_int err = f(obj, name, 0, NULL, &required);
2096 if (err != CL_SUCCESS)
2102 AutoBuffer<char> buf(required + 1);
2103 char* ptr = (char*)buf; // cleanup is not needed
2104 err = f(obj, name, required, ptr, NULL);
2105 if (err != CL_SUCCESS)
2113 static void split(const std::string &s, char delim, std::vector<std::string> &elems)
2118 std::istringstream ss(s);
2122 std::getline(ss, item, delim);
2123 elems.push_back(item);
2127 // Layout: <Platform>:<CPU|GPU|ACCELERATOR|nothing=GPU/CPU>:<deviceName>
2129 // Sample: AMD:GPU:Tahiti
2130 // Sample: :GPU|CPU: = '' = ':' = '::'
2131 static bool parseOpenCLDeviceConfiguration(const std::string& configurationStr,
2132 std::string& platform, std::vector<std::string>& deviceTypes, std::string& deviceNameOrID)
2134 std::vector<std::string> parts;
2135 split(configurationStr, ':', parts);
2136 if (parts.size() > 3)
2138 std::cerr << "ERROR: Invalid configuration string for OpenCL device" << std::endl;
2141 if (parts.size() > 2)
2142 deviceNameOrID = parts[2];
2143 if (parts.size() > 1)
2145 split(parts[1], '|', deviceTypes);
2147 if (parts.size() > 0)
2149 platform = parts[0];
2155 static cl_device_id selectOpenCLDevice()
2160 static cl_device_id selectOpenCLDevice()
2162 std::string platform, deviceName;
2163 std::vector<std::string> deviceTypes;
2165 const char* configuration = getenv("OPENCV_OPENCL_DEVICE");
2166 if (configuration && !parseOpenCLDeviceConfiguration(std::string(configuration), platform, deviceTypes, deviceName))
2171 if (deviceName.length() == 1)
2172 // We limit ID range to 0..9, because we want to write:
2173 // - '2500' to mean i5-2500
2174 // - '8350' to mean AMD FX-8350
2175 // - '650' to mean GeForce 650
2176 // To extend ID range change condition to '> 0'
2179 for (size_t i = 0; i < deviceName.length(); i++)
2181 if (!isdigit(deviceName[i]))
2189 deviceID = atoi(deviceName.c_str());
2195 std::vector<cl_platform_id> platforms;
2197 cl_uint numPlatforms = 0;
2198 CV_OclDbgAssert(clGetPlatformIDs(0, NULL, &numPlatforms) == CL_SUCCESS);
2200 if (numPlatforms == 0)
2202 platforms.resize((size_t)numPlatforms);
2203 CV_OclDbgAssert(clGetPlatformIDs(numPlatforms, &platforms[0], &numPlatforms) == CL_SUCCESS);
2204 platforms.resize(numPlatforms);
2207 int selectedPlatform = -1;
2208 if (platform.length() > 0)
2210 for (size_t i = 0; i < platforms.size(); i++)
2213 CV_OclDbgAssert(getStringInfo(clGetPlatformInfo, platforms[i], CL_PLATFORM_NAME, name) == CL_SUCCESS);
2214 if (name.find(platform) != std::string::npos)
2216 selectedPlatform = (int)i;
2220 if (selectedPlatform == -1)
2222 std::cerr << "ERROR: Can't find OpenCL platform by name: " << platform << std::endl;
2226 if (deviceTypes.size() == 0)
2230 deviceTypes.push_back("GPU");
2231 deviceTypes.push_back("CPU");
2234 deviceTypes.push_back("ALL");
2236 for (size_t t = 0; t < deviceTypes.size(); t++)
2239 std::string tempStrDeviceType = deviceTypes[t];
2240 std::transform( tempStrDeviceType.begin(), tempStrDeviceType.end(), tempStrDeviceType.begin(), tolower );
2242 if (tempStrDeviceType == "gpu" || tempStrDeviceType == "dgpu" || tempStrDeviceType == "igpu")
2243 deviceType = Device::TYPE_GPU;
2244 else if (tempStrDeviceType == "cpu")
2245 deviceType = Device::TYPE_CPU;
2246 else if (tempStrDeviceType == "accelerator")
2247 deviceType = Device::TYPE_ACCELERATOR;
2248 else if (tempStrDeviceType == "all")
2249 deviceType = Device::TYPE_ALL;
2252 std::cerr << "ERROR: Unsupported device type for OpenCL device (GPU, CPU, ACCELERATOR): " << deviceTypes[t] << std::endl;
2256 std::vector<cl_device_id> devices; // TODO Use clReleaseDevice to cleanup
2257 for (int i = selectedPlatform >= 0 ? selectedPlatform : 0;
2258 (selectedPlatform >= 0 ? i == selectedPlatform : true) && (i < (int)platforms.size());
2262 cl_int status = clGetDeviceIDs(platforms[i], deviceType, 0, NULL, &count);
2263 CV_OclDbgAssert(status == CL_SUCCESS || status == CL_DEVICE_NOT_FOUND);
2266 size_t base = devices.size();
2267 devices.resize(base + count);
2268 status = clGetDeviceIDs(platforms[i], deviceType, count, &devices[base], &count);
2269 CV_OclDbgAssert(status == CL_SUCCESS || status == CL_DEVICE_NOT_FOUND);
2272 for (size_t i = (isID ? deviceID : 0);
2273 (isID ? (i == (size_t)deviceID) : true) && (i < devices.size());
2277 CV_OclDbgAssert(getStringInfo(clGetDeviceInfo, devices[i], CL_DEVICE_NAME, name) == CL_SUCCESS);
2278 cl_bool useGPU = true;
2279 if(tempStrDeviceType == "dgpu" || tempStrDeviceType == "igpu")
2281 cl_bool isIGPU = CL_FALSE;
2282 clGetDeviceInfo(devices[i], CL_DEVICE_HOST_UNIFIED_MEMORY, sizeof(isIGPU), &isIGPU, NULL);
2283 useGPU = tempStrDeviceType == "dgpu" ? !isIGPU : isIGPU;
2285 if ( (isID || name.find(deviceName) != std::string::npos) && useGPU)
2287 // TODO check for OpenCL 1.1
2294 std::cerr << "ERROR: Required OpenCL device not found, check configuration: " << (configuration == NULL ? "" : configuration) << std::endl
2295 << " Platform: " << (platform.length() == 0 ? "any" : platform) << std::endl
2296 << " Device types: ";
2297 for (size_t t = 0; t < deviceTypes.size(); t++)
2298 std::cerr << deviceTypes[t] << " ";
2300 std::cerr << std::endl << " Device name: " << (deviceName.length() == 0 ? "any" : deviceName) << std::endl;
2301 CV_Error(CL_INVALID_DEVICE, "Requested OpenCL device is not found");
2306 struct Context::Impl
2316 CV_Assert(handle == NULL);
2318 cl_device_id d = selectOpenCLDevice();
2323 cl_platform_id pl = NULL;
2324 CV_OclDbgAssert(clGetDeviceInfo(d, CL_DEVICE_PLATFORM, sizeof(cl_platform_id), &pl, NULL) == CL_SUCCESS);
2326 cl_context_properties prop[] =
2328 CL_CONTEXT_PLATFORM, (cl_context_properties)pl,
2332 // !!! in the current implementation force the number of devices to 1 !!!
2336 handle = clCreateContext(prop, nd, &d, 0, 0, &status);
2338 bool ok = handle != 0 && status == CL_SUCCESS;
2354 cl_platform_id pl = (cl_platform_id)Platform::getDefault().ptr();
2355 cl_context_properties prop[] =
2357 CL_CONTEXT_PLATFORM, (cl_context_properties)pl,
2361 cl_uint i, nd0 = 0, nd = 0;
2362 int dtype = dtype0 & 15;
2363 CV_OclDbgAssert(clGetDeviceIDs( pl, dtype, 0, 0, &nd0 ) == CL_SUCCESS);
2365 AutoBuffer<void*> dlistbuf(nd0*2+1);
2366 cl_device_id* dlist = (cl_device_id*)(void**)dlistbuf;
2367 cl_device_id* dlist_new = dlist + nd0;
2368 CV_OclDbgAssert(clGetDeviceIDs( pl, dtype, nd0, dlist, &nd0 ) == CL_SUCCESS);
2371 for(i = 0; i < nd0; i++)
2374 if( !d.available() || !d.compilerAvailable() )
2376 if( dtype0 == Device::TYPE_DGPU && d.hostUnifiedMemory() )
2378 if( dtype0 == Device::TYPE_IGPU && !d.hostUnifiedMemory() )
2380 String name = d.name();
2381 if( nd != 0 && name != name0 )
2384 dlist_new[nd++] = dlist[i];
2390 // !!! in the current implementation force the number of devices to 1 !!!
2393 handle = clCreateContext(prop, nd, dlist_new, 0, 0, &retval);
2394 bool ok = handle != 0 && retval == CL_SUCCESS;
2398 for( i = 0; i < nd; i++ )
2399 devices[i].set(dlist_new[i]);
2407 clReleaseContext(handle);
2413 Program getProg(const ProgramSource& src,
2414 const String& buildflags, String& errmsg)
2416 String prefix = Program::getPrefix(buildflags);
2417 HashKey k(src.hash(), crc64((const uchar*)prefix.c_str(), prefix.size()));
2418 phash_t::iterator it = phash.find(k);
2419 if( it != phash.end() )
2421 //String filename = format("%08x%08x_%08x%08x.clb2",
2422 Program prog(src, buildflags, errmsg);
2424 phash.insert(std::pair<HashKey,Program>(k, prog));
2428 IMPLEMENT_REFCOUNTABLE();
2431 std::vector<Device> devices;
2433 typedef ProgramSource::hash_t hash_t;
2437 HashKey(hash_t _a, hash_t _b) : a(_a), b(_b) {}
2438 bool operator < (const HashKey& k) const { return a < k.a || (a == k.a && b < k.b); }
2439 bool operator == (const HashKey& k) const { return a == k.a && b == k.b; }
2440 bool operator != (const HashKey& k) const { return a != k.a || b != k.b; }
2443 typedef std::map<HashKey, Program> phash_t;
2453 Context::Context(int dtype)
2459 bool Context::create()
2474 bool Context::create(int dtype0)
2480 p = new Impl(dtype0);
2498 Context::Context(const Context& c)
2505 Context& Context::operator = (const Context& c)
2507 Impl* newp = (Impl*)c.p;
2516 void* Context::ptr() const
2518 return p == NULL ? NULL : p->handle;
2521 size_t Context::ndevices() const
2523 return p ? p->devices.size() : 0;
2526 const Device& Context::device(size_t idx) const
2528 static Device dummy;
2529 return !p || idx >= p->devices.size() ? dummy : p->devices[idx];
2532 Context& Context::getDefault(bool initialize)
2534 static Context* ctx = new Context();
2535 if(!ctx->p && haveOpenCL())
2538 ctx->p = new Impl();
2541 // do not create new Context right away.
2542 // First, try to retrieve existing context of the same type.
2543 // In its turn, Platform::getContext() may call Context::create()
2544 // if there is no such context.
2545 if (ctx->p->handle == NULL)
2546 ctx->p->setDefault();
2553 Program Context::getProg(const ProgramSource& prog,
2554 const String& buildopts, String& errmsg)
2556 return p ? p->getProg(prog, buildopts, errmsg) : Program();
2559 void initializeContextFromHandle(Context& ctx, void* platform, void* _context, void* _device)
2561 cl_context context = (cl_context)_context;
2562 cl_device_id device = (cl_device_id)_device;
2564 // cleanup old context
2565 Context::Impl * impl = ctx.p;
2568 CV_OclDbgAssert(clReleaseContext(impl->handle) == CL_SUCCESS);
2570 impl->devices.clear();
2572 impl->handle = context;
2573 impl->devices.resize(1);
2574 impl->devices[0].set(device);
2576 Platform& p = Platform::getDefault();
2577 Platform::Impl* pImpl = p.p;
2578 pImpl->handle = (cl_platform_id)platform;
2581 /////////////////////////////////////////// Queue /////////////////////////////////////////////
2585 Impl(const Context& c, const Device& d)
2588 const Context* pc = &c;
2589 cl_context ch = (cl_context)pc->ptr();
2592 pc = &Context::getDefault();
2593 ch = (cl_context)pc->ptr();
2595 cl_device_id dh = (cl_device_id)d.ptr();
2597 dh = (cl_device_id)pc->device(0).ptr();
2599 handle = clCreateCommandQueue(ch, dh, 0, &retval);
2600 CV_OclDbgAssert(retval == CL_SUCCESS);
2606 if (!cv::__termination)
2612 clReleaseCommandQueue(handle);
2618 IMPLEMENT_REFCOUNTABLE();
2620 cl_command_queue handle;
2628 Queue::Queue(const Context& c, const Device& d)
2634 Queue::Queue(const Queue& q)
2641 Queue& Queue::operator = (const Queue& q)
2643 Impl* newp = (Impl*)q.p;
2658 bool Queue::create(const Context& c, const Device& d)
2663 return p->handle != 0;
2666 void Queue::finish()
2670 CV_OclDbgAssert(clFinish(p->handle) == CL_SUCCESS);
2674 void* Queue::ptr() const
2676 return p ? p->handle : 0;
2679 Queue& Queue::getDefault()
2681 Queue& q = coreTlsData.get()->oclQueue;
2682 if( !q.p && haveOpenCL() )
2683 q.create(Context::getDefault());
2687 static cl_command_queue getQueue(const Queue& q)
2689 cl_command_queue qq = (cl_command_queue)q.ptr();
2691 qq = (cl_command_queue)Queue::getDefault().ptr();
2695 /////////////////////////////////////////// KernelArg /////////////////////////////////////////////
2697 KernelArg::KernelArg()
2698 : flags(0), m(0), obj(0), sz(0), wscale(1), iwscale(1)
2702 KernelArg::KernelArg(int _flags, UMat* _m, int _wscale, int _iwscale, const void* _obj, size_t _sz)
2703 : flags(_flags), m(_m), obj(_obj), sz(_sz), wscale(_wscale), iwscale(_iwscale)
2707 KernelArg KernelArg::Constant(const Mat& m)
2709 CV_Assert(m.isContinuous());
2710 return KernelArg(CONSTANT, 0, 0, 0, m.data, m.total()*m.elemSize());
2713 /////////////////////////////////////////// Kernel /////////////////////////////////////////////
2717 Impl(const char* kname, const Program& prog) :
2718 refcount(1), e(0), nu(0)
2720 cl_program ph = (cl_program)prog.ptr();
2723 clCreateKernel(ph, kname, &retval) : 0;
2724 CV_OclDbgAssert(retval == CL_SUCCESS);
2725 for( int i = 0; i < MAX_ARRS; i++ )
2727 haveTempDstUMats = false;
2732 for( int i = 0; i < MAX_ARRS; i++ )
2735 if( CV_XADD(&u[i]->urefcount, -1) == 1 )
2736 u[i]->currAllocator->deallocate(u[i]);
2740 haveTempDstUMats = false;
2743 void addUMat(const UMat& m, bool dst)
2745 CV_Assert(nu < MAX_ARRS && m.u && m.u->urefcount > 0);
2747 CV_XADD(&m.u->urefcount, 1);
2749 if(dst && m.u->tempUMat())
2750 haveTempDstUMats = true;
2753 void addImage(const Image2D& image)
2755 images.push_back(image);
2762 if(e) { clReleaseEvent(e); e = 0; }
2769 clReleaseKernel(handle);
2772 IMPLEMENT_REFCOUNTABLE();
2776 enum { MAX_ARRS = 16 };
2777 UMatData* u[MAX_ARRS];
2779 std::list<Image2D> images;
2780 bool haveTempDstUMats;
2787 static void CL_CALLBACK oclCleanupCallback(cl_event, cl_int, void *p)
2789 ((cv::ocl::Kernel::Impl*)p)->finit();
2794 namespace cv { namespace ocl {
2801 Kernel::Kernel(const char* kname, const Program& prog)
2804 create(kname, prog);
2807 Kernel::Kernel(const char* kname, const ProgramSource& src,
2808 const String& buildopts, String* errmsg)
2811 create(kname, src, buildopts, errmsg);
2814 Kernel::Kernel(const Kernel& k)
2821 Kernel& Kernel::operator = (const Kernel& k)
2823 Impl* newp = (Impl*)k.p;
2838 bool Kernel::create(const char* kname, const Program& prog)
2842 p = new Impl(kname, prog);
2851 bool Kernel::create(const char* kname, const ProgramSource& src,
2852 const String& buildopts, String* errmsg)
2860 if( !errmsg ) errmsg = &tempmsg;
2861 const Program& prog = Context::getDefault().getProg(src, buildopts, *errmsg);
2862 return create(kname, prog);
2865 void* Kernel::ptr() const
2867 return p ? p->handle : 0;
2870 bool Kernel::empty() const
2875 int Kernel::set(int i, const void* value, size_t sz)
2877 if (!p || !p->handle)
2884 cl_int retval = clSetKernelArg(p->handle, (cl_uint)i, sz, value);
2885 CV_OclDbgAssert(retval == CL_SUCCESS);
2886 if (retval != CL_SUCCESS)
2891 int Kernel::set(int i, const Image2D& image2D)
2893 p->addImage(image2D);
2894 cl_mem h = (cl_mem)image2D.ptr();
2895 return set(i, &h, sizeof(h));
2898 int Kernel::set(int i, const UMat& m)
2900 return set(i, KernelArg(KernelArg::READ_WRITE, (UMat*)&m, 0, 0));
2903 int Kernel::set(int i, const KernelArg& arg)
2905 if( !p || !p->handle )
2913 int accessFlags = ((arg.flags & KernelArg::READ_ONLY) ? ACCESS_READ : 0) +
2914 ((arg.flags & KernelArg::WRITE_ONLY) ? ACCESS_WRITE : 0);
2915 bool ptronly = (arg.flags & KernelArg::PTR_ONLY) != 0;
2916 cl_mem h = (cl_mem)arg.m->handle(accessFlags);
2926 CV_OclDbgAssert(clSetKernelArg(p->handle, (cl_uint)i++, sizeof(h), &h) == CL_SUCCESS);
2927 else if( arg.m->dims <= 2 )
2930 CV_OclDbgAssert(clSetKernelArg(p->handle, (cl_uint)i, sizeof(h), &h) == CL_SUCCESS);
2931 CV_OclDbgAssert(clSetKernelArg(p->handle, (cl_uint)(i+1), sizeof(u2d.step), &u2d.step) == CL_SUCCESS);
2932 CV_OclDbgAssert(clSetKernelArg(p->handle, (cl_uint)(i+2), sizeof(u2d.offset), &u2d.offset) == CL_SUCCESS);
2935 if( !(arg.flags & KernelArg::NO_SIZE) )
2937 int cols = u2d.cols*arg.wscale/arg.iwscale;
2938 CV_OclDbgAssert(clSetKernelArg(p->handle, (cl_uint)i, sizeof(u2d.rows), &u2d.rows) == CL_SUCCESS);
2939 CV_OclDbgAssert(clSetKernelArg(p->handle, (cl_uint)(i+1), sizeof(cols), &cols) == CL_SUCCESS);
2946 CV_OclDbgAssert(clSetKernelArg(p->handle, (cl_uint)i, sizeof(h), &h) == CL_SUCCESS);
2947 CV_OclDbgAssert(clSetKernelArg(p->handle, (cl_uint)(i+1), sizeof(u3d.slicestep), &u3d.slicestep) == CL_SUCCESS);
2948 CV_OclDbgAssert(clSetKernelArg(p->handle, (cl_uint)(i+2), sizeof(u3d.step), &u3d.step) == CL_SUCCESS);
2949 CV_OclDbgAssert(clSetKernelArg(p->handle, (cl_uint)(i+3), sizeof(u3d.offset), &u3d.offset) == CL_SUCCESS);
2951 if( !(arg.flags & KernelArg::NO_SIZE) )
2953 int cols = u3d.cols*arg.wscale/arg.iwscale;
2954 CV_OclDbgAssert(clSetKernelArg(p->handle, (cl_uint)i, sizeof(u3d.slices), &u3d.rows) == CL_SUCCESS);
2955 CV_OclDbgAssert(clSetKernelArg(p->handle, (cl_uint)(i+1), sizeof(u3d.rows), &u3d.rows) == CL_SUCCESS);
2956 CV_OclDbgAssert(clSetKernelArg(p->handle, (cl_uint)(i+2), sizeof(u3d.cols), &cols) == CL_SUCCESS);
2960 p->addUMat(*arg.m, (accessFlags & ACCESS_WRITE) != 0);
2963 CV_OclDbgAssert(clSetKernelArg(p->handle, (cl_uint)i, arg.sz, arg.obj) == CL_SUCCESS);
2968 bool Kernel::run(int dims, size_t _globalsize[], size_t _localsize[],
2969 bool sync, const Queue& q)
2971 if(!p || !p->handle || p->e != 0)
2974 cl_command_queue qq = getQueue(q);
2975 size_t offset[CV_MAX_DIM] = {0}, globalsize[CV_MAX_DIM] = {1,1,1};
2977 CV_Assert(_globalsize != 0);
2978 for (int i = 0; i < dims; i++)
2980 size_t val = _localsize ? _localsize[i] :
2981 dims == 1 ? 64 : dims == 2 ? (i == 0 ? 256 : 8) : dims == 3 ? (8>>(int)(i>0)) : 1;
2982 CV_Assert( val > 0 );
2983 total *= _globalsize[i];
2984 globalsize[i] = ((_globalsize[i] + val - 1)/val)*val;
2988 if( p->haveTempDstUMats )
2990 cl_int retval = clEnqueueNDRangeKernel(qq, p->handle, (cl_uint)dims,
2991 offset, globalsize, _localsize, 0, 0,
2993 if( sync || retval != CL_SUCCESS )
2995 CV_OclDbgAssert(clFinish(qq) == CL_SUCCESS);
3001 CV_OclDbgAssert(clSetEventCallback(p->e, CL_COMPLETE, oclCleanupCallback, p) == CL_SUCCESS);
3003 return retval == CL_SUCCESS;
3006 bool Kernel::runTask(bool sync, const Queue& q)
3008 if(!p || !p->handle || p->e != 0)
3011 cl_command_queue qq = getQueue(q);
3012 cl_int retval = clEnqueueTask(qq, p->handle, 0, 0, sync ? 0 : &p->e);
3013 if( sync || retval != CL_SUCCESS )
3015 CV_OclDbgAssert(clFinish(qq) == CL_SUCCESS);
3021 CV_OclDbgAssert(clSetEventCallback(p->e, CL_COMPLETE, oclCleanupCallback, p) == CL_SUCCESS);
3023 return retval == CL_SUCCESS;
3027 size_t Kernel::workGroupSize() const
3029 if(!p || !p->handle)
3031 size_t val = 0, retsz = 0;
3032 cl_device_id dev = (cl_device_id)Device::getDefault().ptr();
3033 return clGetKernelWorkGroupInfo(p->handle, dev, CL_KERNEL_WORK_GROUP_SIZE,
3034 sizeof(val), &val, &retsz) == CL_SUCCESS ? val : 0;
3037 size_t Kernel::preferedWorkGroupSizeMultiple() const
3039 if(!p || !p->handle)
3041 size_t val = 0, retsz = 0;
3042 cl_device_id dev = (cl_device_id)Device::getDefault().ptr();
3043 return clGetKernelWorkGroupInfo(p->handle, dev, CL_KERNEL_PREFERRED_WORK_GROUP_SIZE_MULTIPLE,
3044 sizeof(val), &val, &retsz) == CL_SUCCESS ? val : 0;
3047 bool Kernel::compileWorkGroupSize(size_t wsz[]) const
3049 if(!p || !p->handle || !wsz)
3052 cl_device_id dev = (cl_device_id)Device::getDefault().ptr();
3053 return clGetKernelWorkGroupInfo(p->handle, dev, CL_KERNEL_COMPILE_WORK_GROUP_SIZE,
3054 sizeof(wsz[0]*3), wsz, &retsz) == CL_SUCCESS;
3057 size_t Kernel::localMemSize() const
3059 if(!p || !p->handle)
3063 cl_device_id dev = (cl_device_id)Device::getDefault().ptr();
3064 return clGetKernelWorkGroupInfo(p->handle, dev, CL_KERNEL_LOCAL_MEM_SIZE,
3065 sizeof(val), &val, &retsz) == CL_SUCCESS ? (size_t)val : 0;
3068 /////////////////////////////////////////// Program /////////////////////////////////////////////
3070 struct Program::Impl
3072 Impl(const ProgramSource& _src,
3073 const String& _buildflags, String& errmsg)
3076 const Context& ctx = Context::getDefault();
3078 buildflags = _buildflags;
3079 const String& srcstr = src.source();
3080 const char* srcptr = srcstr.c_str();
3081 size_t srclen = srcstr.size();
3084 handle = clCreateProgramWithSource((cl_context)ctx.ptr(), 1, &srcptr, &srclen, &retval);
3085 if( handle && retval == CL_SUCCESS )
3087 int i, n = (int)ctx.ndevices();
3088 AutoBuffer<void*> deviceListBuf(n+1);
3089 void** deviceList = deviceListBuf;
3090 for( i = 0; i < n; i++ )
3091 deviceList[i] = ctx.device(i).ptr();
3093 Device device = Device::getDefault();
3095 buildflags += " -D AMD_DEVICE";
3096 else if (device.isIntel())
3097 buildflags += " -D INTEL_DEVICE";
3099 retval = clBuildProgram(handle, n,
3100 (const cl_device_id*)deviceList,
3101 buildflags.c_str(), 0, 0);
3102 #if !CV_OPENCL_ALWAYS_SHOW_BUILD_LOG
3103 if( retval != CL_SUCCESS )
3107 cl_int buildInfo_retval = clGetProgramBuildInfo(handle, (cl_device_id)deviceList[0],
3108 CL_PROGRAM_BUILD_LOG, 0, 0, &retsz);
3109 if (buildInfo_retval == CL_SUCCESS && retsz > 1)
3111 AutoBuffer<char> bufbuf(retsz + 16);
3113 buildInfo_retval = clGetProgramBuildInfo(handle, (cl_device_id)deviceList[0],
3114 CL_PROGRAM_BUILD_LOG, retsz+1, buf, &retsz);
3115 if (buildInfo_retval == CL_SUCCESS)
3117 // TODO It is useful to see kernel name & program file name also
3118 errmsg = String(buf);
3119 printf("OpenCL program build log: %s\n%s\n", buildflags.c_str(), errmsg.c_str());
3123 if (retval != CL_SUCCESS && handle)
3125 clReleaseProgram(handle);
3132 Impl(const String& _buf, const String& _buildflags)
3136 buildflags = _buildflags;
3139 String prefix0 = Program::getPrefix(buildflags);
3140 const Context& ctx = Context::getDefault();
3141 const Device& dev = Device::getDefault();
3142 const char* pos0 = _buf.c_str();
3143 const char* pos1 = strchr(pos0, '\n');
3146 const char* pos2 = strchr(pos1+1, '\n');
3149 const char* pos3 = strchr(pos2+1, '\n');
3152 size_t prefixlen = (pos3 - pos0)+1;
3153 String prefix(pos0, prefixlen);
3154 if( prefix != prefix0 )
3156 const uchar* bin = (uchar*)(pos3+1);
3157 void* devid = dev.ptr();
3158 size_t codelen = _buf.length() - prefixlen;
3159 cl_int binstatus = 0, retval = 0;
3160 handle = clCreateProgramWithBinary((cl_context)ctx.ptr(), 1, (cl_device_id*)&devid,
3161 &codelen, &bin, &binstatus, &retval);
3162 CV_OclDbgAssert(retval == CL_SUCCESS);
3169 size_t progsz = 0, retsz = 0;
3170 String prefix = Program::getPrefix(buildflags);
3171 size_t prefixlen = prefix.length();
3172 if(clGetProgramInfo(handle, CL_PROGRAM_BINARY_SIZES, sizeof(progsz), &progsz, &retsz) != CL_SUCCESS)
3174 AutoBuffer<uchar> bufbuf(prefixlen + progsz + 16);
3175 uchar* buf = bufbuf;
3176 memcpy(buf, prefix.c_str(), prefixlen);
3178 if(clGetProgramInfo(handle, CL_PROGRAM_BINARIES, sizeof(buf), &buf, &retsz) != CL_SUCCESS)
3180 buf[progsz] = (uchar)'\0';
3181 return String((const char*)(uchar*)bufbuf, prefixlen + progsz);
3189 if (!cv::__termination)
3192 clReleaseProgram(handle);
3198 IMPLEMENT_REFCOUNTABLE();
3206 Program::Program() { p = 0; }
3208 Program::Program(const ProgramSource& src,
3209 const String& buildflags, String& errmsg)
3212 create(src, buildflags, errmsg);
3215 Program::Program(const Program& prog)
3222 Program& Program::operator = (const Program& prog)
3224 Impl* newp = (Impl*)prog.p;
3239 bool Program::create(const ProgramSource& src,
3240 const String& buildflags, String& errmsg)
3244 p = new Impl(src, buildflags, errmsg);
3253 const ProgramSource& Program::source() const
3255 static ProgramSource dummy;
3256 return p ? p->src : dummy;
3259 void* Program::ptr() const
3261 return p ? p->handle : 0;
3264 bool Program::read(const String& bin, const String& buildflags)
3268 p = new Impl(bin, buildflags);
3269 return p->handle != 0;
3272 bool Program::write(String& bin) const
3277 return !bin.empty();
3280 String Program::getPrefix() const
3284 return getPrefix(p->buildflags);
3287 String Program::getPrefix(const String& buildflags)
3289 const Context& ctx = Context::getDefault();
3290 const Device& dev = ctx.device(0);
3291 return format("name=%s\ndriver=%s\nbuildflags=%s\n",
3292 dev.name().c_str(), dev.driverVersion().c_str(), buildflags.c_str());
3295 ///////////////////////////////////////// ProgramSource ///////////////////////////////////////////////
3297 struct ProgramSource::Impl
3299 Impl(const char* _src)
3303 Impl(const String& _src)
3307 void init(const String& _src)
3311 h = crc64((uchar*)src.c_str(), src.size());
3314 IMPLEMENT_REFCOUNTABLE();
3316 ProgramSource::hash_t h;
3320 ProgramSource::ProgramSource()
3325 ProgramSource::ProgramSource(const char* prog)
3330 ProgramSource::ProgramSource(const String& prog)
3335 ProgramSource::~ProgramSource()
3341 ProgramSource::ProgramSource(const ProgramSource& prog)
3348 ProgramSource& ProgramSource::operator = (const ProgramSource& prog)
3350 Impl* newp = (Impl*)prog.p;
3359 const String& ProgramSource::source() const
3361 static String dummy;
3362 return p ? p->src : dummy;
3365 ProgramSource::hash_t ProgramSource::hash() const
3367 return p ? p->h : 0;
3370 //////////////////////////////////////////// OpenCLAllocator //////////////////////////////////////////////////
3372 class OpenCLBufferPool
3375 ~OpenCLBufferPool() { }
3377 virtual cl_mem allocate(size_t size, CV_OUT size_t& capacity) = 0;
3378 virtual void release(cl_mem handle, size_t capacity) = 0;
3381 class OpenCLBufferPoolImpl : public BufferPoolController, public OpenCLBufferPool
3392 size_t currentReservedSize;
3393 size_t maxReservedSize;
3395 std::list<BufferEntry> reservedEntries_; // LRU order
3398 bool _findAndRemoveEntryFromReservedList(CV_OUT BufferEntry& entry, const size_t size)
3400 if (reservedEntries_.empty())
3402 std::list<BufferEntry>::iterator i = reservedEntries_.begin();
3403 std::list<BufferEntry>::iterator result_pos = reservedEntries_.end();
3404 BufferEntry result = {NULL, 0};
3405 size_t minDiff = (size_t)(-1);
3406 for (; i != reservedEntries_.end(); ++i)
3408 BufferEntry& e = *i;
3409 if (e.capacity_ >= size)
3411 size_t diff = e.capacity_ - size;
3412 if (diff < size / 8 && (result_pos == reservedEntries_.end() || diff < minDiff))
3422 if (result_pos != reservedEntries_.end())
3424 //CV_DbgAssert(result == *result_pos);
3425 reservedEntries_.erase(result_pos);
3427 currentReservedSize -= entry.capacity_;
3434 void _checkSizeOfReservedEntries()
3436 while (currentReservedSize > maxReservedSize)
3438 CV_DbgAssert(!reservedEntries_.empty());
3439 const BufferEntry& entry = reservedEntries_.back();
3440 CV_DbgAssert(currentReservedSize >= entry.capacity_);
3441 currentReservedSize -= entry.capacity_;
3442 _releaseBufferEntry(entry);
3443 reservedEntries_.pop_back();
3447 inline size_t _allocationGranularity(size_t size)
3452 else if (size < 64*1024)
3454 else if (size < 1024*1024)
3456 else if (size < 16*1024*1024)
3462 void _allocateBufferEntry(BufferEntry& entry, size_t size)
3464 CV_DbgAssert(entry.clBuffer_ == NULL);
3465 entry.capacity_ = alignSize(size, (int)_allocationGranularity(size));
3466 Context& ctx = Context::getDefault();
3467 cl_int retval = CL_SUCCESS;
3468 entry.clBuffer_ = clCreateBuffer((cl_context)ctx.ptr(), CL_MEM_READ_WRITE, entry.capacity_, 0, &retval);
3469 CV_Assert(retval == CL_SUCCESS);
3470 CV_Assert(entry.clBuffer_ != NULL);
3471 LOG_BUFFER_POOL("OpenCL allocate %lld (0x%llx) bytes: %p\n",
3472 (long long)entry.capacity_, (long long)entry.capacity_, entry.clBuffer_);
3475 void _releaseBufferEntry(const BufferEntry& entry)
3477 CV_Assert(entry.capacity_ != 0);
3478 CV_Assert(entry.clBuffer_ != NULL);
3479 LOG_BUFFER_POOL("OpenCL release buffer: %p, %lld (0x%llx) bytes\n",
3480 entry.clBuffer_, (long long)entry.capacity_, (long long)entry.capacity_);
3481 clReleaseMemObject(entry.clBuffer_);
3484 OpenCLBufferPoolImpl()
3485 : currentReservedSize(0), maxReservedSize(0)
3487 // Note: Buffer pool is disabled by default,
3488 // because we didn't receive significant performance improvement
3489 maxReservedSize = getConfigurationParameterForSize("OPENCV_OPENCL_BUFFERPOOL_LIMIT", 0);
3491 virtual ~OpenCLBufferPoolImpl()
3493 freeAllReservedBuffers();
3494 CV_Assert(reservedEntries_.empty());
3497 virtual cl_mem allocate(size_t size, CV_OUT size_t& capacity)
3499 BufferEntry entry = {NULL, 0};
3500 if (maxReservedSize > 0)
3502 AutoLock locker(mutex_);
3503 if (_findAndRemoveEntryFromReservedList(entry, size))
3505 CV_DbgAssert(size <= entry.capacity_);
3506 LOG_BUFFER_POOL("Reuse reserved buffer: %p\n", entry.clBuffer_);
3507 capacity = entry.capacity_;
3508 return entry.clBuffer_;
3511 _allocateBufferEntry(entry, size);
3512 capacity = entry.capacity_;
3513 return entry.clBuffer_;
3515 virtual void release(cl_mem handle, size_t capacity)
3517 BufferEntry entry = {handle, capacity};
3518 if (maxReservedSize == 0 || entry.capacity_ > maxReservedSize / 8)
3520 _releaseBufferEntry(entry);
3524 AutoLock locker(mutex_);
3525 reservedEntries_.push_front(entry);
3526 currentReservedSize += entry.capacity_;
3527 _checkSizeOfReservedEntries();
3531 virtual size_t getReservedSize() const { return currentReservedSize; }
3532 virtual size_t getMaxReservedSize() const { return maxReservedSize; }
3533 virtual void setMaxReservedSize(size_t size)
3535 AutoLock locker(mutex_);
3536 size_t oldMaxReservedSize = maxReservedSize;
3537 maxReservedSize = size;
3538 if (maxReservedSize < oldMaxReservedSize)
3540 std::list<BufferEntry>::iterator i = reservedEntries_.begin();
3541 for (; i != reservedEntries_.end();)
3543 const BufferEntry& entry = *i;
3544 if (entry.capacity_ > maxReservedSize / 8)
3546 CV_DbgAssert(currentReservedSize >= entry.capacity_);
3547 currentReservedSize -= entry.capacity_;
3548 _releaseBufferEntry(entry);
3549 i = reservedEntries_.erase(i);
3554 _checkSizeOfReservedEntries();
3557 virtual void freeAllReservedBuffers()
3559 AutoLock locker(mutex_);
3560 std::list<BufferEntry>::const_iterator i = reservedEntries_.begin();
3561 for (; i != reservedEntries_.end(); ++i)
3563 const BufferEntry& entry = *i;
3564 _releaseBufferEntry(entry);
3566 reservedEntries_.clear();
3570 #if defined _MSC_VER
3571 #pragma warning(disable:4127) // conditional expression is constant
3573 template <bool readAccess, bool writeAccess>
3574 class AlignedDataPtr
3578 uchar* const originPtr_;
3579 const size_t alignment_;
3581 uchar* allocatedPtr_;
3584 AlignedDataPtr(uchar* ptr, size_t size, size_t alignment)
3585 : size_(size), originPtr_(ptr), alignment_(alignment), ptr_(ptr), allocatedPtr_(NULL)
3587 CV_DbgAssert((alignment & (alignment - 1)) == 0); // check for 2^n
3588 if (((size_t)ptr_ & (alignment - 1)) != 0)
3590 allocatedPtr_ = new uchar[size_ + alignment - 1];
3591 ptr_ = (uchar*)(((uintptr_t)allocatedPtr_ + (alignment - 1)) & ~(alignment - 1));
3594 memcpy(ptr_, originPtr_, size_);
3599 uchar* getAlignedPtr() const
3601 CV_DbgAssert(((size_t)ptr_ & (alignment_ - 1)) == 0);
3611 memcpy(originPtr_, ptr_, size_);
3613 delete[] allocatedPtr_;
3614 allocatedPtr_ = NULL;
3619 AlignedDataPtr(const AlignedDataPtr&); // disabled
3620 AlignedDataPtr& operator=(const AlignedDataPtr&); // disabled
3622 #if defined _MSC_VER
3623 #pragma warning(default:4127) // conditional expression is constant
3626 #ifndef CV_OPENCL_DATA_PTR_ALIGNMENT
3627 #define CV_OPENCL_DATA_PTR_ALIGNMENT 16
3630 class OpenCLAllocator : public MatAllocator
3632 mutable OpenCLBufferPoolImpl bufferPool;
3635 ALLOCATOR_FLAGS_BUFFER_POOL_USED = 1 << 0
3638 OpenCLAllocator() { matStdAllocator = Mat::getStdAllocator(); }
3640 UMatData* defaultAllocate(int dims, const int* sizes, int type, void* data, size_t* step,
3641 int flags, UMatUsageFlags usageFlags) const
3643 UMatData* u = matStdAllocator->allocate(dims, sizes, type, data, step, flags, usageFlags);
3647 void getBestFlags(const Context& ctx, int /*flags*/, UMatUsageFlags usageFlags, int& createFlags, int& flags0) const
3649 const Device& dev = ctx.device(0);
3651 if ((usageFlags & USAGE_ALLOCATE_HOST_MEMORY) != 0)
3652 createFlags |= CL_MEM_ALLOC_HOST_PTR;
3654 if( dev.hostUnifiedMemory() )
3657 flags0 = UMatData::COPY_ON_MAP;
3660 UMatData* allocate(int dims, const int* sizes, int type,
3661 void* data, size_t* step, int flags, UMatUsageFlags usageFlags) const
3664 return defaultAllocate(dims, sizes, type, data, step, flags, usageFlags);
3665 CV_Assert(data == 0);
3666 size_t total = CV_ELEM_SIZE(type);
3667 for( int i = dims-1; i >= 0; i-- )
3674 Context& ctx = Context::getDefault();
3675 int createFlags = 0, flags0 = 0;
3676 getBestFlags(ctx, flags, usageFlags, createFlags, flags0);
3678 size_t capacity = 0;
3679 void* handle = NULL;
3680 int allocatorFlags = 0;
3681 if (createFlags == 0)
3683 handle = bufferPool.allocate(total, capacity);
3685 return defaultAllocate(dims, sizes, type, data, step, flags, usageFlags);
3686 allocatorFlags = ALLOCATOR_FLAGS_BUFFER_POOL_USED;
3692 handle = clCreateBuffer((cl_context)ctx.ptr(),
3693 CL_MEM_READ_WRITE|createFlags, total, 0, &retval);
3694 if( !handle || retval != CL_SUCCESS )
3695 return defaultAllocate(dims, sizes, type, data, step, flags, usageFlags);
3697 UMatData* u = new UMatData(this);
3700 u->capacity = capacity;
3703 u->allocatorFlags_ = allocatorFlags;
3704 CV_DbgAssert(!u->tempUMat()); // for bufferPool.release() consistency in deallocate()
3708 bool allocate(UMatData* u, int accessFlags, UMatUsageFlags usageFlags) const
3713 UMatDataAutoLock lock(u);
3717 CV_Assert(u->origdata != 0);
3718 Context& ctx = Context::getDefault();
3719 int createFlags = 0, flags0 = 0;
3720 getBestFlags(ctx, accessFlags, usageFlags, createFlags, flags0);
3722 cl_context ctx_handle = (cl_context)ctx.ptr();
3724 int tempUMatFlags = UMatData::TEMP_UMAT;
3725 u->handle = clCreateBuffer(ctx_handle, CL_MEM_USE_HOST_PTR|CL_MEM_READ_WRITE,
3726 u->size, u->origdata, &retval);
3727 if((!u->handle || retval != CL_SUCCESS) && !(accessFlags & ACCESS_FAST))
3729 u->handle = clCreateBuffer(ctx_handle, CL_MEM_COPY_HOST_PTR|CL_MEM_READ_WRITE|createFlags,
3730 u->size, u->origdata, &retval);
3731 tempUMatFlags = UMatData::TEMP_COPIED_UMAT;
3733 if(!u->handle || retval != CL_SUCCESS)
3735 u->prevAllocator = u->currAllocator;
3736 u->currAllocator = this;
3737 u->flags |= tempUMatFlags;
3739 if(accessFlags & ACCESS_WRITE)
3740 u->markHostCopyObsolete(true);
3744 /*void sync(UMatData* u) const
3746 cl_command_queue q = (cl_command_queue)Queue::getDefault().ptr();
3747 UMatDataAutoLock lock(u);
3749 if( u->hostCopyObsolete() && u->handle && u->refcount > 0 && u->origdata)
3751 if( u->tempCopiedUMat() )
3753 clEnqueueReadBuffer(q, (cl_mem)u->handle, CL_TRUE, 0,
3754 u->size, u->origdata, 0, 0, 0);
3759 void* data = clEnqueueMapBuffer(q, (cl_mem)u->handle, CL_TRUE,
3760 (CL_MAP_READ | CL_MAP_WRITE),
3761 0, u->size, 0, 0, 0, &retval);
3762 clEnqueueUnmapMemObject(q, (cl_mem)u->handle, data, 0, 0, 0);
3765 u->markHostCopyObsolete(false);
3767 else if( u->copyOnMap() && u->deviceCopyObsolete() && u->data )
3769 clEnqueueWriteBuffer(q, (cl_mem)u->handle, CL_TRUE, 0,
3770 u->size, u->data, 0, 0, 0);
3774 void deallocate(UMatData* u) const
3779 CV_Assert(u->urefcount >= 0);
3780 CV_Assert(u->refcount >= 0);
3782 // TODO: !!! when we add Shared Virtual Memory Support,
3783 // this function (as well as the others) should be corrected
3784 CV_Assert(u->handle != 0 && u->urefcount == 0);
3787 // UMatDataAutoLock lock(u);
3788 if( u->hostCopyObsolete() && u->refcount > 0 )
3790 cl_command_queue q = (cl_command_queue)Queue::getDefault().ptr();
3791 if( u->tempCopiedUMat() )
3793 AlignedDataPtr<false, true> alignedPtr(u->origdata, u->size, CV_OPENCL_DATA_PTR_ALIGNMENT);
3794 CV_OclDbgAssert(clEnqueueReadBuffer(q, (cl_mem)u->handle, CL_TRUE, 0,
3795 u->size, alignedPtr.getAlignedPtr(), 0, 0, 0) == CL_SUCCESS);
3800 void* data = clEnqueueMapBuffer(q, (cl_mem)u->handle, CL_TRUE,
3801 (CL_MAP_READ | CL_MAP_WRITE),
3802 0, u->size, 0, 0, 0, &retval);
3803 CV_OclDbgAssert(retval == CL_SUCCESS);
3804 CV_OclDbgAssert(clEnqueueUnmapMemObject(q, (cl_mem)u->handle, data, 0, 0, 0) == CL_SUCCESS);
3805 CV_OclDbgAssert(clFinish(q) == CL_SUCCESS);
3808 u->markHostCopyObsolete(false);
3809 clReleaseMemObject((cl_mem)u->handle);
3811 u->currAllocator = u->prevAllocator;
3812 if(u->data && u->copyOnMap() && !(u->flags & UMatData::USER_ALLOCATED))
3814 u->data = u->origdata;
3815 if(u->refcount == 0)
3816 u->currAllocator->deallocate(u);
3820 CV_Assert(u->refcount == 0);
3821 if(u->data && u->copyOnMap() && !(u->flags & UMatData::USER_ALLOCATED))
3826 if (u->allocatorFlags_ & ALLOCATOR_FLAGS_BUFFER_POOL_USED)
3828 bufferPool.release((cl_mem)u->handle, u->capacity);
3832 clReleaseMemObject((cl_mem)u->handle);
3840 void map(UMatData* u, int accessFlags) const
3845 CV_Assert( u->handle != 0 );
3847 UMatDataAutoLock autolock(u);
3849 if(accessFlags & ACCESS_WRITE)
3850 u->markDeviceCopyObsolete(true);
3852 cl_command_queue q = (cl_command_queue)Queue::getDefault().ptr();
3854 // FIXIT Workaround for UMat synchronization issue
3855 // if( u->refcount == 0 )
3857 if( !u->copyOnMap() )
3859 if (u->data) // FIXIT Workaround for UMat synchronization issue
3861 //CV_Assert(u->hostCopyObsolete() == false);
3864 // because there can be other map requests for the same UMat with different access flags,
3865 // we use the universal (read-write) access mode.
3867 u->data = (uchar*)clEnqueueMapBuffer(q, (cl_mem)u->handle, CL_TRUE,
3868 (CL_MAP_READ | CL_MAP_WRITE),
3869 0, u->size, 0, 0, 0, &retval);
3870 if(u->data && retval == CL_SUCCESS)
3872 u->markHostCopyObsolete(false);
3876 // if map failed, switch to copy-on-map mode for the particular buffer
3877 u->flags |= UMatData::COPY_ON_MAP;
3882 u->data = (uchar*)fastMalloc(u->size);
3883 u->markHostCopyObsolete(true);
3887 if( (accessFlags & ACCESS_READ) != 0 && u->hostCopyObsolete() )
3889 AlignedDataPtr<false, true> alignedPtr(u->data, u->size, CV_OPENCL_DATA_PTR_ALIGNMENT);
3890 CV_Assert( clEnqueueReadBuffer(q, (cl_mem)u->handle, CL_TRUE, 0,
3891 u->size, alignedPtr.getAlignedPtr(), 0, 0, 0) == CL_SUCCESS );
3892 u->markHostCopyObsolete(false);
3896 void unmap(UMatData* u) const
3901 CV_Assert(u->handle != 0);
3903 UMatDataAutoLock autolock(u);
3905 // FIXIT Workaround for UMat synchronization issue
3909 cl_command_queue q = (cl_command_queue)Queue::getDefault().ptr();
3911 if( !u->copyOnMap() && u->data )
3913 CV_Assert( (retval = clEnqueueUnmapMemObject(q,
3914 (cl_mem)u->handle, u->data, 0, 0, 0)) == CL_SUCCESS );
3915 CV_OclDbgAssert(clFinish(q) == CL_SUCCESS);
3918 else if( u->copyOnMap() && u->deviceCopyObsolete() )
3920 AlignedDataPtr<true, false> alignedPtr(u->data, u->size, CV_OPENCL_DATA_PTR_ALIGNMENT);
3921 CV_Assert( (retval = clEnqueueWriteBuffer(q, (cl_mem)u->handle, CL_TRUE, 0,
3922 u->size, alignedPtr.getAlignedPtr(), 0, 0, 0)) == CL_SUCCESS );
3924 u->markDeviceCopyObsolete(false);
3925 u->markHostCopyObsolete(false);
3928 bool checkContinuous(int dims, const size_t sz[],
3929 const size_t srcofs[], const size_t srcstep[],
3930 const size_t dstofs[], const size_t dststep[],
3931 size_t& total, size_t new_sz[],
3932 size_t& srcrawofs, size_t new_srcofs[], size_t new_srcstep[],
3933 size_t& dstrawofs, size_t new_dstofs[], size_t new_dststep[]) const
3935 bool iscontinuous = true;
3936 srcrawofs = srcofs ? srcofs[dims-1] : 0;
3937 dstrawofs = dstofs ? dstofs[dims-1] : 0;
3939 for( int i = dims-2; i >= 0; i-- )
3941 if( i >= 0 && (total != srcstep[i] || total != dststep[i]) )
3942 iscontinuous = false;
3945 srcrawofs += srcofs[i]*srcstep[i];
3947 dstrawofs += dstofs[i]*dststep[i];
3952 // OpenCL uses {x, y, z} order while OpenCV uses {z, y, x} order.
3955 new_sz[0] = sz[1]; new_sz[1] = sz[0]; new_sz[2] = 1;
3956 // we assume that new_... arrays are initialized by caller
3957 // with 0's, so there is no else branch
3960 new_srcofs[0] = srcofs[1];
3961 new_srcofs[1] = srcofs[0];
3967 new_dstofs[0] = dstofs[1];
3968 new_dstofs[1] = dstofs[0];
3972 new_srcstep[0] = srcstep[0]; new_srcstep[1] = 0;
3973 new_dststep[0] = dststep[0]; new_dststep[1] = 0;
3977 // we could check for dims == 3 here,
3978 // but from user perspective this one is more informative
3979 CV_Assert(dims <= 3);
3980 new_sz[0] = sz[2]; new_sz[1] = sz[1]; new_sz[2] = sz[0];
3983 new_srcofs[0] = srcofs[2];
3984 new_srcofs[1] = srcofs[1];
3985 new_srcofs[2] = srcofs[0];
3990 new_dstofs[0] = dstofs[2];
3991 new_dstofs[1] = dstofs[1];
3992 new_dstofs[2] = dstofs[0];
3995 new_srcstep[0] = srcstep[1]; new_srcstep[1] = srcstep[0];
3996 new_dststep[0] = dststep[1]; new_dststep[1] = dststep[0];
3999 return iscontinuous;
4002 void download(UMatData* u, void* dstptr, int dims, const size_t sz[],
4003 const size_t srcofs[], const size_t srcstep[],
4004 const size_t dststep[]) const
4008 UMatDataAutoLock autolock(u);
4010 if( u->data && !u->hostCopyObsolete() )
4012 Mat::getStdAllocator()->download(u, dstptr, dims, sz, srcofs, srcstep, dststep);
4015 CV_Assert( u->handle != 0 );
4017 cl_command_queue q = (cl_command_queue)Queue::getDefault().ptr();
4019 size_t total = 0, new_sz[] = {0, 0, 0};
4020 size_t srcrawofs = 0, new_srcofs[] = {0, 0, 0}, new_srcstep[] = {0, 0, 0};
4021 size_t dstrawofs = 0, new_dstofs[] = {0, 0, 0}, new_dststep[] = {0, 0, 0};
4023 bool iscontinuous = checkContinuous(dims, sz, srcofs, srcstep, 0, dststep,
4025 srcrawofs, new_srcofs, new_srcstep,
4026 dstrawofs, new_dstofs, new_dststep);
4028 AlignedDataPtr<false, true> alignedPtr((uchar*)dstptr, sz[0] * dststep[0], CV_OPENCL_DATA_PTR_ALIGNMENT);
4031 CV_Assert( clEnqueueReadBuffer(q, (cl_mem)u->handle, CL_TRUE,
4032 srcrawofs, total, alignedPtr.getAlignedPtr(), 0, 0, 0) == CL_SUCCESS );
4036 CV_Assert( clEnqueueReadBufferRect(q, (cl_mem)u->handle, CL_TRUE,
4037 new_srcofs, new_dstofs, new_sz, new_srcstep[0], new_srcstep[1],
4038 new_dststep[0], new_dststep[1], alignedPtr.getAlignedPtr(), 0, 0, 0) == CL_SUCCESS );
4042 void upload(UMatData* u, const void* srcptr, int dims, const size_t sz[],
4043 const size_t dstofs[], const size_t dststep[],
4044 const size_t srcstep[]) const
4049 // there should be no user-visible CPU copies of the UMat which we are going to copy to
4050 CV_Assert(u->refcount == 0 || u->tempUMat());
4052 size_t total = 0, new_sz[] = {0, 0, 0};
4053 size_t srcrawofs = 0, new_srcofs[] = {0, 0, 0}, new_srcstep[] = {0, 0, 0};
4054 size_t dstrawofs = 0, new_dstofs[] = {0, 0, 0}, new_dststep[] = {0, 0, 0};
4056 bool iscontinuous = checkContinuous(dims, sz, 0, srcstep, dstofs, dststep,
4058 srcrawofs, new_srcofs, new_srcstep,
4059 dstrawofs, new_dstofs, new_dststep);
4061 UMatDataAutoLock autolock(u);
4063 // if there is cached CPU copy of the GPU matrix,
4064 // we could use it as a destination.
4065 // we can do it in 2 cases:
4066 // 1. we overwrite the whole content
4067 // 2. we overwrite part of the matrix, but the GPU copy is out-of-date
4068 if( u->data && (u->hostCopyObsolete() < u->deviceCopyObsolete() || total == u->size))
4070 Mat::getStdAllocator()->upload(u, srcptr, dims, sz, dstofs, dststep, srcstep);
4071 u->markHostCopyObsolete(false);
4072 u->markDeviceCopyObsolete(true);
4076 CV_Assert( u->handle != 0 );
4077 cl_command_queue q = (cl_command_queue)Queue::getDefault().ptr();
4079 AlignedDataPtr<true, false> alignedPtr((uchar*)srcptr, sz[0] * srcstep[0], CV_OPENCL_DATA_PTR_ALIGNMENT);
4082 CV_Assert( clEnqueueWriteBuffer(q, (cl_mem)u->handle,
4083 CL_TRUE, dstrawofs, total, srcptr, 0, 0, 0) == CL_SUCCESS );
4087 CV_Assert( clEnqueueWriteBufferRect(q, (cl_mem)u->handle, CL_TRUE,
4088 new_dstofs, new_srcofs, new_sz, new_dststep[0], new_dststep[1],
4089 new_srcstep[0], new_srcstep[1], srcptr, 0, 0, 0) == CL_SUCCESS );
4092 u->markHostCopyObsolete(true);
4093 u->markDeviceCopyObsolete(false);
4096 void copy(UMatData* src, UMatData* dst, int dims, const size_t sz[],
4097 const size_t srcofs[], const size_t srcstep[],
4098 const size_t dstofs[], const size_t dststep[], bool _sync) const
4103 size_t total = 0, new_sz[] = {0, 0, 0};
4104 size_t srcrawofs = 0, new_srcofs[] = {0, 0, 0}, new_srcstep[] = {0, 0, 0};
4105 size_t dstrawofs = 0, new_dstofs[] = {0, 0, 0}, new_dststep[] = {0, 0, 0};
4107 bool iscontinuous = checkContinuous(dims, sz, srcofs, srcstep, dstofs, dststep,
4109 srcrawofs, new_srcofs, new_srcstep,
4110 dstrawofs, new_dstofs, new_dststep);
4112 UMatDataAutoLock src_autolock(src);
4113 UMatDataAutoLock dst_autolock(dst);
4115 if( !src->handle || (src->data && src->hostCopyObsolete() < src->deviceCopyObsolete()) )
4117 upload(dst, src->data + srcrawofs, dims, sz, dstofs, dststep, srcstep);
4120 if( !dst->handle || (dst->data && dst->hostCopyObsolete() < dst->deviceCopyObsolete()) )
4122 download(src, dst->data + dstrawofs, dims, sz, srcofs, srcstep, dststep);
4123 dst->markHostCopyObsolete(false);
4124 dst->markDeviceCopyObsolete(true);
4128 // there should be no user-visible CPU copies of the UMat which we are going to copy to
4129 CV_Assert(dst->refcount == 0);
4130 cl_command_queue q = (cl_command_queue)Queue::getDefault().ptr();
4134 CV_Assert( clEnqueueCopyBuffer(q, (cl_mem)src->handle, (cl_mem)dst->handle,
4135 srcrawofs, dstrawofs, total, 0, 0, 0) == CL_SUCCESS );
4140 CV_Assert( (retval = clEnqueueCopyBufferRect(q, (cl_mem)src->handle, (cl_mem)dst->handle,
4141 new_srcofs, new_dstofs, new_sz,
4142 new_srcstep[0], new_srcstep[1],
4143 new_dststep[0], new_dststep[1],
4144 0, 0, 0)) == CL_SUCCESS );
4147 dst->markHostCopyObsolete(true);
4148 dst->markDeviceCopyObsolete(false);
4152 CV_OclDbgAssert(clFinish(q) == CL_SUCCESS);
4156 BufferPoolController* getBufferPoolController() const { return &bufferPool; }
4158 MatAllocator* matStdAllocator;
4161 MatAllocator* getOpenCLAllocator()
4163 static MatAllocator * allocator = new OpenCLAllocator();
4167 ///////////////////////////////////////////// Utility functions /////////////////////////////////////////////////
4169 static void getDevices(std::vector<cl_device_id>& devices, cl_platform_id platform)
4171 cl_uint numDevices = 0;
4172 CV_OclDbgAssert(clGetDeviceIDs(platform, (cl_device_type)Device::TYPE_ALL,
4173 0, NULL, &numDevices) == CL_SUCCESS);
4175 if (numDevices == 0)
4181 devices.resize((size_t)numDevices);
4182 CV_OclDbgAssert(clGetDeviceIDs(platform, (cl_device_type)Device::TYPE_ALL,
4183 numDevices, &devices[0], &numDevices) == CL_SUCCESS);
4186 struct PlatformInfo::Impl
4191 handle = *(cl_platform_id*)id;
4192 getDevices(devices, handle);
4195 String getStrProp(cl_device_info prop) const
4199 return clGetPlatformInfo(handle, prop, sizeof(buf)-16, buf, &sz) == CL_SUCCESS &&
4200 sz < sizeof(buf) ? String(buf) : String();
4203 IMPLEMENT_REFCOUNTABLE();
4204 std::vector<cl_device_id> devices;
4205 cl_platform_id handle;
4208 PlatformInfo::PlatformInfo()
4213 PlatformInfo::PlatformInfo(void* platform_id)
4215 p = new Impl(platform_id);
4218 PlatformInfo::~PlatformInfo()
4224 PlatformInfo::PlatformInfo(const PlatformInfo& i)
4231 PlatformInfo& PlatformInfo::operator =(const PlatformInfo& i)
4244 int PlatformInfo::deviceNumber() const
4246 return p ? (int)p->devices.size() : 0;
4249 void PlatformInfo::getDevice(Device& device, int d) const
4251 CV_Assert(p && d < (int)p->devices.size() );
4253 device.set(p->devices[d]);
4256 String PlatformInfo::name() const
4258 return p ? p->getStrProp(CL_PLATFORM_NAME) : String();
4261 String PlatformInfo::vendor() const
4263 return p ? p->getStrProp(CL_PLATFORM_VENDOR) : String();
4266 String PlatformInfo::version() const
4268 return p ? p->getStrProp(CL_PLATFORM_VERSION) : String();
4271 static void getPlatforms(std::vector<cl_platform_id>& platforms)
4273 cl_uint numPlatforms = 0;
4274 CV_OclDbgAssert(clGetPlatformIDs(0, NULL, &numPlatforms) == CL_SUCCESS);
4276 if (numPlatforms == 0)
4282 platforms.resize((size_t)numPlatforms);
4283 CV_OclDbgAssert(clGetPlatformIDs(numPlatforms, &platforms[0], &numPlatforms) == CL_SUCCESS);
4286 void getPlatfomsInfo(std::vector<PlatformInfo>& platformsInfo)
4288 std::vector<cl_platform_id> platforms;
4289 getPlatforms(platforms);
4291 for (size_t i = 0; i < platforms.size(); i++)
4292 platformsInfo.push_back( PlatformInfo((void*)&platforms[i]) );
4295 const char* typeToStr(int type)
4297 static const char* tab[]=
4299 "uchar", "uchar2", "uchar3", "uchar4", 0, 0, 0, "uchar8", 0, 0, 0, 0, 0, 0, 0, "uchar16",
4300 "char", "char2", "char3", "char4", 0, 0, 0, "char8", 0, 0, 0, 0, 0, 0, 0, "char16",
4301 "ushort", "ushort2", "ushort3", "ushort4",0, 0, 0, "ushort8", 0, 0, 0, 0, 0, 0, 0, "ushort16",
4302 "short", "short2", "short3", "short4", 0, 0, 0, "short8", 0, 0, 0, 0, 0, 0, 0, "short16",
4303 "int", "int2", "int3", "int4", 0, 0, 0, "int8", 0, 0, 0, 0, 0, 0, 0, "int16",
4304 "float", "float2", "float3", "float4", 0, 0, 0, "float8", 0, 0, 0, 0, 0, 0, 0, "float16",
4305 "double", "double2", "double3", "double4", 0, 0, 0, "double8", 0, 0, 0, 0, 0, 0, 0, "double16",
4306 "?", "?", "?", "?", "?", "?", "?", "?", "?", "?", "?", "?", "?", "?", "?", "?"
4308 int cn = CV_MAT_CN(type), depth = CV_MAT_DEPTH(type);
4309 return cn > 16 ? "?" : tab[depth*16 + cn-1];
4312 const char* memopTypeToStr(int type)
4314 static const char* tab[] =
4316 "uchar", "uchar2", "uchar3", "uchar4", 0, 0, 0, "uchar8", 0, 0, 0, 0, 0, 0, 0, "uchar16",
4317 "char", "char2", "char3", "char4", 0, 0, 0, "char8", 0, 0, 0, 0, 0, 0, 0, "char16",
4318 "ushort", "ushort2", "ushort3", "ushort4",0, 0, 0, "ushort8", 0, 0, 0, 0, 0, 0, 0, "ushort16",
4319 "short", "short2", "short3", "short4", 0, 0, 0, "short8", 0, 0, 0, 0, 0, 0, 0, "short16",
4320 "int", "int2", "int3", "int4", 0, 0, 0, "int8", 0, 0, 0, 0, 0, 0, 0, "int16",
4321 "int", "int2", "int3", "int4", 0, 0, 0, "int8", 0, 0, 0, 0, 0, 0, 0, "int16",
4322 "ulong", "ulong2", "ulong3", "ulong4", 0, 0, 0, "ulong8", 0, 0, 0, 0, 0, 0, 0, "ulong16",
4323 "?", "?", "?", "?", "?", "?", "?", "?", "?", "?", "?", "?", "?", "?", "?", "?"
4325 int cn = CV_MAT_CN(type), depth = CV_MAT_DEPTH(type);
4326 return cn > 16 ? "?" : tab[depth*16 + cn-1];
4329 const char* convertTypeStr(int sdepth, int ddepth, int cn, char* buf)
4331 if( sdepth == ddepth )
4333 const char *typestr = typeToStr(CV_MAKETYPE(ddepth, cn));
4334 if( ddepth >= CV_32F ||
4335 (ddepth == CV_32S && sdepth < CV_32S) ||
4336 (ddepth == CV_16S && sdepth <= CV_8S) ||
4337 (ddepth == CV_16U && sdepth == CV_8U))
4339 sprintf(buf, "convert_%s", typestr);
4341 else if( sdepth >= CV_32F )
4342 sprintf(buf, "convert_%s%s_rte", typestr, (ddepth < CV_32S ? "_sat" : ""));
4344 sprintf(buf, "convert_%s_sat", typestr);
4349 template <typename T>
4350 static std::string kerToStr(const Mat & k)
4352 int width = k.cols - 1, depth = k.depth();
4353 const T * const data = reinterpret_cast<const T *>(k.data);
4355 std::ostringstream stream;
4356 stream.precision(10);
4360 for (int i = 0; i < width; ++i)
4361 stream << "DIG(" << (int)data[i] << ")";
4362 stream << "DIG(" << (int)data[width] << ")";
4364 else if (depth == CV_32F)
4366 stream.setf(std::ios_base::showpoint);
4367 for (int i = 0; i < width; ++i)
4368 stream << "DIG(" << data[i] << "f)";
4369 stream << "DIG(" << data[width] << "f)";
4373 for (int i = 0; i < width; ++i)
4374 stream << "DIG(" << data[i] << ")";
4375 stream << "DIG(" << data[width] << ")";
4378 return stream.str();
4381 String kernelToStr(InputArray _kernel, int ddepth, const char * name)
4383 Mat kernel = _kernel.getMat().reshape(1, 1);
4385 int depth = kernel.depth();
4389 if (ddepth != depth)
4390 kernel.convertTo(kernel, ddepth);
4392 typedef std::string (* func_t)(const Mat &);
4393 static const func_t funcs[] = { kerToStr<uchar>, kerToStr<char>, kerToStr<ushort>, kerToStr<short>,
4394 kerToStr<int>, kerToStr<float>, kerToStr<double>, 0 };
4395 const func_t func = funcs[ddepth];
4396 CV_Assert(func != 0);
4398 return cv::format(" -D %s=%s", name ? name : "COEFF", func(kernel).c_str());
4401 #define PROCESS_SRC(src) \
4406 CV_Assert(src.isMat() || src.isUMat()); \
4407 int ctype = src.type(), ccn = CV_MAT_CN(ctype); \
4408 Size csize = src.size(); \
4409 cols.push_back(ccn * csize.width); \
4410 if (ctype != type) \
4412 offsets.push_back(src.offset()); \
4413 steps.push_back(src.step()); \
4418 int predictOptimalVectorWidth(InputArray src1, InputArray src2, InputArray src3,
4419 InputArray src4, InputArray src5, InputArray src6,
4420 InputArray src7, InputArray src8, InputArray src9)
4422 int type = src1.type(), depth = CV_MAT_DEPTH(type), cn = CV_MAT_CN(type), esz1 = CV_ELEM_SIZE1(depth);
4423 Size ssize = src1.size();
4424 const ocl::Device & d = ocl::Device::getDefault();
4426 int vectorWidths[] = { d.preferredVectorWidthChar(), d.preferredVectorWidthChar(),
4427 d.preferredVectorWidthShort(), d.preferredVectorWidthShort(),
4428 d.preferredVectorWidthInt(), d.preferredVectorWidthFloat(),
4429 d.preferredVectorWidthDouble(), -1 }, kercn = vectorWidths[depth];
4433 int vectorWidthsIntel[] = { 16, 16, 8, 8, 1, 1, 1, -1 };
4434 kercn = vectorWidthsIntel[depth];
4437 if (ssize.width * cn < kercn || kercn <= 0)
4440 std::vector<size_t> offsets, steps, cols;
4451 size_t size = offsets.size();
4452 int wsz = kercn * esz1;
4453 std::vector<int> dividers(size, wsz);
4455 for (size_t i = 0; i < size; ++i)
4456 while (offsets[i] % dividers[i] != 0 || steps[i] % dividers[i] != 0 || cols[i] % dividers[i] != 0)
4460 for (size_t i = 0; i < size; ++i)
4461 if (dividers[i] != wsz)
4468 // width = *std::min_element(dividers.begin(), dividers.end());
4476 // TODO Make this as a method of OpenCL "BuildOptions" class
4477 void buildOptionsAddMatrixDescription(String& buildOptions, const String& name, InputArray _m)
4479 if (!buildOptions.empty())
4480 buildOptions += " ";
4481 int type = _m.type(), depth = CV_MAT_DEPTH(type);
4482 buildOptions += format(
4483 "-D %s_T=%s -D %s_T1=%s -D %s_CN=%d -D %s_TSIZE=%d -D %s_T1SIZE=%d -D %s_DEPTH=%d",
4484 name.c_str(), ocl::typeToStr(type),
4485 name.c_str(), ocl::typeToStr(CV_MAKE_TYPE(depth, 1)),
4486 name.c_str(), (int)CV_MAT_CN(type),
4487 name.c_str(), (int)CV_ELEM_SIZE(type),
4488 name.c_str(), (int)CV_ELEM_SIZE1(type),
4489 name.c_str(), (int)depth
4494 struct Image2D::Impl
4496 Impl(const UMat &src, bool norm, bool alias)
4500 init(src, norm, alias);
4506 clReleaseMemObject(handle);
4509 static cl_image_format getImageFormat(int depth, int cn, bool norm)
4511 cl_image_format format;
4512 static const int channelTypes[] = { CL_UNSIGNED_INT8, CL_SIGNED_INT8, CL_UNSIGNED_INT16,
4513 CL_SIGNED_INT16, CL_SIGNED_INT32, CL_FLOAT, -1, -1 };
4514 static const int channelTypesNorm[] = { CL_UNORM_INT8, CL_SNORM_INT8, CL_UNORM_INT16,
4515 CL_SNORM_INT16, -1, -1, -1, -1 };
4516 static const int channelOrders[] = { -1, CL_R, CL_RG, -1, CL_RGBA };
4518 int channelType = norm ? channelTypesNorm[depth] : channelTypes[depth];
4519 int channelOrder = channelOrders[cn];
4520 format.image_channel_data_type = (cl_channel_type)channelType;
4521 format.image_channel_order = (cl_channel_order)channelOrder;
4525 static bool isFormatSupported(cl_image_format format)
4527 cl_context context = (cl_context)Context::getDefault().ptr();
4528 // Figure out how many formats are supported by this context.
4529 cl_uint numFormats = 0;
4530 cl_int err = clGetSupportedImageFormats(context, CL_MEM_READ_WRITE,
4531 CL_MEM_OBJECT_IMAGE2D, numFormats,
4533 AutoBuffer<cl_image_format> formats(numFormats);
4534 err = clGetSupportedImageFormats(context, CL_MEM_READ_WRITE,
4535 CL_MEM_OBJECT_IMAGE2D, numFormats,
4537 CV_OclDbgAssert(err == CL_SUCCESS);
4538 for (cl_uint i = 0; i < numFormats; ++i)
4540 if (!memcmp(&formats[i], &format, sizeof(format)))
4548 void init(const UMat &src, bool norm, bool alias)
4550 CV_Assert(ocl::Device::getDefault().imageSupport());
4552 int err, depth = src.depth(), cn = src.channels();
4554 cl_image_format format = getImageFormat(depth, cn, norm);
4556 if (!isFormatSupported(format))
4557 CV_Error(Error::OpenCLApiCallError, "Image format is not supported");
4559 cl_context context = (cl_context)Context::getDefault().ptr();
4560 cl_command_queue queue = (cl_command_queue)Queue::getDefault().ptr();
4562 #ifdef CL_VERSION_1_2
4563 // this enables backwards portability to
4564 // run on OpenCL 1.1 platform if library binaries are compiled with OpenCL 1.2 support
4565 const Device & d = ocl::Device::getDefault();
4566 int minor = d.deviceVersionMinor(), major = d.deviceVersionMajor();
4567 CV_Assert(!alias || canCreateAlias(src));
4568 if (1 < major || (1 == major && 2 <= minor))
4571 desc.image_type = CL_MEM_OBJECT_IMAGE2D;
4572 desc.image_width = src.cols;
4573 desc.image_height = src.rows;
4574 desc.image_depth = 0;
4575 desc.image_array_size = 1;
4576 desc.image_row_pitch = alias ? src.step[0] : 0;
4577 desc.image_slice_pitch = 0;
4578 desc.buffer = alias ? (cl_mem)src.handle(ACCESS_RW) : 0;
4579 desc.num_mip_levels = 0;
4580 desc.num_samples = 0;
4581 handle = clCreateImage(context, CL_MEM_READ_WRITE, &format, &desc, NULL, &err);
4586 CV_SUPPRESS_DEPRECATED_START
4587 CV_Assert(!alias); // This is an OpenCL 1.2 extension
4588 handle = clCreateImage2D(context, CL_MEM_READ_WRITE, &format, src.cols, src.rows, 0, NULL, &err);
4589 CV_SUPPRESS_DEPRECATED_END
4591 CV_OclDbgAssert(err == CL_SUCCESS);
4593 size_t origin[] = { 0, 0, 0 };
4594 size_t region[] = { src.cols, src.rows, 1 };
4597 if (!alias && !src.isContinuous())
4599 devData = clCreateBuffer(context, CL_MEM_READ_ONLY, src.cols * src.rows * src.elemSize(), NULL, &err);
4600 CV_OclDbgAssert(err == CL_SUCCESS);
4602 const size_t roi[3] = {src.cols * src.elemSize(), src.rows, 1};
4603 CV_Assert(clEnqueueCopyBufferRect(queue, (cl_mem)src.handle(ACCESS_READ), devData, origin, origin,
4604 roi, src.step, 0, src.cols * src.elemSize(), 0, 0, NULL, NULL) == CL_SUCCESS);
4605 CV_OclDbgAssert(clFlush(queue) == CL_SUCCESS);
4609 devData = (cl_mem)src.handle(ACCESS_READ);
4611 CV_Assert(devData != NULL);
4615 CV_OclDbgAssert(clEnqueueCopyBufferToImage(queue, devData, handle, 0, origin, region, 0, NULL, 0) == CL_SUCCESS);
4616 if (!src.isContinuous())
4618 CV_OclDbgAssert(clFlush(queue) == CL_SUCCESS);
4619 CV_OclDbgAssert(clReleaseMemObject(devData) == CL_SUCCESS);
4624 IMPLEMENT_REFCOUNTABLE();
4634 Image2D::Image2D(const UMat &src, bool norm, bool alias)
4636 p = new Impl(src, norm, alias);
4639 bool Image2D::canCreateAlias(const UMat &m)
4642 const Device & d = ocl::Device::getDefault();
4643 if (d.imageFromBufferSupport())
4645 // This is the required pitch alignment in pixels
4646 uint pitchAlign = d.imagePitchAlignment();
4647 if (pitchAlign && !(m.step % (pitchAlign * m.elemSize())))
4649 // We don't currently handle the case where the buffer was created
4650 // with CL_MEM_USE_HOST_PTR
4651 if (!m.u->tempUMat())
4660 bool Image2D::isFormatSupported(int depth, int cn, bool norm)
4662 cl_image_format format = Impl::getImageFormat(depth, cn, norm);
4664 return Impl::isFormatSupported(format);
4667 Image2D::Image2D(const Image2D & i)
4674 Image2D & Image2D::operator = (const Image2D & i)
4693 void* Image2D::ptr() const
4695 return p ? p->handle : 0;