1 /*M///////////////////////////////////////////////////////////////////////////////////////
3 // IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
5 // By downloading, copying, installing or using the software you agree to this license.
6 // If you do not agree to this license, do not download, install,
7 // copy or use the software.
11 // For Open Source Computer Vision Library
13 // Copyright (C) 2013, OpenCV Foundation, all rights reserved.
14 // Third party copyrights are property of their respective owners.
16 // Redistribution and use in source and binary forms, with or without modification,
17 // are permitted provided that the following conditions are met:
19 // * Redistribution's of source code must retain the above copyright notice,
20 // this list of conditions and the following disclaimer.
22 // * Redistribution's in binary form must reproduce the above copyright notice,
23 // this list of conditions and the following disclaimer in the documentation
24 // and/or other materials provided with the distribution.
26 // * The name of the copyright holders may not be used to endorse or promote products
27 // derived from this software without specific prior written permission.
29 // This software is provided by the copyright holders and contributors "as is" and
30 // any express or implied warranties, including, but not limited to, the implied
31 // warranties of merchantability and fitness for a particular purpose are disclaimed.
32 // In no event shall the OpenCV Foundation or contributors be liable for any direct,
33 // indirect, incidental, special, exemplary, or consequential damages
34 // (including, but not limited to, procurement of substitute goods or services;
35 // loss of use, data, or profits; or business interruption) however caused
36 // and on any theory of liability, whether in contract, strict liability,
37 // or tort (including negligence or otherwise) arising in any way out of
38 // the use of this software, even if advised of the possibility of such damage.
42 #include "precomp.hpp"
47 #include <iostream> // std::cerr
49 #define CV_OPENCL_ALWAYS_SHOW_BUILD_LOG 0
51 #include "opencv2/core/bufferpool.hpp"
52 #ifndef LOG_BUFFER_POOL
54 # define LOG_BUFFER_POOL printf
56 # define LOG_BUFFER_POOL(...)
61 // TODO Move to some common place
62 static bool getBoolParameter(const char* name, bool defaultValue)
64 const char* envValue = getenv(name);
69 cv::String value = envValue;
70 if (value == "1" || value == "True" || value == "true" || value == "TRUE")
74 if (value == "0" || value == "False" || value == "false" || value == "FALSE")
78 CV_ErrorNoReturn(cv::Error::StsBadArg, cv::format("Invalid value for %s parameter: %s", name, value.c_str()));
82 // TODO Move to some common place
83 static size_t getConfigurationParameterForSize(const char* name, size_t defaultValue)
86 const char* envValue = NULL;
88 const char* envValue = getenv(name);
94 cv::String value = envValue;
96 for (; pos < value.size(); pos++)
98 if (!isdigit(value[pos]))
101 cv::String valueStr = value.substr(0, pos);
102 cv::String suffixStr = value.substr(pos, value.length() - pos);
103 int v = atoi(valueStr.c_str());
104 if (suffixStr.length() == 0)
106 else if (suffixStr == "MB" || suffixStr == "Mb" || suffixStr == "mb")
107 return v * 1024 * 1024;
108 else if (suffixStr == "KB" || suffixStr == "Kb" || suffixStr == "kb")
110 CV_ErrorNoReturn(cv::Error::StsBadArg, cv::format("Invalid value for %s parameter: %s", name, value.c_str()));
113 #include "opencv2/core/opencl/runtime/opencl_clamdblas.hpp"
114 #include "opencv2/core/opencl/runtime/opencl_clamdfft.hpp"
117 #include "opencv2/core/opencl/runtime/opencl_core.hpp"
119 // TODO FIXIT: This file can't be build without OPENCL
122 Part of the file is an extract from the standard OpenCL headers from Khronos site.
123 Below is the original copyright.
126 /*******************************************************************************
127 * Copyright (c) 2008 - 2012 The Khronos Group Inc.
129 * Permission is hereby granted, free of charge, to any person obtaining a
130 * copy of this software and/or associated documentation files (the
131 * "Materials"), to deal in the Materials without restriction, including
132 * without limitation the rights to use, copy, modify, merge, publish,
133 * distribute, sublicense, and/or sell copies of the Materials, and to
134 * permit persons to whom the Materials are furnished to do so, subject to
135 * the following conditions:
137 * The above copyright notice and this permission notice shall be included
138 * in all copies or substantial portions of the Materials.
140 * THE MATERIALS ARE PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
141 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
142 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
143 * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
144 * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
145 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
146 * MATERIALS OR THE USE OR OTHER DEALINGS IN THE MATERIALS.
147 ******************************************************************************/
149 #if 0 //defined __APPLE__
150 #define HAVE_OPENCL 1
155 #define OPENCV_CL_NOT_IMPLEMENTED -1000
159 #if defined __APPLE__
160 #include <OpenCL/opencl.h>
162 #include <CL/opencl.h>
165 static const bool g_haveOpenCL = true;
171 struct _cl_platform_id { int dummy; };
172 struct _cl_device_id { int dummy; };
173 struct _cl_context { int dummy; };
174 struct _cl_command_queue { int dummy; };
175 struct _cl_mem { int dummy; };
176 struct _cl_program { int dummy; };
177 struct _cl_kernel { int dummy; };
178 struct _cl_event { int dummy; };
179 struct _cl_sampler { int dummy; };
181 typedef struct _cl_platform_id * cl_platform_id;
182 typedef struct _cl_device_id * cl_device_id;
183 typedef struct _cl_context * cl_context;
184 typedef struct _cl_command_queue * cl_command_queue;
185 typedef struct _cl_mem * cl_mem;
186 typedef struct _cl_program * cl_program;
187 typedef struct _cl_kernel * cl_kernel;
188 typedef struct _cl_event * cl_event;
189 typedef struct _cl_sampler * cl_sampler;
192 typedef unsigned cl_uint;
193 #if defined (_WIN32) && defined(_MSC_VER)
194 typedef __int64 cl_long;
195 typedef unsigned __int64 cl_ulong;
197 typedef long cl_long;
198 typedef unsigned long cl_ulong;
201 typedef cl_uint cl_bool; /* WARNING! Unlike cl_ types in cl_platform.h, cl_bool is not guaranteed to be the same size as the bool in kernels. */
202 typedef cl_ulong cl_bitfield;
203 typedef cl_bitfield cl_device_type;
204 typedef cl_uint cl_platform_info;
205 typedef cl_uint cl_device_info;
206 typedef cl_bitfield cl_device_fp_config;
207 typedef cl_uint cl_device_mem_cache_type;
208 typedef cl_uint cl_device_local_mem_type;
209 typedef cl_bitfield cl_device_exec_capabilities;
210 typedef cl_bitfield cl_command_queue_properties;
211 typedef intptr_t cl_device_partition_property;
212 typedef cl_bitfield cl_device_affinity_domain;
214 typedef intptr_t cl_context_properties;
215 typedef cl_uint cl_context_info;
216 typedef cl_uint cl_command_queue_info;
217 typedef cl_uint cl_channel_order;
218 typedef cl_uint cl_channel_type;
219 typedef cl_bitfield cl_mem_flags;
220 typedef cl_uint cl_mem_object_type;
221 typedef cl_uint cl_mem_info;
222 typedef cl_bitfield cl_mem_migration_flags;
223 typedef cl_uint cl_image_info;
224 typedef cl_uint cl_buffer_create_type;
225 typedef cl_uint cl_addressing_mode;
226 typedef cl_uint cl_filter_mode;
227 typedef cl_uint cl_sampler_info;
228 typedef cl_bitfield cl_map_flags;
229 typedef cl_uint cl_program_info;
230 typedef cl_uint cl_program_build_info;
231 typedef cl_uint cl_program_binary_type;
232 typedef cl_int cl_build_status;
233 typedef cl_uint cl_kernel_info;
234 typedef cl_uint cl_kernel_arg_info;
235 typedef cl_uint cl_kernel_arg_address_qualifier;
236 typedef cl_uint cl_kernel_arg_access_qualifier;
237 typedef cl_bitfield cl_kernel_arg_type_qualifier;
238 typedef cl_uint cl_kernel_work_group_info;
239 typedef cl_uint cl_event_info;
240 typedef cl_uint cl_command_type;
241 typedef cl_uint cl_profiling_info;
244 typedef struct _cl_image_format {
245 cl_channel_order image_channel_order;
246 cl_channel_type image_channel_data_type;
249 typedef struct _cl_image_desc {
250 cl_mem_object_type image_type;
254 size_t image_array_size;
255 size_t image_row_pitch;
256 size_t image_slice_pitch;
257 cl_uint num_mip_levels;
262 typedef struct _cl_buffer_region {
268 //////////////////////////////////////////////////////////
271 #define CL_DEVICE_NOT_FOUND -1
272 #define CL_DEVICE_NOT_AVAILABLE -2
273 #define CL_COMPILER_NOT_AVAILABLE -3
274 #define CL_MEM_OBJECT_ALLOCATION_FAILURE -4
275 #define CL_OUT_OF_RESOURCES -5
276 #define CL_OUT_OF_HOST_MEMORY -6
277 #define CL_PROFILING_INFO_NOT_AVAILABLE -7
278 #define CL_MEM_COPY_OVERLAP -8
279 #define CL_IMAGE_FORMAT_MISMATCH -9
280 #define CL_IMAGE_FORMAT_NOT_SUPPORTED -10
281 #define CL_BUILD_PROGRAM_FAILURE -11
282 #define CL_MAP_FAILURE -12
283 #define CL_MISALIGNED_SUB_BUFFER_OFFSET -13
284 #define CL_EXEC_STATUS_ERROR_FOR_EVENTS_IN_WAIT_LIST -14
285 #define CL_COMPILE_PROGRAM_FAILURE -15
286 #define CL_LINKER_NOT_AVAILABLE -16
287 #define CL_LINK_PROGRAM_FAILURE -17
288 #define CL_DEVICE_PARTITION_FAILED -18
289 #define CL_KERNEL_ARG_INFO_NOT_AVAILABLE -19
291 #define CL_INVALID_VALUE -30
292 #define CL_INVALID_DEVICE_TYPE -31
293 #define CL_INVALID_PLATFORM -32
294 #define CL_INVALID_DEVICE -33
295 #define CL_INVALID_CONTEXT -34
296 #define CL_INVALID_QUEUE_PROPERTIES -35
297 #define CL_INVALID_COMMAND_QUEUE -36
298 #define CL_INVALID_HOST_PTR -37
299 #define CL_INVALID_MEM_OBJECT -38
300 #define CL_INVALID_IMAGE_FORMAT_DESCRIPTOR -39
301 #define CL_INVALID_IMAGE_SIZE -40
302 #define CL_INVALID_SAMPLER -41
303 #define CL_INVALID_BINARY -42
304 #define CL_INVALID_BUILD_OPTIONS -43
305 #define CL_INVALID_PROGRAM -44
306 #define CL_INVALID_PROGRAM_EXECUTABLE -45
307 #define CL_INVALID_KERNEL_NAME -46
308 #define CL_INVALID_KERNEL_DEFINITION -47
309 #define CL_INVALID_KERNEL -48
310 #define CL_INVALID_ARG_INDEX -49
311 #define CL_INVALID_ARG_VALUE -50
312 #define CL_INVALID_ARG_SIZE -51
313 #define CL_INVALID_KERNEL_ARGS -52
314 #define CL_INVALID_WORK_DIMENSION -53
315 #define CL_INVALID_WORK_GROUP_SIZE -54
316 #define CL_INVALID_WORK_ITEM_SIZE -55
317 #define CL_INVALID_GLOBAL_OFFSET -56
318 #define CL_INVALID_EVENT_WAIT_LIST -57
319 #define CL_INVALID_EVENT -58
320 #define CL_INVALID_OPERATION -59
321 #define CL_INVALID_GL_OBJECT -60
322 #define CL_INVALID_BUFFER_SIZE -61
323 #define CL_INVALID_MIP_LEVEL -62
324 #define CL_INVALID_GLOBAL_WORK_SIZE -63
325 #define CL_INVALID_PROPERTY -64
326 #define CL_INVALID_IMAGE_DESCRIPTOR -65
327 #define CL_INVALID_COMPILER_OPTIONS -66
328 #define CL_INVALID_LINKER_OPTIONS -67
329 #define CL_INVALID_DEVICE_PARTITION_COUNT -68
331 /*#define CL_VERSION_1_0 1
332 #define CL_VERSION_1_1 1
333 #define CL_VERSION_1_2 1*/
337 #define CL_BLOCKING CL_TRUE
338 #define CL_NON_BLOCKING CL_FALSE
340 #define CL_PLATFORM_PROFILE 0x0900
341 #define CL_PLATFORM_VERSION 0x0901
342 #define CL_PLATFORM_NAME 0x0902
343 #define CL_PLATFORM_VENDOR 0x0903
344 #define CL_PLATFORM_EXTENSIONS 0x0904
346 #define CL_DEVICE_TYPE_DEFAULT (1 << 0)
347 #define CL_DEVICE_TYPE_CPU (1 << 1)
348 #define CL_DEVICE_TYPE_GPU (1 << 2)
349 #define CL_DEVICE_TYPE_ACCELERATOR (1 << 3)
350 #define CL_DEVICE_TYPE_CUSTOM (1 << 4)
351 #define CL_DEVICE_TYPE_ALL 0xFFFFFFFF
352 #define CL_DEVICE_TYPE 0x1000
353 #define CL_DEVICE_VENDOR_ID 0x1001
354 #define CL_DEVICE_MAX_COMPUTE_UNITS 0x1002
355 #define CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS 0x1003
356 #define CL_DEVICE_MAX_WORK_GROUP_SIZE 0x1004
357 #define CL_DEVICE_MAX_WORK_ITEM_SIZES 0x1005
358 #define CL_DEVICE_PREFERRED_VECTOR_WIDTH_CHAR 0x1006
359 #define CL_DEVICE_PREFERRED_VECTOR_WIDTH_SHORT 0x1007
360 #define CL_DEVICE_PREFERRED_VECTOR_WIDTH_INT 0x1008
361 #define CL_DEVICE_PREFERRED_VECTOR_WIDTH_LONG 0x1009
362 #define CL_DEVICE_PREFERRED_VECTOR_WIDTH_FLOAT 0x100A
363 #define CL_DEVICE_PREFERRED_VECTOR_WIDTH_DOUBLE 0x100B
364 #define CL_DEVICE_MAX_CLOCK_FREQUENCY 0x100C
365 #define CL_DEVICE_ADDRESS_BITS 0x100D
366 #define CL_DEVICE_MAX_READ_IMAGE_ARGS 0x100E
367 #define CL_DEVICE_MAX_WRITE_IMAGE_ARGS 0x100F
368 #define CL_DEVICE_MAX_MEM_ALLOC_SIZE 0x1010
369 #define CL_DEVICE_IMAGE2D_MAX_WIDTH 0x1011
370 #define CL_DEVICE_IMAGE2D_MAX_HEIGHT 0x1012
371 #define CL_DEVICE_IMAGE3D_MAX_WIDTH 0x1013
372 #define CL_DEVICE_IMAGE3D_MAX_HEIGHT 0x1014
373 #define CL_DEVICE_IMAGE3D_MAX_DEPTH 0x1015
374 #define CL_DEVICE_IMAGE_SUPPORT 0x1016
375 #define CL_DEVICE_MAX_PARAMETER_SIZE 0x1017
376 #define CL_DEVICE_MAX_SAMPLERS 0x1018
377 #define CL_DEVICE_MEM_BASE_ADDR_ALIGN 0x1019
378 #define CL_DEVICE_MIN_DATA_TYPE_ALIGN_SIZE 0x101A
379 #define CL_DEVICE_SINGLE_FP_CONFIG 0x101B
380 #define CL_DEVICE_GLOBAL_MEM_CACHE_TYPE 0x101C
381 #define CL_DEVICE_GLOBAL_MEM_CACHELINE_SIZE 0x101D
382 #define CL_DEVICE_GLOBAL_MEM_CACHE_SIZE 0x101E
383 #define CL_DEVICE_GLOBAL_MEM_SIZE 0x101F
384 #define CL_DEVICE_MAX_CONSTANT_BUFFER_SIZE 0x1020
385 #define CL_DEVICE_MAX_CONSTANT_ARGS 0x1021
386 #define CL_DEVICE_LOCAL_MEM_TYPE 0x1022
387 #define CL_DEVICE_LOCAL_MEM_SIZE 0x1023
388 #define CL_DEVICE_ERROR_CORRECTION_SUPPORT 0x1024
389 #define CL_DEVICE_PROFILING_TIMER_RESOLUTION 0x1025
390 #define CL_DEVICE_ENDIAN_LITTLE 0x1026
391 #define CL_DEVICE_AVAILABLE 0x1027
392 #define CL_DEVICE_COMPILER_AVAILABLE 0x1028
393 #define CL_DEVICE_EXECUTION_CAPABILITIES 0x1029
394 #define CL_DEVICE_QUEUE_PROPERTIES 0x102A
395 #define CL_DEVICE_NAME 0x102B
396 #define CL_DEVICE_VENDOR 0x102C
397 #define CL_DRIVER_VERSION 0x102D
398 #define CL_DEVICE_PROFILE 0x102E
399 #define CL_DEVICE_VERSION 0x102F
400 #define CL_DEVICE_EXTENSIONS 0x1030
401 #define CL_DEVICE_PLATFORM 0x1031
402 #define CL_DEVICE_DOUBLE_FP_CONFIG 0x1032
403 #define CL_DEVICE_HALF_FP_CONFIG 0x1033
404 #define CL_DEVICE_PREFERRED_VECTOR_WIDTH_HALF 0x1034
405 #define CL_DEVICE_HOST_UNIFIED_MEMORY 0x1035
406 #define CL_DEVICE_NATIVE_VECTOR_WIDTH_CHAR 0x1036
407 #define CL_DEVICE_NATIVE_VECTOR_WIDTH_SHORT 0x1037
408 #define CL_DEVICE_NATIVE_VECTOR_WIDTH_INT 0x1038
409 #define CL_DEVICE_NATIVE_VECTOR_WIDTH_LONG 0x1039
410 #define CL_DEVICE_NATIVE_VECTOR_WIDTH_FLOAT 0x103A
411 #define CL_DEVICE_NATIVE_VECTOR_WIDTH_DOUBLE 0x103B
412 #define CL_DEVICE_NATIVE_VECTOR_WIDTH_HALF 0x103C
413 #define CL_DEVICE_OPENCL_C_VERSION 0x103D
414 #define CL_DEVICE_LINKER_AVAILABLE 0x103E
415 #define CL_DEVICE_BUILT_IN_KERNELS 0x103F
416 #define CL_DEVICE_IMAGE_MAX_BUFFER_SIZE 0x1040
417 #define CL_DEVICE_IMAGE_MAX_ARRAY_SIZE 0x1041
418 #define CL_DEVICE_PARENT_DEVICE 0x1042
419 #define CL_DEVICE_PARTITION_MAX_SUB_DEVICES 0x1043
420 #define CL_DEVICE_PARTITION_PROPERTIES 0x1044
421 #define CL_DEVICE_PARTITION_AFFINITY_DOMAIN 0x1045
422 #define CL_DEVICE_PARTITION_TYPE 0x1046
423 #define CL_DEVICE_REFERENCE_COUNT 0x1047
424 #define CL_DEVICE_PREFERRED_INTEROP_USER_SYNC 0x1048
425 #define CL_DEVICE_PRINTF_BUFFER_SIZE 0x1049
426 #define CL_DEVICE_IMAGE_PITCH_ALIGNMENT 0x104A
427 #define CL_DEVICE_IMAGE_BASE_ADDRESS_ALIGNMENT 0x104B
429 #define CL_FP_DENORM (1 << 0)
430 #define CL_FP_INF_NAN (1 << 1)
431 #define CL_FP_ROUND_TO_NEAREST (1 << 2)
432 #define CL_FP_ROUND_TO_ZERO (1 << 3)
433 #define CL_FP_ROUND_TO_INF (1 << 4)
434 #define CL_FP_FMA (1 << 5)
435 #define CL_FP_SOFT_FLOAT (1 << 6)
436 #define CL_FP_CORRECTLY_ROUNDED_DIVIDE_SQRT (1 << 7)
439 #define CL_READ_ONLY_CACHE 0x1
440 #define CL_READ_WRITE_CACHE 0x2
442 #define CL_GLOBAL 0x2
443 #define CL_EXEC_KERNEL (1 << 0)
444 #define CL_EXEC_NATIVE_KERNEL (1 << 1)
445 #define CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE (1 << 0)
446 #define CL_QUEUE_PROFILING_ENABLE (1 << 1)
448 #define CL_CONTEXT_REFERENCE_COUNT 0x1080
449 #define CL_CONTEXT_DEVICES 0x1081
450 #define CL_CONTEXT_PROPERTIES 0x1082
451 #define CL_CONTEXT_NUM_DEVICES 0x1083
452 #define CL_CONTEXT_PLATFORM 0x1084
453 #define CL_CONTEXT_INTEROP_USER_SYNC 0x1085
455 #define CL_DEVICE_PARTITION_EQUALLY 0x1086
456 #define CL_DEVICE_PARTITION_BY_COUNTS 0x1087
457 #define CL_DEVICE_PARTITION_BY_COUNTS_LIST_END 0x0
458 #define CL_DEVICE_PARTITION_BY_AFFINITY_DOMAIN 0x1088
459 #define CL_DEVICE_AFFINITY_DOMAIN_NUMA (1 << 0)
460 #define CL_DEVICE_AFFINITY_DOMAIN_L4_CACHE (1 << 1)
461 #define CL_DEVICE_AFFINITY_DOMAIN_L3_CACHE (1 << 2)
462 #define CL_DEVICE_AFFINITY_DOMAIN_L2_CACHE (1 << 3)
463 #define CL_DEVICE_AFFINITY_DOMAIN_L1_CACHE (1 << 4)
464 #define CL_DEVICE_AFFINITY_DOMAIN_NEXT_PARTITIONABLE (1 << 5)
465 #define CL_QUEUE_CONTEXT 0x1090
466 #define CL_QUEUE_DEVICE 0x1091
467 #define CL_QUEUE_REFERENCE_COUNT 0x1092
468 #define CL_QUEUE_PROPERTIES 0x1093
469 #define CL_MEM_READ_WRITE (1 << 0)
470 #define CL_MEM_WRITE_ONLY (1 << 1)
471 #define CL_MEM_READ_ONLY (1 << 2)
472 #define CL_MEM_USE_HOST_PTR (1 << 3)
473 #define CL_MEM_ALLOC_HOST_PTR (1 << 4)
474 #define CL_MEM_COPY_HOST_PTR (1 << 5)
476 #define CL_MEM_HOST_WRITE_ONLY (1 << 7)
477 #define CL_MEM_HOST_READ_ONLY (1 << 8)
478 #define CL_MEM_HOST_NO_ACCESS (1 << 9)
479 #define CL_MIGRATE_MEM_OBJECT_HOST (1 << 0)
480 #define CL_MIGRATE_MEM_OBJECT_CONTENT_UNDEFINED (1 << 1)
486 #define CL_RGB 0x10B4
487 #define CL_RGBA 0x10B5
488 #define CL_BGRA 0x10B6
489 #define CL_ARGB 0x10B7
490 #define CL_INTENSITY 0x10B8
491 #define CL_LUMINANCE 0x10B9
493 #define CL_RGx 0x10BB
494 #define CL_RGBx 0x10BC
495 #define CL_DEPTH 0x10BD
496 #define CL_DEPTH_STENCIL 0x10BE
498 #define CL_SNORM_INT8 0x10D0
499 #define CL_SNORM_INT16 0x10D1
500 #define CL_UNORM_INT8 0x10D2
501 #define CL_UNORM_INT16 0x10D3
502 #define CL_UNORM_SHORT_565 0x10D4
503 #define CL_UNORM_SHORT_555 0x10D5
504 #define CL_UNORM_INT_101010 0x10D6
505 #define CL_SIGNED_INT8 0x10D7
506 #define CL_SIGNED_INT16 0x10D8
507 #define CL_SIGNED_INT32 0x10D9
508 #define CL_UNSIGNED_INT8 0x10DA
509 #define CL_UNSIGNED_INT16 0x10DB
510 #define CL_UNSIGNED_INT32 0x10DC
511 #define CL_HALF_FLOAT 0x10DD
512 #define CL_FLOAT 0x10DE
513 #define CL_UNORM_INT24 0x10DF
515 #define CL_MEM_OBJECT_BUFFER 0x10F0
516 #define CL_MEM_OBJECT_IMAGE2D 0x10F1
517 #define CL_MEM_OBJECT_IMAGE3D 0x10F2
518 #define CL_MEM_OBJECT_IMAGE2D_ARRAY 0x10F3
519 #define CL_MEM_OBJECT_IMAGE1D 0x10F4
520 #define CL_MEM_OBJECT_IMAGE1D_ARRAY 0x10F5
521 #define CL_MEM_OBJECT_IMAGE1D_BUFFER 0x10F6
523 #define CL_MEM_TYPE 0x1100
524 #define CL_MEM_FLAGS 0x1101
525 #define CL_MEM_SIZE 0x1102
526 #define CL_MEM_HOST_PTR 0x1103
527 #define CL_MEM_MAP_COUNT 0x1104
528 #define CL_MEM_REFERENCE_COUNT 0x1105
529 #define CL_MEM_CONTEXT 0x1106
530 #define CL_MEM_ASSOCIATED_MEMOBJECT 0x1107
531 #define CL_MEM_OFFSET 0x1108
533 #define CL_IMAGE_FORMAT 0x1110
534 #define CL_IMAGE_ELEMENT_SIZE 0x1111
535 #define CL_IMAGE_ROW_PITCH 0x1112
536 #define CL_IMAGE_SLICE_PITCH 0x1113
537 #define CL_IMAGE_WIDTH 0x1114
538 #define CL_IMAGE_HEIGHT 0x1115
539 #define CL_IMAGE_DEPTH 0x1116
540 #define CL_IMAGE_ARRAY_SIZE 0x1117
541 #define CL_IMAGE_BUFFER 0x1118
542 #define CL_IMAGE_NUM_MIP_LEVELS 0x1119
543 #define CL_IMAGE_NUM_SAMPLES 0x111A
545 #define CL_ADDRESS_NONE 0x1130
546 #define CL_ADDRESS_CLAMP_TO_EDGE 0x1131
547 #define CL_ADDRESS_CLAMP 0x1132
548 #define CL_ADDRESS_REPEAT 0x1133
549 #define CL_ADDRESS_MIRRORED_REPEAT 0x1134
551 #define CL_FILTER_NEAREST 0x1140
552 #define CL_FILTER_LINEAR 0x1141
554 #define CL_SAMPLER_REFERENCE_COUNT 0x1150
555 #define CL_SAMPLER_CONTEXT 0x1151
556 #define CL_SAMPLER_NORMALIZED_COORDS 0x1152
557 #define CL_SAMPLER_ADDRESSING_MODE 0x1153
558 #define CL_SAMPLER_FILTER_MODE 0x1154
560 #define CL_MAP_READ (1 << 0)
561 #define CL_MAP_WRITE (1 << 1)
562 #define CL_MAP_WRITE_INVALIDATE_REGION (1 << 2)
564 #define CL_PROGRAM_REFERENCE_COUNT 0x1160
565 #define CL_PROGRAM_CONTEXT 0x1161
566 #define CL_PROGRAM_NUM_DEVICES 0x1162
567 #define CL_PROGRAM_DEVICES 0x1163
568 #define CL_PROGRAM_SOURCE 0x1164
569 #define CL_PROGRAM_BINARY_SIZES 0x1165
570 #define CL_PROGRAM_BINARIES 0x1166
571 #define CL_PROGRAM_NUM_KERNELS 0x1167
572 #define CL_PROGRAM_KERNEL_NAMES 0x1168
573 #define CL_PROGRAM_BUILD_STATUS 0x1181
574 #define CL_PROGRAM_BUILD_OPTIONS 0x1182
575 #define CL_PROGRAM_BUILD_LOG 0x1183
576 #define CL_PROGRAM_BINARY_TYPE 0x1184
577 #define CL_PROGRAM_BINARY_TYPE_NONE 0x0
578 #define CL_PROGRAM_BINARY_TYPE_COMPILED_OBJECT 0x1
579 #define CL_PROGRAM_BINARY_TYPE_LIBRARY 0x2
580 #define CL_PROGRAM_BINARY_TYPE_EXECUTABLE 0x4
582 #define CL_BUILD_SUCCESS 0
583 #define CL_BUILD_NONE -1
584 #define CL_BUILD_ERROR -2
585 #define CL_BUILD_IN_PROGRESS -3
587 #define CL_KERNEL_FUNCTION_NAME 0x1190
588 #define CL_KERNEL_NUM_ARGS 0x1191
589 #define CL_KERNEL_REFERENCE_COUNT 0x1192
590 #define CL_KERNEL_CONTEXT 0x1193
591 #define CL_KERNEL_PROGRAM 0x1194
592 #define CL_KERNEL_ATTRIBUTES 0x1195
593 #define CL_KERNEL_ARG_ADDRESS_QUALIFIER 0x1196
594 #define CL_KERNEL_ARG_ACCESS_QUALIFIER 0x1197
595 #define CL_KERNEL_ARG_TYPE_NAME 0x1198
596 #define CL_KERNEL_ARG_TYPE_QUALIFIER 0x1199
597 #define CL_KERNEL_ARG_NAME 0x119A
598 #define CL_KERNEL_ARG_ADDRESS_GLOBAL 0x119B
599 #define CL_KERNEL_ARG_ADDRESS_LOCAL 0x119C
600 #define CL_KERNEL_ARG_ADDRESS_CONSTANT 0x119D
601 #define CL_KERNEL_ARG_ADDRESS_PRIVATE 0x119E
602 #define CL_KERNEL_ARG_ACCESS_READ_ONLY 0x11A0
603 #define CL_KERNEL_ARG_ACCESS_WRITE_ONLY 0x11A1
604 #define CL_KERNEL_ARG_ACCESS_READ_WRITE 0x11A2
605 #define CL_KERNEL_ARG_ACCESS_NONE 0x11A3
606 #define CL_KERNEL_ARG_TYPE_NONE 0
607 #define CL_KERNEL_ARG_TYPE_CONST (1 << 0)
608 #define CL_KERNEL_ARG_TYPE_RESTRICT (1 << 1)
609 #define CL_KERNEL_ARG_TYPE_VOLATILE (1 << 2)
610 #define CL_KERNEL_WORK_GROUP_SIZE 0x11B0
611 #define CL_KERNEL_COMPILE_WORK_GROUP_SIZE 0x11B1
612 #define CL_KERNEL_LOCAL_MEM_SIZE 0x11B2
613 #define CL_KERNEL_PREFERRED_WORK_GROUP_SIZE_MULTIPLE 0x11B3
614 #define CL_KERNEL_PRIVATE_MEM_SIZE 0x11B4
615 #define CL_KERNEL_GLOBAL_WORK_SIZE 0x11B5
617 #define CL_EVENT_COMMAND_QUEUE 0x11D0
618 #define CL_EVENT_COMMAND_TYPE 0x11D1
619 #define CL_EVENT_REFERENCE_COUNT 0x11D2
620 #define CL_EVENT_COMMAND_EXECUTION_STATUS 0x11D3
621 #define CL_EVENT_CONTEXT 0x11D4
623 #define CL_COMMAND_NDRANGE_KERNEL 0x11F0
624 #define CL_COMMAND_TASK 0x11F1
625 #define CL_COMMAND_NATIVE_KERNEL 0x11F2
626 #define CL_COMMAND_READ_BUFFER 0x11F3
627 #define CL_COMMAND_WRITE_BUFFER 0x11F4
628 #define CL_COMMAND_COPY_BUFFER 0x11F5
629 #define CL_COMMAND_READ_IMAGE 0x11F6
630 #define CL_COMMAND_WRITE_IMAGE 0x11F7
631 #define CL_COMMAND_COPY_IMAGE 0x11F8
632 #define CL_COMMAND_COPY_IMAGE_TO_BUFFER 0x11F9
633 #define CL_COMMAND_COPY_BUFFER_TO_IMAGE 0x11FA
634 #define CL_COMMAND_MAP_BUFFER 0x11FB
635 #define CL_COMMAND_MAP_IMAGE 0x11FC
636 #define CL_COMMAND_UNMAP_MEM_OBJECT 0x11FD
637 #define CL_COMMAND_MARKER 0x11FE
638 #define CL_COMMAND_ACQUIRE_GL_OBJECTS 0x11FF
639 #define CL_COMMAND_RELEASE_GL_OBJECTS 0x1200
640 #define CL_COMMAND_READ_BUFFER_RECT 0x1201
641 #define CL_COMMAND_WRITE_BUFFER_RECT 0x1202
642 #define CL_COMMAND_COPY_BUFFER_RECT 0x1203
643 #define CL_COMMAND_USER 0x1204
644 #define CL_COMMAND_BARRIER 0x1205
645 #define CL_COMMAND_MIGRATE_MEM_OBJECTS 0x1206
646 #define CL_COMMAND_FILL_BUFFER 0x1207
647 #define CL_COMMAND_FILL_IMAGE 0x1208
649 #define CL_COMPLETE 0x0
650 #define CL_RUNNING 0x1
651 #define CL_SUBMITTED 0x2
652 #define CL_QUEUED 0x3
653 #define CL_BUFFER_CREATE_TYPE_REGION 0x1220
655 #define CL_PROFILING_COMMAND_QUEUED 0x1280
656 #define CL_PROFILING_COMMAND_SUBMIT 0x1281
657 #define CL_PROFILING_COMMAND_START 0x1282
658 #define CL_PROFILING_COMMAND_END 0x1283
660 #define CL_CALLBACK CV_STDCALL
662 static volatile bool g_haveOpenCL = false;
663 static const char* oclFuncToCheck = "clEnqueueReadBufferRect";
665 #if defined(__APPLE__)
668 static void* initOpenCLAndLoad(const char* funcname)
670 static bool initialized = false;
671 static void* handle = 0;
676 const char* oclpath = getenv("OPENCV_OPENCL_RUNTIME");
677 oclpath = oclpath && strlen(oclpath) > 0 ? oclpath :
678 "/System/Library/Frameworks/OpenCL.framework/Versions/Current/OpenCL";
679 handle = dlopen(oclpath, RTLD_LAZY);
681 g_haveOpenCL = handle != 0 && dlsym(handle, oclFuncToCheck) != 0;
683 fprintf(stderr, "Successfully loaded OpenCL v1.1+ runtime from %s\n", oclpath);
685 fprintf(stderr, "Failed to load OpenCL runtime\n");
691 return funcname && handle ? dlsym(handle, funcname) : 0;
694 #elif defined WIN32 || defined _WIN32
696 #ifndef _WIN32_WINNT // This is needed for the declaration of TryEnterCriticalSection in winbase.h with Visual Studio 2005 (and older?)
697 #define _WIN32_WINNT 0x0400 // http://msdn.microsoft.com/en-us/library/ms686857(VS.85).aspx
700 #if (_WIN32_WINNT >= 0x0602)
701 #include <synchapi.h>
708 static void* initOpenCLAndLoad(const char* funcname)
710 static bool initialized = false;
711 static HMODULE handle = 0;
717 handle = LoadLibraryA("OpenCL.dll");
719 g_haveOpenCL = handle != 0 && GetProcAddress(handle, oclFuncToCheck) != 0;
726 return funcname ? (void*)GetProcAddress(handle, funcname) : 0;
729 #elif defined(__linux)
734 static void* initOpenCLAndLoad(const char* funcname)
736 static bool initialized = false;
737 static void* handle = 0;
742 handle = dlopen("libOpenCL.so", RTLD_LAZY);
744 handle = dlopen("libCL.so", RTLD_LAZY);
746 g_haveOpenCL = handle != 0 && dlsym(handle, oclFuncToCheck) != 0;
752 return funcname ? (void*)dlsym(handle, funcname) : 0;
757 static void* initOpenCLAndLoad(const char*)
765 #define OCL_FUNC(rettype, funcname, argsdecl, args) \
766 typedef rettype (CV_STDCALL * funcname##_t) argsdecl; \
767 static rettype funcname argsdecl \
769 static funcname##_t funcname##_p = 0; \
770 if( !funcname##_p ) \
772 funcname##_p = (funcname##_t)initOpenCLAndLoad(#funcname); \
773 if( !funcname##_p ) \
774 return OPENCV_CL_NOT_IMPLEMENTED; \
776 return funcname##_p args; \
780 #define OCL_FUNC_P(rettype, funcname, argsdecl, args) \
781 typedef rettype (CV_STDCALL * funcname##_t) argsdecl; \
782 static rettype funcname argsdecl \
784 static funcname##_t funcname##_p = 0; \
785 if( !funcname##_p ) \
787 funcname##_p = (funcname##_t)initOpenCLAndLoad(#funcname); \
788 if( !funcname##_p ) \
791 *errcode_ret = OPENCV_CL_NOT_IMPLEMENTED; \
795 return funcname##_p args; \
798 OCL_FUNC(cl_int, clGetPlatformIDs,
799 (cl_uint num_entries, cl_platform_id* platforms, cl_uint* num_platforms),
800 (num_entries, platforms, num_platforms))
802 OCL_FUNC(cl_int, clGetPlatformInfo,
803 (cl_platform_id platform, cl_platform_info param_name,
804 size_t param_value_size, void * param_value,
805 size_t * param_value_size_ret),
806 (platform, param_name, param_value_size, param_value, param_value_size_ret))
808 OCL_FUNC(cl_int, clGetDeviceInfo,
809 (cl_device_id device,
810 cl_device_info param_name,
811 size_t param_value_size,
813 size_t * param_value_size_ret),
814 (device, param_name, param_value_size, param_value, param_value_size_ret))
817 OCL_FUNC(cl_int, clGetDeviceIDs,
818 (cl_platform_id platform,
819 cl_device_type device_type,
821 cl_device_id * devices,
822 cl_uint * num_devices),
823 (platform, device_type, num_entries, devices, num_devices))
825 OCL_FUNC_P(cl_context, clCreateContext,
826 (const cl_context_properties * properties,
828 const cl_device_id * devices,
829 void (CL_CALLBACK * pfn_notify)(const char *, const void *, size_t, void *),
831 cl_int * errcode_ret),
832 (properties, num_devices, devices, pfn_notify, user_data, errcode_ret))
834 OCL_FUNC(cl_int, clReleaseContext, (cl_context context), (context))
837 OCL_FUNC(cl_int, clRetainContext, (cl_context context), (context))
839 OCL_FUNC_P(cl_context, clCreateContextFromType,
840 (const cl_context_properties * properties,
841 cl_device_type device_type,
842 void (CL_CALLBACK * pfn_notify)(const char *, const void *, size_t, void *),
844 cl_int * errcode_ret),
845 (properties, device_type, pfn_notify, user_data, errcode_ret))
847 OCL_FUNC(cl_int, clGetContextInfo,
849 cl_context_info param_name,
850 size_t param_value_size,
852 size_t * param_value_size_ret),
853 (context, param_name, param_value_size,
854 param_value, param_value_size_ret))
856 OCL_FUNC_P(cl_command_queue, clCreateCommandQueue,
859 cl_command_queue_properties properties,
860 cl_int * errcode_ret),
861 (context, device, properties, errcode_ret))
863 OCL_FUNC(cl_int, clReleaseCommandQueue, (cl_command_queue command_queue), (command_queue))
865 OCL_FUNC_P(cl_mem, clCreateBuffer,
870 cl_int * errcode_ret),
871 (context, flags, size, host_ptr, errcode_ret))
874 OCL_FUNC(cl_int, clRetainCommandQueue, (cl_command_queue command_queue), (command_queue))
876 OCL_FUNC(cl_int, clGetCommandQueueInfo,
877 (cl_command_queue command_queue,
878 cl_command_queue_info param_name,
879 size_t param_value_size,
881 size_t * param_value_size_ret),
882 (command_queue, param_name, param_value_size, param_value, param_value_size_ret))
884 OCL_FUNC_P(cl_mem, clCreateSubBuffer,
887 cl_buffer_create_type buffer_create_type,
888 const void * buffer_create_info,
889 cl_int * errcode_ret),
890 (buffer, flags, buffer_create_type, buffer_create_info, errcode_ret))
893 OCL_FUNC_P(cl_mem, clCreateImage,
896 const cl_image_format * image_format,
897 const cl_image_desc * image_desc,
899 cl_int * errcode_ret),
900 (context, flags, image_format, image_desc, host_ptr, errcode_ret))
902 OCL_FUNC_P(cl_mem, clCreateImage2D,
905 const cl_image_format * image_format,
908 size_t image_row_pitch,
910 cl_int *errcode_ret),
911 (context, flags, image_format, image_width, image_height, image_row_pitch, host_ptr, errcode_ret))
913 OCL_FUNC(cl_int, clGetSupportedImageFormats,
916 cl_mem_object_type image_type,
918 cl_image_format * image_formats,
919 cl_uint * num_image_formats),
920 (context, flags, image_type, num_entries, image_formats, num_image_formats))
923 OCL_FUNC(cl_int, clGetMemObjectInfo,
925 cl_mem_info param_name,
926 size_t param_value_size,
928 size_t * param_value_size_ret),
929 (memobj, param_name, param_value_size, param_value, param_value_size_ret))
931 OCL_FUNC(cl_int, clGetImageInfo,
933 cl_image_info param_name,
934 size_t param_value_size,
936 size_t * param_value_size_ret),
937 (image, param_name, param_value_size, param_value, param_value_size_ret))
939 OCL_FUNC(cl_int, clCreateKernelsInProgram,
943 cl_uint * num_kernels_ret),
944 (program, num_kernels, kernels, num_kernels_ret))
946 OCL_FUNC(cl_int, clRetainKernel, (cl_kernel kernel), (kernel))
948 OCL_FUNC(cl_int, clGetKernelArgInfo,
951 cl_kernel_arg_info param_name,
952 size_t param_value_size,
954 size_t * param_value_size_ret),
955 (kernel, arg_indx, param_name, param_value_size, param_value, param_value_size_ret))
957 OCL_FUNC(cl_int, clEnqueueReadImage,
958 (cl_command_queue command_queue,
960 cl_bool blocking_read,
961 const size_t * origin[3],
962 const size_t * region[3],
966 cl_uint num_events_in_wait_list,
967 const cl_event * event_wait_list,
969 (command_queue, image, blocking_read, origin, region,
970 row_pitch, slice_pitch,
972 num_events_in_wait_list,
976 OCL_FUNC(cl_int, clEnqueueWriteImage,
977 (cl_command_queue command_queue,
979 cl_bool blocking_write,
980 const size_t * origin[3],
981 const size_t * region[3],
982 size_t input_row_pitch,
983 size_t input_slice_pitch,
985 cl_uint num_events_in_wait_list,
986 const cl_event * event_wait_list,
988 (command_queue, image, blocking_write, origin, region, input_row_pitch,
989 input_slice_pitch, ptr, num_events_in_wait_list, event_wait_list, event))
991 OCL_FUNC(cl_int, clEnqueueFillImage,
992 (cl_command_queue command_queue,
994 const void * fill_color,
995 const size_t * origin[3],
996 const size_t * region[3],
997 cl_uint num_events_in_wait_list,
998 const cl_event * event_wait_list,
1000 (command_queue, image, fill_color, origin, region,
1001 num_events_in_wait_list, event_wait_list, event))
1003 OCL_FUNC(cl_int, clEnqueueCopyImage,
1004 (cl_command_queue command_queue,
1007 const size_t * src_origin[3],
1008 const size_t * dst_origin[3],
1009 const size_t * region[3],
1010 cl_uint num_events_in_wait_list,
1011 const cl_event * event_wait_list,
1013 (command_queue, src_image, dst_image, src_origin, dst_origin,
1014 region, num_events_in_wait_list, event_wait_list, event))
1016 OCL_FUNC(cl_int, clEnqueueCopyImageToBuffer,
1017 (cl_command_queue command_queue,
1020 const size_t * src_origin[3],
1021 const size_t * region[3],
1023 cl_uint num_events_in_wait_list,
1024 const cl_event * event_wait_list,
1026 (command_queue, src_image, dst_buffer, src_origin, region, dst_offset,
1027 num_events_in_wait_list, event_wait_list, event))
1030 OCL_FUNC(cl_int, clEnqueueCopyBufferToImage,
1031 (cl_command_queue command_queue,
1035 const size_t dst_origin[3],
1036 const size_t region[3],
1037 cl_uint num_events_in_wait_list,
1038 const cl_event * event_wait_list,
1040 (command_queue, src_buffer, dst_image, src_offset, dst_origin,
1041 region, num_events_in_wait_list, event_wait_list, event))
1043 OCL_FUNC(cl_int, clFlush,
1044 (cl_command_queue command_queue),
1048 OCL_FUNC_P(void*, clEnqueueMapImage,
1049 (cl_command_queue command_queue,
1051 cl_bool blocking_map,
1052 cl_map_flags map_flags,
1053 const size_t * origin[3],
1054 const size_t * region[3],
1055 size_t * image_row_pitch,
1056 size_t * image_slice_pitch,
1057 cl_uint num_events_in_wait_list,
1058 const cl_event * event_wait_list,
1060 cl_int * errcode_ret),
1061 (command_queue, image, blocking_map, map_flags, origin, region,
1062 image_row_pitch, image_slice_pitch, num_events_in_wait_list,
1063 event_wait_list, event, errcode_ret))
1067 OCL_FUNC(cl_int, clRetainProgram, (cl_program program), (program))
1069 OCL_FUNC(cl_int, clGetKernelInfo,
1071 cl_kernel_info param_name,
1072 size_t param_value_size,
1074 size_t * param_value_size_ret),
1075 (kernel, param_name, param_value_size, param_value, param_value_size_ret))
1077 OCL_FUNC(cl_int, clRetainMemObject, (cl_mem memobj), (memobj))
1081 OCL_FUNC(cl_int, clReleaseMemObject, (cl_mem memobj), (memobj))
1084 OCL_FUNC_P(cl_program, clCreateProgramWithSource,
1085 (cl_context context,
1087 const char ** strings,
1088 const size_t * lengths,
1089 cl_int * errcode_ret),
1090 (context, count, strings, lengths, errcode_ret))
1092 OCL_FUNC_P(cl_program, clCreateProgramWithBinary,
1093 (cl_context context,
1094 cl_uint num_devices,
1095 const cl_device_id * device_list,
1096 const size_t * lengths,
1097 const unsigned char ** binaries,
1098 cl_int * binary_status,
1099 cl_int * errcode_ret),
1100 (context, num_devices, device_list, lengths, binaries, binary_status, errcode_ret))
1102 OCL_FUNC(cl_int, clReleaseProgram, (cl_program program), (program))
1104 OCL_FUNC(cl_int, clBuildProgram,
1105 (cl_program program,
1106 cl_uint num_devices,
1107 const cl_device_id * device_list,
1108 const char * options,
1109 void (CL_CALLBACK * pfn_notify)(cl_program, void *),
1111 (program, num_devices, device_list, options, pfn_notify, user_data))
1113 OCL_FUNC(cl_int, clGetProgramInfo,
1114 (cl_program program,
1115 cl_program_info param_name,
1116 size_t param_value_size,
1118 size_t * param_value_size_ret),
1119 (program, param_name, param_value_size, param_value, param_value_size_ret))
1121 OCL_FUNC(cl_int, clGetProgramBuildInfo,
1122 (cl_program program,
1123 cl_device_id device,
1124 cl_program_build_info param_name,
1125 size_t param_value_size,
1127 size_t * param_value_size_ret),
1128 (program, device, param_name, param_value_size, param_value, param_value_size_ret))
1130 OCL_FUNC_P(cl_kernel, clCreateKernel,
1131 (cl_program program,
1132 const char * kernel_name,
1133 cl_int * errcode_ret),
1134 (program, kernel_name, errcode_ret))
1136 OCL_FUNC(cl_int, clReleaseKernel, (cl_kernel kernel), (kernel))
1138 OCL_FUNC(cl_int, clSetKernelArg,
1142 const void * arg_value),
1143 (kernel, arg_index, arg_size, arg_value))
1145 OCL_FUNC(cl_int, clGetKernelWorkGroupInfo,
1147 cl_device_id device,
1148 cl_kernel_work_group_info param_name,
1149 size_t param_value_size,
1151 size_t * param_value_size_ret),
1152 (kernel, device, param_name, param_value_size, param_value, param_value_size_ret))
1154 OCL_FUNC(cl_int, clFinish, (cl_command_queue command_queue), (command_queue))
1156 OCL_FUNC(cl_int, clEnqueueReadBuffer,
1157 (cl_command_queue command_queue,
1159 cl_bool blocking_read,
1163 cl_uint num_events_in_wait_list,
1164 const cl_event * event_wait_list,
1166 (command_queue, buffer, blocking_read, offset, size, ptr,
1167 num_events_in_wait_list, event_wait_list, event))
1169 OCL_FUNC(cl_int, clEnqueueReadBufferRect,
1170 (cl_command_queue command_queue,
1172 cl_bool blocking_read,
1173 const size_t * buffer_offset,
1174 const size_t * host_offset,
1175 const size_t * region,
1176 size_t buffer_row_pitch,
1177 size_t buffer_slice_pitch,
1178 size_t host_row_pitch,
1179 size_t host_slice_pitch,
1181 cl_uint num_events_in_wait_list,
1182 const cl_event * event_wait_list,
1184 (command_queue, buffer, blocking_read, buffer_offset, host_offset, region, buffer_row_pitch,
1185 buffer_slice_pitch, host_row_pitch, host_slice_pitch, ptr, num_events_in_wait_list,
1186 event_wait_list, event))
1188 OCL_FUNC(cl_int, clEnqueueWriteBuffer,
1189 (cl_command_queue command_queue,
1191 cl_bool blocking_write,
1195 cl_uint num_events_in_wait_list,
1196 const cl_event * event_wait_list,
1198 (command_queue, buffer, blocking_write, offset, size, ptr,
1199 num_events_in_wait_list, event_wait_list, event))
1201 OCL_FUNC(cl_int, clEnqueueWriteBufferRect,
1202 (cl_command_queue command_queue,
1204 cl_bool blocking_write,
1205 const size_t * buffer_offset,
1206 const size_t * host_offset,
1207 const size_t * region,
1208 size_t buffer_row_pitch,
1209 size_t buffer_slice_pitch,
1210 size_t host_row_pitch,
1211 size_t host_slice_pitch,
1213 cl_uint num_events_in_wait_list,
1214 const cl_event * event_wait_list,
1216 (command_queue, buffer, blocking_write, buffer_offset, host_offset,
1217 region, buffer_row_pitch, buffer_slice_pitch, host_row_pitch,
1218 host_slice_pitch, ptr, num_events_in_wait_list, event_wait_list, event))
1220 /*OCL_FUNC(cl_int, clEnqueueFillBuffer,
1221 (cl_command_queue command_queue,
1223 const void * pattern,
1224 size_t pattern_size,
1227 cl_uint num_events_in_wait_list,
1228 const cl_event * event_wait_list,
1230 (command_queue, buffer, pattern, pattern_size, offset, size,
1231 num_events_in_wait_list, event_wait_list, event))*/
1233 OCL_FUNC(cl_int, clEnqueueCopyBuffer,
1234 (cl_command_queue command_queue,
1240 cl_uint num_events_in_wait_list,
1241 const cl_event * event_wait_list,
1243 (command_queue, src_buffer, dst_buffer, src_offset, dst_offset,
1244 size, num_events_in_wait_list, event_wait_list, event))
1246 OCL_FUNC(cl_int, clEnqueueCopyBufferRect,
1247 (cl_command_queue command_queue,
1250 const size_t * src_origin,
1251 const size_t * dst_origin,
1252 const size_t * region,
1253 size_t src_row_pitch,
1254 size_t src_slice_pitch,
1255 size_t dst_row_pitch,
1256 size_t dst_slice_pitch,
1257 cl_uint num_events_in_wait_list,
1258 const cl_event * event_wait_list,
1260 (command_queue, src_buffer, dst_buffer, src_origin, dst_origin,
1261 region, src_row_pitch, src_slice_pitch, dst_row_pitch, dst_slice_pitch,
1262 num_events_in_wait_list, event_wait_list, event))
1264 OCL_FUNC_P(void*, clEnqueueMapBuffer,
1265 (cl_command_queue command_queue,
1267 cl_bool blocking_map,
1268 cl_map_flags map_flags,
1271 cl_uint num_events_in_wait_list,
1272 const cl_event * event_wait_list,
1274 cl_int * errcode_ret),
1275 (command_queue, buffer, blocking_map, map_flags, offset, size,
1276 num_events_in_wait_list, event_wait_list, event, errcode_ret))
1278 OCL_FUNC(cl_int, clEnqueueUnmapMemObject,
1279 (cl_command_queue command_queue,
1282 cl_uint num_events_in_wait_list,
1283 const cl_event * event_wait_list,
1285 (command_queue, memobj, mapped_ptr, num_events_in_wait_list, event_wait_list, event))
1287 OCL_FUNC(cl_int, clEnqueueNDRangeKernel,
1288 (cl_command_queue command_queue,
1291 const size_t * global_work_offset,
1292 const size_t * global_work_size,
1293 const size_t * local_work_size,
1294 cl_uint num_events_in_wait_list,
1295 const cl_event * event_wait_list,
1297 (command_queue, kernel, work_dim, global_work_offset, global_work_size,
1298 local_work_size, num_events_in_wait_list, event_wait_list, event))
1300 OCL_FUNC(cl_int, clEnqueueTask,
1301 (cl_command_queue command_queue,
1303 cl_uint num_events_in_wait_list,
1304 const cl_event * event_wait_list,
1306 (command_queue, kernel, num_events_in_wait_list, event_wait_list, event))
1308 OCL_FUNC(cl_int, clSetEventCallback,
1310 cl_int command_exec_callback_type ,
1311 void (CL_CALLBACK *pfn_event_notify) (cl_event event, cl_int event_command_exec_status, void *user_data),
1313 (event, command_exec_callback_type, pfn_event_notify, user_data))
1315 OCL_FUNC(cl_int, clReleaseEvent, (cl_event event), (event))
1321 #ifndef CL_VERSION_1_2
1322 #define CL_VERSION_1_2
1328 #define CV_OclDbgAssert CV_DbgAssert
1330 static bool isRaiseError()
1332 static bool initialized = false;
1333 static bool value = false;
1336 value = getBoolParameter("OPENCV_OPENCL_RAISE_ERROR", false);
1341 #define CV_OclDbgAssert(expr) do { if (isRaiseError()) { CV_Assert(expr); } else { (void)(expr); } } while ((void)0, 0)
1344 namespace cv { namespace ocl {
1348 UMat2D(const UMat& m)
1350 offset = (int)m.offset;
1363 UMat3D(const UMat& m)
1365 offset = (int)m.offset;
1366 step = (int)m.step.p[1];
1367 slicestep = (int)m.step.p[0];
1368 slices = (int)m.size.p[0];
1380 // Computes 64-bit "cyclic redundancy check" sum, as specified in ECMA-182
1381 static uint64 crc64( const uchar* data, size_t size, uint64 crc0=0 )
1383 static uint64 table[256];
1384 static bool initialized = false;
1388 for( int i = 0; i < 256; i++ )
1391 for( int j = 0; j < 8; j++ )
1392 c = ((c & 1) ? CV_BIG_UINT(0xc96c5795d7870f42) : 0) ^ (c >> 1);
1399 for( size_t idx = 0; idx < size; idx++ )
1400 crc = table[(uchar)crc ^ data[idx]] ^ (crc >> 8);
1407 typedef uint64 part;
1408 HashKey(part _a, part _b) : a(_a), b(_b) {}
1412 inline bool operator == (const HashKey& h1, const HashKey& h2)
1414 return h1.a == h2.a && h1.b == h2.b;
1417 inline bool operator < (const HashKey& h1, const HashKey& h2)
1419 return h1.a < h2.a || (h1.a == h2.a && h1.b < h2.b);
1426 static bool g_isOpenCLInitialized = false;
1427 static bool g_isOpenCLAvailable = false;
1429 if (!g_isOpenCLInitialized)
1434 g_isOpenCLAvailable = ::clGetPlatformIDs(0, NULL, &n) == CL_SUCCESS;
1438 g_isOpenCLAvailable = false;
1440 g_isOpenCLInitialized = true;
1442 return g_isOpenCLAvailable;
1450 CoreTLSData* data = coreTlsData.get();
1451 if( data->useOpenCL < 0 )
1455 data->useOpenCL = (int)haveOpenCL() && Device::getDefault().ptr() != NULL;
1459 data->useOpenCL = 0;
1462 return data->useOpenCL > 0;
1465 void setUseOpenCL(bool flag)
1469 CoreTLSData* data = coreTlsData.get();
1470 data->useOpenCL = (flag && Device::getDefault().ptr() != NULL) ? 1 : 0;
1474 #ifdef HAVE_CLAMDBLAS
1479 static AmdBlasHelper & getInstance()
1481 static AmdBlasHelper amdBlas;
1485 bool isAvailable() const
1487 return g_isAmdBlasAvailable;
1494 clAmdBlasTeardown();
1502 if (!g_isAmdBlasInitialized)
1506 if (!g_isAmdBlasInitialized && haveOpenCL())
1510 g_isAmdBlasAvailable = clAmdBlasSetup() == clAmdBlasSuccess;
1514 g_isAmdBlasAvailable = false;
1518 g_isAmdBlasAvailable = false;
1520 g_isAmdBlasInitialized = true;
1526 static bool g_isAmdBlasInitialized;
1527 static bool g_isAmdBlasAvailable;
1530 bool AmdBlasHelper::g_isAmdBlasAvailable = false;
1531 bool AmdBlasHelper::g_isAmdBlasInitialized = false;
1532 Mutex AmdBlasHelper::m;
1536 return AmdBlasHelper::getInstance().isAvailable();
1548 #ifdef HAVE_CLAMDFFT
1553 static AmdFftHelper & getInstance()
1555 static AmdFftHelper amdFft;
1559 bool isAvailable() const
1561 return g_isAmdFftAvailable;
1568 // clAmdFftTeardown();
1576 if (!g_isAmdFftInitialized)
1580 if (!g_isAmdFftInitialized && haveOpenCL())
1584 CV_Assert(clAmdFftInitSetupData(&setupData) == CLFFT_SUCCESS);
1585 g_isAmdFftAvailable = true;
1587 catch (const Exception &)
1589 g_isAmdFftAvailable = false;
1593 g_isAmdFftAvailable = false;
1595 g_isAmdFftInitialized = true;
1600 static clAmdFftSetupData setupData;
1602 static bool g_isAmdFftInitialized;
1603 static bool g_isAmdFftAvailable;
1606 clAmdFftSetupData AmdFftHelper::setupData;
1607 bool AmdFftHelper::g_isAmdFftAvailable = false;
1608 bool AmdFftHelper::g_isAmdFftInitialized = false;
1609 Mutex AmdFftHelper::m;
1613 return AmdFftHelper::getInstance().isAvailable();
1627 Queue::getDefault().finish();
1630 #define IMPLEMENT_REFCOUNTABLE() \
1631 void addref() { CV_XADD(&refcount, 1); } \
1632 void release() { if( CV_XADD(&refcount, -1) == 1 && !cv::__termination) delete this; } \
1635 /////////////////////////////////////////// Platform /////////////////////////////////////////////
1637 struct Platform::Impl
1643 initialized = false;
1652 //cl_uint num_entries
1654 if( clGetPlatformIDs(1, &handle, &n) != CL_SUCCESS || n == 0 )
1660 CV_OclDbgAssert(clGetPlatformInfo(handle, CL_PLATFORM_VENDOR, sizeof(buf), buf, &len) == CL_SUCCESS);
1662 vendor = String(buf);
1669 IMPLEMENT_REFCOUNTABLE();
1671 cl_platform_id handle;
1676 Platform::Platform()
1681 Platform::~Platform()
1687 Platform::Platform(const Platform& pl)
1694 Platform& Platform::operator = (const Platform& pl)
1696 Impl* newp = (Impl*)pl.p;
1705 void* Platform::ptr() const
1707 return p ? p->handle : 0;
1710 Platform& Platform::getDefault()
1721 /////////////////////////////////////// Device ////////////////////////////////////////////
1723 // deviceVersion has format
1724 // OpenCL<space><major_version.minor_version><space><vendor-specific information>
1726 // http://www.khronos.org/registry/cl/sdk/1.1/docs/man/xhtml/clGetDeviceInfo.html
1727 // http://www.khronos.org/registry/cl/sdk/1.2/docs/man/xhtml/clGetDeviceInfo.html
1728 static void parseDeviceVersion(const String &deviceVersion, int &major, int &minor)
1731 if (10 >= deviceVersion.length())
1733 const char *pstr = deviceVersion.c_str();
1734 if (0 != strncmp(pstr, "OpenCL ", 7))
1736 size_t ppos = deviceVersion.find('.', 7);
1737 if (String::npos == ppos)
1739 String temp = deviceVersion.substr(7, ppos - 7);
1740 major = atoi(temp.c_str());
1741 temp = deviceVersion.substr(ppos + 1);
1742 minor = atoi(temp.c_str());
1749 handle = (cl_device_id)d;
1752 name_ = getStrProp(CL_DEVICE_NAME);
1753 version_ = getStrProp(CL_DEVICE_VERSION);
1754 doubleFPConfig_ = getProp<cl_device_fp_config, int>(CL_DEVICE_DOUBLE_FP_CONFIG);
1755 hostUnifiedMemory_ = getBoolProp(CL_DEVICE_HOST_UNIFIED_MEMORY);
1756 maxComputeUnits_ = getProp<cl_uint, int>(CL_DEVICE_MAX_COMPUTE_UNITS);
1757 maxWorkGroupSize_ = getProp<size_t, size_t>(CL_DEVICE_MAX_WORK_GROUP_SIZE);
1758 type_ = getProp<cl_device_type, int>(CL_DEVICE_TYPE);
1759 driverVersion_ = getStrProp(CL_DRIVER_VERSION);
1761 String deviceVersion_ = getStrProp(CL_DEVICE_VERSION);
1762 parseDeviceVersion(deviceVersion_, deviceVersionMajor_, deviceVersionMinor_);
1764 vendorName_ = getStrProp(CL_DEVICE_VENDOR);
1765 if (vendorName_ == "Advanced Micro Devices, Inc." ||
1766 vendorName_ == "AMD")
1767 vendorID_ = VENDOR_AMD;
1768 else if (vendorName_ == "Intel(R) Corporation" || vendorName_ == "Intel" || strstr(name_.c_str(), "Iris") != 0)
1769 vendorID_ = VENDOR_INTEL;
1770 else if (vendorName_ == "NVIDIA Corporation")
1771 vendorID_ = VENDOR_NVIDIA;
1773 vendorID_ = UNKNOWN_VENDOR;
1776 template<typename _TpCL, typename _TpOut>
1777 _TpOut getProp(cl_device_info prop) const
1782 return clGetDeviceInfo(handle, prop, sizeof(temp), &temp, &sz) == CL_SUCCESS &&
1783 sz == sizeof(temp) ? _TpOut(temp) : _TpOut();
1786 bool getBoolProp(cl_device_info prop) const
1788 cl_bool temp = CL_FALSE;
1791 return clGetDeviceInfo(handle, prop, sizeof(temp), &temp, &sz) == CL_SUCCESS &&
1792 sz == sizeof(temp) ? temp != 0 : false;
1795 String getStrProp(cl_device_info prop) const
1799 return clGetDeviceInfo(handle, prop, sizeof(buf)-16, buf, &sz) == CL_SUCCESS &&
1800 sz < sizeof(buf) ? String(buf) : String();
1803 IMPLEMENT_REFCOUNTABLE();
1804 cl_device_id handle;
1808 int doubleFPConfig_;
1809 bool hostUnifiedMemory_;
1810 int maxComputeUnits_;
1811 size_t maxWorkGroupSize_;
1813 int deviceVersionMajor_;
1814 int deviceVersionMinor_;
1815 String driverVersion_;
1826 Device::Device(void* d)
1832 Device::Device(const Device& d)
1839 Device& Device::operator = (const Device& d)
1841 Impl* newp = (Impl*)d.p;
1856 void Device::set(void* d)
1863 void* Device::ptr() const
1865 return p ? p->handle : 0;
1868 String Device::name() const
1869 { return p ? p->name_ : String(); }
1871 String Device::extensions() const
1872 { return p ? p->getStrProp(CL_DEVICE_EXTENSIONS) : String(); }
1874 String Device::version() const
1875 { return p ? p->version_ : String(); }
1877 String Device::vendorName() const
1878 { return p ? p->vendorName_ : String(); }
1880 int Device::vendorID() const
1881 { return p ? p->vendorID_ : 0; }
1883 String Device::OpenCL_C_Version() const
1884 { return p ? p->getStrProp(CL_DEVICE_OPENCL_C_VERSION) : String(); }
1886 String Device::OpenCLVersion() const
1887 { return p ? p->getStrProp(CL_DEVICE_EXTENSIONS) : String(); }
1889 int Device::deviceVersionMajor() const
1890 { return p ? p->deviceVersionMajor_ : 0; }
1892 int Device::deviceVersionMinor() const
1893 { return p ? p->deviceVersionMinor_ : 0; }
1895 String Device::driverVersion() const
1896 { return p ? p->driverVersion_ : String(); }
1898 int Device::type() const
1899 { return p ? p->type_ : 0; }
1901 int Device::addressBits() const
1902 { return p ? p->getProp<cl_uint, int>(CL_DEVICE_ADDRESS_BITS) : 0; }
1904 bool Device::available() const
1905 { return p ? p->getBoolProp(CL_DEVICE_AVAILABLE) : false; }
1907 bool Device::compilerAvailable() const
1908 { return p ? p->getBoolProp(CL_DEVICE_COMPILER_AVAILABLE) : false; }
1910 bool Device::linkerAvailable() const
1911 #ifdef CL_VERSION_1_2
1912 { return p ? p->getBoolProp(CL_DEVICE_LINKER_AVAILABLE) : false; }
1914 { CV_REQUIRE_OPENCL_1_2_ERROR; }
1917 int Device::doubleFPConfig() const
1918 { return p ? p->doubleFPConfig_ : 0; }
1920 int Device::singleFPConfig() const
1921 { return p ? p->getProp<cl_device_fp_config, int>(CL_DEVICE_SINGLE_FP_CONFIG) : 0; }
1923 int Device::halfFPConfig() const
1924 #ifdef CL_VERSION_1_2
1925 { return p ? p->getProp<cl_device_fp_config, int>(CL_DEVICE_HALF_FP_CONFIG) : 0; }
1927 { CV_REQUIRE_OPENCL_1_2_ERROR; }
1930 bool Device::endianLittle() const
1931 { return p ? p->getBoolProp(CL_DEVICE_ENDIAN_LITTLE) : false; }
1933 bool Device::errorCorrectionSupport() const
1934 { return p ? p->getBoolProp(CL_DEVICE_ERROR_CORRECTION_SUPPORT) : false; }
1936 int Device::executionCapabilities() const
1937 { return p ? p->getProp<cl_device_exec_capabilities, int>(CL_DEVICE_EXECUTION_CAPABILITIES) : 0; }
1939 size_t Device::globalMemCacheSize() const
1940 { return p ? p->getProp<cl_ulong, size_t>(CL_DEVICE_GLOBAL_MEM_CACHE_SIZE) : 0; }
1942 int Device::globalMemCacheType() const
1943 { return p ? p->getProp<cl_device_mem_cache_type, int>(CL_DEVICE_GLOBAL_MEM_CACHE_TYPE) : 0; }
1945 int Device::globalMemCacheLineSize() const
1946 { return p ? p->getProp<cl_uint, int>(CL_DEVICE_GLOBAL_MEM_CACHELINE_SIZE) : 0; }
1948 size_t Device::globalMemSize() const
1949 { return p ? p->getProp<cl_ulong, size_t>(CL_DEVICE_GLOBAL_MEM_SIZE) : 0; }
1951 size_t Device::localMemSize() const
1952 { return p ? p->getProp<cl_ulong, size_t>(CL_DEVICE_LOCAL_MEM_SIZE) : 0; }
1954 int Device::localMemType() const
1955 { return p ? p->getProp<cl_device_local_mem_type, int>(CL_DEVICE_LOCAL_MEM_TYPE) : 0; }
1957 bool Device::hostUnifiedMemory() const
1958 { return p ? p->hostUnifiedMemory_ : false; }
1960 bool Device::imageSupport() const
1961 { return p ? p->getBoolProp(CL_DEVICE_IMAGE_SUPPORT) : false; }
1963 bool Device::imageFromBufferSupport() const
1968 size_t pos = p->getStrProp(CL_DEVICE_EXTENSIONS).find("cl_khr_image2d_from_buffer");
1969 if (pos != String::npos)
1977 uint Device::imagePitchAlignment() const
1979 #ifdef CL_DEVICE_IMAGE_PITCH_ALIGNMENT
1980 return p ? p->getProp<cl_uint, uint>(CL_DEVICE_IMAGE_PITCH_ALIGNMENT) : 0;
1986 uint Device::imageBaseAddressAlignment() const
1988 #ifdef CL_DEVICE_IMAGE_BASE_ADDRESS_ALIGNMENT
1989 return p ? p->getProp<cl_uint, uint>(CL_DEVICE_IMAGE_BASE_ADDRESS_ALIGNMENT) : 0;
1995 size_t Device::image2DMaxWidth() const
1996 { return p ? p->getProp<size_t, size_t>(CL_DEVICE_IMAGE2D_MAX_WIDTH) : 0; }
1998 size_t Device::image2DMaxHeight() const
1999 { return p ? p->getProp<size_t, size_t>(CL_DEVICE_IMAGE2D_MAX_HEIGHT) : 0; }
2001 size_t Device::image3DMaxWidth() const
2002 { return p ? p->getProp<size_t, size_t>(CL_DEVICE_IMAGE3D_MAX_WIDTH) : 0; }
2004 size_t Device::image3DMaxHeight() const
2005 { return p ? p->getProp<size_t, size_t>(CL_DEVICE_IMAGE3D_MAX_HEIGHT) : 0; }
2007 size_t Device::image3DMaxDepth() const
2008 { return p ? p->getProp<size_t, size_t>(CL_DEVICE_IMAGE3D_MAX_DEPTH) : 0; }
2010 size_t Device::imageMaxBufferSize() const
2011 #ifdef CL_VERSION_1_2
2012 { return p ? p->getProp<size_t, size_t>(CL_DEVICE_IMAGE_MAX_BUFFER_SIZE) : 0; }
2014 { CV_REQUIRE_OPENCL_1_2_ERROR; }
2017 size_t Device::imageMaxArraySize() const
2018 #ifdef CL_VERSION_1_2
2019 { return p ? p->getProp<size_t, size_t>(CL_DEVICE_IMAGE_MAX_ARRAY_SIZE) : 0; }
2021 { CV_REQUIRE_OPENCL_1_2_ERROR; }
2024 int Device::maxClockFrequency() const
2025 { return p ? p->getProp<cl_uint, int>(CL_DEVICE_MAX_CLOCK_FREQUENCY) : 0; }
2027 int Device::maxComputeUnits() const
2028 { return p ? p->maxComputeUnits_ : 0; }
2030 int Device::maxConstantArgs() const
2031 { return p ? p->getProp<cl_uint, int>(CL_DEVICE_MAX_CONSTANT_ARGS) : 0; }
2033 size_t Device::maxConstantBufferSize() const
2034 { return p ? p->getProp<cl_ulong, size_t>(CL_DEVICE_MAX_CONSTANT_BUFFER_SIZE) : 0; }
2036 size_t Device::maxMemAllocSize() const
2037 { return p ? p->getProp<cl_ulong, size_t>(CL_DEVICE_MAX_MEM_ALLOC_SIZE) : 0; }
2039 size_t Device::maxParameterSize() const
2040 { return p ? p->getProp<cl_ulong, size_t>(CL_DEVICE_MAX_PARAMETER_SIZE) : 0; }
2042 int Device::maxReadImageArgs() const
2043 { return p ? p->getProp<cl_uint, int>(CL_DEVICE_MAX_READ_IMAGE_ARGS) : 0; }
2045 int Device::maxWriteImageArgs() const
2046 { return p ? p->getProp<cl_uint, int>(CL_DEVICE_MAX_WRITE_IMAGE_ARGS) : 0; }
2048 int Device::maxSamplers() const
2049 { return p ? p->getProp<cl_uint, int>(CL_DEVICE_MAX_SAMPLERS) : 0; }
2051 size_t Device::maxWorkGroupSize() const
2052 { return p ? p->maxWorkGroupSize_ : 0; }
2054 int Device::maxWorkItemDims() const
2055 { return p ? p->getProp<cl_uint, int>(CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS) : 0; }
2057 void Device::maxWorkItemSizes(size_t* sizes) const
2061 const int MAX_DIMS = 32;
2063 CV_OclDbgAssert(clGetDeviceInfo(p->handle, CL_DEVICE_MAX_WORK_ITEM_SIZES,
2064 MAX_DIMS*sizeof(sizes[0]), &sizes[0], &retsz) == CL_SUCCESS);
2068 int Device::memBaseAddrAlign() const
2069 { return p ? p->getProp<cl_uint, int>(CL_DEVICE_MEM_BASE_ADDR_ALIGN) : 0; }
2071 int Device::nativeVectorWidthChar() const
2072 { return p ? p->getProp<cl_uint, int>(CL_DEVICE_NATIVE_VECTOR_WIDTH_CHAR) : 0; }
2074 int Device::nativeVectorWidthShort() const
2075 { return p ? p->getProp<cl_uint, int>(CL_DEVICE_NATIVE_VECTOR_WIDTH_SHORT) : 0; }
2077 int Device::nativeVectorWidthInt() const
2078 { return p ? p->getProp<cl_uint, int>(CL_DEVICE_NATIVE_VECTOR_WIDTH_INT) : 0; }
2080 int Device::nativeVectorWidthLong() const
2081 { return p ? p->getProp<cl_uint, int>(CL_DEVICE_NATIVE_VECTOR_WIDTH_LONG) : 0; }
2083 int Device::nativeVectorWidthFloat() const
2084 { return p ? p->getProp<cl_uint, int>(CL_DEVICE_NATIVE_VECTOR_WIDTH_FLOAT) : 0; }
2086 int Device::nativeVectorWidthDouble() const
2087 { return p ? p->getProp<cl_uint, int>(CL_DEVICE_NATIVE_VECTOR_WIDTH_DOUBLE) : 0; }
2089 int Device::nativeVectorWidthHalf() const
2090 { return p ? p->getProp<cl_uint, int>(CL_DEVICE_NATIVE_VECTOR_WIDTH_HALF) : 0; }
2092 int Device::preferredVectorWidthChar() const
2093 { return p ? p->getProp<cl_uint, int>(CL_DEVICE_PREFERRED_VECTOR_WIDTH_CHAR) : 0; }
2095 int Device::preferredVectorWidthShort() const
2096 { return p ? p->getProp<cl_uint, int>(CL_DEVICE_PREFERRED_VECTOR_WIDTH_SHORT) : 0; }
2098 int Device::preferredVectorWidthInt() const
2099 { return p ? p->getProp<cl_uint, int>(CL_DEVICE_PREFERRED_VECTOR_WIDTH_INT) : 0; }
2101 int Device::preferredVectorWidthLong() const
2102 { return p ? p->getProp<cl_uint, int>(CL_DEVICE_PREFERRED_VECTOR_WIDTH_LONG) : 0; }
2104 int Device::preferredVectorWidthFloat() const
2105 { return p ? p->getProp<cl_uint, int>(CL_DEVICE_PREFERRED_VECTOR_WIDTH_FLOAT) : 0; }
2107 int Device::preferredVectorWidthDouble() const
2108 { return p ? p->getProp<cl_uint, int>(CL_DEVICE_PREFERRED_VECTOR_WIDTH_DOUBLE) : 0; }
2110 int Device::preferredVectorWidthHalf() const
2111 { return p ? p->getProp<cl_uint, int>(CL_DEVICE_PREFERRED_VECTOR_WIDTH_HALF) : 0; }
2113 size_t Device::printfBufferSize() const
2114 #ifdef CL_VERSION_1_2
2115 { return p ? p->getProp<size_t, size_t>(CL_DEVICE_PRINTF_BUFFER_SIZE) : 0; }
2117 { CV_REQUIRE_OPENCL_1_2_ERROR; }
2121 size_t Device::profilingTimerResolution() const
2122 { return p ? p->getProp<size_t, size_t>(CL_DEVICE_PROFILING_TIMER_RESOLUTION) : 0; }
2124 const Device& Device::getDefault()
2126 const Context& ctx = Context::getDefault();
2127 int idx = coreTlsData.get()->device;
2128 return ctx.device(idx);
2131 ////////////////////////////////////// Context ///////////////////////////////////////////////////
2133 template <typename Functor, typename ObjectType>
2134 inline cl_int getStringInfo(Functor f, ObjectType obj, cl_uint name, std::string& param)
2137 cl_int err = f(obj, name, 0, NULL, &required);
2138 if (err != CL_SUCCESS)
2144 AutoBuffer<char> buf(required + 1);
2145 char* ptr = (char*)buf; // cleanup is not needed
2146 err = f(obj, name, required, ptr, NULL);
2147 if (err != CL_SUCCESS)
2155 static void split(const std::string &s, char delim, std::vector<std::string> &elems)
2160 std::istringstream ss(s);
2164 std::getline(ss, item, delim);
2165 elems.push_back(item);
2169 // Layout: <Platform>:<CPU|GPU|ACCELERATOR|nothing=GPU/CPU>:<deviceName>
2171 // Sample: AMD:GPU:Tahiti
2172 // Sample: :GPU|CPU: = '' = ':' = '::'
2173 static bool parseOpenCLDeviceConfiguration(const std::string& configurationStr,
2174 std::string& platform, std::vector<std::string>& deviceTypes, std::string& deviceNameOrID)
2176 std::vector<std::string> parts;
2177 split(configurationStr, ':', parts);
2178 if (parts.size() > 3)
2180 std::cerr << "ERROR: Invalid configuration string for OpenCL device" << std::endl;
2183 if (parts.size() > 2)
2184 deviceNameOrID = parts[2];
2185 if (parts.size() > 1)
2187 split(parts[1], '|', deviceTypes);
2189 if (parts.size() > 0)
2191 platform = parts[0];
2197 static cl_device_id selectOpenCLDevice()
2202 static cl_device_id selectOpenCLDevice()
2204 std::string platform, deviceName;
2205 std::vector<std::string> deviceTypes;
2207 const char* configuration = getenv("OPENCV_OPENCL_DEVICE");
2208 if (configuration && !parseOpenCLDeviceConfiguration(std::string(configuration), platform, deviceTypes, deviceName))
2213 if (deviceName.length() == 1)
2214 // We limit ID range to 0..9, because we want to write:
2215 // - '2500' to mean i5-2500
2216 // - '8350' to mean AMD FX-8350
2217 // - '650' to mean GeForce 650
2218 // To extend ID range change condition to '> 0'
2221 for (size_t i = 0; i < deviceName.length(); i++)
2223 if (!isdigit(deviceName[i]))
2231 deviceID = atoi(deviceName.c_str());
2237 std::vector<cl_platform_id> platforms;
2239 cl_uint numPlatforms = 0;
2240 CV_OclDbgAssert(clGetPlatformIDs(0, NULL, &numPlatforms) == CL_SUCCESS);
2242 if (numPlatforms == 0)
2244 platforms.resize((size_t)numPlatforms);
2245 CV_OclDbgAssert(clGetPlatformIDs(numPlatforms, &platforms[0], &numPlatforms) == CL_SUCCESS);
2246 platforms.resize(numPlatforms);
2249 int selectedPlatform = -1;
2250 if (platform.length() > 0)
2252 for (size_t i = 0; i < platforms.size(); i++)
2255 CV_OclDbgAssert(getStringInfo(clGetPlatformInfo, platforms[i], CL_PLATFORM_NAME, name) == CL_SUCCESS);
2256 if (name.find(platform) != std::string::npos)
2258 selectedPlatform = (int)i;
2262 if (selectedPlatform == -1)
2264 std::cerr << "ERROR: Can't find OpenCL platform by name: " << platform << std::endl;
2268 if (deviceTypes.size() == 0)
2272 deviceTypes.push_back("GPU");
2274 deviceTypes.push_back("CPU");
2277 deviceTypes.push_back("ALL");
2279 for (size_t t = 0; t < deviceTypes.size(); t++)
2282 std::string tempStrDeviceType = deviceTypes[t];
2283 std::transform( tempStrDeviceType.begin(), tempStrDeviceType.end(), tempStrDeviceType.begin(), tolower );
2285 if (tempStrDeviceType == "gpu" || tempStrDeviceType == "dgpu" || tempStrDeviceType == "igpu")
2286 deviceType = Device::TYPE_GPU;
2287 else if (tempStrDeviceType == "cpu")
2288 deviceType = Device::TYPE_CPU;
2289 else if (tempStrDeviceType == "accelerator")
2290 deviceType = Device::TYPE_ACCELERATOR;
2291 else if (tempStrDeviceType == "all")
2292 deviceType = Device::TYPE_ALL;
2295 std::cerr << "ERROR: Unsupported device type for OpenCL device (GPU, CPU, ACCELERATOR): " << deviceTypes[t] << std::endl;
2299 std::vector<cl_device_id> devices; // TODO Use clReleaseDevice to cleanup
2300 for (int i = selectedPlatform >= 0 ? selectedPlatform : 0;
2301 (selectedPlatform >= 0 ? i == selectedPlatform : true) && (i < (int)platforms.size());
2305 cl_int status = clGetDeviceIDs(platforms[i], deviceType, 0, NULL, &count);
2306 CV_OclDbgAssert(status == CL_SUCCESS || status == CL_DEVICE_NOT_FOUND);
2309 size_t base = devices.size();
2310 devices.resize(base + count);
2311 status = clGetDeviceIDs(platforms[i], deviceType, count, &devices[base], &count);
2312 CV_OclDbgAssert(status == CL_SUCCESS || status == CL_DEVICE_NOT_FOUND);
2315 for (size_t i = (isID ? deviceID : 0);
2316 (isID ? (i == (size_t)deviceID) : true) && (i < devices.size());
2320 CV_OclDbgAssert(getStringInfo(clGetDeviceInfo, devices[i], CL_DEVICE_NAME, name) == CL_SUCCESS);
2321 cl_bool useGPU = true;
2322 if(tempStrDeviceType == "dgpu" || tempStrDeviceType == "igpu")
2324 cl_bool isIGPU = CL_FALSE;
2325 clGetDeviceInfo(devices[i], CL_DEVICE_HOST_UNIFIED_MEMORY, sizeof(isIGPU), &isIGPU, NULL);
2326 useGPU = tempStrDeviceType == "dgpu" ? !isIGPU : isIGPU;
2328 if ( (isID || name.find(deviceName) != std::string::npos) && useGPU)
2330 // TODO check for OpenCL 1.1
2337 std::cerr << "ERROR: Required OpenCL device not found, check configuration: " << (configuration == NULL ? "" : configuration) << std::endl
2338 << " Platform: " << (platform.length() == 0 ? "any" : platform) << std::endl
2339 << " Device types: ";
2340 for (size_t t = 0; t < deviceTypes.size(); t++)
2341 std::cerr << deviceTypes[t] << " ";
2343 std::cerr << std::endl << " Device name: " << (deviceName.length() == 0 ? "any" : deviceName) << std::endl;
2344 CV_Error(CL_INVALID_DEVICE, "Requested OpenCL device is not found");
2349 struct Context::Impl
2359 CV_Assert(handle == NULL);
2361 cl_device_id d = selectOpenCLDevice();
2366 cl_platform_id pl = NULL;
2367 CV_OclDbgAssert(clGetDeviceInfo(d, CL_DEVICE_PLATFORM, sizeof(cl_platform_id), &pl, NULL) == CL_SUCCESS);
2369 cl_context_properties prop[] =
2371 CL_CONTEXT_PLATFORM, (cl_context_properties)pl,
2375 // !!! in the current implementation force the number of devices to 1 !!!
2379 handle = clCreateContext(prop, nd, &d, 0, 0, &status);
2381 bool ok = handle != 0 && status == CL_SUCCESS;
2397 cl_platform_id pl = (cl_platform_id)Platform::getDefault().ptr();
2398 cl_context_properties prop[] =
2400 CL_CONTEXT_PLATFORM, (cl_context_properties)pl,
2404 cl_uint i, nd0 = 0, nd = 0;
2405 int dtype = dtype0 & 15;
2406 CV_OclDbgAssert(clGetDeviceIDs( pl, dtype, 0, 0, &nd0 ) == CL_SUCCESS);
2408 AutoBuffer<void*> dlistbuf(nd0*2+1);
2409 cl_device_id* dlist = (cl_device_id*)(void**)dlistbuf;
2410 cl_device_id* dlist_new = dlist + nd0;
2411 CV_OclDbgAssert(clGetDeviceIDs( pl, dtype, nd0, dlist, &nd0 ) == CL_SUCCESS);
2414 for(i = 0; i < nd0; i++)
2417 if( !d.available() || !d.compilerAvailable() )
2419 if( dtype0 == Device::TYPE_DGPU && d.hostUnifiedMemory() )
2421 if( dtype0 == Device::TYPE_IGPU && !d.hostUnifiedMemory() )
2423 String name = d.name();
2424 if( nd != 0 && name != name0 )
2427 dlist_new[nd++] = dlist[i];
2433 // !!! in the current implementation force the number of devices to 1 !!!
2436 handle = clCreateContext(prop, nd, dlist_new, 0, 0, &retval);
2437 bool ok = handle != 0 && retval == CL_SUCCESS;
2441 for( i = 0; i < nd; i++ )
2442 devices[i].set(dlist_new[i]);
2450 clReleaseContext(handle);
2456 Program getProg(const ProgramSource& src,
2457 const String& buildflags, String& errmsg)
2459 String prefix = Program::getPrefix(buildflags);
2460 HashKey k(src.hash(), crc64((const uchar*)prefix.c_str(), prefix.size()));
2461 phash_t::iterator it = phash.find(k);
2462 if( it != phash.end() )
2464 //String filename = format("%08x%08x_%08x%08x.clb2",
2465 Program prog(src, buildflags, errmsg);
2467 phash.insert(std::pair<HashKey,Program>(k, prog));
2471 IMPLEMENT_REFCOUNTABLE();
2474 std::vector<Device> devices;
2476 typedef ProgramSource::hash_t hash_t;
2480 HashKey(hash_t _a, hash_t _b) : a(_a), b(_b) {}
2481 bool operator < (const HashKey& k) const { return a < k.a || (a == k.a && b < k.b); }
2482 bool operator == (const HashKey& k) const { return a == k.a && b == k.b; }
2483 bool operator != (const HashKey& k) const { return a != k.a || b != k.b; }
2486 typedef std::map<HashKey, Program> phash_t;
2496 Context::Context(int dtype)
2502 bool Context::create()
2517 bool Context::create(int dtype0)
2523 p = new Impl(dtype0);
2541 Context::Context(const Context& c)
2548 Context& Context::operator = (const Context& c)
2550 Impl* newp = (Impl*)c.p;
2559 void* Context::ptr() const
2561 return p == NULL ? NULL : p->handle;
2564 size_t Context::ndevices() const
2566 return p ? p->devices.size() : 0;
2569 const Device& Context::device(size_t idx) const
2571 static Device dummy;
2572 return !p || idx >= p->devices.size() ? dummy : p->devices[idx];
2575 Context& Context::getDefault(bool initialize)
2577 static Context* ctx = new Context();
2578 if(!ctx->p && haveOpenCL())
2581 ctx->p = new Impl();
2584 // do not create new Context right away.
2585 // First, try to retrieve existing context of the same type.
2586 // In its turn, Platform::getContext() may call Context::create()
2587 // if there is no such context.
2588 if (ctx->p->handle == NULL)
2589 ctx->p->setDefault();
2596 Program Context::getProg(const ProgramSource& prog,
2597 const String& buildopts, String& errmsg)
2599 return p ? p->getProg(prog, buildopts, errmsg) : Program();
2602 void initializeContextFromHandle(Context& ctx, void* platform, void* _context, void* _device)
2604 cl_context context = (cl_context)_context;
2605 cl_device_id device = (cl_device_id)_device;
2607 // cleanup old context
2608 Context::Impl * impl = ctx.p;
2611 CV_OclDbgAssert(clReleaseContext(impl->handle) == CL_SUCCESS);
2613 impl->devices.clear();
2615 impl->handle = context;
2616 impl->devices.resize(1);
2617 impl->devices[0].set(device);
2619 Platform& p = Platform::getDefault();
2620 Platform::Impl* pImpl = p.p;
2621 pImpl->handle = (cl_platform_id)platform;
2624 /////////////////////////////////////////// Queue /////////////////////////////////////////////
2628 Impl(const Context& c, const Device& d)
2631 const Context* pc = &c;
2632 cl_context ch = (cl_context)pc->ptr();
2635 pc = &Context::getDefault();
2636 ch = (cl_context)pc->ptr();
2638 cl_device_id dh = (cl_device_id)d.ptr();
2640 dh = (cl_device_id)pc->device(0).ptr();
2642 handle = clCreateCommandQueue(ch, dh, 0, &retval);
2643 CV_OclDbgAssert(retval == CL_SUCCESS);
2649 if (!cv::__termination)
2655 clReleaseCommandQueue(handle);
2661 IMPLEMENT_REFCOUNTABLE();
2663 cl_command_queue handle;
2671 Queue::Queue(const Context& c, const Device& d)
2677 Queue::Queue(const Queue& q)
2684 Queue& Queue::operator = (const Queue& q)
2686 Impl* newp = (Impl*)q.p;
2701 bool Queue::create(const Context& c, const Device& d)
2706 return p->handle != 0;
2709 void Queue::finish()
2713 CV_OclDbgAssert(clFinish(p->handle) == CL_SUCCESS);
2717 void* Queue::ptr() const
2719 return p ? p->handle : 0;
2722 Queue& Queue::getDefault()
2724 Queue& q = coreTlsData.get()->oclQueue;
2725 if( !q.p && haveOpenCL() )
2726 q.create(Context::getDefault());
2730 static cl_command_queue getQueue(const Queue& q)
2732 cl_command_queue qq = (cl_command_queue)q.ptr();
2734 qq = (cl_command_queue)Queue::getDefault().ptr();
2738 /////////////////////////////////////////// KernelArg /////////////////////////////////////////////
2740 KernelArg::KernelArg()
2741 : flags(0), m(0), obj(0), sz(0), wscale(1), iwscale(1)
2745 KernelArg::KernelArg(int _flags, UMat* _m, int _wscale, int _iwscale, const void* _obj, size_t _sz)
2746 : flags(_flags), m(_m), obj(_obj), sz(_sz), wscale(_wscale), iwscale(_iwscale)
2750 KernelArg KernelArg::Constant(const Mat& m)
2752 CV_Assert(m.isContinuous());
2753 return KernelArg(CONSTANT, 0, 0, 0, m.ptr(), m.total()*m.elemSize());
2756 /////////////////////////////////////////// Kernel /////////////////////////////////////////////
2760 Impl(const char* kname, const Program& prog) :
2761 refcount(1), e(0), nu(0)
2763 cl_program ph = (cl_program)prog.ptr();
2766 clCreateKernel(ph, kname, &retval) : 0;
2767 CV_OclDbgAssert(retval == CL_SUCCESS);
2768 for( int i = 0; i < MAX_ARRS; i++ )
2770 haveTempDstUMats = false;
2775 for( int i = 0; i < MAX_ARRS; i++ )
2778 if( CV_XADD(&u[i]->urefcount, -1) == 1 )
2779 u[i]->currAllocator->deallocate(u[i]);
2783 haveTempDstUMats = false;
2786 void addUMat(const UMat& m, bool dst)
2788 CV_Assert(nu < MAX_ARRS && m.u && m.u->urefcount > 0);
2790 CV_XADD(&m.u->urefcount, 1);
2792 if(dst && m.u->tempUMat())
2793 haveTempDstUMats = true;
2796 void addImage(const Image2D& image)
2798 images.push_back(image);
2805 if(e) { clReleaseEvent(e); e = 0; }
2812 clReleaseKernel(handle);
2815 IMPLEMENT_REFCOUNTABLE();
2819 enum { MAX_ARRS = 16 };
2820 UMatData* u[MAX_ARRS];
2822 std::list<Image2D> images;
2823 bool haveTempDstUMats;
2830 static void CL_CALLBACK oclCleanupCallback(cl_event, cl_int, void *p)
2832 ((cv::ocl::Kernel::Impl*)p)->finit();
2837 namespace cv { namespace ocl {
2844 Kernel::Kernel(const char* kname, const Program& prog)
2847 create(kname, prog);
2850 Kernel::Kernel(const char* kname, const ProgramSource& src,
2851 const String& buildopts, String* errmsg)
2854 create(kname, src, buildopts, errmsg);
2857 Kernel::Kernel(const Kernel& k)
2864 Kernel& Kernel::operator = (const Kernel& k)
2866 Impl* newp = (Impl*)k.p;
2881 bool Kernel::create(const char* kname, const Program& prog)
2885 p = new Impl(kname, prog);
2894 bool Kernel::create(const char* kname, const ProgramSource& src,
2895 const String& buildopts, String* errmsg)
2903 if( !errmsg ) errmsg = &tempmsg;
2904 const Program& prog = Context::getDefault().getProg(src, buildopts, *errmsg);
2905 return create(kname, prog);
2908 void* Kernel::ptr() const
2910 return p ? p->handle : 0;
2913 bool Kernel::empty() const
2918 int Kernel::set(int i, const void* value, size_t sz)
2920 if (!p || !p->handle)
2927 cl_int retval = clSetKernelArg(p->handle, (cl_uint)i, sz, value);
2928 CV_OclDbgAssert(retval == CL_SUCCESS);
2929 if (retval != CL_SUCCESS)
2934 int Kernel::set(int i, const Image2D& image2D)
2936 p->addImage(image2D);
2937 cl_mem h = (cl_mem)image2D.ptr();
2938 return set(i, &h, sizeof(h));
2941 int Kernel::set(int i, const UMat& m)
2943 return set(i, KernelArg(KernelArg::READ_WRITE, (UMat*)&m, 0, 0));
2946 int Kernel::set(int i, const KernelArg& arg)
2948 if( !p || !p->handle )
2956 int accessFlags = ((arg.flags & KernelArg::READ_ONLY) ? ACCESS_READ : 0) +
2957 ((arg.flags & KernelArg::WRITE_ONLY) ? ACCESS_WRITE : 0);
2958 bool ptronly = (arg.flags & KernelArg::PTR_ONLY) != 0;
2959 cl_mem h = (cl_mem)arg.m->handle(accessFlags);
2969 CV_OclDbgAssert(clSetKernelArg(p->handle, (cl_uint)i++, sizeof(h), &h) == CL_SUCCESS);
2970 else if( arg.m->dims <= 2 )
2973 CV_OclDbgAssert(clSetKernelArg(p->handle, (cl_uint)i, sizeof(h), &h) == CL_SUCCESS);
2974 CV_OclDbgAssert(clSetKernelArg(p->handle, (cl_uint)(i+1), sizeof(u2d.step), &u2d.step) == CL_SUCCESS);
2975 CV_OclDbgAssert(clSetKernelArg(p->handle, (cl_uint)(i+2), sizeof(u2d.offset), &u2d.offset) == CL_SUCCESS);
2978 if( !(arg.flags & KernelArg::NO_SIZE) )
2980 int cols = u2d.cols*arg.wscale/arg.iwscale;
2981 CV_OclDbgAssert(clSetKernelArg(p->handle, (cl_uint)i, sizeof(u2d.rows), &u2d.rows) == CL_SUCCESS);
2982 CV_OclDbgAssert(clSetKernelArg(p->handle, (cl_uint)(i+1), sizeof(cols), &cols) == CL_SUCCESS);
2989 CV_OclDbgAssert(clSetKernelArg(p->handle, (cl_uint)i, sizeof(h), &h) == CL_SUCCESS);
2990 CV_OclDbgAssert(clSetKernelArg(p->handle, (cl_uint)(i+1), sizeof(u3d.slicestep), &u3d.slicestep) == CL_SUCCESS);
2991 CV_OclDbgAssert(clSetKernelArg(p->handle, (cl_uint)(i+2), sizeof(u3d.step), &u3d.step) == CL_SUCCESS);
2992 CV_OclDbgAssert(clSetKernelArg(p->handle, (cl_uint)(i+3), sizeof(u3d.offset), &u3d.offset) == CL_SUCCESS);
2994 if( !(arg.flags & KernelArg::NO_SIZE) )
2996 int cols = u3d.cols*arg.wscale/arg.iwscale;
2997 CV_OclDbgAssert(clSetKernelArg(p->handle, (cl_uint)i, sizeof(u3d.slices), &u3d.rows) == CL_SUCCESS);
2998 CV_OclDbgAssert(clSetKernelArg(p->handle, (cl_uint)(i+1), sizeof(u3d.rows), &u3d.rows) == CL_SUCCESS);
2999 CV_OclDbgAssert(clSetKernelArg(p->handle, (cl_uint)(i+2), sizeof(u3d.cols), &cols) == CL_SUCCESS);
3003 p->addUMat(*arg.m, (accessFlags & ACCESS_WRITE) != 0);
3006 CV_OclDbgAssert(clSetKernelArg(p->handle, (cl_uint)i, arg.sz, arg.obj) == CL_SUCCESS);
3011 bool Kernel::run(int dims, size_t _globalsize[], size_t _localsize[],
3012 bool sync, const Queue& q)
3014 if(!p || !p->handle || p->e != 0)
3017 cl_command_queue qq = getQueue(q);
3018 size_t offset[CV_MAX_DIM] = {0}, globalsize[CV_MAX_DIM] = {1,1,1};
3020 CV_Assert(_globalsize != 0);
3021 for (int i = 0; i < dims; i++)
3023 size_t val = _localsize ? _localsize[i] :
3024 dims == 1 ? 64 : dims == 2 ? (i == 0 ? 256 : 8) : dims == 3 ? (8>>(int)(i>0)) : 1;
3025 CV_Assert( val > 0 );
3026 total *= _globalsize[i];
3027 globalsize[i] = ((_globalsize[i] + val - 1)/val)*val;
3031 if( p->haveTempDstUMats )
3033 cl_int retval = clEnqueueNDRangeKernel(qq, p->handle, (cl_uint)dims,
3034 offset, globalsize, _localsize, 0, 0,
3036 if( sync || retval != CL_SUCCESS )
3038 CV_OclDbgAssert(clFinish(qq) == CL_SUCCESS);
3044 CV_OclDbgAssert(clSetEventCallback(p->e, CL_COMPLETE, oclCleanupCallback, p) == CL_SUCCESS);
3046 return retval == CL_SUCCESS;
3049 bool Kernel::runTask(bool sync, const Queue& q)
3051 if(!p || !p->handle || p->e != 0)
3054 cl_command_queue qq = getQueue(q);
3055 cl_int retval = clEnqueueTask(qq, p->handle, 0, 0, sync ? 0 : &p->e);
3056 if( sync || retval != CL_SUCCESS )
3058 CV_OclDbgAssert(clFinish(qq) == CL_SUCCESS);
3064 CV_OclDbgAssert(clSetEventCallback(p->e, CL_COMPLETE, oclCleanupCallback, p) == CL_SUCCESS);
3066 return retval == CL_SUCCESS;
3070 size_t Kernel::workGroupSize() const
3072 if(!p || !p->handle)
3074 size_t val = 0, retsz = 0;
3075 cl_device_id dev = (cl_device_id)Device::getDefault().ptr();
3076 return clGetKernelWorkGroupInfo(p->handle, dev, CL_KERNEL_WORK_GROUP_SIZE,
3077 sizeof(val), &val, &retsz) == CL_SUCCESS ? val : 0;
3080 size_t Kernel::preferedWorkGroupSizeMultiple() const
3082 if(!p || !p->handle)
3084 size_t val = 0, retsz = 0;
3085 cl_device_id dev = (cl_device_id)Device::getDefault().ptr();
3086 return clGetKernelWorkGroupInfo(p->handle, dev, CL_KERNEL_PREFERRED_WORK_GROUP_SIZE_MULTIPLE,
3087 sizeof(val), &val, &retsz) == CL_SUCCESS ? val : 0;
3090 bool Kernel::compileWorkGroupSize(size_t wsz[]) const
3092 if(!p || !p->handle || !wsz)
3095 cl_device_id dev = (cl_device_id)Device::getDefault().ptr();
3096 return clGetKernelWorkGroupInfo(p->handle, dev, CL_KERNEL_COMPILE_WORK_GROUP_SIZE,
3097 sizeof(wsz[0]*3), wsz, &retsz) == CL_SUCCESS;
3100 size_t Kernel::localMemSize() const
3102 if(!p || !p->handle)
3106 cl_device_id dev = (cl_device_id)Device::getDefault().ptr();
3107 return clGetKernelWorkGroupInfo(p->handle, dev, CL_KERNEL_LOCAL_MEM_SIZE,
3108 sizeof(val), &val, &retsz) == CL_SUCCESS ? (size_t)val : 0;
3111 /////////////////////////////////////////// Program /////////////////////////////////////////////
3113 struct Program::Impl
3115 Impl(const ProgramSource& _src,
3116 const String& _buildflags, String& errmsg)
3119 const Context& ctx = Context::getDefault();
3121 buildflags = _buildflags;
3122 const String& srcstr = src.source();
3123 const char* srcptr = srcstr.c_str();
3124 size_t srclen = srcstr.size();
3127 handle = clCreateProgramWithSource((cl_context)ctx.ptr(), 1, &srcptr, &srclen, &retval);
3128 if( handle && retval == CL_SUCCESS )
3130 int i, n = (int)ctx.ndevices();
3131 AutoBuffer<void*> deviceListBuf(n+1);
3132 void** deviceList = deviceListBuf;
3133 for( i = 0; i < n; i++ )
3134 deviceList[i] = ctx.device(i).ptr();
3136 Device device = Device::getDefault();
3138 buildflags += " -D AMD_DEVICE";
3139 else if (device.isIntel())
3140 buildflags += " -D INTEL_DEVICE";
3142 retval = clBuildProgram(handle, n,
3143 (const cl_device_id*)deviceList,
3144 buildflags.c_str(), 0, 0);
3145 #if !CV_OPENCL_ALWAYS_SHOW_BUILD_LOG
3146 if( retval != CL_SUCCESS )
3150 cl_int buildInfo_retval = clGetProgramBuildInfo(handle, (cl_device_id)deviceList[0],
3151 CL_PROGRAM_BUILD_LOG, 0, 0, &retsz);
3152 if (buildInfo_retval == CL_SUCCESS && retsz > 1)
3154 AutoBuffer<char> bufbuf(retsz + 16);
3156 buildInfo_retval = clGetProgramBuildInfo(handle, (cl_device_id)deviceList[0],
3157 CL_PROGRAM_BUILD_LOG, retsz+1, buf, &retsz);
3158 if (buildInfo_retval == CL_SUCCESS)
3160 // TODO It is useful to see kernel name & program file name also
3161 errmsg = String(buf);
3162 printf("OpenCL program build log: %s\n%s\n", buildflags.c_str(), errmsg.c_str());
3166 if (retval != CL_SUCCESS && handle)
3168 clReleaseProgram(handle);
3175 Impl(const String& _buf, const String& _buildflags)
3179 buildflags = _buildflags;
3182 String prefix0 = Program::getPrefix(buildflags);
3183 const Context& ctx = Context::getDefault();
3184 const Device& dev = Device::getDefault();
3185 const char* pos0 = _buf.c_str();
3186 const char* pos1 = strchr(pos0, '\n');
3189 const char* pos2 = strchr(pos1+1, '\n');
3192 const char* pos3 = strchr(pos2+1, '\n');
3195 size_t prefixlen = (pos3 - pos0)+1;
3196 String prefix(pos0, prefixlen);
3197 if( prefix != prefix0 )
3199 const uchar* bin = (uchar*)(pos3+1);
3200 void* devid = dev.ptr();
3201 size_t codelen = _buf.length() - prefixlen;
3202 cl_int binstatus = 0, retval = 0;
3203 handle = clCreateProgramWithBinary((cl_context)ctx.ptr(), 1, (cl_device_id*)&devid,
3204 &codelen, &bin, &binstatus, &retval);
3205 CV_OclDbgAssert(retval == CL_SUCCESS);
3212 size_t progsz = 0, retsz = 0;
3213 String prefix = Program::getPrefix(buildflags);
3214 size_t prefixlen = prefix.length();
3215 if(clGetProgramInfo(handle, CL_PROGRAM_BINARY_SIZES, sizeof(progsz), &progsz, &retsz) != CL_SUCCESS)
3217 AutoBuffer<uchar> bufbuf(prefixlen + progsz + 16);
3218 uchar* buf = bufbuf;
3219 memcpy(buf, prefix.c_str(), prefixlen);
3221 if(clGetProgramInfo(handle, CL_PROGRAM_BINARIES, sizeof(buf), &buf, &retsz) != CL_SUCCESS)
3223 buf[progsz] = (uchar)'\0';
3224 return String((const char*)(uchar*)bufbuf, prefixlen + progsz);
3232 if (!cv::__termination)
3235 clReleaseProgram(handle);
3241 IMPLEMENT_REFCOUNTABLE();
3249 Program::Program() { p = 0; }
3251 Program::Program(const ProgramSource& src,
3252 const String& buildflags, String& errmsg)
3255 create(src, buildflags, errmsg);
3258 Program::Program(const Program& prog)
3265 Program& Program::operator = (const Program& prog)
3267 Impl* newp = (Impl*)prog.p;
3282 bool Program::create(const ProgramSource& src,
3283 const String& buildflags, String& errmsg)
3287 p = new Impl(src, buildflags, errmsg);
3296 const ProgramSource& Program::source() const
3298 static ProgramSource dummy;
3299 return p ? p->src : dummy;
3302 void* Program::ptr() const
3304 return p ? p->handle : 0;
3307 bool Program::read(const String& bin, const String& buildflags)
3311 p = new Impl(bin, buildflags);
3312 return p->handle != 0;
3315 bool Program::write(String& bin) const
3320 return !bin.empty();
3323 String Program::getPrefix() const
3327 return getPrefix(p->buildflags);
3330 String Program::getPrefix(const String& buildflags)
3332 const Context& ctx = Context::getDefault();
3333 const Device& dev = ctx.device(0);
3334 return format("name=%s\ndriver=%s\nbuildflags=%s\n",
3335 dev.name().c_str(), dev.driverVersion().c_str(), buildflags.c_str());
3338 ///////////////////////////////////////// ProgramSource ///////////////////////////////////////////////
3340 struct ProgramSource::Impl
3342 Impl(const char* _src)
3346 Impl(const String& _src)
3350 void init(const String& _src)
3354 h = crc64((uchar*)src.c_str(), src.size());
3357 IMPLEMENT_REFCOUNTABLE();
3359 ProgramSource::hash_t h;
3363 ProgramSource::ProgramSource()
3368 ProgramSource::ProgramSource(const char* prog)
3373 ProgramSource::ProgramSource(const String& prog)
3378 ProgramSource::~ProgramSource()
3384 ProgramSource::ProgramSource(const ProgramSource& prog)
3391 ProgramSource& ProgramSource::operator = (const ProgramSource& prog)
3393 Impl* newp = (Impl*)prog.p;
3402 const String& ProgramSource::source() const
3404 static String dummy;
3405 return p ? p->src : dummy;
3408 ProgramSource::hash_t ProgramSource::hash() const
3410 return p ? p->h : 0;
3413 //////////////////////////////////////////// OpenCLAllocator //////////////////////////////////////////////////
3415 class OpenCLBufferPool
3418 ~OpenCLBufferPool() { }
3420 virtual cl_mem allocate(size_t size, CV_OUT size_t& capacity) = 0;
3421 virtual void release(cl_mem handle, size_t capacity) = 0;
3424 class OpenCLBufferPoolImpl : public BufferPoolController, public OpenCLBufferPool
3435 size_t currentReservedSize;
3436 size_t maxReservedSize;
3438 std::list<BufferEntry> reservedEntries_; // LRU order
3441 bool _findAndRemoveEntryFromReservedList(CV_OUT BufferEntry& entry, const size_t size)
3443 if (reservedEntries_.empty())
3445 std::list<BufferEntry>::iterator i = reservedEntries_.begin();
3446 std::list<BufferEntry>::iterator result_pos = reservedEntries_.end();
3447 BufferEntry result = {NULL, 0};
3448 size_t minDiff = (size_t)(-1);
3449 for (; i != reservedEntries_.end(); ++i)
3451 BufferEntry& e = *i;
3452 if (e.capacity_ >= size)
3454 size_t diff = e.capacity_ - size;
3455 if (diff < size / 8 && (result_pos == reservedEntries_.end() || diff < minDiff))
3465 if (result_pos != reservedEntries_.end())
3467 //CV_DbgAssert(result == *result_pos);
3468 reservedEntries_.erase(result_pos);
3470 currentReservedSize -= entry.capacity_;
3477 void _checkSizeOfReservedEntries()
3479 while (currentReservedSize > maxReservedSize)
3481 CV_DbgAssert(!reservedEntries_.empty());
3482 const BufferEntry& entry = reservedEntries_.back();
3483 CV_DbgAssert(currentReservedSize >= entry.capacity_);
3484 currentReservedSize -= entry.capacity_;
3485 _releaseBufferEntry(entry);
3486 reservedEntries_.pop_back();
3490 inline size_t _allocationGranularity(size_t size)
3495 else if (size < 64*1024)
3497 else if (size < 1024*1024)
3499 else if (size < 16*1024*1024)
3505 void _allocateBufferEntry(BufferEntry& entry, size_t size)
3507 CV_DbgAssert(entry.clBuffer_ == NULL);
3508 entry.capacity_ = alignSize(size, (int)_allocationGranularity(size));
3509 Context& ctx = Context::getDefault();
3510 cl_int retval = CL_SUCCESS;
3511 entry.clBuffer_ = clCreateBuffer((cl_context)ctx.ptr(), CL_MEM_READ_WRITE, entry.capacity_, 0, &retval);
3512 CV_Assert(retval == CL_SUCCESS);
3513 CV_Assert(entry.clBuffer_ != NULL);
3514 LOG_BUFFER_POOL("OpenCL allocate %lld (0x%llx) bytes: %p\n",
3515 (long long)entry.capacity_, (long long)entry.capacity_, entry.clBuffer_);
3518 void _releaseBufferEntry(const BufferEntry& entry)
3520 CV_Assert(entry.capacity_ != 0);
3521 CV_Assert(entry.clBuffer_ != NULL);
3522 LOG_BUFFER_POOL("OpenCL release buffer: %p, %lld (0x%llx) bytes\n",
3523 entry.clBuffer_, (long long)entry.capacity_, (long long)entry.capacity_);
3524 clReleaseMemObject(entry.clBuffer_);
3527 OpenCLBufferPoolImpl()
3528 : currentReservedSize(0), maxReservedSize(0)
3530 int poolSize = ocl::Device::getDefault().isIntel() ? 1 << 27 : 0;
3531 maxReservedSize = getConfigurationParameterForSize("OPENCV_OPENCL_BUFFERPOOL_LIMIT", poolSize);
3533 virtual ~OpenCLBufferPoolImpl()
3535 freeAllReservedBuffers();
3536 CV_Assert(reservedEntries_.empty());
3539 virtual cl_mem allocate(size_t size, CV_OUT size_t& capacity)
3541 BufferEntry entry = {NULL, 0};
3542 if (maxReservedSize > 0)
3544 AutoLock locker(mutex_);
3545 if (_findAndRemoveEntryFromReservedList(entry, size))
3547 CV_DbgAssert(size <= entry.capacity_);
3548 LOG_BUFFER_POOL("Reuse reserved buffer: %p\n", entry.clBuffer_);
3549 capacity = entry.capacity_;
3550 return entry.clBuffer_;
3553 _allocateBufferEntry(entry, size);
3554 capacity = entry.capacity_;
3555 return entry.clBuffer_;
3557 virtual void release(cl_mem handle, size_t capacity)
3559 BufferEntry entry = {handle, capacity};
3560 if (maxReservedSize == 0 || entry.capacity_ > maxReservedSize / 8)
3562 _releaseBufferEntry(entry);
3566 AutoLock locker(mutex_);
3567 reservedEntries_.push_front(entry);
3568 currentReservedSize += entry.capacity_;
3569 _checkSizeOfReservedEntries();
3573 virtual size_t getReservedSize() const { return currentReservedSize; }
3574 virtual size_t getMaxReservedSize() const { return maxReservedSize; }
3575 virtual void setMaxReservedSize(size_t size)
3577 AutoLock locker(mutex_);
3578 size_t oldMaxReservedSize = maxReservedSize;
3579 maxReservedSize = size;
3580 if (maxReservedSize < oldMaxReservedSize)
3582 std::list<BufferEntry>::iterator i = reservedEntries_.begin();
3583 for (; i != reservedEntries_.end();)
3585 const BufferEntry& entry = *i;
3586 if (entry.capacity_ > maxReservedSize / 8)
3588 CV_DbgAssert(currentReservedSize >= entry.capacity_);
3589 currentReservedSize -= entry.capacity_;
3590 _releaseBufferEntry(entry);
3591 i = reservedEntries_.erase(i);
3596 _checkSizeOfReservedEntries();
3599 virtual void freeAllReservedBuffers()
3601 AutoLock locker(mutex_);
3602 std::list<BufferEntry>::const_iterator i = reservedEntries_.begin();
3603 for (; i != reservedEntries_.end(); ++i)
3605 const BufferEntry& entry = *i;
3606 _releaseBufferEntry(entry);
3608 reservedEntries_.clear();
3612 #if defined _MSC_VER
3613 #pragma warning(disable:4127) // conditional expression is constant
3615 template <bool readAccess, bool writeAccess>
3616 class AlignedDataPtr
3620 uchar* const originPtr_;
3621 const size_t alignment_;
3623 uchar* allocatedPtr_;
3626 AlignedDataPtr(uchar* ptr, size_t size, size_t alignment)
3627 : size_(size), originPtr_(ptr), alignment_(alignment), ptr_(ptr), allocatedPtr_(NULL)
3629 CV_DbgAssert((alignment & (alignment - 1)) == 0); // check for 2^n
3630 if (((size_t)ptr_ & (alignment - 1)) != 0)
3632 allocatedPtr_ = new uchar[size_ + alignment - 1];
3633 ptr_ = (uchar*)(((uintptr_t)allocatedPtr_ + (alignment - 1)) & ~(alignment - 1));
3636 memcpy(ptr_, originPtr_, size_);
3641 uchar* getAlignedPtr() const
3643 CV_DbgAssert(((size_t)ptr_ & (alignment_ - 1)) == 0);
3653 memcpy(originPtr_, ptr_, size_);
3655 delete[] allocatedPtr_;
3656 allocatedPtr_ = NULL;
3661 AlignedDataPtr(const AlignedDataPtr&); // disabled
3662 AlignedDataPtr& operator=(const AlignedDataPtr&); // disabled
3664 #if defined _MSC_VER
3665 #pragma warning(default:4127) // conditional expression is constant
3668 #ifndef CV_OPENCL_DATA_PTR_ALIGNMENT
3669 #define CV_OPENCL_DATA_PTR_ALIGNMENT 16
3672 class OpenCLAllocator : public MatAllocator
3674 mutable OpenCLBufferPoolImpl bufferPool;
3677 ALLOCATOR_FLAGS_BUFFER_POOL_USED = 1 << 0
3680 OpenCLAllocator() { matStdAllocator = Mat::getStdAllocator(); }
3682 UMatData* defaultAllocate(int dims, const int* sizes, int type, void* data, size_t* step,
3683 int flags, UMatUsageFlags usageFlags) const
3685 UMatData* u = matStdAllocator->allocate(dims, sizes, type, data, step, flags, usageFlags);
3689 void getBestFlags(const Context& ctx, int /*flags*/, UMatUsageFlags usageFlags, int& createFlags, int& flags0) const
3691 const Device& dev = ctx.device(0);
3693 if ((usageFlags & USAGE_ALLOCATE_HOST_MEMORY) != 0)
3694 createFlags |= CL_MEM_ALLOC_HOST_PTR;
3696 if( dev.hostUnifiedMemory() )
3699 flags0 = UMatData::COPY_ON_MAP;
3702 UMatData* allocate(int dims, const int* sizes, int type,
3703 void* data, size_t* step, int flags, UMatUsageFlags usageFlags) const
3706 return defaultAllocate(dims, sizes, type, data, step, flags, usageFlags);
3707 CV_Assert(data == 0);
3708 size_t total = CV_ELEM_SIZE(type);
3709 for( int i = dims-1; i >= 0; i-- )
3716 Context& ctx = Context::getDefault();
3717 int createFlags = 0, flags0 = 0;
3718 getBestFlags(ctx, flags, usageFlags, createFlags, flags0);
3720 size_t capacity = 0;
3721 void* handle = NULL;
3722 int allocatorFlags = 0;
3723 if (createFlags == 0)
3725 handle = bufferPool.allocate(total, capacity);
3727 return defaultAllocate(dims, sizes, type, data, step, flags, usageFlags);
3728 allocatorFlags = ALLOCATOR_FLAGS_BUFFER_POOL_USED;
3734 handle = clCreateBuffer((cl_context)ctx.ptr(),
3735 CL_MEM_READ_WRITE|createFlags, total, 0, &retval);
3736 if( !handle || retval != CL_SUCCESS )
3737 return defaultAllocate(dims, sizes, type, data, step, flags, usageFlags);
3739 UMatData* u = new UMatData(this);
3742 u->capacity = capacity;
3745 u->allocatorFlags_ = allocatorFlags;
3746 CV_DbgAssert(!u->tempUMat()); // for bufferPool.release() consistency in deallocate()
3750 bool allocate(UMatData* u, int accessFlags, UMatUsageFlags usageFlags) const
3755 UMatDataAutoLock lock(u);
3759 CV_Assert(u->origdata != 0);
3760 Context& ctx = Context::getDefault();
3761 int createFlags = 0, flags0 = 0;
3762 getBestFlags(ctx, accessFlags, usageFlags, createFlags, flags0);
3764 cl_context ctx_handle = (cl_context)ctx.ptr();
3766 int tempUMatFlags = UMatData::TEMP_UMAT;
3767 u->handle = clCreateBuffer(ctx_handle, CL_MEM_USE_HOST_PTR|CL_MEM_READ_WRITE,
3768 u->size, u->origdata, &retval);
3769 if((!u->handle || retval != CL_SUCCESS) && !(accessFlags & ACCESS_FAST))
3771 u->handle = clCreateBuffer(ctx_handle, CL_MEM_COPY_HOST_PTR|CL_MEM_READ_WRITE|createFlags,
3772 u->size, u->origdata, &retval);
3773 tempUMatFlags = UMatData::TEMP_COPIED_UMAT;
3776 if(!u->handle || retval != CL_SUCCESS)
3778 u->prevAllocator = u->currAllocator;
3779 u->currAllocator = this;
3780 u->flags |= tempUMatFlags;
3782 if(accessFlags & ACCESS_WRITE)
3783 u->markHostCopyObsolete(true);
3787 /*void sync(UMatData* u) const
3789 cl_command_queue q = (cl_command_queue)Queue::getDefault().ptr();
3790 UMatDataAutoLock lock(u);
3792 if( u->hostCopyObsolete() && u->handle && u->refcount > 0 && u->origdata)
3794 if( u->tempCopiedUMat() )
3796 clEnqueueReadBuffer(q, (cl_mem)u->handle, CL_TRUE, 0,
3797 u->size, u->origdata, 0, 0, 0);
3802 void* data = clEnqueueMapBuffer(q, (cl_mem)u->handle, CL_TRUE,
3803 (CL_MAP_READ | CL_MAP_WRITE),
3804 0, u->size, 0, 0, 0, &retval);
3805 clEnqueueUnmapMemObject(q, (cl_mem)u->handle, data, 0, 0, 0);
3808 u->markHostCopyObsolete(false);
3810 else if( u->copyOnMap() && u->deviceCopyObsolete() && u->data )
3812 clEnqueueWriteBuffer(q, (cl_mem)u->handle, CL_TRUE, 0,
3813 u->size, u->data, 0, 0, 0);
3817 void deallocate(UMatData* u) const
3822 CV_Assert(u->urefcount >= 0);
3823 CV_Assert(u->refcount >= 0);
3825 // TODO: !!! when we add Shared Virtual Memory Support,
3826 // this function (as well as the others) should be corrected
3827 CV_Assert(u->handle != 0 && u->urefcount == 0);
3830 // UMatDataAutoLock lock(u);
3831 if( u->hostCopyObsolete() && u->refcount > 0 )
3833 cl_command_queue q = (cl_command_queue)Queue::getDefault().ptr();
3834 if( u->tempCopiedUMat() )
3836 AlignedDataPtr<false, true> alignedPtr(u->origdata, u->size, CV_OPENCL_DATA_PTR_ALIGNMENT);
3837 CV_OclDbgAssert(clEnqueueReadBuffer(q, (cl_mem)u->handle, CL_TRUE, 0,
3838 u->size, alignedPtr.getAlignedPtr(), 0, 0, 0) == CL_SUCCESS);
3843 void* data = clEnqueueMapBuffer(q, (cl_mem)u->handle, CL_TRUE,
3844 (CL_MAP_READ | CL_MAP_WRITE),
3845 0, u->size, 0, 0, 0, &retval);
3846 CV_OclDbgAssert(retval == CL_SUCCESS);
3847 CV_OclDbgAssert(clEnqueueUnmapMemObject(q, (cl_mem)u->handle, data, 0, 0, 0) == CL_SUCCESS);
3848 CV_OclDbgAssert(clFinish(q) == CL_SUCCESS);
3851 u->markHostCopyObsolete(false);
3852 clReleaseMemObject((cl_mem)u->handle);
3854 u->currAllocator = u->prevAllocator;
3855 if(u->data && u->copyOnMap() && !(u->flags & UMatData::USER_ALLOCATED))
3857 u->data = u->origdata;
3858 if(u->refcount == 0)
3859 u->currAllocator->deallocate(u);
3863 CV_Assert(u->refcount == 0);
3864 if(u->data && u->copyOnMap() && !(u->flags & UMatData::USER_ALLOCATED))
3869 if (u->allocatorFlags_ & ALLOCATOR_FLAGS_BUFFER_POOL_USED)
3871 bufferPool.release((cl_mem)u->handle, u->capacity);
3875 clReleaseMemObject((cl_mem)u->handle);
3883 void map(UMatData* u, int accessFlags) const
3888 CV_Assert( u->handle != 0 );
3890 UMatDataAutoLock autolock(u);
3892 if(accessFlags & ACCESS_WRITE)
3893 u->markDeviceCopyObsolete(true);
3895 cl_command_queue q = (cl_command_queue)Queue::getDefault().ptr();
3897 // FIXIT Workaround for UMat synchronization issue
3898 // if( u->refcount == 0 )
3900 if( !u->copyOnMap() )
3902 if (u->data) // FIXIT Workaround for UMat synchronization issue
3904 //CV_Assert(u->hostCopyObsolete() == false);
3907 // because there can be other map requests for the same UMat with different access flags,
3908 // we use the universal (read-write) access mode.
3910 u->data = (uchar*)clEnqueueMapBuffer(q, (cl_mem)u->handle, CL_TRUE,
3911 (CL_MAP_READ | CL_MAP_WRITE),
3912 0, u->size, 0, 0, 0, &retval);
3913 if(u->data && retval == CL_SUCCESS)
3915 u->markHostCopyObsolete(false);
3916 u->markDeviceMemMapped(true);
3920 // if map failed, switch to copy-on-map mode for the particular buffer
3921 u->flags |= UMatData::COPY_ON_MAP;
3926 u->data = (uchar*)fastMalloc(u->size);
3927 u->markHostCopyObsolete(true);
3931 if( (accessFlags & ACCESS_READ) != 0 && u->hostCopyObsolete() )
3933 AlignedDataPtr<false, true> alignedPtr(u->data, u->size, CV_OPENCL_DATA_PTR_ALIGNMENT);
3934 CV_Assert( clEnqueueReadBuffer(q, (cl_mem)u->handle, CL_TRUE, 0,
3935 u->size, alignedPtr.getAlignedPtr(), 0, 0, 0) == CL_SUCCESS );
3936 u->markHostCopyObsolete(false);
3940 void unmap(UMatData* u) const
3946 CV_Assert(u->handle != 0);
3948 UMatDataAutoLock autolock(u);
3950 // FIXIT Workaround for UMat synchronization issue
3954 cl_command_queue q = (cl_command_queue)Queue::getDefault().ptr();
3956 if( !u->copyOnMap() && u->deviceMemMapped() )
3958 CV_Assert(u->data != NULL);
3959 u->markDeviceMemMapped(false);
3960 CV_Assert( (retval = clEnqueueUnmapMemObject(q,
3961 (cl_mem)u->handle, u->data, 0, 0, 0)) == CL_SUCCESS );
3962 CV_OclDbgAssert(clFinish(q) == CL_SUCCESS);
3965 else if( u->copyOnMap() && u->deviceCopyObsolete() )
3967 AlignedDataPtr<true, false> alignedPtr(u->data, u->size, CV_OPENCL_DATA_PTR_ALIGNMENT);
3968 CV_Assert( (retval = clEnqueueWriteBuffer(q, (cl_mem)u->handle, CL_TRUE, 0,
3969 u->size, alignedPtr.getAlignedPtr(), 0, 0, 0)) == CL_SUCCESS );
3971 u->markDeviceCopyObsolete(false);
3972 u->markHostCopyObsolete(false);
3975 bool checkContinuous(int dims, const size_t sz[],
3976 const size_t srcofs[], const size_t srcstep[],
3977 const size_t dstofs[], const size_t dststep[],
3978 size_t& total, size_t new_sz[],
3979 size_t& srcrawofs, size_t new_srcofs[], size_t new_srcstep[],
3980 size_t& dstrawofs, size_t new_dstofs[], size_t new_dststep[]) const
3982 bool iscontinuous = true;
3983 srcrawofs = srcofs ? srcofs[dims-1] : 0;
3984 dstrawofs = dstofs ? dstofs[dims-1] : 0;
3986 for( int i = dims-2; i >= 0; i-- )
3988 if( i >= 0 && (total != srcstep[i] || total != dststep[i]) )
3989 iscontinuous = false;
3992 srcrawofs += srcofs[i]*srcstep[i];
3994 dstrawofs += dstofs[i]*dststep[i];
3999 // OpenCL uses {x, y, z} order while OpenCV uses {z, y, x} order.
4002 new_sz[0] = sz[1]; new_sz[1] = sz[0]; new_sz[2] = 1;
4003 // we assume that new_... arrays are initialized by caller
4004 // with 0's, so there is no else branch
4007 new_srcofs[0] = srcofs[1];
4008 new_srcofs[1] = srcofs[0];
4014 new_dstofs[0] = dstofs[1];
4015 new_dstofs[1] = dstofs[0];
4019 new_srcstep[0] = srcstep[0]; new_srcstep[1] = 0;
4020 new_dststep[0] = dststep[0]; new_dststep[1] = 0;
4024 // we could check for dims == 3 here,
4025 // but from user perspective this one is more informative
4026 CV_Assert(dims <= 3);
4027 new_sz[0] = sz[2]; new_sz[1] = sz[1]; new_sz[2] = sz[0];
4030 new_srcofs[0] = srcofs[2];
4031 new_srcofs[1] = srcofs[1];
4032 new_srcofs[2] = srcofs[0];
4037 new_dstofs[0] = dstofs[2];
4038 new_dstofs[1] = dstofs[1];
4039 new_dstofs[2] = dstofs[0];
4042 new_srcstep[0] = srcstep[1]; new_srcstep[1] = srcstep[0];
4043 new_dststep[0] = dststep[1]; new_dststep[1] = dststep[0];
4046 return iscontinuous;
4049 void download(UMatData* u, void* dstptr, int dims, const size_t sz[],
4050 const size_t srcofs[], const size_t srcstep[],
4051 const size_t dststep[]) const
4055 UMatDataAutoLock autolock(u);
4057 if( u->data && !u->hostCopyObsolete() )
4059 Mat::getStdAllocator()->download(u, dstptr, dims, sz, srcofs, srcstep, dststep);
4062 CV_Assert( u->handle != 0 );
4064 cl_command_queue q = (cl_command_queue)Queue::getDefault().ptr();
4066 size_t total = 0, new_sz[] = {0, 0, 0};
4067 size_t srcrawofs = 0, new_srcofs[] = {0, 0, 0}, new_srcstep[] = {0, 0, 0};
4068 size_t dstrawofs = 0, new_dstofs[] = {0, 0, 0}, new_dststep[] = {0, 0, 0};
4070 bool iscontinuous = checkContinuous(dims, sz, srcofs, srcstep, 0, dststep,
4072 srcrawofs, new_srcofs, new_srcstep,
4073 dstrawofs, new_dstofs, new_dststep);
4075 AlignedDataPtr<false, true> alignedPtr((uchar*)dstptr, sz[0] * dststep[0], CV_OPENCL_DATA_PTR_ALIGNMENT);
4078 CV_Assert( clEnqueueReadBuffer(q, (cl_mem)u->handle, CL_TRUE,
4079 srcrawofs, total, alignedPtr.getAlignedPtr(), 0, 0, 0) == CL_SUCCESS );
4083 CV_Assert( clEnqueueReadBufferRect(q, (cl_mem)u->handle, CL_TRUE,
4084 new_srcofs, new_dstofs, new_sz, new_srcstep[0], new_srcstep[1],
4085 new_dststep[0], new_dststep[1], alignedPtr.getAlignedPtr(), 0, 0, 0) == CL_SUCCESS );
4089 void upload(UMatData* u, const void* srcptr, int dims, const size_t sz[],
4090 const size_t dstofs[], const size_t dststep[],
4091 const size_t srcstep[]) const
4096 // there should be no user-visible CPU copies of the UMat which we are going to copy to
4097 CV_Assert(u->refcount == 0 || u->tempUMat());
4099 size_t total = 0, new_sz[] = {0, 0, 0};
4100 size_t srcrawofs = 0, new_srcofs[] = {0, 0, 0}, new_srcstep[] = {0, 0, 0};
4101 size_t dstrawofs = 0, new_dstofs[] = {0, 0, 0}, new_dststep[] = {0, 0, 0};
4103 bool iscontinuous = checkContinuous(dims, sz, 0, srcstep, dstofs, dststep,
4105 srcrawofs, new_srcofs, new_srcstep,
4106 dstrawofs, new_dstofs, new_dststep);
4108 UMatDataAutoLock autolock(u);
4110 // if there is cached CPU copy of the GPU matrix,
4111 // we could use it as a destination.
4112 // we can do it in 2 cases:
4113 // 1. we overwrite the whole content
4114 // 2. we overwrite part of the matrix, but the GPU copy is out-of-date
4115 if( u->data && (u->hostCopyObsolete() < u->deviceCopyObsolete() || total == u->size))
4117 Mat::getStdAllocator()->upload(u, srcptr, dims, sz, dstofs, dststep, srcstep);
4118 u->markHostCopyObsolete(false);
4119 u->markDeviceCopyObsolete(true);
4123 CV_Assert( u->handle != 0 );
4124 cl_command_queue q = (cl_command_queue)Queue::getDefault().ptr();
4126 AlignedDataPtr<true, false> alignedPtr((uchar*)srcptr, sz[0] * srcstep[0], CV_OPENCL_DATA_PTR_ALIGNMENT);
4129 CV_Assert( clEnqueueWriteBuffer(q, (cl_mem)u->handle,
4130 CL_TRUE, dstrawofs, total, srcptr, 0, 0, 0) == CL_SUCCESS );
4134 CV_Assert( clEnqueueWriteBufferRect(q, (cl_mem)u->handle, CL_TRUE,
4135 new_dstofs, new_srcofs, new_sz, new_dststep[0], new_dststep[1],
4136 new_srcstep[0], new_srcstep[1], srcptr, 0, 0, 0) == CL_SUCCESS );
4139 u->markHostCopyObsolete(true);
4140 u->markDeviceCopyObsolete(false);
4143 void copy(UMatData* src, UMatData* dst, int dims, const size_t sz[],
4144 const size_t srcofs[], const size_t srcstep[],
4145 const size_t dstofs[], const size_t dststep[], bool _sync) const
4150 size_t total = 0, new_sz[] = {0, 0, 0};
4151 size_t srcrawofs = 0, new_srcofs[] = {0, 0, 0}, new_srcstep[] = {0, 0, 0};
4152 size_t dstrawofs = 0, new_dstofs[] = {0, 0, 0}, new_dststep[] = {0, 0, 0};
4154 bool iscontinuous = checkContinuous(dims, sz, srcofs, srcstep, dstofs, dststep,
4156 srcrawofs, new_srcofs, new_srcstep,
4157 dstrawofs, new_dstofs, new_dststep);
4159 UMatDataAutoLock src_autolock(src);
4160 UMatDataAutoLock dst_autolock(dst);
4162 if( !src->handle || (src->data && src->hostCopyObsolete() < src->deviceCopyObsolete()) )
4164 upload(dst, src->data + srcrawofs, dims, sz, dstofs, dststep, srcstep);
4167 if( !dst->handle || (dst->data && dst->hostCopyObsolete() < dst->deviceCopyObsolete()) )
4169 download(src, dst->data + dstrawofs, dims, sz, srcofs, srcstep, dststep);
4170 dst->markHostCopyObsolete(false);
4171 dst->markDeviceCopyObsolete(true);
4175 // there should be no user-visible CPU copies of the UMat which we are going to copy to
4176 CV_Assert(dst->refcount == 0);
4177 cl_command_queue q = (cl_command_queue)Queue::getDefault().ptr();
4181 CV_Assert( clEnqueueCopyBuffer(q, (cl_mem)src->handle, (cl_mem)dst->handle,
4182 srcrawofs, dstrawofs, total, 0, 0, 0) == CL_SUCCESS );
4187 CV_Assert( (retval = clEnqueueCopyBufferRect(q, (cl_mem)src->handle, (cl_mem)dst->handle,
4188 new_srcofs, new_dstofs, new_sz,
4189 new_srcstep[0], new_srcstep[1],
4190 new_dststep[0], new_dststep[1],
4191 0, 0, 0)) == CL_SUCCESS );
4194 dst->markHostCopyObsolete(true);
4195 dst->markDeviceCopyObsolete(false);
4199 CV_OclDbgAssert(clFinish(q) == CL_SUCCESS);
4203 BufferPoolController* getBufferPoolController() const { return &bufferPool; }
4205 MatAllocator* matStdAllocator;
4208 MatAllocator* getOpenCLAllocator()
4210 static MatAllocator * allocator = new OpenCLAllocator();
4214 ///////////////////////////////////////////// Utility functions /////////////////////////////////////////////////
4216 static void getDevices(std::vector<cl_device_id>& devices, cl_platform_id platform)
4218 cl_uint numDevices = 0;
4219 CV_OclDbgAssert(clGetDeviceIDs(platform, (cl_device_type)Device::TYPE_ALL,
4220 0, NULL, &numDevices) == CL_SUCCESS);
4222 if (numDevices == 0)
4228 devices.resize((size_t)numDevices);
4229 CV_OclDbgAssert(clGetDeviceIDs(platform, (cl_device_type)Device::TYPE_ALL,
4230 numDevices, &devices[0], &numDevices) == CL_SUCCESS);
4233 struct PlatformInfo::Impl
4238 handle = *(cl_platform_id*)id;
4239 getDevices(devices, handle);
4242 String getStrProp(cl_device_info prop) const
4246 return clGetPlatformInfo(handle, prop, sizeof(buf)-16, buf, &sz) == CL_SUCCESS &&
4247 sz < sizeof(buf) ? String(buf) : String();
4250 IMPLEMENT_REFCOUNTABLE();
4251 std::vector<cl_device_id> devices;
4252 cl_platform_id handle;
4255 PlatformInfo::PlatformInfo()
4260 PlatformInfo::PlatformInfo(void* platform_id)
4262 p = new Impl(platform_id);
4265 PlatformInfo::~PlatformInfo()
4271 PlatformInfo::PlatformInfo(const PlatformInfo& i)
4278 PlatformInfo& PlatformInfo::operator =(const PlatformInfo& i)
4291 int PlatformInfo::deviceNumber() const
4293 return p ? (int)p->devices.size() : 0;
4296 void PlatformInfo::getDevice(Device& device, int d) const
4298 CV_Assert(p && d < (int)p->devices.size() );
4300 device.set(p->devices[d]);
4303 String PlatformInfo::name() const
4305 return p ? p->getStrProp(CL_PLATFORM_NAME) : String();
4308 String PlatformInfo::vendor() const
4310 return p ? p->getStrProp(CL_PLATFORM_VENDOR) : String();
4313 String PlatformInfo::version() const
4315 return p ? p->getStrProp(CL_PLATFORM_VERSION) : String();
4318 static void getPlatforms(std::vector<cl_platform_id>& platforms)
4320 cl_uint numPlatforms = 0;
4321 CV_OclDbgAssert(clGetPlatformIDs(0, NULL, &numPlatforms) == CL_SUCCESS);
4323 if (numPlatforms == 0)
4329 platforms.resize((size_t)numPlatforms);
4330 CV_OclDbgAssert(clGetPlatformIDs(numPlatforms, &platforms[0], &numPlatforms) == CL_SUCCESS);
4333 void getPlatfomsInfo(std::vector<PlatformInfo>& platformsInfo)
4335 std::vector<cl_platform_id> platforms;
4336 getPlatforms(platforms);
4338 for (size_t i = 0; i < platforms.size(); i++)
4339 platformsInfo.push_back( PlatformInfo((void*)&platforms[i]) );
4342 const char* typeToStr(int type)
4344 static const char* tab[]=
4346 "uchar", "uchar2", "uchar3", "uchar4", 0, 0, 0, "uchar8", 0, 0, 0, 0, 0, 0, 0, "uchar16",
4347 "char", "char2", "char3", "char4", 0, 0, 0, "char8", 0, 0, 0, 0, 0, 0, 0, "char16",
4348 "ushort", "ushort2", "ushort3", "ushort4",0, 0, 0, "ushort8", 0, 0, 0, 0, 0, 0, 0, "ushort16",
4349 "short", "short2", "short3", "short4", 0, 0, 0, "short8", 0, 0, 0, 0, 0, 0, 0, "short16",
4350 "int", "int2", "int3", "int4", 0, 0, 0, "int8", 0, 0, 0, 0, 0, 0, 0, "int16",
4351 "float", "float2", "float3", "float4", 0, 0, 0, "float8", 0, 0, 0, 0, 0, 0, 0, "float16",
4352 "double", "double2", "double3", "double4", 0, 0, 0, "double8", 0, 0, 0, 0, 0, 0, 0, "double16",
4353 "?", "?", "?", "?", "?", "?", "?", "?", "?", "?", "?", "?", "?", "?", "?", "?"
4355 int cn = CV_MAT_CN(type), depth = CV_MAT_DEPTH(type);
4356 return cn > 16 ? "?" : tab[depth*16 + cn-1];
4359 const char* memopTypeToStr(int type)
4361 static const char* tab[] =
4363 "uchar", "uchar2", "uchar3", "uchar4", 0, 0, 0, "uchar8", 0, 0, 0, 0, 0, 0, 0, "uchar16",
4364 "char", "char2", "char3", "char4", 0, 0, 0, "char8", 0, 0, 0, 0, 0, 0, 0, "char16",
4365 "ushort", "ushort2", "ushort3", "ushort4",0, 0, 0, "ushort8", 0, 0, 0, 0, 0, 0, 0, "ushort16",
4366 "short", "short2", "short3", "short4", 0, 0, 0, "short8", 0, 0, 0, 0, 0, 0, 0, "short16",
4367 "int", "int2", "int3", "int4", 0, 0, 0, "int8", 0, 0, 0, 0, 0, 0, 0, "int16",
4368 "int", "int2", "int3", "int4", 0, 0, 0, "int8", 0, 0, 0, 0, 0, 0, 0, "int16",
4369 "ulong", "ulong2", "ulong3", "ulong4", 0, 0, 0, "ulong8", 0, 0, 0, 0, 0, 0, 0, "ulong16",
4370 "?", "?", "?", "?", "?", "?", "?", "?", "?", "?", "?", "?", "?", "?", "?", "?"
4372 int cn = CV_MAT_CN(type), depth = CV_MAT_DEPTH(type);
4373 return cn > 16 ? "?" : tab[depth*16 + cn-1];
4376 const char* convertTypeStr(int sdepth, int ddepth, int cn, char* buf)
4378 if( sdepth == ddepth )
4380 const char *typestr = typeToStr(CV_MAKETYPE(ddepth, cn));
4381 if( ddepth >= CV_32F ||
4382 (ddepth == CV_32S && sdepth < CV_32S) ||
4383 (ddepth == CV_16S && sdepth <= CV_8S) ||
4384 (ddepth == CV_16U && sdepth == CV_8U))
4386 sprintf(buf, "convert_%s", typestr);
4388 else if( sdepth >= CV_32F )
4389 sprintf(buf, "convert_%s%s_rte", typestr, (ddepth < CV_32S ? "_sat" : ""));
4391 sprintf(buf, "convert_%s_sat", typestr);
4396 template <typename T>
4397 static std::string kerToStr(const Mat & k)
4399 int width = k.cols - 1, depth = k.depth();
4400 const T * const data = k.ptr<T>();
4402 std::ostringstream stream;
4403 stream.precision(10);
4407 for (int i = 0; i < width; ++i)
4408 stream << "DIG(" << (int)data[i] << ")";
4409 stream << "DIG(" << (int)data[width] << ")";
4411 else if (depth == CV_32F)
4413 stream.setf(std::ios_base::showpoint);
4414 for (int i = 0; i < width; ++i)
4415 stream << "DIG(" << data[i] << "f)";
4416 stream << "DIG(" << data[width] << "f)";
4420 for (int i = 0; i < width; ++i)
4421 stream << "DIG(" << data[i] << ")";
4422 stream << "DIG(" << data[width] << ")";
4425 return stream.str();
4428 String kernelToStr(InputArray _kernel, int ddepth, const char * name)
4430 Mat kernel = _kernel.getMat().reshape(1, 1);
4432 int depth = kernel.depth();
4436 if (ddepth != depth)
4437 kernel.convertTo(kernel, ddepth);
4439 typedef std::string (* func_t)(const Mat &);
4440 static const func_t funcs[] = { kerToStr<uchar>, kerToStr<char>, kerToStr<ushort>, kerToStr<short>,
4441 kerToStr<int>, kerToStr<float>, kerToStr<double>, 0 };
4442 const func_t func = funcs[ddepth];
4443 CV_Assert(func != 0);
4445 return cv::format(" -D %s=%s", name ? name : "COEFF", func(kernel).c_str());
4448 #define PROCESS_SRC(src) \
4453 CV_Assert(src.isMat() || src.isUMat()); \
4454 int ctype = src.type(), ccn = CV_MAT_CN(ctype); \
4455 Size csize = src.size(); \
4456 cols.push_back(ccn * csize.width); \
4457 if (ctype != type) \
4459 offsets.push_back(src.offset()); \
4460 steps.push_back(src.step()); \
4465 int predictOptimalVectorWidth(InputArray src1, InputArray src2, InputArray src3,
4466 InputArray src4, InputArray src5, InputArray src6,
4467 InputArray src7, InputArray src8, InputArray src9)
4469 int type = src1.type(), depth = CV_MAT_DEPTH(type), cn = CV_MAT_CN(type), esz1 = CV_ELEM_SIZE1(depth);
4470 Size ssize = src1.size();
4471 const ocl::Device & d = ocl::Device::getDefault();
4473 int vectorWidths[] = { d.preferredVectorWidthChar(), d.preferredVectorWidthChar(),
4474 d.preferredVectorWidthShort(), d.preferredVectorWidthShort(),
4475 d.preferredVectorWidthInt(), d.preferredVectorWidthFloat(),
4476 d.preferredVectorWidthDouble(), -1 }, kercn = vectorWidths[depth];
4478 // if the device says don't use vectors
4479 if (vectorWidths[0] == 1)
4482 int vectorWidthsOthers[] = { 16, 16, 8, 8, 1, 1, 1, -1 };
4483 kercn = vectorWidthsOthers[depth];
4486 if (ssize.width * cn < kercn || kercn <= 0)
4489 std::vector<size_t> offsets, steps, cols;
4500 size_t size = offsets.size();
4501 int wsz = kercn * esz1;
4502 std::vector<int> dividers(size, wsz);
4504 for (size_t i = 0; i < size; ++i)
4505 while (offsets[i] % dividers[i] != 0 || steps[i] % dividers[i] != 0 || cols[i] % dividers[i] != 0)
4509 for (size_t i = 0; i < size; ++i)
4510 if (dividers[i] != wsz)
4517 // width = *std::min_element(dividers.begin(), dividers.end());
4525 // TODO Make this as a method of OpenCL "BuildOptions" class
4526 void buildOptionsAddMatrixDescription(String& buildOptions, const String& name, InputArray _m)
4528 if (!buildOptions.empty())
4529 buildOptions += " ";
4530 int type = _m.type(), depth = CV_MAT_DEPTH(type);
4531 buildOptions += format(
4532 "-D %s_T=%s -D %s_T1=%s -D %s_CN=%d -D %s_TSIZE=%d -D %s_T1SIZE=%d -D %s_DEPTH=%d",
4533 name.c_str(), ocl::typeToStr(type),
4534 name.c_str(), ocl::typeToStr(CV_MAKE_TYPE(depth, 1)),
4535 name.c_str(), (int)CV_MAT_CN(type),
4536 name.c_str(), (int)CV_ELEM_SIZE(type),
4537 name.c_str(), (int)CV_ELEM_SIZE1(type),
4538 name.c_str(), (int)depth
4543 struct Image2D::Impl
4545 Impl(const UMat &src, bool norm, bool alias)
4549 init(src, norm, alias);
4555 clReleaseMemObject(handle);
4558 static cl_image_format getImageFormat(int depth, int cn, bool norm)
4560 cl_image_format format;
4561 static const int channelTypes[] = { CL_UNSIGNED_INT8, CL_SIGNED_INT8, CL_UNSIGNED_INT16,
4562 CL_SIGNED_INT16, CL_SIGNED_INT32, CL_FLOAT, -1, -1 };
4563 static const int channelTypesNorm[] = { CL_UNORM_INT8, CL_SNORM_INT8, CL_UNORM_INT16,
4564 CL_SNORM_INT16, -1, -1, -1, -1 };
4565 static const int channelOrders[] = { -1, CL_R, CL_RG, -1, CL_RGBA };
4567 int channelType = norm ? channelTypesNorm[depth] : channelTypes[depth];
4568 int channelOrder = channelOrders[cn];
4569 format.image_channel_data_type = (cl_channel_type)channelType;
4570 format.image_channel_order = (cl_channel_order)channelOrder;
4574 static bool isFormatSupported(cl_image_format format)
4576 cl_context context = (cl_context)Context::getDefault().ptr();
4577 // Figure out how many formats are supported by this context.
4578 cl_uint numFormats = 0;
4579 cl_int err = clGetSupportedImageFormats(context, CL_MEM_READ_WRITE,
4580 CL_MEM_OBJECT_IMAGE2D, numFormats,
4582 AutoBuffer<cl_image_format> formats(numFormats);
4583 err = clGetSupportedImageFormats(context, CL_MEM_READ_WRITE,
4584 CL_MEM_OBJECT_IMAGE2D, numFormats,
4586 CV_OclDbgAssert(err == CL_SUCCESS);
4587 for (cl_uint i = 0; i < numFormats; ++i)
4589 if (!memcmp(&formats[i], &format, sizeof(format)))
4597 void init(const UMat &src, bool norm, bool alias)
4599 CV_Assert(ocl::Device::getDefault().imageSupport());
4601 int err, depth = src.depth(), cn = src.channels();
4603 cl_image_format format = getImageFormat(depth, cn, norm);
4605 if (!isFormatSupported(format))
4606 CV_Error(Error::OpenCLApiCallError, "Image format is not supported");
4608 cl_context context = (cl_context)Context::getDefault().ptr();
4609 cl_command_queue queue = (cl_command_queue)Queue::getDefault().ptr();
4611 #ifdef CL_VERSION_1_2
4612 // this enables backwards portability to
4613 // run on OpenCL 1.1 platform if library binaries are compiled with OpenCL 1.2 support
4614 const Device & d = ocl::Device::getDefault();
4615 int minor = d.deviceVersionMinor(), major = d.deviceVersionMajor();
4616 CV_Assert(!alias || canCreateAlias(src));
4617 if (1 < major || (1 == major && 2 <= minor))
4620 desc.image_type = CL_MEM_OBJECT_IMAGE2D;
4621 desc.image_width = src.cols;
4622 desc.image_height = src.rows;
4623 desc.image_depth = 0;
4624 desc.image_array_size = 1;
4625 desc.image_row_pitch = alias ? src.step[0] : 0;
4626 desc.image_slice_pitch = 0;
4627 desc.buffer = alias ? (cl_mem)src.handle(ACCESS_RW) : 0;
4628 desc.num_mip_levels = 0;
4629 desc.num_samples = 0;
4630 handle = clCreateImage(context, CL_MEM_READ_WRITE, &format, &desc, NULL, &err);
4635 CV_SUPPRESS_DEPRECATED_START
4636 CV_Assert(!alias); // This is an OpenCL 1.2 extension
4637 handle = clCreateImage2D(context, CL_MEM_READ_WRITE, &format, src.cols, src.rows, 0, NULL, &err);
4638 CV_SUPPRESS_DEPRECATED_END
4640 CV_OclDbgAssert(err == CL_SUCCESS);
4642 size_t origin[] = { 0, 0, 0 };
4643 size_t region[] = { static_cast<size_t>(src.cols), static_cast<size_t>(src.rows), 1 };
4646 if (!alias && !src.isContinuous())
4648 devData = clCreateBuffer(context, CL_MEM_READ_ONLY, src.cols * src.rows * src.elemSize(), NULL, &err);
4649 CV_OclDbgAssert(err == CL_SUCCESS);
4651 const size_t roi[3] = {static_cast<size_t>(src.cols) * src.elemSize(), static_cast<size_t>(src.rows), 1};
4652 CV_Assert(clEnqueueCopyBufferRect(queue, (cl_mem)src.handle(ACCESS_READ), devData, origin, origin,
4653 roi, src.step, 0, src.cols * src.elemSize(), 0, 0, NULL, NULL) == CL_SUCCESS);
4654 CV_OclDbgAssert(clFlush(queue) == CL_SUCCESS);
4658 devData = (cl_mem)src.handle(ACCESS_READ);
4660 CV_Assert(devData != NULL);
4664 CV_OclDbgAssert(clEnqueueCopyBufferToImage(queue, devData, handle, 0, origin, region, 0, NULL, 0) == CL_SUCCESS);
4665 if (!src.isContinuous())
4667 CV_OclDbgAssert(clFlush(queue) == CL_SUCCESS);
4668 CV_OclDbgAssert(clReleaseMemObject(devData) == CL_SUCCESS);
4673 IMPLEMENT_REFCOUNTABLE();
4683 Image2D::Image2D(const UMat &src, bool norm, bool alias)
4685 p = new Impl(src, norm, alias);
4688 bool Image2D::canCreateAlias(const UMat &m)
4691 const Device & d = ocl::Device::getDefault();
4692 if (d.imageFromBufferSupport())
4694 // This is the required pitch alignment in pixels
4695 uint pitchAlign = d.imagePitchAlignment();
4696 if (pitchAlign && !(m.step % (pitchAlign * m.elemSize())))
4698 // We don't currently handle the case where the buffer was created
4699 // with CL_MEM_USE_HOST_PTR
4700 if (!m.u->tempUMat())
4709 bool Image2D::isFormatSupported(int depth, int cn, bool norm)
4711 cl_image_format format = Impl::getImageFormat(depth, cn, norm);
4713 return Impl::isFormatSupported(format);
4716 Image2D::Image2D(const Image2D & i)
4723 Image2D & Image2D::operator = (const Image2D & i)
4742 void* Image2D::ptr() const
4744 return p ? p->handle : 0;
4747 bool isPerformanceCheckBypassed()
4749 static bool initialized = false;
4750 static bool value = false;
4753 value = getBoolParameter("OPENCV_OPENCL_PERF_CHECK_BYPASS", false);