1 /*M///////////////////////////////////////////////////////////////////////////////////////
3 // IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
5 // By downloading, copying, installing or using the software you agree to this license.
6 // If you do not agree to this license, do not download, install,
7 // copy or use the software.
11 // For Open Source Computer Vision Library
13 // Copyright (C) 2013, OpenCV Foundation, all rights reserved.
14 // Third party copyrights are property of their respective owners.
16 // Redistribution and use in source and binary forms, with or without modification,
17 // are permitted provided that the following conditions are met:
19 // * Redistribution's of source code must retain the above copyright notice,
20 // this list of conditions and the following disclaimer.
22 // * Redistribution's in binary form must reproduce the above copyright notice,
23 // this list of conditions and the following disclaimer in the documentation
24 // and/or other materials provided with the distribution.
26 // * The name of the copyright holders may not be used to endorse or promote products
27 // derived from this software without specific prior written permission.
29 // This software is provided by the copyright holders and contributors "as is" and
30 // any express or implied warranties, including, but not limited to, the implied
31 // warranties of merchantability and fitness for a particular purpose are disclaimed.
32 // In no event shall the OpenCV Foundation or contributors be liable for any direct,
33 // indirect, incidental, special, exemplary, or consequential damages
34 // (including, but not limited to, procurement of substitute goods or services;
35 // loss of use, data, or profits; or business interruption) however caused
36 // and on any theory of liability, whether in contract, strict liability,
37 // or tort (including negligence or otherwise) arising in any way out of
38 // the use of this software, even if advised of the possibility of such damage.
42 #include "precomp.hpp"
47 #include <iostream> // std::cerr
49 #define CV_OPENCL_ALWAYS_SHOW_BUILD_LOG 0
50 #define CV_OPENCL_SHOW_RUN_ERRORS 0
52 #include "opencv2/core/bufferpool.hpp"
53 #ifndef LOG_BUFFER_POOL
55 # define LOG_BUFFER_POOL printf
57 # define LOG_BUFFER_POOL(...)
62 // TODO Move to some common place
63 static bool getBoolParameter(const char* name, bool defaultValue)
65 const char* envValue = getenv(name);
70 cv::String value = envValue;
71 if (value == "1" || value == "True" || value == "true" || value == "TRUE")
75 if (value == "0" || value == "False" || value == "false" || value == "FALSE")
79 CV_ErrorNoReturn(cv::Error::StsBadArg, cv::format("Invalid value for %s parameter: %s", name, value.c_str()));
83 // TODO Move to some common place
84 static size_t getConfigurationParameterForSize(const char* name, size_t defaultValue)
87 const char* envValue = NULL;
89 const char* envValue = getenv(name);
95 cv::String value = envValue;
97 for (; pos < value.size(); pos++)
99 if (!isdigit(value[pos]))
102 cv::String valueStr = value.substr(0, pos);
103 cv::String suffixStr = value.substr(pos, value.length() - pos);
104 int v = atoi(valueStr.c_str());
105 if (suffixStr.length() == 0)
107 else if (suffixStr == "MB" || suffixStr == "Mb" || suffixStr == "mb")
108 return v * 1024 * 1024;
109 else if (suffixStr == "KB" || suffixStr == "Kb" || suffixStr == "kb")
111 CV_ErrorNoReturn(cv::Error::StsBadArg, cv::format("Invalid value for %s parameter: %s", name, value.c_str()));
114 #include "opencv2/core/opencl/runtime/opencl_clamdblas.hpp"
115 #include "opencv2/core/opencl/runtime/opencl_clamdfft.hpp"
118 #include "opencv2/core/opencl/runtime/opencl_core.hpp"
120 // TODO FIXIT: This file can't be build without OPENCL
123 Part of the file is an extract from the standard OpenCL headers from Khronos site.
124 Below is the original copyright.
127 /*******************************************************************************
128 * Copyright (c) 2008 - 2012 The Khronos Group Inc.
130 * Permission is hereby granted, free of charge, to any person obtaining a
131 * copy of this software and/or associated documentation files (the
132 * "Materials"), to deal in the Materials without restriction, including
133 * without limitation the rights to use, copy, modify, merge, publish,
134 * distribute, sublicense, and/or sell copies of the Materials, and to
135 * permit persons to whom the Materials are furnished to do so, subject to
136 * the following conditions:
138 * The above copyright notice and this permission notice shall be included
139 * in all copies or substantial portions of the Materials.
141 * THE MATERIALS ARE PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
142 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
143 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
144 * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
145 * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
146 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
147 * MATERIALS OR THE USE OR OTHER DEALINGS IN THE MATERIALS.
148 ******************************************************************************/
150 #if 0 //defined __APPLE__
151 #define HAVE_OPENCL 1
156 #define OPENCV_CL_NOT_IMPLEMENTED -1000
160 #if defined __APPLE__
161 #include <OpenCL/opencl.h>
163 #include <CL/opencl.h>
166 static const bool g_haveOpenCL = true;
172 struct _cl_platform_id { int dummy; };
173 struct _cl_device_id { int dummy; };
174 struct _cl_context { int dummy; };
175 struct _cl_command_queue { int dummy; };
176 struct _cl_mem { int dummy; };
177 struct _cl_program { int dummy; };
178 struct _cl_kernel { int dummy; };
179 struct _cl_event { int dummy; };
180 struct _cl_sampler { int dummy; };
182 typedef struct _cl_platform_id * cl_platform_id;
183 typedef struct _cl_device_id * cl_device_id;
184 typedef struct _cl_context * cl_context;
185 typedef struct _cl_command_queue * cl_command_queue;
186 typedef struct _cl_mem * cl_mem;
187 typedef struct _cl_program * cl_program;
188 typedef struct _cl_kernel * cl_kernel;
189 typedef struct _cl_event * cl_event;
190 typedef struct _cl_sampler * cl_sampler;
193 typedef unsigned cl_uint;
194 #if defined (_WIN32) && defined(_MSC_VER)
195 typedef __int64 cl_long;
196 typedef unsigned __int64 cl_ulong;
198 typedef long cl_long;
199 typedef unsigned long cl_ulong;
202 typedef cl_uint cl_bool; /* WARNING! Unlike cl_ types in cl_platform.h, cl_bool is not guaranteed to be the same size as the bool in kernels. */
203 typedef cl_ulong cl_bitfield;
204 typedef cl_bitfield cl_device_type;
205 typedef cl_uint cl_platform_info;
206 typedef cl_uint cl_device_info;
207 typedef cl_bitfield cl_device_fp_config;
208 typedef cl_uint cl_device_mem_cache_type;
209 typedef cl_uint cl_device_local_mem_type;
210 typedef cl_bitfield cl_device_exec_capabilities;
211 typedef cl_bitfield cl_command_queue_properties;
212 typedef intptr_t cl_device_partition_property;
213 typedef cl_bitfield cl_device_affinity_domain;
215 typedef intptr_t cl_context_properties;
216 typedef cl_uint cl_context_info;
217 typedef cl_uint cl_command_queue_info;
218 typedef cl_uint cl_channel_order;
219 typedef cl_uint cl_channel_type;
220 typedef cl_bitfield cl_mem_flags;
221 typedef cl_uint cl_mem_object_type;
222 typedef cl_uint cl_mem_info;
223 typedef cl_bitfield cl_mem_migration_flags;
224 typedef cl_uint cl_image_info;
225 typedef cl_uint cl_buffer_create_type;
226 typedef cl_uint cl_addressing_mode;
227 typedef cl_uint cl_filter_mode;
228 typedef cl_uint cl_sampler_info;
229 typedef cl_bitfield cl_map_flags;
230 typedef cl_uint cl_program_info;
231 typedef cl_uint cl_program_build_info;
232 typedef cl_uint cl_program_binary_type;
233 typedef cl_int cl_build_status;
234 typedef cl_uint cl_kernel_info;
235 typedef cl_uint cl_kernel_arg_info;
236 typedef cl_uint cl_kernel_arg_address_qualifier;
237 typedef cl_uint cl_kernel_arg_access_qualifier;
238 typedef cl_bitfield cl_kernel_arg_type_qualifier;
239 typedef cl_uint cl_kernel_work_group_info;
240 typedef cl_uint cl_event_info;
241 typedef cl_uint cl_command_type;
242 typedef cl_uint cl_profiling_info;
245 typedef struct _cl_image_format {
246 cl_channel_order image_channel_order;
247 cl_channel_type image_channel_data_type;
250 typedef struct _cl_image_desc {
251 cl_mem_object_type image_type;
255 size_t image_array_size;
256 size_t image_row_pitch;
257 size_t image_slice_pitch;
258 cl_uint num_mip_levels;
263 typedef struct _cl_buffer_region {
269 //////////////////////////////////////////////////////////
272 #define CL_DEVICE_NOT_FOUND -1
273 #define CL_DEVICE_NOT_AVAILABLE -2
274 #define CL_COMPILER_NOT_AVAILABLE -3
275 #define CL_MEM_OBJECT_ALLOCATION_FAILURE -4
276 #define CL_OUT_OF_RESOURCES -5
277 #define CL_OUT_OF_HOST_MEMORY -6
278 #define CL_PROFILING_INFO_NOT_AVAILABLE -7
279 #define CL_MEM_COPY_OVERLAP -8
280 #define CL_IMAGE_FORMAT_MISMATCH -9
281 #define CL_IMAGE_FORMAT_NOT_SUPPORTED -10
282 #define CL_BUILD_PROGRAM_FAILURE -11
283 #define CL_MAP_FAILURE -12
284 #define CL_MISALIGNED_SUB_BUFFER_OFFSET -13
285 #define CL_EXEC_STATUS_ERROR_FOR_EVENTS_IN_WAIT_LIST -14
286 #define CL_COMPILE_PROGRAM_FAILURE -15
287 #define CL_LINKER_NOT_AVAILABLE -16
288 #define CL_LINK_PROGRAM_FAILURE -17
289 #define CL_DEVICE_PARTITION_FAILED -18
290 #define CL_KERNEL_ARG_INFO_NOT_AVAILABLE -19
292 #define CL_INVALID_VALUE -30
293 #define CL_INVALID_DEVICE_TYPE -31
294 #define CL_INVALID_PLATFORM -32
295 #define CL_INVALID_DEVICE -33
296 #define CL_INVALID_CONTEXT -34
297 #define CL_INVALID_QUEUE_PROPERTIES -35
298 #define CL_INVALID_COMMAND_QUEUE -36
299 #define CL_INVALID_HOST_PTR -37
300 #define CL_INVALID_MEM_OBJECT -38
301 #define CL_INVALID_IMAGE_FORMAT_DESCRIPTOR -39
302 #define CL_INVALID_IMAGE_SIZE -40
303 #define CL_INVALID_SAMPLER -41
304 #define CL_INVALID_BINARY -42
305 #define CL_INVALID_BUILD_OPTIONS -43
306 #define CL_INVALID_PROGRAM -44
307 #define CL_INVALID_PROGRAM_EXECUTABLE -45
308 #define CL_INVALID_KERNEL_NAME -46
309 #define CL_INVALID_KERNEL_DEFINITION -47
310 #define CL_INVALID_KERNEL -48
311 #define CL_INVALID_ARG_INDEX -49
312 #define CL_INVALID_ARG_VALUE -50
313 #define CL_INVALID_ARG_SIZE -51
314 #define CL_INVALID_KERNEL_ARGS -52
315 #define CL_INVALID_WORK_DIMENSION -53
316 #define CL_INVALID_WORK_GROUP_SIZE -54
317 #define CL_INVALID_WORK_ITEM_SIZE -55
318 #define CL_INVALID_GLOBAL_OFFSET -56
319 #define CL_INVALID_EVENT_WAIT_LIST -57
320 #define CL_INVALID_EVENT -58
321 #define CL_INVALID_OPERATION -59
322 #define CL_INVALID_GL_OBJECT -60
323 #define CL_INVALID_BUFFER_SIZE -61
324 #define CL_INVALID_MIP_LEVEL -62
325 #define CL_INVALID_GLOBAL_WORK_SIZE -63
326 #define CL_INVALID_PROPERTY -64
327 #define CL_INVALID_IMAGE_DESCRIPTOR -65
328 #define CL_INVALID_COMPILER_OPTIONS -66
329 #define CL_INVALID_LINKER_OPTIONS -67
330 #define CL_INVALID_DEVICE_PARTITION_COUNT -68
332 /*#define CL_VERSION_1_0 1
333 #define CL_VERSION_1_1 1
334 #define CL_VERSION_1_2 1*/
338 #define CL_BLOCKING CL_TRUE
339 #define CL_NON_BLOCKING CL_FALSE
341 #define CL_PLATFORM_PROFILE 0x0900
342 #define CL_PLATFORM_VERSION 0x0901
343 #define CL_PLATFORM_NAME 0x0902
344 #define CL_PLATFORM_VENDOR 0x0903
345 #define CL_PLATFORM_EXTENSIONS 0x0904
347 #define CL_DEVICE_TYPE_DEFAULT (1 << 0)
348 #define CL_DEVICE_TYPE_CPU (1 << 1)
349 #define CL_DEVICE_TYPE_GPU (1 << 2)
350 #define CL_DEVICE_TYPE_ACCELERATOR (1 << 3)
351 #define CL_DEVICE_TYPE_CUSTOM (1 << 4)
352 #define CL_DEVICE_TYPE_ALL 0xFFFFFFFF
353 #define CL_DEVICE_TYPE 0x1000
354 #define CL_DEVICE_VENDOR_ID 0x1001
355 #define CL_DEVICE_MAX_COMPUTE_UNITS 0x1002
356 #define CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS 0x1003
357 #define CL_DEVICE_MAX_WORK_GROUP_SIZE 0x1004
358 #define CL_DEVICE_MAX_WORK_ITEM_SIZES 0x1005
359 #define CL_DEVICE_PREFERRED_VECTOR_WIDTH_CHAR 0x1006
360 #define CL_DEVICE_PREFERRED_VECTOR_WIDTH_SHORT 0x1007
361 #define CL_DEVICE_PREFERRED_VECTOR_WIDTH_INT 0x1008
362 #define CL_DEVICE_PREFERRED_VECTOR_WIDTH_LONG 0x1009
363 #define CL_DEVICE_PREFERRED_VECTOR_WIDTH_FLOAT 0x100A
364 #define CL_DEVICE_PREFERRED_VECTOR_WIDTH_DOUBLE 0x100B
365 #define CL_DEVICE_MAX_CLOCK_FREQUENCY 0x100C
366 #define CL_DEVICE_ADDRESS_BITS 0x100D
367 #define CL_DEVICE_MAX_READ_IMAGE_ARGS 0x100E
368 #define CL_DEVICE_MAX_WRITE_IMAGE_ARGS 0x100F
369 #define CL_DEVICE_MAX_MEM_ALLOC_SIZE 0x1010
370 #define CL_DEVICE_IMAGE2D_MAX_WIDTH 0x1011
371 #define CL_DEVICE_IMAGE2D_MAX_HEIGHT 0x1012
372 #define CL_DEVICE_IMAGE3D_MAX_WIDTH 0x1013
373 #define CL_DEVICE_IMAGE3D_MAX_HEIGHT 0x1014
374 #define CL_DEVICE_IMAGE3D_MAX_DEPTH 0x1015
375 #define CL_DEVICE_IMAGE_SUPPORT 0x1016
376 #define CL_DEVICE_MAX_PARAMETER_SIZE 0x1017
377 #define CL_DEVICE_MAX_SAMPLERS 0x1018
378 #define CL_DEVICE_MEM_BASE_ADDR_ALIGN 0x1019
379 #define CL_DEVICE_MIN_DATA_TYPE_ALIGN_SIZE 0x101A
380 #define CL_DEVICE_SINGLE_FP_CONFIG 0x101B
381 #define CL_DEVICE_GLOBAL_MEM_CACHE_TYPE 0x101C
382 #define CL_DEVICE_GLOBAL_MEM_CACHELINE_SIZE 0x101D
383 #define CL_DEVICE_GLOBAL_MEM_CACHE_SIZE 0x101E
384 #define CL_DEVICE_GLOBAL_MEM_SIZE 0x101F
385 #define CL_DEVICE_MAX_CONSTANT_BUFFER_SIZE 0x1020
386 #define CL_DEVICE_MAX_CONSTANT_ARGS 0x1021
387 #define CL_DEVICE_LOCAL_MEM_TYPE 0x1022
388 #define CL_DEVICE_LOCAL_MEM_SIZE 0x1023
389 #define CL_DEVICE_ERROR_CORRECTION_SUPPORT 0x1024
390 #define CL_DEVICE_PROFILING_TIMER_RESOLUTION 0x1025
391 #define CL_DEVICE_ENDIAN_LITTLE 0x1026
392 #define CL_DEVICE_AVAILABLE 0x1027
393 #define CL_DEVICE_COMPILER_AVAILABLE 0x1028
394 #define CL_DEVICE_EXECUTION_CAPABILITIES 0x1029
395 #define CL_DEVICE_QUEUE_PROPERTIES 0x102A
396 #define CL_DEVICE_NAME 0x102B
397 #define CL_DEVICE_VENDOR 0x102C
398 #define CL_DRIVER_VERSION 0x102D
399 #define CL_DEVICE_PROFILE 0x102E
400 #define CL_DEVICE_VERSION 0x102F
401 #define CL_DEVICE_EXTENSIONS 0x1030
402 #define CL_DEVICE_PLATFORM 0x1031
403 #define CL_DEVICE_DOUBLE_FP_CONFIG 0x1032
404 #define CL_DEVICE_HALF_FP_CONFIG 0x1033
405 #define CL_DEVICE_PREFERRED_VECTOR_WIDTH_HALF 0x1034
406 #define CL_DEVICE_HOST_UNIFIED_MEMORY 0x1035
407 #define CL_DEVICE_NATIVE_VECTOR_WIDTH_CHAR 0x1036
408 #define CL_DEVICE_NATIVE_VECTOR_WIDTH_SHORT 0x1037
409 #define CL_DEVICE_NATIVE_VECTOR_WIDTH_INT 0x1038
410 #define CL_DEVICE_NATIVE_VECTOR_WIDTH_LONG 0x1039
411 #define CL_DEVICE_NATIVE_VECTOR_WIDTH_FLOAT 0x103A
412 #define CL_DEVICE_NATIVE_VECTOR_WIDTH_DOUBLE 0x103B
413 #define CL_DEVICE_NATIVE_VECTOR_WIDTH_HALF 0x103C
414 #define CL_DEVICE_OPENCL_C_VERSION 0x103D
415 #define CL_DEVICE_LINKER_AVAILABLE 0x103E
416 #define CL_DEVICE_BUILT_IN_KERNELS 0x103F
417 #define CL_DEVICE_IMAGE_MAX_BUFFER_SIZE 0x1040
418 #define CL_DEVICE_IMAGE_MAX_ARRAY_SIZE 0x1041
419 #define CL_DEVICE_PARENT_DEVICE 0x1042
420 #define CL_DEVICE_PARTITION_MAX_SUB_DEVICES 0x1043
421 #define CL_DEVICE_PARTITION_PROPERTIES 0x1044
422 #define CL_DEVICE_PARTITION_AFFINITY_DOMAIN 0x1045
423 #define CL_DEVICE_PARTITION_TYPE 0x1046
424 #define CL_DEVICE_REFERENCE_COUNT 0x1047
425 #define CL_DEVICE_PREFERRED_INTEROP_USER_SYNC 0x1048
426 #define CL_DEVICE_PRINTF_BUFFER_SIZE 0x1049
427 #define CL_DEVICE_IMAGE_PITCH_ALIGNMENT 0x104A
428 #define CL_DEVICE_IMAGE_BASE_ADDRESS_ALIGNMENT 0x104B
430 #define CL_FP_DENORM (1 << 0)
431 #define CL_FP_INF_NAN (1 << 1)
432 #define CL_FP_ROUND_TO_NEAREST (1 << 2)
433 #define CL_FP_ROUND_TO_ZERO (1 << 3)
434 #define CL_FP_ROUND_TO_INF (1 << 4)
435 #define CL_FP_FMA (1 << 5)
436 #define CL_FP_SOFT_FLOAT (1 << 6)
437 #define CL_FP_CORRECTLY_ROUNDED_DIVIDE_SQRT (1 << 7)
440 #define CL_READ_ONLY_CACHE 0x1
441 #define CL_READ_WRITE_CACHE 0x2
443 #define CL_GLOBAL 0x2
444 #define CL_EXEC_KERNEL (1 << 0)
445 #define CL_EXEC_NATIVE_KERNEL (1 << 1)
446 #define CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE (1 << 0)
447 #define CL_QUEUE_PROFILING_ENABLE (1 << 1)
449 #define CL_CONTEXT_REFERENCE_COUNT 0x1080
450 #define CL_CONTEXT_DEVICES 0x1081
451 #define CL_CONTEXT_PROPERTIES 0x1082
452 #define CL_CONTEXT_NUM_DEVICES 0x1083
453 #define CL_CONTEXT_PLATFORM 0x1084
454 #define CL_CONTEXT_INTEROP_USER_SYNC 0x1085
456 #define CL_DEVICE_PARTITION_EQUALLY 0x1086
457 #define CL_DEVICE_PARTITION_BY_COUNTS 0x1087
458 #define CL_DEVICE_PARTITION_BY_COUNTS_LIST_END 0x0
459 #define CL_DEVICE_PARTITION_BY_AFFINITY_DOMAIN 0x1088
460 #define CL_DEVICE_AFFINITY_DOMAIN_NUMA (1 << 0)
461 #define CL_DEVICE_AFFINITY_DOMAIN_L4_CACHE (1 << 1)
462 #define CL_DEVICE_AFFINITY_DOMAIN_L3_CACHE (1 << 2)
463 #define CL_DEVICE_AFFINITY_DOMAIN_L2_CACHE (1 << 3)
464 #define CL_DEVICE_AFFINITY_DOMAIN_L1_CACHE (1 << 4)
465 #define CL_DEVICE_AFFINITY_DOMAIN_NEXT_PARTITIONABLE (1 << 5)
466 #define CL_QUEUE_CONTEXT 0x1090
467 #define CL_QUEUE_DEVICE 0x1091
468 #define CL_QUEUE_REFERENCE_COUNT 0x1092
469 #define CL_QUEUE_PROPERTIES 0x1093
470 #define CL_MEM_READ_WRITE (1 << 0)
471 #define CL_MEM_WRITE_ONLY (1 << 1)
472 #define CL_MEM_READ_ONLY (1 << 2)
473 #define CL_MEM_USE_HOST_PTR (1 << 3)
474 #define CL_MEM_ALLOC_HOST_PTR (1 << 4)
475 #define CL_MEM_COPY_HOST_PTR (1 << 5)
477 #define CL_MEM_HOST_WRITE_ONLY (1 << 7)
478 #define CL_MEM_HOST_READ_ONLY (1 << 8)
479 #define CL_MEM_HOST_NO_ACCESS (1 << 9)
480 #define CL_MIGRATE_MEM_OBJECT_HOST (1 << 0)
481 #define CL_MIGRATE_MEM_OBJECT_CONTENT_UNDEFINED (1 << 1)
487 #define CL_RGB 0x10B4
488 #define CL_RGBA 0x10B5
489 #define CL_BGRA 0x10B6
490 #define CL_ARGB 0x10B7
491 #define CL_INTENSITY 0x10B8
492 #define CL_LUMINANCE 0x10B9
494 #define CL_RGx 0x10BB
495 #define CL_RGBx 0x10BC
496 #define CL_DEPTH 0x10BD
497 #define CL_DEPTH_STENCIL 0x10BE
499 #define CL_SNORM_INT8 0x10D0
500 #define CL_SNORM_INT16 0x10D1
501 #define CL_UNORM_INT8 0x10D2
502 #define CL_UNORM_INT16 0x10D3
503 #define CL_UNORM_SHORT_565 0x10D4
504 #define CL_UNORM_SHORT_555 0x10D5
505 #define CL_UNORM_INT_101010 0x10D6
506 #define CL_SIGNED_INT8 0x10D7
507 #define CL_SIGNED_INT16 0x10D8
508 #define CL_SIGNED_INT32 0x10D9
509 #define CL_UNSIGNED_INT8 0x10DA
510 #define CL_UNSIGNED_INT16 0x10DB
511 #define CL_UNSIGNED_INT32 0x10DC
512 #define CL_HALF_FLOAT 0x10DD
513 #define CL_FLOAT 0x10DE
514 #define CL_UNORM_INT24 0x10DF
516 #define CL_MEM_OBJECT_BUFFER 0x10F0
517 #define CL_MEM_OBJECT_IMAGE2D 0x10F1
518 #define CL_MEM_OBJECT_IMAGE3D 0x10F2
519 #define CL_MEM_OBJECT_IMAGE2D_ARRAY 0x10F3
520 #define CL_MEM_OBJECT_IMAGE1D 0x10F4
521 #define CL_MEM_OBJECT_IMAGE1D_ARRAY 0x10F5
522 #define CL_MEM_OBJECT_IMAGE1D_BUFFER 0x10F6
524 #define CL_MEM_TYPE 0x1100
525 #define CL_MEM_FLAGS 0x1101
526 #define CL_MEM_SIZE 0x1102
527 #define CL_MEM_HOST_PTR 0x1103
528 #define CL_MEM_MAP_COUNT 0x1104
529 #define CL_MEM_REFERENCE_COUNT 0x1105
530 #define CL_MEM_CONTEXT 0x1106
531 #define CL_MEM_ASSOCIATED_MEMOBJECT 0x1107
532 #define CL_MEM_OFFSET 0x1108
534 #define CL_IMAGE_FORMAT 0x1110
535 #define CL_IMAGE_ELEMENT_SIZE 0x1111
536 #define CL_IMAGE_ROW_PITCH 0x1112
537 #define CL_IMAGE_SLICE_PITCH 0x1113
538 #define CL_IMAGE_WIDTH 0x1114
539 #define CL_IMAGE_HEIGHT 0x1115
540 #define CL_IMAGE_DEPTH 0x1116
541 #define CL_IMAGE_ARRAY_SIZE 0x1117
542 #define CL_IMAGE_BUFFER 0x1118
543 #define CL_IMAGE_NUM_MIP_LEVELS 0x1119
544 #define CL_IMAGE_NUM_SAMPLES 0x111A
546 #define CL_ADDRESS_NONE 0x1130
547 #define CL_ADDRESS_CLAMP_TO_EDGE 0x1131
548 #define CL_ADDRESS_CLAMP 0x1132
549 #define CL_ADDRESS_REPEAT 0x1133
550 #define CL_ADDRESS_MIRRORED_REPEAT 0x1134
552 #define CL_FILTER_NEAREST 0x1140
553 #define CL_FILTER_LINEAR 0x1141
555 #define CL_SAMPLER_REFERENCE_COUNT 0x1150
556 #define CL_SAMPLER_CONTEXT 0x1151
557 #define CL_SAMPLER_NORMALIZED_COORDS 0x1152
558 #define CL_SAMPLER_ADDRESSING_MODE 0x1153
559 #define CL_SAMPLER_FILTER_MODE 0x1154
561 #define CL_MAP_READ (1 << 0)
562 #define CL_MAP_WRITE (1 << 1)
563 #define CL_MAP_WRITE_INVALIDATE_REGION (1 << 2)
565 #define CL_PROGRAM_REFERENCE_COUNT 0x1160
566 #define CL_PROGRAM_CONTEXT 0x1161
567 #define CL_PROGRAM_NUM_DEVICES 0x1162
568 #define CL_PROGRAM_DEVICES 0x1163
569 #define CL_PROGRAM_SOURCE 0x1164
570 #define CL_PROGRAM_BINARY_SIZES 0x1165
571 #define CL_PROGRAM_BINARIES 0x1166
572 #define CL_PROGRAM_NUM_KERNELS 0x1167
573 #define CL_PROGRAM_KERNEL_NAMES 0x1168
574 #define CL_PROGRAM_BUILD_STATUS 0x1181
575 #define CL_PROGRAM_BUILD_OPTIONS 0x1182
576 #define CL_PROGRAM_BUILD_LOG 0x1183
577 #define CL_PROGRAM_BINARY_TYPE 0x1184
578 #define CL_PROGRAM_BINARY_TYPE_NONE 0x0
579 #define CL_PROGRAM_BINARY_TYPE_COMPILED_OBJECT 0x1
580 #define CL_PROGRAM_BINARY_TYPE_LIBRARY 0x2
581 #define CL_PROGRAM_BINARY_TYPE_EXECUTABLE 0x4
583 #define CL_BUILD_SUCCESS 0
584 #define CL_BUILD_NONE -1
585 #define CL_BUILD_ERROR -2
586 #define CL_BUILD_IN_PROGRESS -3
588 #define CL_KERNEL_FUNCTION_NAME 0x1190
589 #define CL_KERNEL_NUM_ARGS 0x1191
590 #define CL_KERNEL_REFERENCE_COUNT 0x1192
591 #define CL_KERNEL_CONTEXT 0x1193
592 #define CL_KERNEL_PROGRAM 0x1194
593 #define CL_KERNEL_ATTRIBUTES 0x1195
594 #define CL_KERNEL_ARG_ADDRESS_QUALIFIER 0x1196
595 #define CL_KERNEL_ARG_ACCESS_QUALIFIER 0x1197
596 #define CL_KERNEL_ARG_TYPE_NAME 0x1198
597 #define CL_KERNEL_ARG_TYPE_QUALIFIER 0x1199
598 #define CL_KERNEL_ARG_NAME 0x119A
599 #define CL_KERNEL_ARG_ADDRESS_GLOBAL 0x119B
600 #define CL_KERNEL_ARG_ADDRESS_LOCAL 0x119C
601 #define CL_KERNEL_ARG_ADDRESS_CONSTANT 0x119D
602 #define CL_KERNEL_ARG_ADDRESS_PRIVATE 0x119E
603 #define CL_KERNEL_ARG_ACCESS_READ_ONLY 0x11A0
604 #define CL_KERNEL_ARG_ACCESS_WRITE_ONLY 0x11A1
605 #define CL_KERNEL_ARG_ACCESS_READ_WRITE 0x11A2
606 #define CL_KERNEL_ARG_ACCESS_NONE 0x11A3
607 #define CL_KERNEL_ARG_TYPE_NONE 0
608 #define CL_KERNEL_ARG_TYPE_CONST (1 << 0)
609 #define CL_KERNEL_ARG_TYPE_RESTRICT (1 << 1)
610 #define CL_KERNEL_ARG_TYPE_VOLATILE (1 << 2)
611 #define CL_KERNEL_WORK_GROUP_SIZE 0x11B0
612 #define CL_KERNEL_COMPILE_WORK_GROUP_SIZE 0x11B1
613 #define CL_KERNEL_LOCAL_MEM_SIZE 0x11B2
614 #define CL_KERNEL_PREFERRED_WORK_GROUP_SIZE_MULTIPLE 0x11B3
615 #define CL_KERNEL_PRIVATE_MEM_SIZE 0x11B4
616 #define CL_KERNEL_GLOBAL_WORK_SIZE 0x11B5
618 #define CL_EVENT_COMMAND_QUEUE 0x11D0
619 #define CL_EVENT_COMMAND_TYPE 0x11D1
620 #define CL_EVENT_REFERENCE_COUNT 0x11D2
621 #define CL_EVENT_COMMAND_EXECUTION_STATUS 0x11D3
622 #define CL_EVENT_CONTEXT 0x11D4
624 #define CL_COMMAND_NDRANGE_KERNEL 0x11F0
625 #define CL_COMMAND_TASK 0x11F1
626 #define CL_COMMAND_NATIVE_KERNEL 0x11F2
627 #define CL_COMMAND_READ_BUFFER 0x11F3
628 #define CL_COMMAND_WRITE_BUFFER 0x11F4
629 #define CL_COMMAND_COPY_BUFFER 0x11F5
630 #define CL_COMMAND_READ_IMAGE 0x11F6
631 #define CL_COMMAND_WRITE_IMAGE 0x11F7
632 #define CL_COMMAND_COPY_IMAGE 0x11F8
633 #define CL_COMMAND_COPY_IMAGE_TO_BUFFER 0x11F9
634 #define CL_COMMAND_COPY_BUFFER_TO_IMAGE 0x11FA
635 #define CL_COMMAND_MAP_BUFFER 0x11FB
636 #define CL_COMMAND_MAP_IMAGE 0x11FC
637 #define CL_COMMAND_UNMAP_MEM_OBJECT 0x11FD
638 #define CL_COMMAND_MARKER 0x11FE
639 #define CL_COMMAND_ACQUIRE_GL_OBJECTS 0x11FF
640 #define CL_COMMAND_RELEASE_GL_OBJECTS 0x1200
641 #define CL_COMMAND_READ_BUFFER_RECT 0x1201
642 #define CL_COMMAND_WRITE_BUFFER_RECT 0x1202
643 #define CL_COMMAND_COPY_BUFFER_RECT 0x1203
644 #define CL_COMMAND_USER 0x1204
645 #define CL_COMMAND_BARRIER 0x1205
646 #define CL_COMMAND_MIGRATE_MEM_OBJECTS 0x1206
647 #define CL_COMMAND_FILL_BUFFER 0x1207
648 #define CL_COMMAND_FILL_IMAGE 0x1208
650 #define CL_COMPLETE 0x0
651 #define CL_RUNNING 0x1
652 #define CL_SUBMITTED 0x2
653 #define CL_QUEUED 0x3
654 #define CL_BUFFER_CREATE_TYPE_REGION 0x1220
656 #define CL_PROFILING_COMMAND_QUEUED 0x1280
657 #define CL_PROFILING_COMMAND_SUBMIT 0x1281
658 #define CL_PROFILING_COMMAND_START 0x1282
659 #define CL_PROFILING_COMMAND_END 0x1283
661 #define CL_CALLBACK CV_STDCALL
663 static volatile bool g_haveOpenCL = false;
664 static const char* oclFuncToCheck = "clEnqueueReadBufferRect";
666 #if defined(__APPLE__)
669 static void* initOpenCLAndLoad(const char* funcname)
671 static bool initialized = false;
672 static void* handle = 0;
677 const char* oclpath = getenv("OPENCV_OPENCL_RUNTIME");
678 oclpath = oclpath && strlen(oclpath) > 0 ? oclpath :
679 "/System/Library/Frameworks/OpenCL.framework/Versions/Current/OpenCL";
680 handle = dlopen(oclpath, RTLD_LAZY);
682 g_haveOpenCL = handle != 0 && dlsym(handle, oclFuncToCheck) != 0;
684 fprintf(stderr, "Successfully loaded OpenCL v1.1+ runtime from %s\n", oclpath);
686 fprintf(stderr, "Failed to load OpenCL runtime\n");
692 return funcname && handle ? dlsym(handle, funcname) : 0;
695 #elif defined WIN32 || defined _WIN32
697 #ifndef _WIN32_WINNT // This is needed for the declaration of TryEnterCriticalSection in winbase.h with Visual Studio 2005 (and older?)
698 #define _WIN32_WINNT 0x0400 // http://msdn.microsoft.com/en-us/library/ms686857(VS.85).aspx
701 #if (_WIN32_WINNT >= 0x0602)
702 #include <synchapi.h>
709 static void* initOpenCLAndLoad(const char* funcname)
711 static bool initialized = false;
712 static HMODULE handle = 0;
718 handle = LoadLibraryA("OpenCL.dll");
720 g_haveOpenCL = handle != 0 && GetProcAddress(handle, oclFuncToCheck) != 0;
727 return funcname ? (void*)GetProcAddress(handle, funcname) : 0;
730 #elif defined(__linux)
735 static void* initOpenCLAndLoad(const char* funcname)
737 static bool initialized = false;
738 static void* handle = 0;
743 handle = dlopen("libOpenCL.so", RTLD_LAZY);
745 handle = dlopen("libCL.so", RTLD_LAZY);
747 g_haveOpenCL = handle != 0 && dlsym(handle, oclFuncToCheck) != 0;
753 return funcname ? (void*)dlsym(handle, funcname) : 0;
758 static void* initOpenCLAndLoad(const char*)
766 #define OCL_FUNC(rettype, funcname, argsdecl, args) \
767 typedef rettype (CV_STDCALL * funcname##_t) argsdecl; \
768 static rettype funcname argsdecl \
770 static funcname##_t funcname##_p = 0; \
771 if( !funcname##_p ) \
773 funcname##_p = (funcname##_t)initOpenCLAndLoad(#funcname); \
774 if( !funcname##_p ) \
775 return OPENCV_CL_NOT_IMPLEMENTED; \
777 return funcname##_p args; \
781 #define OCL_FUNC_P(rettype, funcname, argsdecl, args) \
782 typedef rettype (CV_STDCALL * funcname##_t) argsdecl; \
783 static rettype funcname argsdecl \
785 static funcname##_t funcname##_p = 0; \
786 if( !funcname##_p ) \
788 funcname##_p = (funcname##_t)initOpenCLAndLoad(#funcname); \
789 if( !funcname##_p ) \
792 *errcode_ret = OPENCV_CL_NOT_IMPLEMENTED; \
796 return funcname##_p args; \
799 OCL_FUNC(cl_int, clGetPlatformIDs,
800 (cl_uint num_entries, cl_platform_id* platforms, cl_uint* num_platforms),
801 (num_entries, platforms, num_platforms))
803 OCL_FUNC(cl_int, clGetPlatformInfo,
804 (cl_platform_id platform, cl_platform_info param_name,
805 size_t param_value_size, void * param_value,
806 size_t * param_value_size_ret),
807 (platform, param_name, param_value_size, param_value, param_value_size_ret))
809 OCL_FUNC(cl_int, clGetDeviceInfo,
810 (cl_device_id device,
811 cl_device_info param_name,
812 size_t param_value_size,
814 size_t * param_value_size_ret),
815 (device, param_name, param_value_size, param_value, param_value_size_ret))
818 OCL_FUNC(cl_int, clGetDeviceIDs,
819 (cl_platform_id platform,
820 cl_device_type device_type,
822 cl_device_id * devices,
823 cl_uint * num_devices),
824 (platform, device_type, num_entries, devices, num_devices))
826 OCL_FUNC_P(cl_context, clCreateContext,
827 (const cl_context_properties * properties,
829 const cl_device_id * devices,
830 void (CL_CALLBACK * pfn_notify)(const char *, const void *, size_t, void *),
832 cl_int * errcode_ret),
833 (properties, num_devices, devices, pfn_notify, user_data, errcode_ret))
835 OCL_FUNC(cl_int, clReleaseContext, (cl_context context), (context))
838 OCL_FUNC(cl_int, clRetainContext, (cl_context context), (context))
840 OCL_FUNC_P(cl_context, clCreateContextFromType,
841 (const cl_context_properties * properties,
842 cl_device_type device_type,
843 void (CL_CALLBACK * pfn_notify)(const char *, const void *, size_t, void *),
845 cl_int * errcode_ret),
846 (properties, device_type, pfn_notify, user_data, errcode_ret))
848 OCL_FUNC(cl_int, clGetContextInfo,
850 cl_context_info param_name,
851 size_t param_value_size,
853 size_t * param_value_size_ret),
854 (context, param_name, param_value_size,
855 param_value, param_value_size_ret))
857 OCL_FUNC_P(cl_command_queue, clCreateCommandQueue,
860 cl_command_queue_properties properties,
861 cl_int * errcode_ret),
862 (context, device, properties, errcode_ret))
864 OCL_FUNC(cl_int, clReleaseCommandQueue, (cl_command_queue command_queue), (command_queue))
866 OCL_FUNC_P(cl_mem, clCreateBuffer,
871 cl_int * errcode_ret),
872 (context, flags, size, host_ptr, errcode_ret))
875 OCL_FUNC(cl_int, clRetainCommandQueue, (cl_command_queue command_queue), (command_queue))
877 OCL_FUNC(cl_int, clGetCommandQueueInfo,
878 (cl_command_queue command_queue,
879 cl_command_queue_info param_name,
880 size_t param_value_size,
882 size_t * param_value_size_ret),
883 (command_queue, param_name, param_value_size, param_value, param_value_size_ret))
885 OCL_FUNC_P(cl_mem, clCreateSubBuffer,
888 cl_buffer_create_type buffer_create_type,
889 const void * buffer_create_info,
890 cl_int * errcode_ret),
891 (buffer, flags, buffer_create_type, buffer_create_info, errcode_ret))
894 OCL_FUNC_P(cl_mem, clCreateImage,
897 const cl_image_format * image_format,
898 const cl_image_desc * image_desc,
900 cl_int * errcode_ret),
901 (context, flags, image_format, image_desc, host_ptr, errcode_ret))
903 OCL_FUNC_P(cl_mem, clCreateImage2D,
906 const cl_image_format * image_format,
909 size_t image_row_pitch,
911 cl_int *errcode_ret),
912 (context, flags, image_format, image_width, image_height, image_row_pitch, host_ptr, errcode_ret))
914 OCL_FUNC(cl_int, clGetSupportedImageFormats,
917 cl_mem_object_type image_type,
919 cl_image_format * image_formats,
920 cl_uint * num_image_formats),
921 (context, flags, image_type, num_entries, image_formats, num_image_formats))
924 OCL_FUNC(cl_int, clGetMemObjectInfo,
926 cl_mem_info param_name,
927 size_t param_value_size,
929 size_t * param_value_size_ret),
930 (memobj, param_name, param_value_size, param_value, param_value_size_ret))
932 OCL_FUNC(cl_int, clGetImageInfo,
934 cl_image_info param_name,
935 size_t param_value_size,
937 size_t * param_value_size_ret),
938 (image, param_name, param_value_size, param_value, param_value_size_ret))
940 OCL_FUNC(cl_int, clCreateKernelsInProgram,
944 cl_uint * num_kernels_ret),
945 (program, num_kernels, kernels, num_kernels_ret))
947 OCL_FUNC(cl_int, clRetainKernel, (cl_kernel kernel), (kernel))
949 OCL_FUNC(cl_int, clGetKernelArgInfo,
952 cl_kernel_arg_info param_name,
953 size_t param_value_size,
955 size_t * param_value_size_ret),
956 (kernel, arg_indx, param_name, param_value_size, param_value, param_value_size_ret))
958 OCL_FUNC(cl_int, clEnqueueReadImage,
959 (cl_command_queue command_queue,
961 cl_bool blocking_read,
962 const size_t * origin[3],
963 const size_t * region[3],
967 cl_uint num_events_in_wait_list,
968 const cl_event * event_wait_list,
970 (command_queue, image, blocking_read, origin, region,
971 row_pitch, slice_pitch,
973 num_events_in_wait_list,
977 OCL_FUNC(cl_int, clEnqueueWriteImage,
978 (cl_command_queue command_queue,
980 cl_bool blocking_write,
981 const size_t * origin[3],
982 const size_t * region[3],
983 size_t input_row_pitch,
984 size_t input_slice_pitch,
986 cl_uint num_events_in_wait_list,
987 const cl_event * event_wait_list,
989 (command_queue, image, blocking_write, origin, region, input_row_pitch,
990 input_slice_pitch, ptr, num_events_in_wait_list, event_wait_list, event))
992 OCL_FUNC(cl_int, clEnqueueFillImage,
993 (cl_command_queue command_queue,
995 const void * fill_color,
996 const size_t * origin[3],
997 const size_t * region[3],
998 cl_uint num_events_in_wait_list,
999 const cl_event * event_wait_list,
1001 (command_queue, image, fill_color, origin, region,
1002 num_events_in_wait_list, event_wait_list, event))
1004 OCL_FUNC(cl_int, clEnqueueCopyImage,
1005 (cl_command_queue command_queue,
1008 const size_t * src_origin[3],
1009 const size_t * dst_origin[3],
1010 const size_t * region[3],
1011 cl_uint num_events_in_wait_list,
1012 const cl_event * event_wait_list,
1014 (command_queue, src_image, dst_image, src_origin, dst_origin,
1015 region, num_events_in_wait_list, event_wait_list, event))
1017 OCL_FUNC(cl_int, clEnqueueCopyImageToBuffer,
1018 (cl_command_queue command_queue,
1021 const size_t * src_origin[3],
1022 const size_t * region[3],
1024 cl_uint num_events_in_wait_list,
1025 const cl_event * event_wait_list,
1027 (command_queue, src_image, dst_buffer, src_origin, region, dst_offset,
1028 num_events_in_wait_list, event_wait_list, event))
1031 OCL_FUNC(cl_int, clEnqueueCopyBufferToImage,
1032 (cl_command_queue command_queue,
1036 const size_t dst_origin[3],
1037 const size_t region[3],
1038 cl_uint num_events_in_wait_list,
1039 const cl_event * event_wait_list,
1041 (command_queue, src_buffer, dst_image, src_offset, dst_origin,
1042 region, num_events_in_wait_list, event_wait_list, event))
1044 OCL_FUNC(cl_int, clFlush,
1045 (cl_command_queue command_queue),
1049 OCL_FUNC_P(void*, clEnqueueMapImage,
1050 (cl_command_queue command_queue,
1052 cl_bool blocking_map,
1053 cl_map_flags map_flags,
1054 const size_t * origin[3],
1055 const size_t * region[3],
1056 size_t * image_row_pitch,
1057 size_t * image_slice_pitch,
1058 cl_uint num_events_in_wait_list,
1059 const cl_event * event_wait_list,
1061 cl_int * errcode_ret),
1062 (command_queue, image, blocking_map, map_flags, origin, region,
1063 image_row_pitch, image_slice_pitch, num_events_in_wait_list,
1064 event_wait_list, event, errcode_ret))
1068 OCL_FUNC(cl_int, clRetainProgram, (cl_program program), (program))
1070 OCL_FUNC(cl_int, clGetKernelInfo,
1072 cl_kernel_info param_name,
1073 size_t param_value_size,
1075 size_t * param_value_size_ret),
1076 (kernel, param_name, param_value_size, param_value, param_value_size_ret))
1078 OCL_FUNC(cl_int, clRetainMemObject, (cl_mem memobj), (memobj))
1082 OCL_FUNC(cl_int, clReleaseMemObject, (cl_mem memobj), (memobj))
1085 OCL_FUNC_P(cl_program, clCreateProgramWithSource,
1086 (cl_context context,
1088 const char ** strings,
1089 const size_t * lengths,
1090 cl_int * errcode_ret),
1091 (context, count, strings, lengths, errcode_ret))
1093 OCL_FUNC_P(cl_program, clCreateProgramWithBinary,
1094 (cl_context context,
1095 cl_uint num_devices,
1096 const cl_device_id * device_list,
1097 const size_t * lengths,
1098 const unsigned char ** binaries,
1099 cl_int * binary_status,
1100 cl_int * errcode_ret),
1101 (context, num_devices, device_list, lengths, binaries, binary_status, errcode_ret))
1103 OCL_FUNC(cl_int, clReleaseProgram, (cl_program program), (program))
1105 OCL_FUNC(cl_int, clBuildProgram,
1106 (cl_program program,
1107 cl_uint num_devices,
1108 const cl_device_id * device_list,
1109 const char * options,
1110 void (CL_CALLBACK * pfn_notify)(cl_program, void *),
1112 (program, num_devices, device_list, options, pfn_notify, user_data))
1114 OCL_FUNC(cl_int, clGetProgramInfo,
1115 (cl_program program,
1116 cl_program_info param_name,
1117 size_t param_value_size,
1119 size_t * param_value_size_ret),
1120 (program, param_name, param_value_size, param_value, param_value_size_ret))
1122 OCL_FUNC(cl_int, clGetProgramBuildInfo,
1123 (cl_program program,
1124 cl_device_id device,
1125 cl_program_build_info param_name,
1126 size_t param_value_size,
1128 size_t * param_value_size_ret),
1129 (program, device, param_name, param_value_size, param_value, param_value_size_ret))
1131 OCL_FUNC_P(cl_kernel, clCreateKernel,
1132 (cl_program program,
1133 const char * kernel_name,
1134 cl_int * errcode_ret),
1135 (program, kernel_name, errcode_ret))
1137 OCL_FUNC(cl_int, clReleaseKernel, (cl_kernel kernel), (kernel))
1139 OCL_FUNC(cl_int, clSetKernelArg,
1143 const void * arg_value),
1144 (kernel, arg_index, arg_size, arg_value))
1146 OCL_FUNC(cl_int, clGetKernelWorkGroupInfo,
1148 cl_device_id device,
1149 cl_kernel_work_group_info param_name,
1150 size_t param_value_size,
1152 size_t * param_value_size_ret),
1153 (kernel, device, param_name, param_value_size, param_value, param_value_size_ret))
1155 OCL_FUNC(cl_int, clFinish, (cl_command_queue command_queue), (command_queue))
1157 OCL_FUNC(cl_int, clEnqueueReadBuffer,
1158 (cl_command_queue command_queue,
1160 cl_bool blocking_read,
1164 cl_uint num_events_in_wait_list,
1165 const cl_event * event_wait_list,
1167 (command_queue, buffer, blocking_read, offset, size, ptr,
1168 num_events_in_wait_list, event_wait_list, event))
1170 OCL_FUNC(cl_int, clEnqueueReadBufferRect,
1171 (cl_command_queue command_queue,
1173 cl_bool blocking_read,
1174 const size_t * buffer_offset,
1175 const size_t * host_offset,
1176 const size_t * region,
1177 size_t buffer_row_pitch,
1178 size_t buffer_slice_pitch,
1179 size_t host_row_pitch,
1180 size_t host_slice_pitch,
1182 cl_uint num_events_in_wait_list,
1183 const cl_event * event_wait_list,
1185 (command_queue, buffer, blocking_read, buffer_offset, host_offset, region, buffer_row_pitch,
1186 buffer_slice_pitch, host_row_pitch, host_slice_pitch, ptr, num_events_in_wait_list,
1187 event_wait_list, event))
1189 OCL_FUNC(cl_int, clEnqueueWriteBuffer,
1190 (cl_command_queue command_queue,
1192 cl_bool blocking_write,
1196 cl_uint num_events_in_wait_list,
1197 const cl_event * event_wait_list,
1199 (command_queue, buffer, blocking_write, offset, size, ptr,
1200 num_events_in_wait_list, event_wait_list, event))
1202 OCL_FUNC(cl_int, clEnqueueWriteBufferRect,
1203 (cl_command_queue command_queue,
1205 cl_bool blocking_write,
1206 const size_t * buffer_offset,
1207 const size_t * host_offset,
1208 const size_t * region,
1209 size_t buffer_row_pitch,
1210 size_t buffer_slice_pitch,
1211 size_t host_row_pitch,
1212 size_t host_slice_pitch,
1214 cl_uint num_events_in_wait_list,
1215 const cl_event * event_wait_list,
1217 (command_queue, buffer, blocking_write, buffer_offset, host_offset,
1218 region, buffer_row_pitch, buffer_slice_pitch, host_row_pitch,
1219 host_slice_pitch, ptr, num_events_in_wait_list, event_wait_list, event))
1221 /*OCL_FUNC(cl_int, clEnqueueFillBuffer,
1222 (cl_command_queue command_queue,
1224 const void * pattern,
1225 size_t pattern_size,
1228 cl_uint num_events_in_wait_list,
1229 const cl_event * event_wait_list,
1231 (command_queue, buffer, pattern, pattern_size, offset, size,
1232 num_events_in_wait_list, event_wait_list, event))*/
1234 OCL_FUNC(cl_int, clEnqueueCopyBuffer,
1235 (cl_command_queue command_queue,
1241 cl_uint num_events_in_wait_list,
1242 const cl_event * event_wait_list,
1244 (command_queue, src_buffer, dst_buffer, src_offset, dst_offset,
1245 size, num_events_in_wait_list, event_wait_list, event))
1247 OCL_FUNC(cl_int, clEnqueueCopyBufferRect,
1248 (cl_command_queue command_queue,
1251 const size_t * src_origin,
1252 const size_t * dst_origin,
1253 const size_t * region,
1254 size_t src_row_pitch,
1255 size_t src_slice_pitch,
1256 size_t dst_row_pitch,
1257 size_t dst_slice_pitch,
1258 cl_uint num_events_in_wait_list,
1259 const cl_event * event_wait_list,
1261 (command_queue, src_buffer, dst_buffer, src_origin, dst_origin,
1262 region, src_row_pitch, src_slice_pitch, dst_row_pitch, dst_slice_pitch,
1263 num_events_in_wait_list, event_wait_list, event))
1265 OCL_FUNC_P(void*, clEnqueueMapBuffer,
1266 (cl_command_queue command_queue,
1268 cl_bool blocking_map,
1269 cl_map_flags map_flags,
1272 cl_uint num_events_in_wait_list,
1273 const cl_event * event_wait_list,
1275 cl_int * errcode_ret),
1276 (command_queue, buffer, blocking_map, map_flags, offset, size,
1277 num_events_in_wait_list, event_wait_list, event, errcode_ret))
1279 OCL_FUNC(cl_int, clEnqueueUnmapMemObject,
1280 (cl_command_queue command_queue,
1283 cl_uint num_events_in_wait_list,
1284 const cl_event * event_wait_list,
1286 (command_queue, memobj, mapped_ptr, num_events_in_wait_list, event_wait_list, event))
1288 OCL_FUNC(cl_int, clEnqueueNDRangeKernel,
1289 (cl_command_queue command_queue,
1292 const size_t * global_work_offset,
1293 const size_t * global_work_size,
1294 const size_t * local_work_size,
1295 cl_uint num_events_in_wait_list,
1296 const cl_event * event_wait_list,
1298 (command_queue, kernel, work_dim, global_work_offset, global_work_size,
1299 local_work_size, num_events_in_wait_list, event_wait_list, event))
1301 OCL_FUNC(cl_int, clEnqueueTask,
1302 (cl_command_queue command_queue,
1304 cl_uint num_events_in_wait_list,
1305 const cl_event * event_wait_list,
1307 (command_queue, kernel, num_events_in_wait_list, event_wait_list, event))
1309 OCL_FUNC(cl_int, clSetEventCallback,
1311 cl_int command_exec_callback_type ,
1312 void (CL_CALLBACK *pfn_event_notify) (cl_event event, cl_int event_command_exec_status, void *user_data),
1314 (event, command_exec_callback_type, pfn_event_notify, user_data))
1316 OCL_FUNC(cl_int, clReleaseEvent, (cl_event event), (event))
1322 #ifndef CL_VERSION_1_2
1323 #define CL_VERSION_1_2
1329 #define CV_OclDbgAssert CV_DbgAssert
1331 static bool isRaiseError()
1333 static bool initialized = false;
1334 static bool value = false;
1337 value = getBoolParameter("OPENCV_OPENCL_RAISE_ERROR", false);
1342 #define CV_OclDbgAssert(expr) do { if (isRaiseError()) { CV_Assert(expr); } else { (void)(expr); } } while ((void)0, 0)
1345 namespace cv { namespace ocl {
1349 UMat2D(const UMat& m)
1351 offset = (int)m.offset;
1364 UMat3D(const UMat& m)
1366 offset = (int)m.offset;
1367 step = (int)m.step.p[1];
1368 slicestep = (int)m.step.p[0];
1369 slices = (int)m.size.p[0];
1381 // Computes 64-bit "cyclic redundancy check" sum, as specified in ECMA-182
1382 static uint64 crc64( const uchar* data, size_t size, uint64 crc0=0 )
1384 static uint64 table[256];
1385 static bool initialized = false;
1389 for( int i = 0; i < 256; i++ )
1392 for( int j = 0; j < 8; j++ )
1393 c = ((c & 1) ? CV_BIG_UINT(0xc96c5795d7870f42) : 0) ^ (c >> 1);
1400 for( size_t idx = 0; idx < size; idx++ )
1401 crc = table[(uchar)crc ^ data[idx]] ^ (crc >> 8);
1408 typedef uint64 part;
1409 HashKey(part _a, part _b) : a(_a), b(_b) {}
1413 inline bool operator == (const HashKey& h1, const HashKey& h2)
1415 return h1.a == h2.a && h1.b == h2.b;
1418 inline bool operator < (const HashKey& h1, const HashKey& h2)
1420 return h1.a < h2.a || (h1.a == h2.a && h1.b < h2.b);
1427 static bool g_isOpenCLInitialized = false;
1428 static bool g_isOpenCLAvailable = false;
1430 if (!g_isOpenCLInitialized)
1435 g_isOpenCLAvailable = ::clGetPlatformIDs(0, NULL, &n) == CL_SUCCESS;
1439 g_isOpenCLAvailable = false;
1441 g_isOpenCLInitialized = true;
1443 return g_isOpenCLAvailable;
1451 CoreTLSData* data = coreTlsData.get();
1452 if( data->useOpenCL < 0 )
1456 data->useOpenCL = (int)haveOpenCL() && Device::getDefault().ptr() && Device::getDefault().available();
1460 data->useOpenCL = 0;
1463 return data->useOpenCL > 0;
1466 void setUseOpenCL(bool flag)
1470 CoreTLSData* data = coreTlsData.get();
1471 data->useOpenCL = (flag && Device::getDefault().ptr() != NULL) ? 1 : 0;
1475 #ifdef HAVE_CLAMDBLAS
1480 static AmdBlasHelper & getInstance()
1482 static AmdBlasHelper amdBlas;
1486 bool isAvailable() const
1488 return g_isAmdBlasAvailable;
1495 clAmdBlasTeardown();
1503 if (!g_isAmdBlasInitialized)
1507 if (!g_isAmdBlasInitialized && haveOpenCL())
1511 g_isAmdBlasAvailable = clAmdBlasSetup() == clAmdBlasSuccess;
1515 g_isAmdBlasAvailable = false;
1519 g_isAmdBlasAvailable = false;
1521 g_isAmdBlasInitialized = true;
1527 static bool g_isAmdBlasInitialized;
1528 static bool g_isAmdBlasAvailable;
1531 bool AmdBlasHelper::g_isAmdBlasAvailable = false;
1532 bool AmdBlasHelper::g_isAmdBlasInitialized = false;
1533 Mutex AmdBlasHelper::m;
1537 return AmdBlasHelper::getInstance().isAvailable();
1549 #ifdef HAVE_CLAMDFFT
1554 static AmdFftHelper & getInstance()
1556 static AmdFftHelper amdFft;
1560 bool isAvailable() const
1562 return g_isAmdFftAvailable;
1569 // clAmdFftTeardown();
1577 if (!g_isAmdFftInitialized)
1581 if (!g_isAmdFftInitialized && haveOpenCL())
1585 cl_uint major, minor, patch;
1586 CV_Assert(clAmdFftInitSetupData(&setupData) == CLFFT_SUCCESS);
1588 // it throws exception in case AmdFft binaries are not found
1589 CV_Assert(clAmdFftGetVersion(&major, &minor, &patch) == CLFFT_SUCCESS);
1590 g_isAmdFftAvailable = true;
1592 catch (const Exception &)
1594 g_isAmdFftAvailable = false;
1598 g_isAmdFftAvailable = false;
1600 g_isAmdFftInitialized = true;
1605 static clAmdFftSetupData setupData;
1607 static bool g_isAmdFftInitialized;
1608 static bool g_isAmdFftAvailable;
1611 clAmdFftSetupData AmdFftHelper::setupData;
1612 bool AmdFftHelper::g_isAmdFftAvailable = false;
1613 bool AmdFftHelper::g_isAmdFftInitialized = false;
1614 Mutex AmdFftHelper::m;
1618 return AmdFftHelper::getInstance().isAvailable();
1632 Queue::getDefault().finish();
1635 #define IMPLEMENT_REFCOUNTABLE() \
1636 void addref() { CV_XADD(&refcount, 1); } \
1637 void release() { if( CV_XADD(&refcount, -1) == 1 && !cv::__termination) delete this; } \
1640 /////////////////////////////////////////// Platform /////////////////////////////////////////////
1642 struct Platform::Impl
1648 initialized = false;
1657 //cl_uint num_entries
1659 if( clGetPlatformIDs(1, &handle, &n) != CL_SUCCESS || n == 0 )
1665 CV_OclDbgAssert(clGetPlatformInfo(handle, CL_PLATFORM_VENDOR, sizeof(buf), buf, &len) == CL_SUCCESS);
1667 vendor = String(buf);
1674 IMPLEMENT_REFCOUNTABLE();
1676 cl_platform_id handle;
1681 Platform::Platform()
1686 Platform::~Platform()
1692 Platform::Platform(const Platform& pl)
1699 Platform& Platform::operator = (const Platform& pl)
1701 Impl* newp = (Impl*)pl.p;
1710 void* Platform::ptr() const
1712 return p ? p->handle : 0;
1715 Platform& Platform::getDefault()
1726 /////////////////////////////////////// Device ////////////////////////////////////////////
1728 // deviceVersion has format
1729 // OpenCL<space><major_version.minor_version><space><vendor-specific information>
1731 // http://www.khronos.org/registry/cl/sdk/1.1/docs/man/xhtml/clGetDeviceInfo.html
1732 // http://www.khronos.org/registry/cl/sdk/1.2/docs/man/xhtml/clGetDeviceInfo.html
1733 static void parseDeviceVersion(const String &deviceVersion, int &major, int &minor)
1736 if (10 >= deviceVersion.length())
1738 const char *pstr = deviceVersion.c_str();
1739 if (0 != strncmp(pstr, "OpenCL ", 7))
1741 size_t ppos = deviceVersion.find('.', 7);
1742 if (String::npos == ppos)
1744 String temp = deviceVersion.substr(7, ppos - 7);
1745 major = atoi(temp.c_str());
1746 temp = deviceVersion.substr(ppos + 1);
1747 minor = atoi(temp.c_str());
1754 handle = (cl_device_id)d;
1757 name_ = getStrProp(CL_DEVICE_NAME);
1758 version_ = getStrProp(CL_DEVICE_VERSION);
1759 doubleFPConfig_ = getProp<cl_device_fp_config, int>(CL_DEVICE_DOUBLE_FP_CONFIG);
1760 hostUnifiedMemory_ = getBoolProp(CL_DEVICE_HOST_UNIFIED_MEMORY);
1761 maxComputeUnits_ = getProp<cl_uint, int>(CL_DEVICE_MAX_COMPUTE_UNITS);
1762 maxWorkGroupSize_ = getProp<size_t, size_t>(CL_DEVICE_MAX_WORK_GROUP_SIZE);
1763 type_ = getProp<cl_device_type, int>(CL_DEVICE_TYPE);
1764 driverVersion_ = getStrProp(CL_DRIVER_VERSION);
1766 String deviceVersion_ = getStrProp(CL_DEVICE_VERSION);
1767 parseDeviceVersion(deviceVersion_, deviceVersionMajor_, deviceVersionMinor_);
1769 vendorName_ = getStrProp(CL_DEVICE_VENDOR);
1770 if (vendorName_ == "Advanced Micro Devices, Inc." ||
1771 vendorName_ == "AMD")
1772 vendorID_ = VENDOR_AMD;
1773 else if (vendorName_ == "Intel(R) Corporation" || vendorName_ == "Intel" || strstr(name_.c_str(), "Iris") != 0)
1774 vendorID_ = VENDOR_INTEL;
1775 else if (vendorName_ == "NVIDIA Corporation")
1776 vendorID_ = VENDOR_NVIDIA;
1778 vendorID_ = UNKNOWN_VENDOR;
1781 template<typename _TpCL, typename _TpOut>
1782 _TpOut getProp(cl_device_info prop) const
1787 return clGetDeviceInfo(handle, prop, sizeof(temp), &temp, &sz) == CL_SUCCESS &&
1788 sz == sizeof(temp) ? _TpOut(temp) : _TpOut();
1791 bool getBoolProp(cl_device_info prop) const
1793 cl_bool temp = CL_FALSE;
1796 return clGetDeviceInfo(handle, prop, sizeof(temp), &temp, &sz) == CL_SUCCESS &&
1797 sz == sizeof(temp) ? temp != 0 : false;
1800 String getStrProp(cl_device_info prop) const
1804 return clGetDeviceInfo(handle, prop, sizeof(buf)-16, buf, &sz) == CL_SUCCESS &&
1805 sz < sizeof(buf) ? String(buf) : String();
1808 IMPLEMENT_REFCOUNTABLE();
1809 cl_device_id handle;
1813 int doubleFPConfig_;
1814 bool hostUnifiedMemory_;
1815 int maxComputeUnits_;
1816 size_t maxWorkGroupSize_;
1818 int deviceVersionMajor_;
1819 int deviceVersionMinor_;
1820 String driverVersion_;
1831 Device::Device(void* d)
1837 Device::Device(const Device& d)
1844 Device& Device::operator = (const Device& d)
1846 Impl* newp = (Impl*)d.p;
1861 void Device::set(void* d)
1868 void* Device::ptr() const
1870 return p ? p->handle : 0;
1873 String Device::name() const
1874 { return p ? p->name_ : String(); }
1876 String Device::extensions() const
1877 { return p ? p->getStrProp(CL_DEVICE_EXTENSIONS) : String(); }
1879 String Device::version() const
1880 { return p ? p->version_ : String(); }
1882 String Device::vendorName() const
1883 { return p ? p->vendorName_ : String(); }
1885 int Device::vendorID() const
1886 { return p ? p->vendorID_ : 0; }
1888 String Device::OpenCL_C_Version() const
1889 { return p ? p->getStrProp(CL_DEVICE_OPENCL_C_VERSION) : String(); }
1891 String Device::OpenCLVersion() const
1892 { return p ? p->getStrProp(CL_DEVICE_EXTENSIONS) : String(); }
1894 int Device::deviceVersionMajor() const
1895 { return p ? p->deviceVersionMajor_ : 0; }
1897 int Device::deviceVersionMinor() const
1898 { return p ? p->deviceVersionMinor_ : 0; }
1900 String Device::driverVersion() const
1901 { return p ? p->driverVersion_ : String(); }
1903 int Device::type() const
1904 { return p ? p->type_ : 0; }
1906 int Device::addressBits() const
1907 { return p ? p->getProp<cl_uint, int>(CL_DEVICE_ADDRESS_BITS) : 0; }
1909 bool Device::available() const
1910 { return p ? p->getBoolProp(CL_DEVICE_AVAILABLE) : false; }
1912 bool Device::compilerAvailable() const
1913 { return p ? p->getBoolProp(CL_DEVICE_COMPILER_AVAILABLE) : false; }
1915 bool Device::linkerAvailable() const
1916 #ifdef CL_VERSION_1_2
1917 { return p ? p->getBoolProp(CL_DEVICE_LINKER_AVAILABLE) : false; }
1919 { CV_REQUIRE_OPENCL_1_2_ERROR; }
1922 int Device::doubleFPConfig() const
1923 { return p ? p->doubleFPConfig_ : 0; }
1925 int Device::singleFPConfig() const
1926 { return p ? p->getProp<cl_device_fp_config, int>(CL_DEVICE_SINGLE_FP_CONFIG) : 0; }
1928 int Device::halfFPConfig() const
1929 #ifdef CL_VERSION_1_2
1930 { return p ? p->getProp<cl_device_fp_config, int>(CL_DEVICE_HALF_FP_CONFIG) : 0; }
1932 { CV_REQUIRE_OPENCL_1_2_ERROR; }
1935 bool Device::endianLittle() const
1936 { return p ? p->getBoolProp(CL_DEVICE_ENDIAN_LITTLE) : false; }
1938 bool Device::errorCorrectionSupport() const
1939 { return p ? p->getBoolProp(CL_DEVICE_ERROR_CORRECTION_SUPPORT) : false; }
1941 int Device::executionCapabilities() const
1942 { return p ? p->getProp<cl_device_exec_capabilities, int>(CL_DEVICE_EXECUTION_CAPABILITIES) : 0; }
1944 size_t Device::globalMemCacheSize() const
1945 { return p ? p->getProp<cl_ulong, size_t>(CL_DEVICE_GLOBAL_MEM_CACHE_SIZE) : 0; }
1947 int Device::globalMemCacheType() const
1948 { return p ? p->getProp<cl_device_mem_cache_type, int>(CL_DEVICE_GLOBAL_MEM_CACHE_TYPE) : 0; }
1950 int Device::globalMemCacheLineSize() const
1951 { return p ? p->getProp<cl_uint, int>(CL_DEVICE_GLOBAL_MEM_CACHELINE_SIZE) : 0; }
1953 size_t Device::globalMemSize() const
1954 { return p ? p->getProp<cl_ulong, size_t>(CL_DEVICE_GLOBAL_MEM_SIZE) : 0; }
1956 size_t Device::localMemSize() const
1957 { return p ? p->getProp<cl_ulong, size_t>(CL_DEVICE_LOCAL_MEM_SIZE) : 0; }
1959 int Device::localMemType() const
1960 { return p ? p->getProp<cl_device_local_mem_type, int>(CL_DEVICE_LOCAL_MEM_TYPE) : 0; }
1962 bool Device::hostUnifiedMemory() const
1963 { return p ? p->hostUnifiedMemory_ : false; }
1965 bool Device::imageSupport() const
1966 { return p ? p->getBoolProp(CL_DEVICE_IMAGE_SUPPORT) : false; }
1968 bool Device::imageFromBufferSupport() const
1973 size_t pos = p->getStrProp(CL_DEVICE_EXTENSIONS).find("cl_khr_image2d_from_buffer");
1974 if (pos != String::npos)
1982 uint Device::imagePitchAlignment() const
1984 #ifdef CL_DEVICE_IMAGE_PITCH_ALIGNMENT
1985 return p ? p->getProp<cl_uint, uint>(CL_DEVICE_IMAGE_PITCH_ALIGNMENT) : 0;
1991 uint Device::imageBaseAddressAlignment() const
1993 #ifdef CL_DEVICE_IMAGE_BASE_ADDRESS_ALIGNMENT
1994 return p ? p->getProp<cl_uint, uint>(CL_DEVICE_IMAGE_BASE_ADDRESS_ALIGNMENT) : 0;
2000 size_t Device::image2DMaxWidth() const
2001 { return p ? p->getProp<size_t, size_t>(CL_DEVICE_IMAGE2D_MAX_WIDTH) : 0; }
2003 size_t Device::image2DMaxHeight() const
2004 { return p ? p->getProp<size_t, size_t>(CL_DEVICE_IMAGE2D_MAX_HEIGHT) : 0; }
2006 size_t Device::image3DMaxWidth() const
2007 { return p ? p->getProp<size_t, size_t>(CL_DEVICE_IMAGE3D_MAX_WIDTH) : 0; }
2009 size_t Device::image3DMaxHeight() const
2010 { return p ? p->getProp<size_t, size_t>(CL_DEVICE_IMAGE3D_MAX_HEIGHT) : 0; }
2012 size_t Device::image3DMaxDepth() const
2013 { return p ? p->getProp<size_t, size_t>(CL_DEVICE_IMAGE3D_MAX_DEPTH) : 0; }
2015 size_t Device::imageMaxBufferSize() const
2016 #ifdef CL_VERSION_1_2
2017 { return p ? p->getProp<size_t, size_t>(CL_DEVICE_IMAGE_MAX_BUFFER_SIZE) : 0; }
2019 { CV_REQUIRE_OPENCL_1_2_ERROR; }
2022 size_t Device::imageMaxArraySize() const
2023 #ifdef CL_VERSION_1_2
2024 { return p ? p->getProp<size_t, size_t>(CL_DEVICE_IMAGE_MAX_ARRAY_SIZE) : 0; }
2026 { CV_REQUIRE_OPENCL_1_2_ERROR; }
2029 int Device::maxClockFrequency() const
2030 { return p ? p->getProp<cl_uint, int>(CL_DEVICE_MAX_CLOCK_FREQUENCY) : 0; }
2032 int Device::maxComputeUnits() const
2033 { return p ? p->maxComputeUnits_ : 0; }
2035 int Device::maxConstantArgs() const
2036 { return p ? p->getProp<cl_uint, int>(CL_DEVICE_MAX_CONSTANT_ARGS) : 0; }
2038 size_t Device::maxConstantBufferSize() const
2039 { return p ? p->getProp<cl_ulong, size_t>(CL_DEVICE_MAX_CONSTANT_BUFFER_SIZE) : 0; }
2041 size_t Device::maxMemAllocSize() const
2042 { return p ? p->getProp<cl_ulong, size_t>(CL_DEVICE_MAX_MEM_ALLOC_SIZE) : 0; }
2044 size_t Device::maxParameterSize() const
2045 { return p ? p->getProp<cl_ulong, size_t>(CL_DEVICE_MAX_PARAMETER_SIZE) : 0; }
2047 int Device::maxReadImageArgs() const
2048 { return p ? p->getProp<cl_uint, int>(CL_DEVICE_MAX_READ_IMAGE_ARGS) : 0; }
2050 int Device::maxWriteImageArgs() const
2051 { return p ? p->getProp<cl_uint, int>(CL_DEVICE_MAX_WRITE_IMAGE_ARGS) : 0; }
2053 int Device::maxSamplers() const
2054 { return p ? p->getProp<cl_uint, int>(CL_DEVICE_MAX_SAMPLERS) : 0; }
2056 size_t Device::maxWorkGroupSize() const
2057 { return p ? p->maxWorkGroupSize_ : 0; }
2059 int Device::maxWorkItemDims() const
2060 { return p ? p->getProp<cl_uint, int>(CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS) : 0; }
2062 void Device::maxWorkItemSizes(size_t* sizes) const
2066 const int MAX_DIMS = 32;
2068 CV_OclDbgAssert(clGetDeviceInfo(p->handle, CL_DEVICE_MAX_WORK_ITEM_SIZES,
2069 MAX_DIMS*sizeof(sizes[0]), &sizes[0], &retsz) == CL_SUCCESS);
2073 int Device::memBaseAddrAlign() const
2074 { return p ? p->getProp<cl_uint, int>(CL_DEVICE_MEM_BASE_ADDR_ALIGN) : 0; }
2076 int Device::nativeVectorWidthChar() const
2077 { return p ? p->getProp<cl_uint, int>(CL_DEVICE_NATIVE_VECTOR_WIDTH_CHAR) : 0; }
2079 int Device::nativeVectorWidthShort() const
2080 { return p ? p->getProp<cl_uint, int>(CL_DEVICE_NATIVE_VECTOR_WIDTH_SHORT) : 0; }
2082 int Device::nativeVectorWidthInt() const
2083 { return p ? p->getProp<cl_uint, int>(CL_DEVICE_NATIVE_VECTOR_WIDTH_INT) : 0; }
2085 int Device::nativeVectorWidthLong() const
2086 { return p ? p->getProp<cl_uint, int>(CL_DEVICE_NATIVE_VECTOR_WIDTH_LONG) : 0; }
2088 int Device::nativeVectorWidthFloat() const
2089 { return p ? p->getProp<cl_uint, int>(CL_DEVICE_NATIVE_VECTOR_WIDTH_FLOAT) : 0; }
2091 int Device::nativeVectorWidthDouble() const
2092 { return p ? p->getProp<cl_uint, int>(CL_DEVICE_NATIVE_VECTOR_WIDTH_DOUBLE) : 0; }
2094 int Device::nativeVectorWidthHalf() const
2095 { return p ? p->getProp<cl_uint, int>(CL_DEVICE_NATIVE_VECTOR_WIDTH_HALF) : 0; }
2097 int Device::preferredVectorWidthChar() const
2098 { return p ? p->getProp<cl_uint, int>(CL_DEVICE_PREFERRED_VECTOR_WIDTH_CHAR) : 0; }
2100 int Device::preferredVectorWidthShort() const
2101 { return p ? p->getProp<cl_uint, int>(CL_DEVICE_PREFERRED_VECTOR_WIDTH_SHORT) : 0; }
2103 int Device::preferredVectorWidthInt() const
2104 { return p ? p->getProp<cl_uint, int>(CL_DEVICE_PREFERRED_VECTOR_WIDTH_INT) : 0; }
2106 int Device::preferredVectorWidthLong() const
2107 { return p ? p->getProp<cl_uint, int>(CL_DEVICE_PREFERRED_VECTOR_WIDTH_LONG) : 0; }
2109 int Device::preferredVectorWidthFloat() const
2110 { return p ? p->getProp<cl_uint, int>(CL_DEVICE_PREFERRED_VECTOR_WIDTH_FLOAT) : 0; }
2112 int Device::preferredVectorWidthDouble() const
2113 { return p ? p->getProp<cl_uint, int>(CL_DEVICE_PREFERRED_VECTOR_WIDTH_DOUBLE) : 0; }
2115 int Device::preferredVectorWidthHalf() const
2116 { return p ? p->getProp<cl_uint, int>(CL_DEVICE_PREFERRED_VECTOR_WIDTH_HALF) : 0; }
2118 size_t Device::printfBufferSize() const
2119 #ifdef CL_VERSION_1_2
2120 { return p ? p->getProp<size_t, size_t>(CL_DEVICE_PRINTF_BUFFER_SIZE) : 0; }
2122 { CV_REQUIRE_OPENCL_1_2_ERROR; }
2126 size_t Device::profilingTimerResolution() const
2127 { return p ? p->getProp<size_t, size_t>(CL_DEVICE_PROFILING_TIMER_RESOLUTION) : 0; }
2129 const Device& Device::getDefault()
2131 const Context& ctx = Context::getDefault();
2132 int idx = coreTlsData.get()->device;
2133 const Device& device = ctx.device(idx);
2137 ////////////////////////////////////// Context ///////////////////////////////////////////////////
2139 template <typename Functor, typename ObjectType>
2140 inline cl_int getStringInfo(Functor f, ObjectType obj, cl_uint name, std::string& param)
2143 cl_int err = f(obj, name, 0, NULL, &required);
2144 if (err != CL_SUCCESS)
2150 AutoBuffer<char> buf(required + 1);
2151 char* ptr = (char*)buf; // cleanup is not needed
2152 err = f(obj, name, required, ptr, NULL);
2153 if (err != CL_SUCCESS)
2161 static void split(const std::string &s, char delim, std::vector<std::string> &elems)
2166 std::istringstream ss(s);
2170 std::getline(ss, item, delim);
2171 elems.push_back(item);
2175 // Layout: <Platform>:<CPU|GPU|ACCELERATOR|nothing=GPU/CPU>:<deviceName>
2177 // Sample: AMD:GPU:Tahiti
2178 // Sample: :GPU|CPU: = '' = ':' = '::'
2179 static bool parseOpenCLDeviceConfiguration(const std::string& configurationStr,
2180 std::string& platform, std::vector<std::string>& deviceTypes, std::string& deviceNameOrID)
2182 std::vector<std::string> parts;
2183 split(configurationStr, ':', parts);
2184 if (parts.size() > 3)
2186 std::cerr << "ERROR: Invalid configuration string for OpenCL device" << std::endl;
2189 if (parts.size() > 2)
2190 deviceNameOrID = parts[2];
2191 if (parts.size() > 1)
2193 split(parts[1], '|', deviceTypes);
2195 if (parts.size() > 0)
2197 platform = parts[0];
2203 static cl_device_id selectOpenCLDevice()
2208 static cl_device_id selectOpenCLDevice()
2210 std::string platform, deviceName;
2211 std::vector<std::string> deviceTypes;
2213 const char* configuration = getenv("OPENCV_OPENCL_DEVICE");
2214 if (configuration &&
2215 (strcmp(configuration, "disabled") == 0 ||
2216 !parseOpenCLDeviceConfiguration(std::string(configuration), platform, deviceTypes, deviceName)
2222 if (deviceName.length() == 1)
2223 // We limit ID range to 0..9, because we want to write:
2224 // - '2500' to mean i5-2500
2225 // - '8350' to mean AMD FX-8350
2226 // - '650' to mean GeForce 650
2227 // To extend ID range change condition to '> 0'
2230 for (size_t i = 0; i < deviceName.length(); i++)
2232 if (!isdigit(deviceName[i]))
2240 deviceID = atoi(deviceName.c_str());
2246 std::vector<cl_platform_id> platforms;
2248 cl_uint numPlatforms = 0;
2249 CV_OclDbgAssert(clGetPlatformIDs(0, NULL, &numPlatforms) == CL_SUCCESS);
2251 if (numPlatforms == 0)
2253 platforms.resize((size_t)numPlatforms);
2254 CV_OclDbgAssert(clGetPlatformIDs(numPlatforms, &platforms[0], &numPlatforms) == CL_SUCCESS);
2255 platforms.resize(numPlatforms);
2258 int selectedPlatform = -1;
2259 if (platform.length() > 0)
2261 for (size_t i = 0; i < platforms.size(); i++)
2264 CV_OclDbgAssert(getStringInfo(clGetPlatformInfo, platforms[i], CL_PLATFORM_NAME, name) == CL_SUCCESS);
2265 if (name.find(platform) != std::string::npos)
2267 selectedPlatform = (int)i;
2271 if (selectedPlatform == -1)
2273 std::cerr << "ERROR: Can't find OpenCL platform by name: " << platform << std::endl;
2277 if (deviceTypes.size() == 0)
2281 deviceTypes.push_back("GPU");
2283 deviceTypes.push_back("CPU");
2286 deviceTypes.push_back("ALL");
2288 for (size_t t = 0; t < deviceTypes.size(); t++)
2291 std::string tempStrDeviceType = deviceTypes[t];
2292 std::transform( tempStrDeviceType.begin(), tempStrDeviceType.end(), tempStrDeviceType.begin(), tolower );
2294 if (tempStrDeviceType == "gpu" || tempStrDeviceType == "dgpu" || tempStrDeviceType == "igpu")
2295 deviceType = Device::TYPE_GPU;
2296 else if (tempStrDeviceType == "cpu")
2297 deviceType = Device::TYPE_CPU;
2298 else if (tempStrDeviceType == "accelerator")
2299 deviceType = Device::TYPE_ACCELERATOR;
2300 else if (tempStrDeviceType == "all")
2301 deviceType = Device::TYPE_ALL;
2304 std::cerr << "ERROR: Unsupported device type for OpenCL device (GPU, CPU, ACCELERATOR): " << deviceTypes[t] << std::endl;
2308 std::vector<cl_device_id> devices; // TODO Use clReleaseDevice to cleanup
2309 for (int i = selectedPlatform >= 0 ? selectedPlatform : 0;
2310 (selectedPlatform >= 0 ? i == selectedPlatform : true) && (i < (int)platforms.size());
2314 cl_int status = clGetDeviceIDs(platforms[i], deviceType, 0, NULL, &count);
2315 CV_OclDbgAssert(status == CL_SUCCESS || status == CL_DEVICE_NOT_FOUND);
2318 size_t base = devices.size();
2319 devices.resize(base + count);
2320 status = clGetDeviceIDs(platforms[i], deviceType, count, &devices[base], &count);
2321 CV_OclDbgAssert(status == CL_SUCCESS || status == CL_DEVICE_NOT_FOUND);
2324 for (size_t i = (isID ? deviceID : 0);
2325 (isID ? (i == (size_t)deviceID) : true) && (i < devices.size());
2329 CV_OclDbgAssert(getStringInfo(clGetDeviceInfo, devices[i], CL_DEVICE_NAME, name) == CL_SUCCESS);
2330 cl_bool useGPU = true;
2331 if(tempStrDeviceType == "dgpu" || tempStrDeviceType == "igpu")
2333 cl_bool isIGPU = CL_FALSE;
2334 clGetDeviceInfo(devices[i], CL_DEVICE_HOST_UNIFIED_MEMORY, sizeof(isIGPU), &isIGPU, NULL);
2335 useGPU = tempStrDeviceType == "dgpu" ? !isIGPU : isIGPU;
2337 if ( (isID || name.find(deviceName) != std::string::npos) && useGPU)
2339 // TODO check for OpenCL 1.1
2347 return NULL; // suppress messages on stderr
2349 std::cerr << "ERROR: Requested OpenCL device not found, check configuration: " << (configuration == NULL ? "" : configuration) << std::endl
2350 << " Platform: " << (platform.length() == 0 ? "any" : platform) << std::endl
2351 << " Device types: ";
2352 for (size_t t = 0; t < deviceTypes.size(); t++)
2353 std::cerr << deviceTypes[t] << " ";
2355 std::cerr << std::endl << " Device name: " << (deviceName.length() == 0 ? "any" : deviceName) << std::endl;
2360 struct Context::Impl
2370 CV_Assert(handle == NULL);
2372 cl_device_id d = selectOpenCLDevice();
2377 cl_platform_id pl = NULL;
2378 CV_OclDbgAssert(clGetDeviceInfo(d, CL_DEVICE_PLATFORM, sizeof(cl_platform_id), &pl, NULL) == CL_SUCCESS);
2380 cl_context_properties prop[] =
2382 CL_CONTEXT_PLATFORM, (cl_context_properties)pl,
2386 // !!! in the current implementation force the number of devices to 1 !!!
2390 handle = clCreateContext(prop, nd, &d, 0, 0, &status);
2392 bool ok = handle != 0 && status == CL_SUCCESS;
2408 cl_platform_id pl = (cl_platform_id)Platform::getDefault().ptr();
2409 cl_context_properties prop[] =
2411 CL_CONTEXT_PLATFORM, (cl_context_properties)pl,
2415 cl_uint i, nd0 = 0, nd = 0;
2416 int dtype = dtype0 & 15;
2417 CV_OclDbgAssert(clGetDeviceIDs( pl, dtype, 0, 0, &nd0 ) == CL_SUCCESS);
2419 AutoBuffer<void*> dlistbuf(nd0*2+1);
2420 cl_device_id* dlist = (cl_device_id*)(void**)dlistbuf;
2421 cl_device_id* dlist_new = dlist + nd0;
2422 CV_OclDbgAssert(clGetDeviceIDs( pl, dtype, nd0, dlist, &nd0 ) == CL_SUCCESS);
2425 for(i = 0; i < nd0; i++)
2428 if( !d.available() || !d.compilerAvailable() )
2430 if( dtype0 == Device::TYPE_DGPU && d.hostUnifiedMemory() )
2432 if( dtype0 == Device::TYPE_IGPU && !d.hostUnifiedMemory() )
2434 String name = d.name();
2435 if( nd != 0 && name != name0 )
2438 dlist_new[nd++] = dlist[i];
2444 // !!! in the current implementation force the number of devices to 1 !!!
2447 handle = clCreateContext(prop, nd, dlist_new, 0, 0, &retval);
2448 bool ok = handle != 0 && retval == CL_SUCCESS;
2452 for( i = 0; i < nd; i++ )
2453 devices[i].set(dlist_new[i]);
2461 clReleaseContext(handle);
2467 Program getProg(const ProgramSource& src,
2468 const String& buildflags, String& errmsg)
2470 String prefix = Program::getPrefix(buildflags);
2471 HashKey k(src.hash(), crc64((const uchar*)prefix.c_str(), prefix.size()));
2472 phash_t::iterator it = phash.find(k);
2473 if( it != phash.end() )
2475 //String filename = format("%08x%08x_%08x%08x.clb2",
2476 Program prog(src, buildflags, errmsg);
2478 phash.insert(std::pair<HashKey,Program>(k, prog));
2482 IMPLEMENT_REFCOUNTABLE();
2485 std::vector<Device> devices;
2487 typedef ProgramSource::hash_t hash_t;
2491 HashKey(hash_t _a, hash_t _b) : a(_a), b(_b) {}
2492 bool operator < (const HashKey& k) const { return a < k.a || (a == k.a && b < k.b); }
2493 bool operator == (const HashKey& k) const { return a == k.a && b == k.b; }
2494 bool operator != (const HashKey& k) const { return a != k.a || b != k.b; }
2497 typedef std::map<HashKey, Program> phash_t;
2507 Context::Context(int dtype)
2513 bool Context::create()
2528 bool Context::create(int dtype0)
2534 p = new Impl(dtype0);
2552 Context::Context(const Context& c)
2559 Context& Context::operator = (const Context& c)
2561 Impl* newp = (Impl*)c.p;
2570 void* Context::ptr() const
2572 return p == NULL ? NULL : p->handle;
2575 size_t Context::ndevices() const
2577 return p ? p->devices.size() : 0;
2580 const Device& Context::device(size_t idx) const
2582 static Device dummy;
2583 return !p || idx >= p->devices.size() ? dummy : p->devices[idx];
2586 Context& Context::getDefault(bool initialize)
2588 static Context* ctx = new Context();
2589 if(!ctx->p && haveOpenCL())
2592 ctx->p = new Impl();
2595 // do not create new Context right away.
2596 // First, try to retrieve existing context of the same type.
2597 // In its turn, Platform::getContext() may call Context::create()
2598 // if there is no such context.
2599 if (ctx->p->handle == NULL)
2600 ctx->p->setDefault();
2607 Program Context::getProg(const ProgramSource& prog,
2608 const String& buildopts, String& errmsg)
2610 return p ? p->getProg(prog, buildopts, errmsg) : Program();
2613 void initializeContextFromHandle(Context& ctx, void* platform, void* _context, void* _device)
2615 cl_context context = (cl_context)_context;
2616 cl_device_id device = (cl_device_id)_device;
2618 // cleanup old context
2619 Context::Impl * impl = ctx.p;
2622 CV_OclDbgAssert(clReleaseContext(impl->handle) == CL_SUCCESS);
2624 impl->devices.clear();
2626 impl->handle = context;
2627 impl->devices.resize(1);
2628 impl->devices[0].set(device);
2630 Platform& p = Platform::getDefault();
2631 Platform::Impl* pImpl = p.p;
2632 pImpl->handle = (cl_platform_id)platform;
2635 /////////////////////////////////////////// Queue /////////////////////////////////////////////
2639 Impl(const Context& c, const Device& d)
2642 const Context* pc = &c;
2643 cl_context ch = (cl_context)pc->ptr();
2646 pc = &Context::getDefault();
2647 ch = (cl_context)pc->ptr();
2649 cl_device_id dh = (cl_device_id)d.ptr();
2651 dh = (cl_device_id)pc->device(0).ptr();
2653 handle = clCreateCommandQueue(ch, dh, 0, &retval);
2654 CV_OclDbgAssert(retval == CL_SUCCESS);
2660 if (!cv::__termination)
2666 clReleaseCommandQueue(handle);
2672 IMPLEMENT_REFCOUNTABLE();
2674 cl_command_queue handle;
2682 Queue::Queue(const Context& c, const Device& d)
2688 Queue::Queue(const Queue& q)
2695 Queue& Queue::operator = (const Queue& q)
2697 Impl* newp = (Impl*)q.p;
2712 bool Queue::create(const Context& c, const Device& d)
2717 return p->handle != 0;
2720 void Queue::finish()
2724 CV_OclDbgAssert(clFinish(p->handle) == CL_SUCCESS);
2728 void* Queue::ptr() const
2730 return p ? p->handle : 0;
2733 Queue& Queue::getDefault()
2735 Queue& q = coreTlsData.get()->oclQueue;
2736 if( !q.p && haveOpenCL() )
2737 q.create(Context::getDefault());
2741 static cl_command_queue getQueue(const Queue& q)
2743 cl_command_queue qq = (cl_command_queue)q.ptr();
2745 qq = (cl_command_queue)Queue::getDefault().ptr();
2749 /////////////////////////////////////////// KernelArg /////////////////////////////////////////////
2751 KernelArg::KernelArg()
2752 : flags(0), m(0), obj(0), sz(0), wscale(1), iwscale(1)
2756 KernelArg::KernelArg(int _flags, UMat* _m, int _wscale, int _iwscale, const void* _obj, size_t _sz)
2757 : flags(_flags), m(_m), obj(_obj), sz(_sz), wscale(_wscale), iwscale(_iwscale)
2761 KernelArg KernelArg::Constant(const Mat& m)
2763 CV_Assert(m.isContinuous());
2764 return KernelArg(CONSTANT, 0, 0, 0, m.ptr(), m.total()*m.elemSize());
2767 /////////////////////////////////////////// Kernel /////////////////////////////////////////////
2771 Impl(const char* kname, const Program& prog) :
2772 refcount(1), e(0), nu(0)
2774 cl_program ph = (cl_program)prog.ptr();
2777 clCreateKernel(ph, kname, &retval) : 0;
2778 CV_OclDbgAssert(retval == CL_SUCCESS);
2779 for( int i = 0; i < MAX_ARRS; i++ )
2781 haveTempDstUMats = false;
2786 for( int i = 0; i < MAX_ARRS; i++ )
2789 if( CV_XADD(&u[i]->urefcount, -1) == 1 )
2790 u[i]->currAllocator->deallocate(u[i]);
2794 haveTempDstUMats = false;
2797 void addUMat(const UMat& m, bool dst)
2799 CV_Assert(nu < MAX_ARRS && m.u && m.u->urefcount > 0);
2801 CV_XADD(&m.u->urefcount, 1);
2803 if(dst && m.u->tempUMat())
2804 haveTempDstUMats = true;
2807 void addImage(const Image2D& image)
2809 images.push_back(image);
2816 if(e) { clReleaseEvent(e); e = 0; }
2823 clReleaseKernel(handle);
2826 IMPLEMENT_REFCOUNTABLE();
2830 enum { MAX_ARRS = 16 };
2831 UMatData* u[MAX_ARRS];
2833 std::list<Image2D> images;
2834 bool haveTempDstUMats;
2841 static void CL_CALLBACK oclCleanupCallback(cl_event, cl_int, void *p)
2843 ((cv::ocl::Kernel::Impl*)p)->finit();
2848 namespace cv { namespace ocl {
2855 Kernel::Kernel(const char* kname, const Program& prog)
2858 create(kname, prog);
2861 Kernel::Kernel(const char* kname, const ProgramSource& src,
2862 const String& buildopts, String* errmsg)
2865 create(kname, src, buildopts, errmsg);
2868 Kernel::Kernel(const Kernel& k)
2875 Kernel& Kernel::operator = (const Kernel& k)
2877 Impl* newp = (Impl*)k.p;
2892 bool Kernel::create(const char* kname, const Program& prog)
2896 p = new Impl(kname, prog);
2902 #ifdef CV_OPENCL_RUN_ASSERT // check kernel compilation fails
2908 bool Kernel::create(const char* kname, const ProgramSource& src,
2909 const String& buildopts, String* errmsg)
2917 if( !errmsg ) errmsg = &tempmsg;
2918 const Program& prog = Context::getDefault().getProg(src, buildopts, *errmsg);
2919 return create(kname, prog);
2922 void* Kernel::ptr() const
2924 return p ? p->handle : 0;
2927 bool Kernel::empty() const
2932 int Kernel::set(int i, const void* value, size_t sz)
2934 if (!p || !p->handle)
2941 cl_int retval = clSetKernelArg(p->handle, (cl_uint)i, sz, value);
2942 CV_OclDbgAssert(retval == CL_SUCCESS);
2943 if (retval != CL_SUCCESS)
2948 int Kernel::set(int i, const Image2D& image2D)
2950 p->addImage(image2D);
2951 cl_mem h = (cl_mem)image2D.ptr();
2952 return set(i, &h, sizeof(h));
2955 int Kernel::set(int i, const UMat& m)
2957 return set(i, KernelArg(KernelArg::READ_WRITE, (UMat*)&m, 0, 0));
2960 int Kernel::set(int i, const KernelArg& arg)
2962 if( !p || !p->handle )
2970 int accessFlags = ((arg.flags & KernelArg::READ_ONLY) ? ACCESS_READ : 0) +
2971 ((arg.flags & KernelArg::WRITE_ONLY) ? ACCESS_WRITE : 0);
2972 bool ptronly = (arg.flags & KernelArg::PTR_ONLY) != 0;
2973 cl_mem h = (cl_mem)arg.m->handle(accessFlags);
2983 CV_OclDbgAssert(clSetKernelArg(p->handle, (cl_uint)i++, sizeof(h), &h) == CL_SUCCESS);
2984 else if( arg.m->dims <= 2 )
2987 CV_OclDbgAssert(clSetKernelArg(p->handle, (cl_uint)i, sizeof(h), &h) == CL_SUCCESS);
2988 CV_OclDbgAssert(clSetKernelArg(p->handle, (cl_uint)(i+1), sizeof(u2d.step), &u2d.step) == CL_SUCCESS);
2989 CV_OclDbgAssert(clSetKernelArg(p->handle, (cl_uint)(i+2), sizeof(u2d.offset), &u2d.offset) == CL_SUCCESS);
2992 if( !(arg.flags & KernelArg::NO_SIZE) )
2994 int cols = u2d.cols*arg.wscale/arg.iwscale;
2995 CV_OclDbgAssert(clSetKernelArg(p->handle, (cl_uint)i, sizeof(u2d.rows), &u2d.rows) == CL_SUCCESS);
2996 CV_OclDbgAssert(clSetKernelArg(p->handle, (cl_uint)(i+1), sizeof(cols), &cols) == CL_SUCCESS);
3003 CV_OclDbgAssert(clSetKernelArg(p->handle, (cl_uint)i, sizeof(h), &h) == CL_SUCCESS);
3004 CV_OclDbgAssert(clSetKernelArg(p->handle, (cl_uint)(i+1), sizeof(u3d.slicestep), &u3d.slicestep) == CL_SUCCESS);
3005 CV_OclDbgAssert(clSetKernelArg(p->handle, (cl_uint)(i+2), sizeof(u3d.step), &u3d.step) == CL_SUCCESS);
3006 CV_OclDbgAssert(clSetKernelArg(p->handle, (cl_uint)(i+3), sizeof(u3d.offset), &u3d.offset) == CL_SUCCESS);
3008 if( !(arg.flags & KernelArg::NO_SIZE) )
3010 int cols = u3d.cols*arg.wscale/arg.iwscale;
3011 CV_OclDbgAssert(clSetKernelArg(p->handle, (cl_uint)i, sizeof(u3d.slices), &u3d.rows) == CL_SUCCESS);
3012 CV_OclDbgAssert(clSetKernelArg(p->handle, (cl_uint)(i+1), sizeof(u3d.rows), &u3d.rows) == CL_SUCCESS);
3013 CV_OclDbgAssert(clSetKernelArg(p->handle, (cl_uint)(i+2), sizeof(u3d.cols), &cols) == CL_SUCCESS);
3017 p->addUMat(*arg.m, (accessFlags & ACCESS_WRITE) != 0);
3020 CV_OclDbgAssert(clSetKernelArg(p->handle, (cl_uint)i, arg.sz, arg.obj) == CL_SUCCESS);
3025 bool Kernel::run(int dims, size_t _globalsize[], size_t _localsize[],
3026 bool sync, const Queue& q)
3028 if(!p || !p->handle || p->e != 0)
3031 cl_command_queue qq = getQueue(q);
3032 size_t offset[CV_MAX_DIM] = {0}, globalsize[CV_MAX_DIM] = {1,1,1};
3034 CV_Assert(_globalsize != 0);
3035 for (int i = 0; i < dims; i++)
3037 size_t val = _localsize ? _localsize[i] :
3038 dims == 1 ? 64 : dims == 2 ? (i == 0 ? 256 : 8) : dims == 3 ? (8>>(int)(i>0)) : 1;
3039 CV_Assert( val > 0 );
3040 total *= _globalsize[i];
3041 globalsize[i] = ((_globalsize[i] + val - 1)/val)*val;
3045 if( p->haveTempDstUMats )
3047 cl_int retval = clEnqueueNDRangeKernel(qq, p->handle, (cl_uint)dims,
3048 offset, globalsize, _localsize, 0, 0,
3050 #if CV_OPENCL_SHOW_RUN_ERRORS
3051 if (retval != CL_SUCCESS)
3053 printf("OpenCL program returns error: %d\n", retval);
3057 if( sync || retval != CL_SUCCESS )
3059 CV_OclDbgAssert(clFinish(qq) == CL_SUCCESS);
3065 CV_OclDbgAssert(clSetEventCallback(p->e, CL_COMPLETE, oclCleanupCallback, p) == CL_SUCCESS);
3067 return retval == CL_SUCCESS;
3070 bool Kernel::runTask(bool sync, const Queue& q)
3072 if(!p || !p->handle || p->e != 0)
3075 cl_command_queue qq = getQueue(q);
3076 cl_int retval = clEnqueueTask(qq, p->handle, 0, 0, sync ? 0 : &p->e);
3077 if( sync || retval != CL_SUCCESS )
3079 CV_OclDbgAssert(clFinish(qq) == CL_SUCCESS);
3085 CV_OclDbgAssert(clSetEventCallback(p->e, CL_COMPLETE, oclCleanupCallback, p) == CL_SUCCESS);
3087 return retval == CL_SUCCESS;
3091 size_t Kernel::workGroupSize() const
3093 if(!p || !p->handle)
3095 size_t val = 0, retsz = 0;
3096 cl_device_id dev = (cl_device_id)Device::getDefault().ptr();
3097 return clGetKernelWorkGroupInfo(p->handle, dev, CL_KERNEL_WORK_GROUP_SIZE,
3098 sizeof(val), &val, &retsz) == CL_SUCCESS ? val : 0;
3101 size_t Kernel::preferedWorkGroupSizeMultiple() const
3103 if(!p || !p->handle)
3105 size_t val = 0, retsz = 0;
3106 cl_device_id dev = (cl_device_id)Device::getDefault().ptr();
3107 return clGetKernelWorkGroupInfo(p->handle, dev, CL_KERNEL_PREFERRED_WORK_GROUP_SIZE_MULTIPLE,
3108 sizeof(val), &val, &retsz) == CL_SUCCESS ? val : 0;
3111 bool Kernel::compileWorkGroupSize(size_t wsz[]) const
3113 if(!p || !p->handle || !wsz)
3116 cl_device_id dev = (cl_device_id)Device::getDefault().ptr();
3117 return clGetKernelWorkGroupInfo(p->handle, dev, CL_KERNEL_COMPILE_WORK_GROUP_SIZE,
3118 sizeof(wsz[0]*3), wsz, &retsz) == CL_SUCCESS;
3121 size_t Kernel::localMemSize() const
3123 if(!p || !p->handle)
3127 cl_device_id dev = (cl_device_id)Device::getDefault().ptr();
3128 return clGetKernelWorkGroupInfo(p->handle, dev, CL_KERNEL_LOCAL_MEM_SIZE,
3129 sizeof(val), &val, &retsz) == CL_SUCCESS ? (size_t)val : 0;
3132 /////////////////////////////////////////// Program /////////////////////////////////////////////
3134 struct Program::Impl
3136 Impl(const ProgramSource& _src,
3137 const String& _buildflags, String& errmsg)
3140 const Context& ctx = Context::getDefault();
3142 buildflags = _buildflags;
3143 const String& srcstr = src.source();
3144 const char* srcptr = srcstr.c_str();
3145 size_t srclen = srcstr.size();
3148 handle = clCreateProgramWithSource((cl_context)ctx.ptr(), 1, &srcptr, &srclen, &retval);
3149 if( handle && retval == CL_SUCCESS )
3151 int i, n = (int)ctx.ndevices();
3152 AutoBuffer<void*> deviceListBuf(n+1);
3153 void** deviceList = deviceListBuf;
3154 for( i = 0; i < n; i++ )
3155 deviceList[i] = ctx.device(i).ptr();
3157 Device device = Device::getDefault();
3159 buildflags += " -D AMD_DEVICE";
3160 else if (device.isIntel())
3161 buildflags += " -D INTEL_DEVICE";
3163 retval = clBuildProgram(handle, n,
3164 (const cl_device_id*)deviceList,
3165 buildflags.c_str(), 0, 0);
3166 #if !CV_OPENCL_ALWAYS_SHOW_BUILD_LOG
3167 if( retval != CL_SUCCESS )
3171 cl_int buildInfo_retval = clGetProgramBuildInfo(handle, (cl_device_id)deviceList[0],
3172 CL_PROGRAM_BUILD_LOG, 0, 0, &retsz);
3173 if (buildInfo_retval == CL_SUCCESS && retsz > 1)
3175 AutoBuffer<char> bufbuf(retsz + 16);
3177 buildInfo_retval = clGetProgramBuildInfo(handle, (cl_device_id)deviceList[0],
3178 CL_PROGRAM_BUILD_LOG, retsz+1, buf, &retsz);
3179 if (buildInfo_retval == CL_SUCCESS)
3181 // TODO It is useful to see kernel name & program file name also
3182 errmsg = String(buf);
3183 printf("OpenCL program build log: %s\n%s\n", buildflags.c_str(), errmsg.c_str());
3187 if (retval != CL_SUCCESS && handle)
3189 clReleaseProgram(handle);
3196 Impl(const String& _buf, const String& _buildflags)
3200 buildflags = _buildflags;
3203 String prefix0 = Program::getPrefix(buildflags);
3204 const Context& ctx = Context::getDefault();
3205 const Device& dev = Device::getDefault();
3206 const char* pos0 = _buf.c_str();
3207 const char* pos1 = strchr(pos0, '\n');
3210 const char* pos2 = strchr(pos1+1, '\n');
3213 const char* pos3 = strchr(pos2+1, '\n');
3216 size_t prefixlen = (pos3 - pos0)+1;
3217 String prefix(pos0, prefixlen);
3218 if( prefix != prefix0 )
3220 const uchar* bin = (uchar*)(pos3+1);
3221 void* devid = dev.ptr();
3222 size_t codelen = _buf.length() - prefixlen;
3223 cl_int binstatus = 0, retval = 0;
3224 handle = clCreateProgramWithBinary((cl_context)ctx.ptr(), 1, (cl_device_id*)&devid,
3225 &codelen, &bin, &binstatus, &retval);
3226 CV_OclDbgAssert(retval == CL_SUCCESS);
3233 size_t progsz = 0, retsz = 0;
3234 String prefix = Program::getPrefix(buildflags);
3235 size_t prefixlen = prefix.length();
3236 if(clGetProgramInfo(handle, CL_PROGRAM_BINARY_SIZES, sizeof(progsz), &progsz, &retsz) != CL_SUCCESS)
3238 AutoBuffer<uchar> bufbuf(prefixlen + progsz + 16);
3239 uchar* buf = bufbuf;
3240 memcpy(buf, prefix.c_str(), prefixlen);
3242 if(clGetProgramInfo(handle, CL_PROGRAM_BINARIES, sizeof(buf), &buf, &retsz) != CL_SUCCESS)
3244 buf[progsz] = (uchar)'\0';
3245 return String((const char*)(uchar*)bufbuf, prefixlen + progsz);
3253 if (!cv::__termination)
3256 clReleaseProgram(handle);
3262 IMPLEMENT_REFCOUNTABLE();
3270 Program::Program() { p = 0; }
3272 Program::Program(const ProgramSource& src,
3273 const String& buildflags, String& errmsg)
3276 create(src, buildflags, errmsg);
3279 Program::Program(const Program& prog)
3286 Program& Program::operator = (const Program& prog)
3288 Impl* newp = (Impl*)prog.p;
3303 bool Program::create(const ProgramSource& src,
3304 const String& buildflags, String& errmsg)
3308 p = new Impl(src, buildflags, errmsg);
3317 const ProgramSource& Program::source() const
3319 static ProgramSource dummy;
3320 return p ? p->src : dummy;
3323 void* Program::ptr() const
3325 return p ? p->handle : 0;
3328 bool Program::read(const String& bin, const String& buildflags)
3332 p = new Impl(bin, buildflags);
3333 return p->handle != 0;
3336 bool Program::write(String& bin) const
3341 return !bin.empty();
3344 String Program::getPrefix() const
3348 return getPrefix(p->buildflags);
3351 String Program::getPrefix(const String& buildflags)
3353 const Context& ctx = Context::getDefault();
3354 const Device& dev = ctx.device(0);
3355 return format("name=%s\ndriver=%s\nbuildflags=%s\n",
3356 dev.name().c_str(), dev.driverVersion().c_str(), buildflags.c_str());
3359 ///////////////////////////////////////// ProgramSource ///////////////////////////////////////////////
3361 struct ProgramSource::Impl
3363 Impl(const char* _src)
3367 Impl(const String& _src)
3371 void init(const String& _src)
3375 h = crc64((uchar*)src.c_str(), src.size());
3378 IMPLEMENT_REFCOUNTABLE();
3380 ProgramSource::hash_t h;
3384 ProgramSource::ProgramSource()
3389 ProgramSource::ProgramSource(const char* prog)
3394 ProgramSource::ProgramSource(const String& prog)
3399 ProgramSource::~ProgramSource()
3405 ProgramSource::ProgramSource(const ProgramSource& prog)
3412 ProgramSource& ProgramSource::operator = (const ProgramSource& prog)
3414 Impl* newp = (Impl*)prog.p;
3423 const String& ProgramSource::source() const
3425 static String dummy;
3426 return p ? p->src : dummy;
3429 ProgramSource::hash_t ProgramSource::hash() const
3431 return p ? p->h : 0;
3434 //////////////////////////////////////////// OpenCLAllocator //////////////////////////////////////////////////
3436 class OpenCLBufferPool
3439 ~OpenCLBufferPool() { }
3441 virtual cl_mem allocate(size_t size, CV_OUT size_t& capacity) = 0;
3442 virtual void release(cl_mem handle, size_t capacity) = 0;
3445 class OpenCLBufferPoolImpl : public BufferPoolController, public OpenCLBufferPool
3456 size_t currentReservedSize;
3457 size_t maxReservedSize;
3459 std::list<BufferEntry> reservedEntries_; // LRU order
3462 bool _findAndRemoveEntryFromReservedList(CV_OUT BufferEntry& entry, const size_t size)
3464 if (reservedEntries_.empty())
3466 std::list<BufferEntry>::iterator i = reservedEntries_.begin();
3467 std::list<BufferEntry>::iterator result_pos = reservedEntries_.end();
3468 BufferEntry result = {NULL, 0};
3469 size_t minDiff = (size_t)(-1);
3470 for (; i != reservedEntries_.end(); ++i)
3472 BufferEntry& e = *i;
3473 if (e.capacity_ >= size)
3475 size_t diff = e.capacity_ - size;
3476 if (diff < size / 8 && (result_pos == reservedEntries_.end() || diff < minDiff))
3486 if (result_pos != reservedEntries_.end())
3488 //CV_DbgAssert(result == *result_pos);
3489 reservedEntries_.erase(result_pos);
3491 currentReservedSize -= entry.capacity_;
3498 void _checkSizeOfReservedEntries()
3500 while (currentReservedSize > maxReservedSize)
3502 CV_DbgAssert(!reservedEntries_.empty());
3503 const BufferEntry& entry = reservedEntries_.back();
3504 CV_DbgAssert(currentReservedSize >= entry.capacity_);
3505 currentReservedSize -= entry.capacity_;
3506 _releaseBufferEntry(entry);
3507 reservedEntries_.pop_back();
3511 inline size_t _allocationGranularity(size_t size)
3516 else if (size < 64*1024)
3518 else if (size < 1024*1024)
3520 else if (size < 16*1024*1024)
3526 void _allocateBufferEntry(BufferEntry& entry, size_t size)
3528 CV_DbgAssert(entry.clBuffer_ == NULL);
3529 entry.capacity_ = alignSize(size, (int)_allocationGranularity(size));
3530 Context& ctx = Context::getDefault();
3531 cl_int retval = CL_SUCCESS;
3532 entry.clBuffer_ = clCreateBuffer((cl_context)ctx.ptr(), CL_MEM_READ_WRITE, entry.capacity_, 0, &retval);
3533 CV_Assert(retval == CL_SUCCESS);
3534 CV_Assert(entry.clBuffer_ != NULL);
3535 if(retval == CL_SUCCESS)
3537 CV_IMPL_ADD(CV_IMPL_OCL);
3539 LOG_BUFFER_POOL("OpenCL allocate %lld (0x%llx) bytes: %p\n",
3540 (long long)entry.capacity_, (long long)entry.capacity_, entry.clBuffer_);
3543 void _releaseBufferEntry(const BufferEntry& entry)
3545 CV_Assert(entry.capacity_ != 0);
3546 CV_Assert(entry.clBuffer_ != NULL);
3547 LOG_BUFFER_POOL("OpenCL release buffer: %p, %lld (0x%llx) bytes\n",
3548 entry.clBuffer_, (long long)entry.capacity_, (long long)entry.capacity_);
3549 clReleaseMemObject(entry.clBuffer_);
3552 OpenCLBufferPoolImpl()
3553 : currentReservedSize(0), maxReservedSize(0)
3555 int poolSize = ocl::Device::getDefault().isIntel() ? 1 << 27 : 0;
3556 maxReservedSize = getConfigurationParameterForSize("OPENCV_OPENCL_BUFFERPOOL_LIMIT", poolSize);
3558 virtual ~OpenCLBufferPoolImpl()
3560 freeAllReservedBuffers();
3561 CV_Assert(reservedEntries_.empty());
3564 virtual cl_mem allocate(size_t size, CV_OUT size_t& capacity)
3566 BufferEntry entry = {NULL, 0};
3567 if (maxReservedSize > 0)
3569 AutoLock locker(mutex_);
3570 if (_findAndRemoveEntryFromReservedList(entry, size))
3572 CV_DbgAssert(size <= entry.capacity_);
3573 LOG_BUFFER_POOL("Reuse reserved buffer: %p\n", entry.clBuffer_);
3574 capacity = entry.capacity_;
3575 return entry.clBuffer_;
3578 _allocateBufferEntry(entry, size);
3579 capacity = entry.capacity_;
3580 return entry.clBuffer_;
3582 virtual void release(cl_mem handle, size_t capacity)
3584 BufferEntry entry = {handle, capacity};
3585 if (maxReservedSize == 0 || entry.capacity_ > maxReservedSize / 8)
3587 _releaseBufferEntry(entry);
3591 AutoLock locker(mutex_);
3592 reservedEntries_.push_front(entry);
3593 currentReservedSize += entry.capacity_;
3594 _checkSizeOfReservedEntries();
3598 virtual size_t getReservedSize() const { return currentReservedSize; }
3599 virtual size_t getMaxReservedSize() const { return maxReservedSize; }
3600 virtual void setMaxReservedSize(size_t size)
3602 AutoLock locker(mutex_);
3603 size_t oldMaxReservedSize = maxReservedSize;
3604 maxReservedSize = size;
3605 if (maxReservedSize < oldMaxReservedSize)
3607 std::list<BufferEntry>::iterator i = reservedEntries_.begin();
3608 for (; i != reservedEntries_.end();)
3610 const BufferEntry& entry = *i;
3611 if (entry.capacity_ > maxReservedSize / 8)
3613 CV_DbgAssert(currentReservedSize >= entry.capacity_);
3614 currentReservedSize -= entry.capacity_;
3615 _releaseBufferEntry(entry);
3616 i = reservedEntries_.erase(i);
3621 _checkSizeOfReservedEntries();
3624 virtual void freeAllReservedBuffers()
3626 AutoLock locker(mutex_);
3627 std::list<BufferEntry>::const_iterator i = reservedEntries_.begin();
3628 for (; i != reservedEntries_.end(); ++i)
3630 const BufferEntry& entry = *i;
3631 _releaseBufferEntry(entry);
3633 reservedEntries_.clear();
3637 #if defined _MSC_VER
3638 #pragma warning(disable:4127) // conditional expression is constant
3640 template <bool readAccess, bool writeAccess>
3641 class AlignedDataPtr
3645 uchar* const originPtr_;
3646 const size_t alignment_;
3648 uchar* allocatedPtr_;
3651 AlignedDataPtr(uchar* ptr, size_t size, size_t alignment)
3652 : size_(size), originPtr_(ptr), alignment_(alignment), ptr_(ptr), allocatedPtr_(NULL)
3654 CV_DbgAssert((alignment & (alignment - 1)) == 0); // check for 2^n
3655 if (((size_t)ptr_ & (alignment - 1)) != 0)
3657 allocatedPtr_ = new uchar[size_ + alignment - 1];
3658 ptr_ = (uchar*)(((uintptr_t)allocatedPtr_ + (alignment - 1)) & ~(alignment - 1));
3661 memcpy(ptr_, originPtr_, size_);
3666 uchar* getAlignedPtr() const
3668 CV_DbgAssert(((size_t)ptr_ & (alignment_ - 1)) == 0);
3678 memcpy(originPtr_, ptr_, size_);
3680 delete[] allocatedPtr_;
3681 allocatedPtr_ = NULL;
3686 AlignedDataPtr(const AlignedDataPtr&); // disabled
3687 AlignedDataPtr& operator=(const AlignedDataPtr&); // disabled
3689 #if defined _MSC_VER
3690 #pragma warning(default:4127) // conditional expression is constant
3693 #ifndef CV_OPENCL_DATA_PTR_ALIGNMENT
3694 #define CV_OPENCL_DATA_PTR_ALIGNMENT 16
3697 class OpenCLAllocator : public MatAllocator
3699 mutable OpenCLBufferPoolImpl bufferPool;
3702 ALLOCATOR_FLAGS_BUFFER_POOL_USED = 1 << 0
3705 OpenCLAllocator() { matStdAllocator = Mat::getStdAllocator(); }
3707 UMatData* defaultAllocate(int dims, const int* sizes, int type, void* data, size_t* step,
3708 int flags, UMatUsageFlags usageFlags) const
3710 UMatData* u = matStdAllocator->allocate(dims, sizes, type, data, step, flags, usageFlags);
3714 void getBestFlags(const Context& ctx, int /*flags*/, UMatUsageFlags usageFlags, int& createFlags, int& flags0) const
3716 const Device& dev = ctx.device(0);
3718 if ((usageFlags & USAGE_ALLOCATE_HOST_MEMORY) != 0)
3719 createFlags |= CL_MEM_ALLOC_HOST_PTR;
3721 if( dev.hostUnifiedMemory() )
3724 flags0 = UMatData::COPY_ON_MAP;
3727 UMatData* allocate(int dims, const int* sizes, int type,
3728 void* data, size_t* step, int flags, UMatUsageFlags usageFlags) const
3731 return defaultAllocate(dims, sizes, type, data, step, flags, usageFlags);
3732 CV_Assert(data == 0);
3733 size_t total = CV_ELEM_SIZE(type);
3734 for( int i = dims-1; i >= 0; i-- )
3741 Context& ctx = Context::getDefault();
3742 int createFlags = 0, flags0 = 0;
3743 getBestFlags(ctx, flags, usageFlags, createFlags, flags0);
3745 size_t capacity = 0;
3746 void* handle = NULL;
3747 int allocatorFlags = 0;
3748 if (createFlags == 0)
3750 handle = bufferPool.allocate(total, capacity);
3752 return defaultAllocate(dims, sizes, type, data, step, flags, usageFlags);
3753 allocatorFlags = ALLOCATOR_FLAGS_BUFFER_POOL_USED;
3759 handle = clCreateBuffer((cl_context)ctx.ptr(),
3760 CL_MEM_READ_WRITE|createFlags, total, 0, &retval);
3761 if( !handle || retval != CL_SUCCESS )
3762 return defaultAllocate(dims, sizes, type, data, step, flags, usageFlags);
3763 CV_IMPL_ADD(CV_IMPL_OCL)
3765 UMatData* u = new UMatData(this);
3768 u->capacity = capacity;
3771 u->allocatorFlags_ = allocatorFlags;
3772 CV_DbgAssert(!u->tempUMat()); // for bufferPool.release() consistency in deallocate()
3776 bool allocate(UMatData* u, int accessFlags, UMatUsageFlags usageFlags) const
3781 UMatDataAutoLock lock(u);
3785 CV_Assert(u->origdata != 0);
3786 Context& ctx = Context::getDefault();
3787 int createFlags = 0, flags0 = 0;
3788 getBestFlags(ctx, accessFlags, usageFlags, createFlags, flags0);
3790 cl_context ctx_handle = (cl_context)ctx.ptr();
3792 int tempUMatFlags = UMatData::TEMP_UMAT;
3793 u->handle = clCreateBuffer(ctx_handle, CL_MEM_USE_HOST_PTR|CL_MEM_READ_WRITE,
3794 u->size, u->origdata, &retval);
3795 if((!u->handle || retval != CL_SUCCESS) && !(accessFlags & ACCESS_FAST))
3797 u->handle = clCreateBuffer(ctx_handle, CL_MEM_COPY_HOST_PTR|CL_MEM_READ_WRITE|createFlags,
3798 u->size, u->origdata, &retval);
3799 tempUMatFlags = UMatData::TEMP_COPIED_UMAT;
3802 if(!u->handle || retval != CL_SUCCESS)
3804 u->prevAllocator = u->currAllocator;
3805 u->currAllocator = this;
3806 u->flags |= tempUMatFlags;
3808 if(accessFlags & ACCESS_WRITE)
3809 u->markHostCopyObsolete(true);
3813 /*void sync(UMatData* u) const
3815 cl_command_queue q = (cl_command_queue)Queue::getDefault().ptr();
3816 UMatDataAutoLock lock(u);
3818 if( u->hostCopyObsolete() && u->handle && u->refcount > 0 && u->origdata)
3820 if( u->tempCopiedUMat() )
3822 clEnqueueReadBuffer(q, (cl_mem)u->handle, CL_TRUE, 0,
3823 u->size, u->origdata, 0, 0, 0);
3828 void* data = clEnqueueMapBuffer(q, (cl_mem)u->handle, CL_TRUE,
3829 (CL_MAP_READ | CL_MAP_WRITE),
3830 0, u->size, 0, 0, 0, &retval);
3831 clEnqueueUnmapMemObject(q, (cl_mem)u->handle, data, 0, 0, 0);
3834 u->markHostCopyObsolete(false);
3836 else if( u->copyOnMap() && u->deviceCopyObsolete() && u->data )
3838 clEnqueueWriteBuffer(q, (cl_mem)u->handle, CL_TRUE, 0,
3839 u->size, u->data, 0, 0, 0);
3843 void deallocate(UMatData* u) const
3848 CV_Assert(u->urefcount >= 0);
3849 CV_Assert(u->refcount >= 0);
3851 // TODO: !!! when we add Shared Virtual Memory Support,
3852 // this function (as well as the others) should be corrected
3853 CV_Assert(u->handle != 0 && u->urefcount == 0);
3856 // UMatDataAutoLock lock(u);
3857 if( u->hostCopyObsolete() && u->refcount > 0 )
3859 cl_command_queue q = (cl_command_queue)Queue::getDefault().ptr();
3860 if( u->tempCopiedUMat() )
3862 AlignedDataPtr<false, true> alignedPtr(u->origdata, u->size, CV_OPENCL_DATA_PTR_ALIGNMENT);
3863 CV_OclDbgAssert(clEnqueueReadBuffer(q, (cl_mem)u->handle, CL_TRUE, 0,
3864 u->size, alignedPtr.getAlignedPtr(), 0, 0, 0) == CL_SUCCESS);
3869 void* data = clEnqueueMapBuffer(q, (cl_mem)u->handle, CL_TRUE,
3870 (CL_MAP_READ | CL_MAP_WRITE),
3871 0, u->size, 0, 0, 0, &retval);
3872 CV_OclDbgAssert(retval == CL_SUCCESS);
3873 CV_OclDbgAssert(clEnqueueUnmapMemObject(q, (cl_mem)u->handle, data, 0, 0, 0) == CL_SUCCESS);
3874 CV_OclDbgAssert(clFinish(q) == CL_SUCCESS);
3877 u->markHostCopyObsolete(false);
3878 clReleaseMemObject((cl_mem)u->handle);
3880 u->currAllocator = u->prevAllocator;
3881 if(u->data && u->copyOnMap() && !(u->flags & UMatData::USER_ALLOCATED))
3883 u->data = u->origdata;
3884 if(u->refcount == 0)
3885 u->currAllocator->deallocate(u);
3889 CV_Assert(u->refcount == 0);
3890 if(u->data && u->copyOnMap() && !(u->flags & UMatData::USER_ALLOCATED))
3895 if (u->allocatorFlags_ & ALLOCATOR_FLAGS_BUFFER_POOL_USED)
3897 bufferPool.release((cl_mem)u->handle, u->capacity);
3901 clReleaseMemObject((cl_mem)u->handle);
3909 void map(UMatData* u, int accessFlags) const
3914 CV_Assert( u->handle != 0 );
3916 UMatDataAutoLock autolock(u);
3918 if(accessFlags & ACCESS_WRITE)
3919 u->markDeviceCopyObsolete(true);
3921 cl_command_queue q = (cl_command_queue)Queue::getDefault().ptr();
3923 // FIXIT Workaround for UMat synchronization issue
3924 // if( u->refcount == 0 )
3926 if( !u->copyOnMap() )
3928 if (u->data) // FIXIT Workaround for UMat synchronization issue
3930 //CV_Assert(u->hostCopyObsolete() == false);
3933 // because there can be other map requests for the same UMat with different access flags,
3934 // we use the universal (read-write) access mode.
3936 u->data = (uchar*)clEnqueueMapBuffer(q, (cl_mem)u->handle, CL_TRUE,
3937 (CL_MAP_READ | CL_MAP_WRITE),
3938 0, u->size, 0, 0, 0, &retval);
3939 if(u->data && retval == CL_SUCCESS)
3941 u->markHostCopyObsolete(false);
3942 u->markDeviceMemMapped(true);
3946 // if map failed, switch to copy-on-map mode for the particular buffer
3947 u->flags |= UMatData::COPY_ON_MAP;
3952 u->data = (uchar*)fastMalloc(u->size);
3953 u->markHostCopyObsolete(true);
3957 if( (accessFlags & ACCESS_READ) != 0 && u->hostCopyObsolete() )
3959 AlignedDataPtr<false, true> alignedPtr(u->data, u->size, CV_OPENCL_DATA_PTR_ALIGNMENT);
3960 CV_Assert( clEnqueueReadBuffer(q, (cl_mem)u->handle, CL_TRUE, 0,
3961 u->size, alignedPtr.getAlignedPtr(), 0, 0, 0) == CL_SUCCESS );
3962 u->markHostCopyObsolete(false);
3966 void unmap(UMatData* u) const
3972 CV_Assert(u->handle != 0);
3974 UMatDataAutoLock autolock(u);
3976 // FIXIT Workaround for UMat synchronization issue
3980 cl_command_queue q = (cl_command_queue)Queue::getDefault().ptr();
3982 if( !u->copyOnMap() && u->deviceMemMapped() )
3984 CV_Assert(u->data != NULL);
3985 u->markDeviceMemMapped(false);
3986 CV_Assert( (retval = clEnqueueUnmapMemObject(q,
3987 (cl_mem)u->handle, u->data, 0, 0, 0)) == CL_SUCCESS );
3988 if (Device::getDefault().isAMD())
3990 // required for multithreaded applications (see stitching test)
3991 CV_OclDbgAssert(clFinish(q) == CL_SUCCESS);
3995 else if( u->copyOnMap() && u->deviceCopyObsolete() )
3997 AlignedDataPtr<true, false> alignedPtr(u->data, u->size, CV_OPENCL_DATA_PTR_ALIGNMENT);
3998 CV_Assert( (retval = clEnqueueWriteBuffer(q, (cl_mem)u->handle, CL_TRUE, 0,
3999 u->size, alignedPtr.getAlignedPtr(), 0, 0, 0)) == CL_SUCCESS );
4001 u->markDeviceCopyObsolete(false);
4002 u->markHostCopyObsolete(false);
4005 bool checkContinuous(int dims, const size_t sz[],
4006 const size_t srcofs[], const size_t srcstep[],
4007 const size_t dstofs[], const size_t dststep[],
4008 size_t& total, size_t new_sz[],
4009 size_t& srcrawofs, size_t new_srcofs[], size_t new_srcstep[],
4010 size_t& dstrawofs, size_t new_dstofs[], size_t new_dststep[]) const
4012 bool iscontinuous = true;
4013 srcrawofs = srcofs ? srcofs[dims-1] : 0;
4014 dstrawofs = dstofs ? dstofs[dims-1] : 0;
4016 for( int i = dims-2; i >= 0; i-- )
4018 if( i >= 0 && (total != srcstep[i] || total != dststep[i]) )
4019 iscontinuous = false;
4022 srcrawofs += srcofs[i]*srcstep[i];
4024 dstrawofs += dstofs[i]*dststep[i];
4029 // OpenCL uses {x, y, z} order while OpenCV uses {z, y, x} order.
4032 new_sz[0] = sz[1]; new_sz[1] = sz[0]; new_sz[2] = 1;
4033 // we assume that new_... arrays are initialized by caller
4034 // with 0's, so there is no else branch
4037 new_srcofs[0] = srcofs[1];
4038 new_srcofs[1] = srcofs[0];
4044 new_dstofs[0] = dstofs[1];
4045 new_dstofs[1] = dstofs[0];
4049 new_srcstep[0] = srcstep[0]; new_srcstep[1] = 0;
4050 new_dststep[0] = dststep[0]; new_dststep[1] = 0;
4054 // we could check for dims == 3 here,
4055 // but from user perspective this one is more informative
4056 CV_Assert(dims <= 3);
4057 new_sz[0] = sz[2]; new_sz[1] = sz[1]; new_sz[2] = sz[0];
4060 new_srcofs[0] = srcofs[2];
4061 new_srcofs[1] = srcofs[1];
4062 new_srcofs[2] = srcofs[0];
4067 new_dstofs[0] = dstofs[2];
4068 new_dstofs[1] = dstofs[1];
4069 new_dstofs[2] = dstofs[0];
4072 new_srcstep[0] = srcstep[1]; new_srcstep[1] = srcstep[0];
4073 new_dststep[0] = dststep[1]; new_dststep[1] = dststep[0];
4076 return iscontinuous;
4079 void download(UMatData* u, void* dstptr, int dims, const size_t sz[],
4080 const size_t srcofs[], const size_t srcstep[],
4081 const size_t dststep[]) const
4085 UMatDataAutoLock autolock(u);
4087 if( u->data && !u->hostCopyObsolete() )
4089 Mat::getStdAllocator()->download(u, dstptr, dims, sz, srcofs, srcstep, dststep);
4092 CV_Assert( u->handle != 0 );
4094 cl_command_queue q = (cl_command_queue)Queue::getDefault().ptr();
4096 size_t total = 0, new_sz[] = {0, 0, 0};
4097 size_t srcrawofs = 0, new_srcofs[] = {0, 0, 0}, new_srcstep[] = {0, 0, 0};
4098 size_t dstrawofs = 0, new_dstofs[] = {0, 0, 0}, new_dststep[] = {0, 0, 0};
4100 bool iscontinuous = checkContinuous(dims, sz, srcofs, srcstep, 0, dststep,
4102 srcrawofs, new_srcofs, new_srcstep,
4103 dstrawofs, new_dstofs, new_dststep);
4105 AlignedDataPtr<false, true> alignedPtr((uchar*)dstptr, sz[0] * dststep[0], CV_OPENCL_DATA_PTR_ALIGNMENT);
4108 CV_Assert( clEnqueueReadBuffer(q, (cl_mem)u->handle, CL_TRUE,
4109 srcrawofs, total, alignedPtr.getAlignedPtr(), 0, 0, 0) == CL_SUCCESS );
4113 CV_Assert( clEnqueueReadBufferRect(q, (cl_mem)u->handle, CL_TRUE,
4114 new_srcofs, new_dstofs, new_sz, new_srcstep[0], new_srcstep[1],
4115 new_dststep[0], new_dststep[1], alignedPtr.getAlignedPtr(), 0, 0, 0) == CL_SUCCESS );
4119 void upload(UMatData* u, const void* srcptr, int dims, const size_t sz[],
4120 const size_t dstofs[], const size_t dststep[],
4121 const size_t srcstep[]) const
4126 // there should be no user-visible CPU copies of the UMat which we are going to copy to
4127 CV_Assert(u->refcount == 0 || u->tempUMat());
4129 size_t total = 0, new_sz[] = {0, 0, 0};
4130 size_t srcrawofs = 0, new_srcofs[] = {0, 0, 0}, new_srcstep[] = {0, 0, 0};
4131 size_t dstrawofs = 0, new_dstofs[] = {0, 0, 0}, new_dststep[] = {0, 0, 0};
4133 bool iscontinuous = checkContinuous(dims, sz, 0, srcstep, dstofs, dststep,
4135 srcrawofs, new_srcofs, new_srcstep,
4136 dstrawofs, new_dstofs, new_dststep);
4138 UMatDataAutoLock autolock(u);
4140 // if there is cached CPU copy of the GPU matrix,
4141 // we could use it as a destination.
4142 // we can do it in 2 cases:
4143 // 1. we overwrite the whole content
4144 // 2. we overwrite part of the matrix, but the GPU copy is out-of-date
4145 if( u->data && (u->hostCopyObsolete() < u->deviceCopyObsolete() || total == u->size))
4147 Mat::getStdAllocator()->upload(u, srcptr, dims, sz, dstofs, dststep, srcstep);
4148 u->markHostCopyObsolete(false);
4149 u->markDeviceCopyObsolete(true);
4153 CV_Assert( u->handle != 0 );
4154 cl_command_queue q = (cl_command_queue)Queue::getDefault().ptr();
4156 AlignedDataPtr<true, false> alignedPtr((uchar*)srcptr, sz[0] * srcstep[0], CV_OPENCL_DATA_PTR_ALIGNMENT);
4159 CV_Assert( clEnqueueWriteBuffer(q, (cl_mem)u->handle,
4160 CL_TRUE, dstrawofs, total, srcptr, 0, 0, 0) == CL_SUCCESS );
4164 CV_Assert( clEnqueueWriteBufferRect(q, (cl_mem)u->handle, CL_TRUE,
4165 new_dstofs, new_srcofs, new_sz, new_dststep[0], new_dststep[1],
4166 new_srcstep[0], new_srcstep[1], srcptr, 0, 0, 0) == CL_SUCCESS );
4169 u->markHostCopyObsolete(true);
4170 u->markDeviceCopyObsolete(false);
4173 void copy(UMatData* src, UMatData* dst, int dims, const size_t sz[],
4174 const size_t srcofs[], const size_t srcstep[],
4175 const size_t dstofs[], const size_t dststep[], bool _sync) const
4180 size_t total = 0, new_sz[] = {0, 0, 0};
4181 size_t srcrawofs = 0, new_srcofs[] = {0, 0, 0}, new_srcstep[] = {0, 0, 0};
4182 size_t dstrawofs = 0, new_dstofs[] = {0, 0, 0}, new_dststep[] = {0, 0, 0};
4184 bool iscontinuous = checkContinuous(dims, sz, srcofs, srcstep, dstofs, dststep,
4186 srcrawofs, new_srcofs, new_srcstep,
4187 dstrawofs, new_dstofs, new_dststep);
4189 UMatDataAutoLock src_autolock(src);
4190 UMatDataAutoLock dst_autolock(dst);
4192 if( !src->handle || (src->data && src->hostCopyObsolete() < src->deviceCopyObsolete()) )
4194 upload(dst, src->data + srcrawofs, dims, sz, dstofs, dststep, srcstep);
4197 if( !dst->handle || (dst->data && dst->hostCopyObsolete() < dst->deviceCopyObsolete()) )
4199 download(src, dst->data + dstrawofs, dims, sz, srcofs, srcstep, dststep);
4200 dst->markHostCopyObsolete(false);
4201 dst->markDeviceCopyObsolete(true);
4205 // there should be no user-visible CPU copies of the UMat which we are going to copy to
4206 CV_Assert(dst->refcount == 0);
4207 cl_command_queue q = (cl_command_queue)Queue::getDefault().ptr();
4212 CV_Assert( (retval = clEnqueueCopyBuffer(q, (cl_mem)src->handle, (cl_mem)dst->handle,
4213 srcrawofs, dstrawofs, total, 0, 0, 0)) == CL_SUCCESS );
4217 CV_Assert( (retval = clEnqueueCopyBufferRect(q, (cl_mem)src->handle, (cl_mem)dst->handle,
4218 new_srcofs, new_dstofs, new_sz,
4219 new_srcstep[0], new_srcstep[1],
4220 new_dststep[0], new_dststep[1],
4221 0, 0, 0)) == CL_SUCCESS );
4223 if(retval == CL_SUCCESS)
4225 CV_IMPL_ADD(CV_IMPL_OCL)
4228 dst->markHostCopyObsolete(true);
4229 dst->markDeviceCopyObsolete(false);
4233 CV_OclDbgAssert(clFinish(q) == CL_SUCCESS);
4237 BufferPoolController* getBufferPoolController() const { return &bufferPool; }
4239 MatAllocator* matStdAllocator;
4242 MatAllocator* getOpenCLAllocator()
4244 static MatAllocator * allocator = new OpenCLAllocator();
4248 ///////////////////////////////////////////// Utility functions /////////////////////////////////////////////////
4250 static void getDevices(std::vector<cl_device_id>& devices, cl_platform_id platform)
4252 cl_uint numDevices = 0;
4253 CV_OclDbgAssert(clGetDeviceIDs(platform, (cl_device_type)Device::TYPE_ALL,
4254 0, NULL, &numDevices) == CL_SUCCESS);
4256 if (numDevices == 0)
4262 devices.resize((size_t)numDevices);
4263 CV_OclDbgAssert(clGetDeviceIDs(platform, (cl_device_type)Device::TYPE_ALL,
4264 numDevices, &devices[0], &numDevices) == CL_SUCCESS);
4267 struct PlatformInfo::Impl
4272 handle = *(cl_platform_id*)id;
4273 getDevices(devices, handle);
4276 String getStrProp(cl_device_info prop) const
4280 return clGetPlatformInfo(handle, prop, sizeof(buf)-16, buf, &sz) == CL_SUCCESS &&
4281 sz < sizeof(buf) ? String(buf) : String();
4284 IMPLEMENT_REFCOUNTABLE();
4285 std::vector<cl_device_id> devices;
4286 cl_platform_id handle;
4289 PlatformInfo::PlatformInfo()
4294 PlatformInfo::PlatformInfo(void* platform_id)
4296 p = new Impl(platform_id);
4299 PlatformInfo::~PlatformInfo()
4305 PlatformInfo::PlatformInfo(const PlatformInfo& i)
4312 PlatformInfo& PlatformInfo::operator =(const PlatformInfo& i)
4325 int PlatformInfo::deviceNumber() const
4327 return p ? (int)p->devices.size() : 0;
4330 void PlatformInfo::getDevice(Device& device, int d) const
4332 CV_Assert(p && d < (int)p->devices.size() );
4334 device.set(p->devices[d]);
4337 String PlatformInfo::name() const
4339 return p ? p->getStrProp(CL_PLATFORM_NAME) : String();
4342 String PlatformInfo::vendor() const
4344 return p ? p->getStrProp(CL_PLATFORM_VENDOR) : String();
4347 String PlatformInfo::version() const
4349 return p ? p->getStrProp(CL_PLATFORM_VERSION) : String();
4352 static void getPlatforms(std::vector<cl_platform_id>& platforms)
4354 cl_uint numPlatforms = 0;
4355 CV_OclDbgAssert(clGetPlatformIDs(0, NULL, &numPlatforms) == CL_SUCCESS);
4357 if (numPlatforms == 0)
4363 platforms.resize((size_t)numPlatforms);
4364 CV_OclDbgAssert(clGetPlatformIDs(numPlatforms, &platforms[0], &numPlatforms) == CL_SUCCESS);
4367 void getPlatfomsInfo(std::vector<PlatformInfo>& platformsInfo)
4369 std::vector<cl_platform_id> platforms;
4370 getPlatforms(platforms);
4372 for (size_t i = 0; i < platforms.size(); i++)
4373 platformsInfo.push_back( PlatformInfo((void*)&platforms[i]) );
4376 const char* typeToStr(int type)
4378 static const char* tab[]=
4380 "uchar", "uchar2", "uchar3", "uchar4", 0, 0, 0, "uchar8", 0, 0, 0, 0, 0, 0, 0, "uchar16",
4381 "char", "char2", "char3", "char4", 0, 0, 0, "char8", 0, 0, 0, 0, 0, 0, 0, "char16",
4382 "ushort", "ushort2", "ushort3", "ushort4",0, 0, 0, "ushort8", 0, 0, 0, 0, 0, 0, 0, "ushort16",
4383 "short", "short2", "short3", "short4", 0, 0, 0, "short8", 0, 0, 0, 0, 0, 0, 0, "short16",
4384 "int", "int2", "int3", "int4", 0, 0, 0, "int8", 0, 0, 0, 0, 0, 0, 0, "int16",
4385 "float", "float2", "float3", "float4", 0, 0, 0, "float8", 0, 0, 0, 0, 0, 0, 0, "float16",
4386 "double", "double2", "double3", "double4", 0, 0, 0, "double8", 0, 0, 0, 0, 0, 0, 0, "double16",
4387 "?", "?", "?", "?", "?", "?", "?", "?", "?", "?", "?", "?", "?", "?", "?", "?"
4389 int cn = CV_MAT_CN(type), depth = CV_MAT_DEPTH(type);
4390 return cn > 16 ? "?" : tab[depth*16 + cn-1];
4393 const char* memopTypeToStr(int type)
4395 static const char* tab[] =
4397 "uchar", "uchar2", "uchar3", "uchar4", 0, 0, 0, "uchar8", 0, 0, 0, 0, 0, 0, 0, "uchar16",
4398 "char", "char2", "char3", "char4", 0, 0, 0, "char8", 0, 0, 0, 0, 0, 0, 0, "char16",
4399 "ushort", "ushort2", "ushort3", "ushort4",0, 0, 0, "ushort8", 0, 0, 0, 0, 0, 0, 0, "ushort16",
4400 "short", "short2", "short3", "short4", 0, 0, 0, "short8", 0, 0, 0, 0, 0, 0, 0, "short16",
4401 "int", "int2", "int3", "int4", 0, 0, 0, "int8", 0, 0, 0, 0, 0, 0, 0, "int16",
4402 "int", "int2", "int3", "int4", 0, 0, 0, "int8", 0, 0, 0, 0, 0, 0, 0, "int16",
4403 "ulong", "ulong2", "ulong3", "ulong4", 0, 0, 0, "ulong8", 0, 0, 0, 0, 0, 0, 0, "ulong16",
4404 "?", "?", "?", "?", "?", "?", "?", "?", "?", "?", "?", "?", "?", "?", "?", "?"
4406 int cn = CV_MAT_CN(type), depth = CV_MAT_DEPTH(type);
4407 return cn > 16 ? "?" : tab[depth*16 + cn-1];
4410 const char* vecopTypeToStr(int type)
4412 static const char* tab[] =
4414 "uchar", "short", "uchar3", "int", 0, 0, 0, "int2", 0, 0, 0, 0, 0, 0, 0, "int4",
4415 "char", "short", "char3", "int", 0, 0, 0, "int2", 0, 0, 0, 0, 0, 0, 0, "int4",
4416 "ushort", "int", "ushort3", "int2",0, 0, 0, "int4", 0, 0, 0, 0, 0, 0, 0, "int8",
4417 "short", "int", "short3", "int2", 0, 0, 0, "int4", 0, 0, 0, 0, 0, 0, 0, "int8",
4418 "int", "int2", "int3", "int4", 0, 0, 0, "int8", 0, 0, 0, 0, 0, 0, 0, "int16",
4419 "int", "int2", "int3", "int4", 0, 0, 0, "int8", 0, 0, 0, 0, 0, 0, 0, "int16",
4420 "ulong", "ulong2", "ulong3", "ulong4", 0, 0, 0, "ulong8", 0, 0, 0, 0, 0, 0, 0, "ulong16",
4421 "?", "?", "?", "?", "?", "?", "?", "?", "?", "?", "?", "?", "?", "?", "?", "?"
4423 int cn = CV_MAT_CN(type), depth = CV_MAT_DEPTH(type);
4424 return cn > 16 ? "?" : tab[depth*16 + cn-1];
4427 const char* convertTypeStr(int sdepth, int ddepth, int cn, char* buf)
4429 if( sdepth == ddepth )
4431 const char *typestr = typeToStr(CV_MAKETYPE(ddepth, cn));
4432 if( ddepth >= CV_32F ||
4433 (ddepth == CV_32S && sdepth < CV_32S) ||
4434 (ddepth == CV_16S && sdepth <= CV_8S) ||
4435 (ddepth == CV_16U && sdepth == CV_8U))
4437 sprintf(buf, "convert_%s", typestr);
4439 else if( sdepth >= CV_32F )
4440 sprintf(buf, "convert_%s%s_rte", typestr, (ddepth < CV_32S ? "_sat" : ""));
4442 sprintf(buf, "convert_%s_sat", typestr);
4447 template <typename T>
4448 static std::string kerToStr(const Mat & k)
4450 int width = k.cols - 1, depth = k.depth();
4451 const T * const data = k.ptr<T>();
4453 std::ostringstream stream;
4454 stream.precision(10);
4458 for (int i = 0; i < width; ++i)
4459 stream << "DIG(" << (int)data[i] << ")";
4460 stream << "DIG(" << (int)data[width] << ")";
4462 else if (depth == CV_32F)
4464 stream.setf(std::ios_base::showpoint);
4465 for (int i = 0; i < width; ++i)
4466 stream << "DIG(" << data[i] << "f)";
4467 stream << "DIG(" << data[width] << "f)";
4471 for (int i = 0; i < width; ++i)
4472 stream << "DIG(" << data[i] << ")";
4473 stream << "DIG(" << data[width] << ")";
4476 return stream.str();
4479 String kernelToStr(InputArray _kernel, int ddepth, const char * name)
4481 Mat kernel = _kernel.getMat().reshape(1, 1);
4483 int depth = kernel.depth();
4487 if (ddepth != depth)
4488 kernel.convertTo(kernel, ddepth);
4490 typedef std::string (* func_t)(const Mat &);
4491 static const func_t funcs[] = { kerToStr<uchar>, kerToStr<char>, kerToStr<ushort>, kerToStr<short>,
4492 kerToStr<int>, kerToStr<float>, kerToStr<double>, 0 };
4493 const func_t func = funcs[ddepth];
4494 CV_Assert(func != 0);
4496 return cv::format(" -D %s=%s", name ? name : "COEFF", func(kernel).c_str());
4499 #define PROCESS_SRC(src) \
4504 CV_Assert(src.isMat() || src.isUMat()); \
4505 Size csize = src.size(); \
4506 int ctype = src.type(), ccn = CV_MAT_CN(ctype), cdepth = CV_MAT_DEPTH(ctype), \
4507 ckercn = vectorWidths[cdepth], cwidth = ccn * csize.width; \
4508 if (cwidth < ckercn || ckercn <= 0) \
4510 cols.push_back(cwidth); \
4511 if (strat == OCL_VECTOR_OWN && ctype != ref_type) \
4513 offsets.push_back(src.offset()); \
4514 steps.push_back(src.step()); \
4515 dividers.push_back(ckercn * CV_ELEM_SIZE1(ctype)); \
4516 kercns.push_back(ckercn); \
4521 int predictOptimalVectorWidth(InputArray src1, InputArray src2, InputArray src3,
4522 InputArray src4, InputArray src5, InputArray src6,
4523 InputArray src7, InputArray src8, InputArray src9,
4524 OclVectorStrategy strat)
4526 const ocl::Device & d = ocl::Device::getDefault();
4528 int vectorWidths[] = { d.preferredVectorWidthChar(), d.preferredVectorWidthChar(),
4529 d.preferredVectorWidthShort(), d.preferredVectorWidthShort(),
4530 d.preferredVectorWidthInt(), d.preferredVectorWidthFloat(),
4531 d.preferredVectorWidthDouble(), -1 };
4533 // if the device says don't use vectors
4534 if (vectorWidths[0] == 1)
4537 vectorWidths[CV_8U] = vectorWidths[CV_8S] = 4;
4538 vectorWidths[CV_16U] = vectorWidths[CV_16S] = 2;
4539 vectorWidths[CV_32S] = vectorWidths[CV_32F] = vectorWidths[CV_64F] = 1;
4542 return checkOptimalVectorWidth(vectorWidths, src1, src2, src3, src4, src5, src6, src7, src8, src9, strat);
4545 int checkOptimalVectorWidth(const int *vectorWidths,
4546 InputArray src1, InputArray src2, InputArray src3,
4547 InputArray src4, InputArray src5, InputArray src6,
4548 InputArray src7, InputArray src8, InputArray src9,
4549 OclVectorStrategy strat)
4551 CV_Assert(vectorWidths);
4553 int ref_type = src1.type();
4555 std::vector<size_t> offsets, steps, cols;
4556 std::vector<int> dividers, kercns;
4567 size_t size = offsets.size();
4569 for (size_t i = 0; i < size; ++i)
4570 while (offsets[i] % dividers[i] != 0 || steps[i] % dividers[i] != 0 || cols[i] % kercns[i] != 0)
4571 dividers[i] >>= 1, kercns[i] >>= 1;
4574 int kercn = *std::min_element(kercns.begin(), kercns.end());
4579 int predictOptimalVectorWidthMax(InputArray src1, InputArray src2, InputArray src3,
4580 InputArray src4, InputArray src5, InputArray src6,
4581 InputArray src7, InputArray src8, InputArray src9)
4583 return predictOptimalVectorWidth(src1, src2, src3, src4, src5, src6, src7, src8, src9, OCL_VECTOR_MAX);
4589 // TODO Make this as a method of OpenCL "BuildOptions" class
4590 void buildOptionsAddMatrixDescription(String& buildOptions, const String& name, InputArray _m)
4592 if (!buildOptions.empty())
4593 buildOptions += " ";
4594 int type = _m.type(), depth = CV_MAT_DEPTH(type);
4595 buildOptions += format(
4596 "-D %s_T=%s -D %s_T1=%s -D %s_CN=%d -D %s_TSIZE=%d -D %s_T1SIZE=%d -D %s_DEPTH=%d",
4597 name.c_str(), ocl::typeToStr(type),
4598 name.c_str(), ocl::typeToStr(CV_MAKE_TYPE(depth, 1)),
4599 name.c_str(), (int)CV_MAT_CN(type),
4600 name.c_str(), (int)CV_ELEM_SIZE(type),
4601 name.c_str(), (int)CV_ELEM_SIZE1(type),
4602 name.c_str(), (int)depth
4607 struct Image2D::Impl
4609 Impl(const UMat &src, bool norm, bool alias)
4613 init(src, norm, alias);
4619 clReleaseMemObject(handle);
4622 static cl_image_format getImageFormat(int depth, int cn, bool norm)
4624 cl_image_format format;
4625 static const int channelTypes[] = { CL_UNSIGNED_INT8, CL_SIGNED_INT8, CL_UNSIGNED_INT16,
4626 CL_SIGNED_INT16, CL_SIGNED_INT32, CL_FLOAT, -1, -1 };
4627 static const int channelTypesNorm[] = { CL_UNORM_INT8, CL_SNORM_INT8, CL_UNORM_INT16,
4628 CL_SNORM_INT16, -1, -1, -1, -1 };
4629 static const int channelOrders[] = { -1, CL_R, CL_RG, -1, CL_RGBA };
4631 int channelType = norm ? channelTypesNorm[depth] : channelTypes[depth];
4632 int channelOrder = channelOrders[cn];
4633 format.image_channel_data_type = (cl_channel_type)channelType;
4634 format.image_channel_order = (cl_channel_order)channelOrder;
4638 static bool isFormatSupported(cl_image_format format)
4641 CV_Error(Error::OpenCLApiCallError, "OpenCL runtime not found!");
4643 cl_context context = (cl_context)Context::getDefault().ptr();
4644 // Figure out how many formats are supported by this context.
4645 cl_uint numFormats = 0;
4646 cl_int err = clGetSupportedImageFormats(context, CL_MEM_READ_WRITE,
4647 CL_MEM_OBJECT_IMAGE2D, numFormats,
4649 AutoBuffer<cl_image_format> formats(numFormats);
4650 err = clGetSupportedImageFormats(context, CL_MEM_READ_WRITE,
4651 CL_MEM_OBJECT_IMAGE2D, numFormats,
4653 CV_OclDbgAssert(err == CL_SUCCESS);
4654 for (cl_uint i = 0; i < numFormats; ++i)
4656 if (!memcmp(&formats[i], &format, sizeof(format)))
4664 void init(const UMat &src, bool norm, bool alias)
4667 CV_Error(Error::OpenCLApiCallError, "OpenCL runtime not found!");
4669 CV_Assert(!src.empty());
4670 CV_Assert(ocl::Device::getDefault().imageSupport());
4672 int err, depth = src.depth(), cn = src.channels();
4674 cl_image_format format = getImageFormat(depth, cn, norm);
4676 if (!isFormatSupported(format))
4677 CV_Error(Error::OpenCLApiCallError, "Image format is not supported");
4679 if (alias && !src.handle(ACCESS_RW))
4680 CV_Error(Error::OpenCLApiCallError, "Incorrect UMat, handle is null");
4682 cl_context context = (cl_context)Context::getDefault().ptr();
4683 cl_command_queue queue = (cl_command_queue)Queue::getDefault().ptr();
4685 #ifdef CL_VERSION_1_2
4686 // this enables backwards portability to
4687 // run on OpenCL 1.1 platform if library binaries are compiled with OpenCL 1.2 support
4688 const Device & d = ocl::Device::getDefault();
4689 int minor = d.deviceVersionMinor(), major = d.deviceVersionMajor();
4690 CV_Assert(!alias || canCreateAlias(src));
4691 if (1 < major || (1 == major && 2 <= minor))
4694 desc.image_type = CL_MEM_OBJECT_IMAGE2D;
4695 desc.image_width = src.cols;
4696 desc.image_height = src.rows;
4697 desc.image_depth = 0;
4698 desc.image_array_size = 1;
4699 desc.image_row_pitch = alias ? src.step[0] : 0;
4700 desc.image_slice_pitch = 0;
4701 desc.buffer = alias ? (cl_mem)src.handle(ACCESS_RW) : 0;
4702 desc.num_mip_levels = 0;
4703 desc.num_samples = 0;
4704 handle = clCreateImage(context, CL_MEM_READ_WRITE, &format, &desc, NULL, &err);
4709 CV_SUPPRESS_DEPRECATED_START
4710 CV_Assert(!alias); // This is an OpenCL 1.2 extension
4711 handle = clCreateImage2D(context, CL_MEM_READ_WRITE, &format, src.cols, src.rows, 0, NULL, &err);
4712 CV_SUPPRESS_DEPRECATED_END
4714 CV_OclDbgAssert(err == CL_SUCCESS);
4716 size_t origin[] = { 0, 0, 0 };
4717 size_t region[] = { static_cast<size_t>(src.cols), static_cast<size_t>(src.rows), 1 };
4720 if (!alias && !src.isContinuous())
4722 devData = clCreateBuffer(context, CL_MEM_READ_ONLY, src.cols * src.rows * src.elemSize(), NULL, &err);
4723 CV_OclDbgAssert(err == CL_SUCCESS);
4725 const size_t roi[3] = {static_cast<size_t>(src.cols) * src.elemSize(), static_cast<size_t>(src.rows), 1};
4726 CV_Assert(clEnqueueCopyBufferRect(queue, (cl_mem)src.handle(ACCESS_READ), devData, origin, origin,
4727 roi, src.step, 0, src.cols * src.elemSize(), 0, 0, NULL, NULL) == CL_SUCCESS);
4728 CV_OclDbgAssert(clFlush(queue) == CL_SUCCESS);
4732 devData = (cl_mem)src.handle(ACCESS_READ);
4734 CV_Assert(devData != NULL);
4738 CV_OclDbgAssert(clEnqueueCopyBufferToImage(queue, devData, handle, 0, origin, region, 0, NULL, 0) == CL_SUCCESS);
4739 if (!src.isContinuous())
4741 CV_OclDbgAssert(clFlush(queue) == CL_SUCCESS);
4742 CV_OclDbgAssert(clReleaseMemObject(devData) == CL_SUCCESS);
4747 IMPLEMENT_REFCOUNTABLE();
4757 Image2D::Image2D(const UMat &src, bool norm, bool alias)
4759 p = new Impl(src, norm, alias);
4762 bool Image2D::canCreateAlias(const UMat &m)
4765 const Device & d = ocl::Device::getDefault();
4766 if (d.imageFromBufferSupport() && !m.empty())
4768 // This is the required pitch alignment in pixels
4769 uint pitchAlign = d.imagePitchAlignment();
4770 if (pitchAlign && !(m.step % (pitchAlign * m.elemSize())))
4772 // We don't currently handle the case where the buffer was created
4773 // with CL_MEM_USE_HOST_PTR
4774 if (!m.u->tempUMat())
4783 bool Image2D::isFormatSupported(int depth, int cn, bool norm)
4785 cl_image_format format = Impl::getImageFormat(depth, cn, norm);
4787 return Impl::isFormatSupported(format);
4790 Image2D::Image2D(const Image2D & i)
4797 Image2D & Image2D::operator = (const Image2D & i)
4816 void* Image2D::ptr() const
4818 return p ? p->handle : 0;
4821 bool isPerformanceCheckBypassed()
4823 static bool initialized = false;
4824 static bool value = false;
4827 value = getBoolParameter("OPENCV_OPENCL_PERF_CHECK_BYPASS", false);