1 /*M///////////////////////////////////////////////////////////////////////////////////////
3 // IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
5 // By downloading, copying, installing or using the software you agree to this license.
6 // If you do not agree to this license, do not download, install,
7 // copy or use the software.
11 // For Open Source Computer Vision Library
13 // Copyright (C) 2013, OpenCV Foundation, all rights reserved.
14 // Third party copyrights are property of their respective owners.
16 // Redistribution and use in source and binary forms, with or without modification,
17 // are permitted provided that the following conditions are met:
19 // * Redistribution's of source code must retain the above copyright notice,
20 // this list of conditions and the following disclaimer.
22 // * Redistribution's in binary form must reproduce the above copyright notice,
23 // this list of conditions and the following disclaimer in the documentation
24 // and/or other materials provided with the distribution.
26 // * The name of the copyright holders may not be used to endorse or promote products
27 // derived from this software without specific prior written permission.
29 // This software is provided by the copyright holders and contributors "as is" and
30 // any express or implied warranties, including, but not limited to, the implied
31 // warranties of merchantability and fitness for a particular purpose are disclaimed.
32 // In no event shall the OpenCV Foundation or contributors be liable for any direct,
33 // indirect, incidental, special, exemplary, or consequential damages
34 // (including, but not limited to, procurement of substitute goods or services;
35 // loss of use, data, or profits; or business interruption) however caused
36 // and on any theory of liability, whether in contract, strict liability,
37 // or tort (including negligence or otherwise) arising in any way out of
38 // the use of this software, even if advised of the possibility of such damage.
42 #include "precomp.hpp"
47 #include <iostream> // std::cerr
49 #define CV_OPENCL_ALWAYS_SHOW_BUILD_LOG 0
51 #include "opencv2/core/bufferpool.hpp"
52 #ifndef LOG_BUFFER_POOL
54 # define LOG_BUFFER_POOL printf
56 # define LOG_BUFFER_POOL(...)
60 // TODO Move to some common place
61 static size_t getConfigurationParameterForSize(const char* name, size_t defaultValue)
64 const char* envValue = NULL;
66 const char* envValue = getenv(name);
72 cv::String value = envValue;
74 for (; pos < value.size(); pos++)
76 if (!isdigit(value[pos]))
79 cv::String valueStr = value.substr(0, pos);
80 cv::String suffixStr = value.substr(pos, value.length() - pos);
81 int v = atoi(valueStr.c_str());
82 if (suffixStr.length() == 0)
84 else if (suffixStr == "MB" || suffixStr == "Mb" || suffixStr == "mb")
85 return v * 1024 * 1024;
86 else if (suffixStr == "KB" || suffixStr == "Kb" || suffixStr == "kb")
88 CV_ErrorNoReturn(cv::Error::StsBadArg, cv::format("Invalid value for %s parameter: %s", name, value.c_str()));
91 #include "opencv2/core/opencl/runtime/opencl_clamdblas.hpp"
92 #include "opencv2/core/opencl/runtime/opencl_clamdfft.hpp"
95 #include "opencv2/core/opencl/runtime/opencl_core.hpp"
97 // TODO FIXIT: This file can't be build without OPENCL
100 Part of the file is an extract from the standard OpenCL headers from Khronos site.
101 Below is the original copyright.
104 /*******************************************************************************
105 * Copyright (c) 2008 - 2012 The Khronos Group Inc.
107 * Permission is hereby granted, free of charge, to any person obtaining a
108 * copy of this software and/or associated documentation files (the
109 * "Materials"), to deal in the Materials without restriction, including
110 * without limitation the rights to use, copy, modify, merge, publish,
111 * distribute, sublicense, and/or sell copies of the Materials, and to
112 * permit persons to whom the Materials are furnished to do so, subject to
113 * the following conditions:
115 * The above copyright notice and this permission notice shall be included
116 * in all copies or substantial portions of the Materials.
118 * THE MATERIALS ARE PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
119 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
120 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
121 * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
122 * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
123 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
124 * MATERIALS OR THE USE OR OTHER DEALINGS IN THE MATERIALS.
125 ******************************************************************************/
127 #if 0 //defined __APPLE__
128 #define HAVE_OPENCL 1
133 #define OPENCV_CL_NOT_IMPLEMENTED -1000
137 #if defined __APPLE__
138 #include <OpenCL/opencl.h>
140 #include <CL/opencl.h>
143 static const bool g_haveOpenCL = true;
149 struct _cl_platform_id { int dummy; };
150 struct _cl_device_id { int dummy; };
151 struct _cl_context { int dummy; };
152 struct _cl_command_queue { int dummy; };
153 struct _cl_mem { int dummy; };
154 struct _cl_program { int dummy; };
155 struct _cl_kernel { int dummy; };
156 struct _cl_event { int dummy; };
157 struct _cl_sampler { int dummy; };
159 typedef struct _cl_platform_id * cl_platform_id;
160 typedef struct _cl_device_id * cl_device_id;
161 typedef struct _cl_context * cl_context;
162 typedef struct _cl_command_queue * cl_command_queue;
163 typedef struct _cl_mem * cl_mem;
164 typedef struct _cl_program * cl_program;
165 typedef struct _cl_kernel * cl_kernel;
166 typedef struct _cl_event * cl_event;
167 typedef struct _cl_sampler * cl_sampler;
170 typedef unsigned cl_uint;
171 #if defined (_WIN32) && defined(_MSC_VER)
172 typedef __int64 cl_long;
173 typedef unsigned __int64 cl_ulong;
175 typedef long cl_long;
176 typedef unsigned long cl_ulong;
179 typedef cl_uint cl_bool; /* WARNING! Unlike cl_ types in cl_platform.h, cl_bool is not guaranteed to be the same size as the bool in kernels. */
180 typedef cl_ulong cl_bitfield;
181 typedef cl_bitfield cl_device_type;
182 typedef cl_uint cl_platform_info;
183 typedef cl_uint cl_device_info;
184 typedef cl_bitfield cl_device_fp_config;
185 typedef cl_uint cl_device_mem_cache_type;
186 typedef cl_uint cl_device_local_mem_type;
187 typedef cl_bitfield cl_device_exec_capabilities;
188 typedef cl_bitfield cl_command_queue_properties;
189 typedef intptr_t cl_device_partition_property;
190 typedef cl_bitfield cl_device_affinity_domain;
192 typedef intptr_t cl_context_properties;
193 typedef cl_uint cl_context_info;
194 typedef cl_uint cl_command_queue_info;
195 typedef cl_uint cl_channel_order;
196 typedef cl_uint cl_channel_type;
197 typedef cl_bitfield cl_mem_flags;
198 typedef cl_uint cl_mem_object_type;
199 typedef cl_uint cl_mem_info;
200 typedef cl_bitfield cl_mem_migration_flags;
201 typedef cl_uint cl_image_info;
202 typedef cl_uint cl_buffer_create_type;
203 typedef cl_uint cl_addressing_mode;
204 typedef cl_uint cl_filter_mode;
205 typedef cl_uint cl_sampler_info;
206 typedef cl_bitfield cl_map_flags;
207 typedef cl_uint cl_program_info;
208 typedef cl_uint cl_program_build_info;
209 typedef cl_uint cl_program_binary_type;
210 typedef cl_int cl_build_status;
211 typedef cl_uint cl_kernel_info;
212 typedef cl_uint cl_kernel_arg_info;
213 typedef cl_uint cl_kernel_arg_address_qualifier;
214 typedef cl_uint cl_kernel_arg_access_qualifier;
215 typedef cl_bitfield cl_kernel_arg_type_qualifier;
216 typedef cl_uint cl_kernel_work_group_info;
217 typedef cl_uint cl_event_info;
218 typedef cl_uint cl_command_type;
219 typedef cl_uint cl_profiling_info;
222 typedef struct _cl_image_format {
223 cl_channel_order image_channel_order;
224 cl_channel_type image_channel_data_type;
227 typedef struct _cl_image_desc {
228 cl_mem_object_type image_type;
232 size_t image_array_size;
233 size_t image_row_pitch;
234 size_t image_slice_pitch;
235 cl_uint num_mip_levels;
240 typedef struct _cl_buffer_region {
246 //////////////////////////////////////////////////////////
249 #define CL_DEVICE_NOT_FOUND -1
250 #define CL_DEVICE_NOT_AVAILABLE -2
251 #define CL_COMPILER_NOT_AVAILABLE -3
252 #define CL_MEM_OBJECT_ALLOCATION_FAILURE -4
253 #define CL_OUT_OF_RESOURCES -5
254 #define CL_OUT_OF_HOST_MEMORY -6
255 #define CL_PROFILING_INFO_NOT_AVAILABLE -7
256 #define CL_MEM_COPY_OVERLAP -8
257 #define CL_IMAGE_FORMAT_MISMATCH -9
258 #define CL_IMAGE_FORMAT_NOT_SUPPORTED -10
259 #define CL_BUILD_PROGRAM_FAILURE -11
260 #define CL_MAP_FAILURE -12
261 #define CL_MISALIGNED_SUB_BUFFER_OFFSET -13
262 #define CL_EXEC_STATUS_ERROR_FOR_EVENTS_IN_WAIT_LIST -14
263 #define CL_COMPILE_PROGRAM_FAILURE -15
264 #define CL_LINKER_NOT_AVAILABLE -16
265 #define CL_LINK_PROGRAM_FAILURE -17
266 #define CL_DEVICE_PARTITION_FAILED -18
267 #define CL_KERNEL_ARG_INFO_NOT_AVAILABLE -19
269 #define CL_INVALID_VALUE -30
270 #define CL_INVALID_DEVICE_TYPE -31
271 #define CL_INVALID_PLATFORM -32
272 #define CL_INVALID_DEVICE -33
273 #define CL_INVALID_CONTEXT -34
274 #define CL_INVALID_QUEUE_PROPERTIES -35
275 #define CL_INVALID_COMMAND_QUEUE -36
276 #define CL_INVALID_HOST_PTR -37
277 #define CL_INVALID_MEM_OBJECT -38
278 #define CL_INVALID_IMAGE_FORMAT_DESCRIPTOR -39
279 #define CL_INVALID_IMAGE_SIZE -40
280 #define CL_INVALID_SAMPLER -41
281 #define CL_INVALID_BINARY -42
282 #define CL_INVALID_BUILD_OPTIONS -43
283 #define CL_INVALID_PROGRAM -44
284 #define CL_INVALID_PROGRAM_EXECUTABLE -45
285 #define CL_INVALID_KERNEL_NAME -46
286 #define CL_INVALID_KERNEL_DEFINITION -47
287 #define CL_INVALID_KERNEL -48
288 #define CL_INVALID_ARG_INDEX -49
289 #define CL_INVALID_ARG_VALUE -50
290 #define CL_INVALID_ARG_SIZE -51
291 #define CL_INVALID_KERNEL_ARGS -52
292 #define CL_INVALID_WORK_DIMENSION -53
293 #define CL_INVALID_WORK_GROUP_SIZE -54
294 #define CL_INVALID_WORK_ITEM_SIZE -55
295 #define CL_INVALID_GLOBAL_OFFSET -56
296 #define CL_INVALID_EVENT_WAIT_LIST -57
297 #define CL_INVALID_EVENT -58
298 #define CL_INVALID_OPERATION -59
299 #define CL_INVALID_GL_OBJECT -60
300 #define CL_INVALID_BUFFER_SIZE -61
301 #define CL_INVALID_MIP_LEVEL -62
302 #define CL_INVALID_GLOBAL_WORK_SIZE -63
303 #define CL_INVALID_PROPERTY -64
304 #define CL_INVALID_IMAGE_DESCRIPTOR -65
305 #define CL_INVALID_COMPILER_OPTIONS -66
306 #define CL_INVALID_LINKER_OPTIONS -67
307 #define CL_INVALID_DEVICE_PARTITION_COUNT -68
309 /*#define CL_VERSION_1_0 1
310 #define CL_VERSION_1_1 1
311 #define CL_VERSION_1_2 1*/
315 #define CL_BLOCKING CL_TRUE
316 #define CL_NON_BLOCKING CL_FALSE
318 #define CL_PLATFORM_PROFILE 0x0900
319 #define CL_PLATFORM_VERSION 0x0901
320 #define CL_PLATFORM_NAME 0x0902
321 #define CL_PLATFORM_VENDOR 0x0903
322 #define CL_PLATFORM_EXTENSIONS 0x0904
324 #define CL_DEVICE_TYPE_DEFAULT (1 << 0)
325 #define CL_DEVICE_TYPE_CPU (1 << 1)
326 #define CL_DEVICE_TYPE_GPU (1 << 2)
327 #define CL_DEVICE_TYPE_ACCELERATOR (1 << 3)
328 #define CL_DEVICE_TYPE_CUSTOM (1 << 4)
329 #define CL_DEVICE_TYPE_ALL 0xFFFFFFFF
330 #define CL_DEVICE_TYPE 0x1000
331 #define CL_DEVICE_VENDOR_ID 0x1001
332 #define CL_DEVICE_MAX_COMPUTE_UNITS 0x1002
333 #define CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS 0x1003
334 #define CL_DEVICE_MAX_WORK_GROUP_SIZE 0x1004
335 #define CL_DEVICE_MAX_WORK_ITEM_SIZES 0x1005
336 #define CL_DEVICE_PREFERRED_VECTOR_WIDTH_CHAR 0x1006
337 #define CL_DEVICE_PREFERRED_VECTOR_WIDTH_SHORT 0x1007
338 #define CL_DEVICE_PREFERRED_VECTOR_WIDTH_INT 0x1008
339 #define CL_DEVICE_PREFERRED_VECTOR_WIDTH_LONG 0x1009
340 #define CL_DEVICE_PREFERRED_VECTOR_WIDTH_FLOAT 0x100A
341 #define CL_DEVICE_PREFERRED_VECTOR_WIDTH_DOUBLE 0x100B
342 #define CL_DEVICE_MAX_CLOCK_FREQUENCY 0x100C
343 #define CL_DEVICE_ADDRESS_BITS 0x100D
344 #define CL_DEVICE_MAX_READ_IMAGE_ARGS 0x100E
345 #define CL_DEVICE_MAX_WRITE_IMAGE_ARGS 0x100F
346 #define CL_DEVICE_MAX_MEM_ALLOC_SIZE 0x1010
347 #define CL_DEVICE_IMAGE2D_MAX_WIDTH 0x1011
348 #define CL_DEVICE_IMAGE2D_MAX_HEIGHT 0x1012
349 #define CL_DEVICE_IMAGE3D_MAX_WIDTH 0x1013
350 #define CL_DEVICE_IMAGE3D_MAX_HEIGHT 0x1014
351 #define CL_DEVICE_IMAGE3D_MAX_DEPTH 0x1015
352 #define CL_DEVICE_IMAGE_SUPPORT 0x1016
353 #define CL_DEVICE_MAX_PARAMETER_SIZE 0x1017
354 #define CL_DEVICE_MAX_SAMPLERS 0x1018
355 #define CL_DEVICE_MEM_BASE_ADDR_ALIGN 0x1019
356 #define CL_DEVICE_MIN_DATA_TYPE_ALIGN_SIZE 0x101A
357 #define CL_DEVICE_SINGLE_FP_CONFIG 0x101B
358 #define CL_DEVICE_GLOBAL_MEM_CACHE_TYPE 0x101C
359 #define CL_DEVICE_GLOBAL_MEM_CACHELINE_SIZE 0x101D
360 #define CL_DEVICE_GLOBAL_MEM_CACHE_SIZE 0x101E
361 #define CL_DEVICE_GLOBAL_MEM_SIZE 0x101F
362 #define CL_DEVICE_MAX_CONSTANT_BUFFER_SIZE 0x1020
363 #define CL_DEVICE_MAX_CONSTANT_ARGS 0x1021
364 #define CL_DEVICE_LOCAL_MEM_TYPE 0x1022
365 #define CL_DEVICE_LOCAL_MEM_SIZE 0x1023
366 #define CL_DEVICE_ERROR_CORRECTION_SUPPORT 0x1024
367 #define CL_DEVICE_PROFILING_TIMER_RESOLUTION 0x1025
368 #define CL_DEVICE_ENDIAN_LITTLE 0x1026
369 #define CL_DEVICE_AVAILABLE 0x1027
370 #define CL_DEVICE_COMPILER_AVAILABLE 0x1028
371 #define CL_DEVICE_EXECUTION_CAPABILITIES 0x1029
372 #define CL_DEVICE_QUEUE_PROPERTIES 0x102A
373 #define CL_DEVICE_NAME 0x102B
374 #define CL_DEVICE_VENDOR 0x102C
375 #define CL_DRIVER_VERSION 0x102D
376 #define CL_DEVICE_PROFILE 0x102E
377 #define CL_DEVICE_VERSION 0x102F
378 #define CL_DEVICE_EXTENSIONS 0x1030
379 #define CL_DEVICE_PLATFORM 0x1031
380 #define CL_DEVICE_DOUBLE_FP_CONFIG 0x1032
381 #define CL_DEVICE_HALF_FP_CONFIG 0x1033
382 #define CL_DEVICE_PREFERRED_VECTOR_WIDTH_HALF 0x1034
383 #define CL_DEVICE_HOST_UNIFIED_MEMORY 0x1035
384 #define CL_DEVICE_NATIVE_VECTOR_WIDTH_CHAR 0x1036
385 #define CL_DEVICE_NATIVE_VECTOR_WIDTH_SHORT 0x1037
386 #define CL_DEVICE_NATIVE_VECTOR_WIDTH_INT 0x1038
387 #define CL_DEVICE_NATIVE_VECTOR_WIDTH_LONG 0x1039
388 #define CL_DEVICE_NATIVE_VECTOR_WIDTH_FLOAT 0x103A
389 #define CL_DEVICE_NATIVE_VECTOR_WIDTH_DOUBLE 0x103B
390 #define CL_DEVICE_NATIVE_VECTOR_WIDTH_HALF 0x103C
391 #define CL_DEVICE_OPENCL_C_VERSION 0x103D
392 #define CL_DEVICE_LINKER_AVAILABLE 0x103E
393 #define CL_DEVICE_BUILT_IN_KERNELS 0x103F
394 #define CL_DEVICE_IMAGE_MAX_BUFFER_SIZE 0x1040
395 #define CL_DEVICE_IMAGE_MAX_ARRAY_SIZE 0x1041
396 #define CL_DEVICE_PARENT_DEVICE 0x1042
397 #define CL_DEVICE_PARTITION_MAX_SUB_DEVICES 0x1043
398 #define CL_DEVICE_PARTITION_PROPERTIES 0x1044
399 #define CL_DEVICE_PARTITION_AFFINITY_DOMAIN 0x1045
400 #define CL_DEVICE_PARTITION_TYPE 0x1046
401 #define CL_DEVICE_REFERENCE_COUNT 0x1047
402 #define CL_DEVICE_PREFERRED_INTEROP_USER_SYNC 0x1048
403 #define CL_DEVICE_PRINTF_BUFFER_SIZE 0x1049
404 #define CL_DEVICE_IMAGE_PITCH_ALIGNMENT 0x104A
405 #define CL_DEVICE_IMAGE_BASE_ADDRESS_ALIGNMENT 0x104B
407 #define CL_FP_DENORM (1 << 0)
408 #define CL_FP_INF_NAN (1 << 1)
409 #define CL_FP_ROUND_TO_NEAREST (1 << 2)
410 #define CL_FP_ROUND_TO_ZERO (1 << 3)
411 #define CL_FP_ROUND_TO_INF (1 << 4)
412 #define CL_FP_FMA (1 << 5)
413 #define CL_FP_SOFT_FLOAT (1 << 6)
414 #define CL_FP_CORRECTLY_ROUNDED_DIVIDE_SQRT (1 << 7)
417 #define CL_READ_ONLY_CACHE 0x1
418 #define CL_READ_WRITE_CACHE 0x2
420 #define CL_GLOBAL 0x2
421 #define CL_EXEC_KERNEL (1 << 0)
422 #define CL_EXEC_NATIVE_KERNEL (1 << 1)
423 #define CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE (1 << 0)
424 #define CL_QUEUE_PROFILING_ENABLE (1 << 1)
426 #define CL_CONTEXT_REFERENCE_COUNT 0x1080
427 #define CL_CONTEXT_DEVICES 0x1081
428 #define CL_CONTEXT_PROPERTIES 0x1082
429 #define CL_CONTEXT_NUM_DEVICES 0x1083
430 #define CL_CONTEXT_PLATFORM 0x1084
431 #define CL_CONTEXT_INTEROP_USER_SYNC 0x1085
433 #define CL_DEVICE_PARTITION_EQUALLY 0x1086
434 #define CL_DEVICE_PARTITION_BY_COUNTS 0x1087
435 #define CL_DEVICE_PARTITION_BY_COUNTS_LIST_END 0x0
436 #define CL_DEVICE_PARTITION_BY_AFFINITY_DOMAIN 0x1088
437 #define CL_DEVICE_AFFINITY_DOMAIN_NUMA (1 << 0)
438 #define CL_DEVICE_AFFINITY_DOMAIN_L4_CACHE (1 << 1)
439 #define CL_DEVICE_AFFINITY_DOMAIN_L3_CACHE (1 << 2)
440 #define CL_DEVICE_AFFINITY_DOMAIN_L2_CACHE (1 << 3)
441 #define CL_DEVICE_AFFINITY_DOMAIN_L1_CACHE (1 << 4)
442 #define CL_DEVICE_AFFINITY_DOMAIN_NEXT_PARTITIONABLE (1 << 5)
443 #define CL_QUEUE_CONTEXT 0x1090
444 #define CL_QUEUE_DEVICE 0x1091
445 #define CL_QUEUE_REFERENCE_COUNT 0x1092
446 #define CL_QUEUE_PROPERTIES 0x1093
447 #define CL_MEM_READ_WRITE (1 << 0)
448 #define CL_MEM_WRITE_ONLY (1 << 1)
449 #define CL_MEM_READ_ONLY (1 << 2)
450 #define CL_MEM_USE_HOST_PTR (1 << 3)
451 #define CL_MEM_ALLOC_HOST_PTR (1 << 4)
452 #define CL_MEM_COPY_HOST_PTR (1 << 5)
454 #define CL_MEM_HOST_WRITE_ONLY (1 << 7)
455 #define CL_MEM_HOST_READ_ONLY (1 << 8)
456 #define CL_MEM_HOST_NO_ACCESS (1 << 9)
457 #define CL_MIGRATE_MEM_OBJECT_HOST (1 << 0)
458 #define CL_MIGRATE_MEM_OBJECT_CONTENT_UNDEFINED (1 << 1)
464 #define CL_RGB 0x10B4
465 #define CL_RGBA 0x10B5
466 #define CL_BGRA 0x10B6
467 #define CL_ARGB 0x10B7
468 #define CL_INTENSITY 0x10B8
469 #define CL_LUMINANCE 0x10B9
471 #define CL_RGx 0x10BB
472 #define CL_RGBx 0x10BC
473 #define CL_DEPTH 0x10BD
474 #define CL_DEPTH_STENCIL 0x10BE
476 #define CL_SNORM_INT8 0x10D0
477 #define CL_SNORM_INT16 0x10D1
478 #define CL_UNORM_INT8 0x10D2
479 #define CL_UNORM_INT16 0x10D3
480 #define CL_UNORM_SHORT_565 0x10D4
481 #define CL_UNORM_SHORT_555 0x10D5
482 #define CL_UNORM_INT_101010 0x10D6
483 #define CL_SIGNED_INT8 0x10D7
484 #define CL_SIGNED_INT16 0x10D8
485 #define CL_SIGNED_INT32 0x10D9
486 #define CL_UNSIGNED_INT8 0x10DA
487 #define CL_UNSIGNED_INT16 0x10DB
488 #define CL_UNSIGNED_INT32 0x10DC
489 #define CL_HALF_FLOAT 0x10DD
490 #define CL_FLOAT 0x10DE
491 #define CL_UNORM_INT24 0x10DF
493 #define CL_MEM_OBJECT_BUFFER 0x10F0
494 #define CL_MEM_OBJECT_IMAGE2D 0x10F1
495 #define CL_MEM_OBJECT_IMAGE3D 0x10F2
496 #define CL_MEM_OBJECT_IMAGE2D_ARRAY 0x10F3
497 #define CL_MEM_OBJECT_IMAGE1D 0x10F4
498 #define CL_MEM_OBJECT_IMAGE1D_ARRAY 0x10F5
499 #define CL_MEM_OBJECT_IMAGE1D_BUFFER 0x10F6
501 #define CL_MEM_TYPE 0x1100
502 #define CL_MEM_FLAGS 0x1101
503 #define CL_MEM_SIZE 0x1102
504 #define CL_MEM_HOST_PTR 0x1103
505 #define CL_MEM_MAP_COUNT 0x1104
506 #define CL_MEM_REFERENCE_COUNT 0x1105
507 #define CL_MEM_CONTEXT 0x1106
508 #define CL_MEM_ASSOCIATED_MEMOBJECT 0x1107
509 #define CL_MEM_OFFSET 0x1108
511 #define CL_IMAGE_FORMAT 0x1110
512 #define CL_IMAGE_ELEMENT_SIZE 0x1111
513 #define CL_IMAGE_ROW_PITCH 0x1112
514 #define CL_IMAGE_SLICE_PITCH 0x1113
515 #define CL_IMAGE_WIDTH 0x1114
516 #define CL_IMAGE_HEIGHT 0x1115
517 #define CL_IMAGE_DEPTH 0x1116
518 #define CL_IMAGE_ARRAY_SIZE 0x1117
519 #define CL_IMAGE_BUFFER 0x1118
520 #define CL_IMAGE_NUM_MIP_LEVELS 0x1119
521 #define CL_IMAGE_NUM_SAMPLES 0x111A
523 #define CL_ADDRESS_NONE 0x1130
524 #define CL_ADDRESS_CLAMP_TO_EDGE 0x1131
525 #define CL_ADDRESS_CLAMP 0x1132
526 #define CL_ADDRESS_REPEAT 0x1133
527 #define CL_ADDRESS_MIRRORED_REPEAT 0x1134
529 #define CL_FILTER_NEAREST 0x1140
530 #define CL_FILTER_LINEAR 0x1141
532 #define CL_SAMPLER_REFERENCE_COUNT 0x1150
533 #define CL_SAMPLER_CONTEXT 0x1151
534 #define CL_SAMPLER_NORMALIZED_COORDS 0x1152
535 #define CL_SAMPLER_ADDRESSING_MODE 0x1153
536 #define CL_SAMPLER_FILTER_MODE 0x1154
538 #define CL_MAP_READ (1 << 0)
539 #define CL_MAP_WRITE (1 << 1)
540 #define CL_MAP_WRITE_INVALIDATE_REGION (1 << 2)
542 #define CL_PROGRAM_REFERENCE_COUNT 0x1160
543 #define CL_PROGRAM_CONTEXT 0x1161
544 #define CL_PROGRAM_NUM_DEVICES 0x1162
545 #define CL_PROGRAM_DEVICES 0x1163
546 #define CL_PROGRAM_SOURCE 0x1164
547 #define CL_PROGRAM_BINARY_SIZES 0x1165
548 #define CL_PROGRAM_BINARIES 0x1166
549 #define CL_PROGRAM_NUM_KERNELS 0x1167
550 #define CL_PROGRAM_KERNEL_NAMES 0x1168
551 #define CL_PROGRAM_BUILD_STATUS 0x1181
552 #define CL_PROGRAM_BUILD_OPTIONS 0x1182
553 #define CL_PROGRAM_BUILD_LOG 0x1183
554 #define CL_PROGRAM_BINARY_TYPE 0x1184
555 #define CL_PROGRAM_BINARY_TYPE_NONE 0x0
556 #define CL_PROGRAM_BINARY_TYPE_COMPILED_OBJECT 0x1
557 #define CL_PROGRAM_BINARY_TYPE_LIBRARY 0x2
558 #define CL_PROGRAM_BINARY_TYPE_EXECUTABLE 0x4
560 #define CL_BUILD_SUCCESS 0
561 #define CL_BUILD_NONE -1
562 #define CL_BUILD_ERROR -2
563 #define CL_BUILD_IN_PROGRESS -3
565 #define CL_KERNEL_FUNCTION_NAME 0x1190
566 #define CL_KERNEL_NUM_ARGS 0x1191
567 #define CL_KERNEL_REFERENCE_COUNT 0x1192
568 #define CL_KERNEL_CONTEXT 0x1193
569 #define CL_KERNEL_PROGRAM 0x1194
570 #define CL_KERNEL_ATTRIBUTES 0x1195
571 #define CL_KERNEL_ARG_ADDRESS_QUALIFIER 0x1196
572 #define CL_KERNEL_ARG_ACCESS_QUALIFIER 0x1197
573 #define CL_KERNEL_ARG_TYPE_NAME 0x1198
574 #define CL_KERNEL_ARG_TYPE_QUALIFIER 0x1199
575 #define CL_KERNEL_ARG_NAME 0x119A
576 #define CL_KERNEL_ARG_ADDRESS_GLOBAL 0x119B
577 #define CL_KERNEL_ARG_ADDRESS_LOCAL 0x119C
578 #define CL_KERNEL_ARG_ADDRESS_CONSTANT 0x119D
579 #define CL_KERNEL_ARG_ADDRESS_PRIVATE 0x119E
580 #define CL_KERNEL_ARG_ACCESS_READ_ONLY 0x11A0
581 #define CL_KERNEL_ARG_ACCESS_WRITE_ONLY 0x11A1
582 #define CL_KERNEL_ARG_ACCESS_READ_WRITE 0x11A2
583 #define CL_KERNEL_ARG_ACCESS_NONE 0x11A3
584 #define CL_KERNEL_ARG_TYPE_NONE 0
585 #define CL_KERNEL_ARG_TYPE_CONST (1 << 0)
586 #define CL_KERNEL_ARG_TYPE_RESTRICT (1 << 1)
587 #define CL_KERNEL_ARG_TYPE_VOLATILE (1 << 2)
588 #define CL_KERNEL_WORK_GROUP_SIZE 0x11B0
589 #define CL_KERNEL_COMPILE_WORK_GROUP_SIZE 0x11B1
590 #define CL_KERNEL_LOCAL_MEM_SIZE 0x11B2
591 #define CL_KERNEL_PREFERRED_WORK_GROUP_SIZE_MULTIPLE 0x11B3
592 #define CL_KERNEL_PRIVATE_MEM_SIZE 0x11B4
593 #define CL_KERNEL_GLOBAL_WORK_SIZE 0x11B5
595 #define CL_EVENT_COMMAND_QUEUE 0x11D0
596 #define CL_EVENT_COMMAND_TYPE 0x11D1
597 #define CL_EVENT_REFERENCE_COUNT 0x11D2
598 #define CL_EVENT_COMMAND_EXECUTION_STATUS 0x11D3
599 #define CL_EVENT_CONTEXT 0x11D4
601 #define CL_COMMAND_NDRANGE_KERNEL 0x11F0
602 #define CL_COMMAND_TASK 0x11F1
603 #define CL_COMMAND_NATIVE_KERNEL 0x11F2
604 #define CL_COMMAND_READ_BUFFER 0x11F3
605 #define CL_COMMAND_WRITE_BUFFER 0x11F4
606 #define CL_COMMAND_COPY_BUFFER 0x11F5
607 #define CL_COMMAND_READ_IMAGE 0x11F6
608 #define CL_COMMAND_WRITE_IMAGE 0x11F7
609 #define CL_COMMAND_COPY_IMAGE 0x11F8
610 #define CL_COMMAND_COPY_IMAGE_TO_BUFFER 0x11F9
611 #define CL_COMMAND_COPY_BUFFER_TO_IMAGE 0x11FA
612 #define CL_COMMAND_MAP_BUFFER 0x11FB
613 #define CL_COMMAND_MAP_IMAGE 0x11FC
614 #define CL_COMMAND_UNMAP_MEM_OBJECT 0x11FD
615 #define CL_COMMAND_MARKER 0x11FE
616 #define CL_COMMAND_ACQUIRE_GL_OBJECTS 0x11FF
617 #define CL_COMMAND_RELEASE_GL_OBJECTS 0x1200
618 #define CL_COMMAND_READ_BUFFER_RECT 0x1201
619 #define CL_COMMAND_WRITE_BUFFER_RECT 0x1202
620 #define CL_COMMAND_COPY_BUFFER_RECT 0x1203
621 #define CL_COMMAND_USER 0x1204
622 #define CL_COMMAND_BARRIER 0x1205
623 #define CL_COMMAND_MIGRATE_MEM_OBJECTS 0x1206
624 #define CL_COMMAND_FILL_BUFFER 0x1207
625 #define CL_COMMAND_FILL_IMAGE 0x1208
627 #define CL_COMPLETE 0x0
628 #define CL_RUNNING 0x1
629 #define CL_SUBMITTED 0x2
630 #define CL_QUEUED 0x3
631 #define CL_BUFFER_CREATE_TYPE_REGION 0x1220
633 #define CL_PROFILING_COMMAND_QUEUED 0x1280
634 #define CL_PROFILING_COMMAND_SUBMIT 0x1281
635 #define CL_PROFILING_COMMAND_START 0x1282
636 #define CL_PROFILING_COMMAND_END 0x1283
638 #define CL_CALLBACK CV_STDCALL
640 static volatile bool g_haveOpenCL = false;
641 static const char* oclFuncToCheck = "clEnqueueReadBufferRect";
643 #if defined(__APPLE__)
646 static void* initOpenCLAndLoad(const char* funcname)
648 static bool initialized = false;
649 static void* handle = 0;
654 const char* oclpath = getenv("OPENCV_OPENCL_RUNTIME");
655 oclpath = oclpath && strlen(oclpath) > 0 ? oclpath :
656 "/System/Library/Frameworks/OpenCL.framework/Versions/Current/OpenCL";
657 handle = dlopen(oclpath, RTLD_LAZY);
659 g_haveOpenCL = handle != 0 && dlsym(handle, oclFuncToCheck) != 0;
661 fprintf(stderr, "Successfully loaded OpenCL v1.1+ runtime from %s\n", oclpath);
663 fprintf(stderr, "Failed to load OpenCL runtime\n");
669 return funcname && handle ? dlsym(handle, funcname) : 0;
672 #elif defined WIN32 || defined _WIN32
674 #ifndef _WIN32_WINNT // This is needed for the declaration of TryEnterCriticalSection in winbase.h with Visual Studio 2005 (and older?)
675 #define _WIN32_WINNT 0x0400 // http://msdn.microsoft.com/en-us/library/ms686857(VS.85).aspx
678 #if (_WIN32_WINNT >= 0x0602)
679 #include <synchapi.h>
686 static void* initOpenCLAndLoad(const char* funcname)
688 static bool initialized = false;
689 static HMODULE handle = 0;
695 handle = LoadLibraryA("OpenCL.dll");
697 g_haveOpenCL = handle != 0 && GetProcAddress(handle, oclFuncToCheck) != 0;
704 return funcname ? (void*)GetProcAddress(handle, funcname) : 0;
707 #elif defined(__linux)
712 static void* initOpenCLAndLoad(const char* funcname)
714 static bool initialized = false;
715 static void* handle = 0;
720 handle = dlopen("libOpenCL.so", RTLD_LAZY);
722 handle = dlopen("libCL.so", RTLD_LAZY);
724 g_haveOpenCL = handle != 0 && dlsym(handle, oclFuncToCheck) != 0;
730 return funcname ? (void*)dlsym(handle, funcname) : 0;
735 static void* initOpenCLAndLoad(const char*)
743 #define OCL_FUNC(rettype, funcname, argsdecl, args) \
744 typedef rettype (CV_STDCALL * funcname##_t) argsdecl; \
745 static rettype funcname argsdecl \
747 static funcname##_t funcname##_p = 0; \
748 if( !funcname##_p ) \
750 funcname##_p = (funcname##_t)initOpenCLAndLoad(#funcname); \
751 if( !funcname##_p ) \
752 return OPENCV_CL_NOT_IMPLEMENTED; \
754 return funcname##_p args; \
758 #define OCL_FUNC_P(rettype, funcname, argsdecl, args) \
759 typedef rettype (CV_STDCALL * funcname##_t) argsdecl; \
760 static rettype funcname argsdecl \
762 static funcname##_t funcname##_p = 0; \
763 if( !funcname##_p ) \
765 funcname##_p = (funcname##_t)initOpenCLAndLoad(#funcname); \
766 if( !funcname##_p ) \
769 *errcode_ret = OPENCV_CL_NOT_IMPLEMENTED; \
773 return funcname##_p args; \
776 OCL_FUNC(cl_int, clGetPlatformIDs,
777 (cl_uint num_entries, cl_platform_id* platforms, cl_uint* num_platforms),
778 (num_entries, platforms, num_platforms))
780 OCL_FUNC(cl_int, clGetPlatformInfo,
781 (cl_platform_id platform, cl_platform_info param_name,
782 size_t param_value_size, void * param_value,
783 size_t * param_value_size_ret),
784 (platform, param_name, param_value_size, param_value, param_value_size_ret))
786 OCL_FUNC(cl_int, clGetDeviceInfo,
787 (cl_device_id device,
788 cl_device_info param_name,
789 size_t param_value_size,
791 size_t * param_value_size_ret),
792 (device, param_name, param_value_size, param_value, param_value_size_ret))
795 OCL_FUNC(cl_int, clGetDeviceIDs,
796 (cl_platform_id platform,
797 cl_device_type device_type,
799 cl_device_id * devices,
800 cl_uint * num_devices),
801 (platform, device_type, num_entries, devices, num_devices))
803 OCL_FUNC_P(cl_context, clCreateContext,
804 (const cl_context_properties * properties,
806 const cl_device_id * devices,
807 void (CL_CALLBACK * pfn_notify)(const char *, const void *, size_t, void *),
809 cl_int * errcode_ret),
810 (properties, num_devices, devices, pfn_notify, user_data, errcode_ret))
812 OCL_FUNC(cl_int, clReleaseContext, (cl_context context), (context))
815 OCL_FUNC(cl_int, clRetainContext, (cl_context context), (context))
817 OCL_FUNC_P(cl_context, clCreateContextFromType,
818 (const cl_context_properties * properties,
819 cl_device_type device_type,
820 void (CL_CALLBACK * pfn_notify)(const char *, const void *, size_t, void *),
822 cl_int * errcode_ret),
823 (properties, device_type, pfn_notify, user_data, errcode_ret))
825 OCL_FUNC(cl_int, clGetContextInfo,
827 cl_context_info param_name,
828 size_t param_value_size,
830 size_t * param_value_size_ret),
831 (context, param_name, param_value_size,
832 param_value, param_value_size_ret))
834 OCL_FUNC_P(cl_command_queue, clCreateCommandQueue,
837 cl_command_queue_properties properties,
838 cl_int * errcode_ret),
839 (context, device, properties, errcode_ret))
841 OCL_FUNC(cl_int, clReleaseCommandQueue, (cl_command_queue command_queue), (command_queue))
843 OCL_FUNC_P(cl_mem, clCreateBuffer,
848 cl_int * errcode_ret),
849 (context, flags, size, host_ptr, errcode_ret))
852 OCL_FUNC(cl_int, clRetainCommandQueue, (cl_command_queue command_queue), (command_queue))
854 OCL_FUNC(cl_int, clGetCommandQueueInfo,
855 (cl_command_queue command_queue,
856 cl_command_queue_info param_name,
857 size_t param_value_size,
859 size_t * param_value_size_ret),
860 (command_queue, param_name, param_value_size, param_value, param_value_size_ret))
862 OCL_FUNC_P(cl_mem, clCreateSubBuffer,
865 cl_buffer_create_type buffer_create_type,
866 const void * buffer_create_info,
867 cl_int * errcode_ret),
868 (buffer, flags, buffer_create_type, buffer_create_info, errcode_ret))
871 OCL_FUNC_P(cl_mem, clCreateImage,
874 const cl_image_format * image_format,
875 const cl_image_desc * image_desc,
877 cl_int * errcode_ret),
878 (context, flags, image_format, image_desc, host_ptr, errcode_ret))
880 OCL_FUNC_P(cl_mem, clCreateImage2D,
883 const cl_image_format * image_format,
886 size_t image_row_pitch,
888 cl_int *errcode_ret),
889 (context, flags, image_format, image_width, image_height, image_row_pitch, host_ptr, errcode_ret))
891 OCL_FUNC(cl_int, clGetSupportedImageFormats,
894 cl_mem_object_type image_type,
896 cl_image_format * image_formats,
897 cl_uint * num_image_formats),
898 (context, flags, image_type, num_entries, image_formats, num_image_formats))
901 OCL_FUNC(cl_int, clGetMemObjectInfo,
903 cl_mem_info param_name,
904 size_t param_value_size,
906 size_t * param_value_size_ret),
907 (memobj, param_name, param_value_size, param_value, param_value_size_ret))
909 OCL_FUNC(cl_int, clGetImageInfo,
911 cl_image_info param_name,
912 size_t param_value_size,
914 size_t * param_value_size_ret),
915 (image, param_name, param_value_size, param_value, param_value_size_ret))
917 OCL_FUNC(cl_int, clCreateKernelsInProgram,
921 cl_uint * num_kernels_ret),
922 (program, num_kernels, kernels, num_kernels_ret))
924 OCL_FUNC(cl_int, clRetainKernel, (cl_kernel kernel), (kernel))
926 OCL_FUNC(cl_int, clGetKernelArgInfo,
929 cl_kernel_arg_info param_name,
930 size_t param_value_size,
932 size_t * param_value_size_ret),
933 (kernel, arg_indx, param_name, param_value_size, param_value, param_value_size_ret))
935 OCL_FUNC(cl_int, clEnqueueReadImage,
936 (cl_command_queue command_queue,
938 cl_bool blocking_read,
939 const size_t * origin[3],
940 const size_t * region[3],
944 cl_uint num_events_in_wait_list,
945 const cl_event * event_wait_list,
947 (command_queue, image, blocking_read, origin, region,
948 row_pitch, slice_pitch,
950 num_events_in_wait_list,
954 OCL_FUNC(cl_int, clEnqueueWriteImage,
955 (cl_command_queue command_queue,
957 cl_bool blocking_write,
958 const size_t * origin[3],
959 const size_t * region[3],
960 size_t input_row_pitch,
961 size_t input_slice_pitch,
963 cl_uint num_events_in_wait_list,
964 const cl_event * event_wait_list,
966 (command_queue, image, blocking_write, origin, region, input_row_pitch,
967 input_slice_pitch, ptr, num_events_in_wait_list, event_wait_list, event))
969 OCL_FUNC(cl_int, clEnqueueFillImage,
970 (cl_command_queue command_queue,
972 const void * fill_color,
973 const size_t * origin[3],
974 const size_t * region[3],
975 cl_uint num_events_in_wait_list,
976 const cl_event * event_wait_list,
978 (command_queue, image, fill_color, origin, region,
979 num_events_in_wait_list, event_wait_list, event))
981 OCL_FUNC(cl_int, clEnqueueCopyImage,
982 (cl_command_queue command_queue,
985 const size_t * src_origin[3],
986 const size_t * dst_origin[3],
987 const size_t * region[3],
988 cl_uint num_events_in_wait_list,
989 const cl_event * event_wait_list,
991 (command_queue, src_image, dst_image, src_origin, dst_origin,
992 region, num_events_in_wait_list, event_wait_list, event))
994 OCL_FUNC(cl_int, clEnqueueCopyImageToBuffer,
995 (cl_command_queue command_queue,
998 const size_t * src_origin[3],
999 const size_t * region[3],
1001 cl_uint num_events_in_wait_list,
1002 const cl_event * event_wait_list,
1004 (command_queue, src_image, dst_buffer, src_origin, region, dst_offset,
1005 num_events_in_wait_list, event_wait_list, event))
1008 OCL_FUNC(cl_int, clEnqueueCopyBufferToImage,
1009 (cl_command_queue command_queue,
1013 const size_t dst_origin[3],
1014 const size_t region[3],
1015 cl_uint num_events_in_wait_list,
1016 const cl_event * event_wait_list,
1018 (command_queue, src_buffer, dst_image, src_offset, dst_origin,
1019 region, num_events_in_wait_list, event_wait_list, event))
1021 OCL_FUNC(cl_int, clFlush,
1022 (cl_command_queue command_queue),
1026 OCL_FUNC_P(void*, clEnqueueMapImage,
1027 (cl_command_queue command_queue,
1029 cl_bool blocking_map,
1030 cl_map_flags map_flags,
1031 const size_t * origin[3],
1032 const size_t * region[3],
1033 size_t * image_row_pitch,
1034 size_t * image_slice_pitch,
1035 cl_uint num_events_in_wait_list,
1036 const cl_event * event_wait_list,
1038 cl_int * errcode_ret),
1039 (command_queue, image, blocking_map, map_flags, origin, region,
1040 image_row_pitch, image_slice_pitch, num_events_in_wait_list,
1041 event_wait_list, event, errcode_ret))
1045 OCL_FUNC(cl_int, clRetainProgram, (cl_program program), (program))
1047 OCL_FUNC(cl_int, clGetKernelInfo,
1049 cl_kernel_info param_name,
1050 size_t param_value_size,
1052 size_t * param_value_size_ret),
1053 (kernel, param_name, param_value_size, param_value, param_value_size_ret))
1055 OCL_FUNC(cl_int, clRetainMemObject, (cl_mem memobj), (memobj))
1059 OCL_FUNC(cl_int, clReleaseMemObject, (cl_mem memobj), (memobj))
1062 OCL_FUNC_P(cl_program, clCreateProgramWithSource,
1063 (cl_context context,
1065 const char ** strings,
1066 const size_t * lengths,
1067 cl_int * errcode_ret),
1068 (context, count, strings, lengths, errcode_ret))
1070 OCL_FUNC_P(cl_program, clCreateProgramWithBinary,
1071 (cl_context context,
1072 cl_uint num_devices,
1073 const cl_device_id * device_list,
1074 const size_t * lengths,
1075 const unsigned char ** binaries,
1076 cl_int * binary_status,
1077 cl_int * errcode_ret),
1078 (context, num_devices, device_list, lengths, binaries, binary_status, errcode_ret))
1080 OCL_FUNC(cl_int, clReleaseProgram, (cl_program program), (program))
1082 OCL_FUNC(cl_int, clBuildProgram,
1083 (cl_program program,
1084 cl_uint num_devices,
1085 const cl_device_id * device_list,
1086 const char * options,
1087 void (CL_CALLBACK * pfn_notify)(cl_program, void *),
1089 (program, num_devices, device_list, options, pfn_notify, user_data))
1091 OCL_FUNC(cl_int, clGetProgramInfo,
1092 (cl_program program,
1093 cl_program_info param_name,
1094 size_t param_value_size,
1096 size_t * param_value_size_ret),
1097 (program, param_name, param_value_size, param_value, param_value_size_ret))
1099 OCL_FUNC(cl_int, clGetProgramBuildInfo,
1100 (cl_program program,
1101 cl_device_id device,
1102 cl_program_build_info param_name,
1103 size_t param_value_size,
1105 size_t * param_value_size_ret),
1106 (program, device, param_name, param_value_size, param_value, param_value_size_ret))
1108 OCL_FUNC_P(cl_kernel, clCreateKernel,
1109 (cl_program program,
1110 const char * kernel_name,
1111 cl_int * errcode_ret),
1112 (program, kernel_name, errcode_ret))
1114 OCL_FUNC(cl_int, clReleaseKernel, (cl_kernel kernel), (kernel))
1116 OCL_FUNC(cl_int, clSetKernelArg,
1120 const void * arg_value),
1121 (kernel, arg_index, arg_size, arg_value))
1123 OCL_FUNC(cl_int, clGetKernelWorkGroupInfo,
1125 cl_device_id device,
1126 cl_kernel_work_group_info param_name,
1127 size_t param_value_size,
1129 size_t * param_value_size_ret),
1130 (kernel, device, param_name, param_value_size, param_value, param_value_size_ret))
1132 OCL_FUNC(cl_int, clFinish, (cl_command_queue command_queue), (command_queue))
1134 OCL_FUNC(cl_int, clEnqueueReadBuffer,
1135 (cl_command_queue command_queue,
1137 cl_bool blocking_read,
1141 cl_uint num_events_in_wait_list,
1142 const cl_event * event_wait_list,
1144 (command_queue, buffer, blocking_read, offset, size, ptr,
1145 num_events_in_wait_list, event_wait_list, event))
1147 OCL_FUNC(cl_int, clEnqueueReadBufferRect,
1148 (cl_command_queue command_queue,
1150 cl_bool blocking_read,
1151 const size_t * buffer_offset,
1152 const size_t * host_offset,
1153 const size_t * region,
1154 size_t buffer_row_pitch,
1155 size_t buffer_slice_pitch,
1156 size_t host_row_pitch,
1157 size_t host_slice_pitch,
1159 cl_uint num_events_in_wait_list,
1160 const cl_event * event_wait_list,
1162 (command_queue, buffer, blocking_read, buffer_offset, host_offset, region, buffer_row_pitch,
1163 buffer_slice_pitch, host_row_pitch, host_slice_pitch, ptr, num_events_in_wait_list,
1164 event_wait_list, event))
1166 OCL_FUNC(cl_int, clEnqueueWriteBuffer,
1167 (cl_command_queue command_queue,
1169 cl_bool blocking_write,
1173 cl_uint num_events_in_wait_list,
1174 const cl_event * event_wait_list,
1176 (command_queue, buffer, blocking_write, offset, size, ptr,
1177 num_events_in_wait_list, event_wait_list, event))
1179 OCL_FUNC(cl_int, clEnqueueWriteBufferRect,
1180 (cl_command_queue command_queue,
1182 cl_bool blocking_write,
1183 const size_t * buffer_offset,
1184 const size_t * host_offset,
1185 const size_t * region,
1186 size_t buffer_row_pitch,
1187 size_t buffer_slice_pitch,
1188 size_t host_row_pitch,
1189 size_t host_slice_pitch,
1191 cl_uint num_events_in_wait_list,
1192 const cl_event * event_wait_list,
1194 (command_queue, buffer, blocking_write, buffer_offset, host_offset,
1195 region, buffer_row_pitch, buffer_slice_pitch, host_row_pitch,
1196 host_slice_pitch, ptr, num_events_in_wait_list, event_wait_list, event))
1198 /*OCL_FUNC(cl_int, clEnqueueFillBuffer,
1199 (cl_command_queue command_queue,
1201 const void * pattern,
1202 size_t pattern_size,
1205 cl_uint num_events_in_wait_list,
1206 const cl_event * event_wait_list,
1208 (command_queue, buffer, pattern, pattern_size, offset, size,
1209 num_events_in_wait_list, event_wait_list, event))*/
1211 OCL_FUNC(cl_int, clEnqueueCopyBuffer,
1212 (cl_command_queue command_queue,
1218 cl_uint num_events_in_wait_list,
1219 const cl_event * event_wait_list,
1221 (command_queue, src_buffer, dst_buffer, src_offset, dst_offset,
1222 size, num_events_in_wait_list, event_wait_list, event))
1224 OCL_FUNC(cl_int, clEnqueueCopyBufferRect,
1225 (cl_command_queue command_queue,
1228 const size_t * src_origin,
1229 const size_t * dst_origin,
1230 const size_t * region,
1231 size_t src_row_pitch,
1232 size_t src_slice_pitch,
1233 size_t dst_row_pitch,
1234 size_t dst_slice_pitch,
1235 cl_uint num_events_in_wait_list,
1236 const cl_event * event_wait_list,
1238 (command_queue, src_buffer, dst_buffer, src_origin, dst_origin,
1239 region, src_row_pitch, src_slice_pitch, dst_row_pitch, dst_slice_pitch,
1240 num_events_in_wait_list, event_wait_list, event))
1242 OCL_FUNC_P(void*, clEnqueueMapBuffer,
1243 (cl_command_queue command_queue,
1245 cl_bool blocking_map,
1246 cl_map_flags map_flags,
1249 cl_uint num_events_in_wait_list,
1250 const cl_event * event_wait_list,
1252 cl_int * errcode_ret),
1253 (command_queue, buffer, blocking_map, map_flags, offset, size,
1254 num_events_in_wait_list, event_wait_list, event, errcode_ret))
1256 OCL_FUNC(cl_int, clEnqueueUnmapMemObject,
1257 (cl_command_queue command_queue,
1260 cl_uint num_events_in_wait_list,
1261 const cl_event * event_wait_list,
1263 (command_queue, memobj, mapped_ptr, num_events_in_wait_list, event_wait_list, event))
1265 OCL_FUNC(cl_int, clEnqueueNDRangeKernel,
1266 (cl_command_queue command_queue,
1269 const size_t * global_work_offset,
1270 const size_t * global_work_size,
1271 const size_t * local_work_size,
1272 cl_uint num_events_in_wait_list,
1273 const cl_event * event_wait_list,
1275 (command_queue, kernel, work_dim, global_work_offset, global_work_size,
1276 local_work_size, num_events_in_wait_list, event_wait_list, event))
1278 OCL_FUNC(cl_int, clEnqueueTask,
1279 (cl_command_queue command_queue,
1281 cl_uint num_events_in_wait_list,
1282 const cl_event * event_wait_list,
1284 (command_queue, kernel, num_events_in_wait_list, event_wait_list, event))
1286 OCL_FUNC(cl_int, clSetEventCallback,
1288 cl_int command_exec_callback_type ,
1289 void (CL_CALLBACK *pfn_event_notify) (cl_event event, cl_int event_command_exec_status, void *user_data),
1291 (event, command_exec_callback_type, pfn_event_notify, user_data))
1293 OCL_FUNC(cl_int, clReleaseEvent, (cl_event event), (event))
1299 #ifndef CL_VERSION_1_2
1300 #define CL_VERSION_1_2
1306 #define CV_OclDbgAssert CV_DbgAssert
1308 #define CV_OclDbgAssert(expr) (void)(expr)
1311 namespace cv { namespace ocl {
1315 UMat2D(const UMat& m)
1317 offset = (int)m.offset;
1330 UMat3D(const UMat& m)
1332 offset = (int)m.offset;
1333 step = (int)m.step.p[1];
1334 slicestep = (int)m.step.p[0];
1335 slices = (int)m.size.p[0];
1347 // Computes 64-bit "cyclic redundancy check" sum, as specified in ECMA-182
1348 static uint64 crc64( const uchar* data, size_t size, uint64 crc0=0 )
1350 static uint64 table[256];
1351 static bool initialized = false;
1355 for( int i = 0; i < 256; i++ )
1358 for( int j = 0; j < 8; j++ )
1359 c = ((c & 1) ? CV_BIG_UINT(0xc96c5795d7870f42) : 0) ^ (c >> 1);
1366 for( size_t idx = 0; idx < size; idx++ )
1367 crc = table[(uchar)crc ^ data[idx]] ^ (crc >> 8);
1374 typedef uint64 part;
1375 HashKey(part _a, part _b) : a(_a), b(_b) {}
1379 inline bool operator == (const HashKey& h1, const HashKey& h2)
1381 return h1.a == h2.a && h1.b == h2.b;
1384 inline bool operator < (const HashKey& h1, const HashKey& h2)
1386 return h1.a < h2.a || (h1.a == h2.a && h1.b < h2.b);
1393 static bool g_isOpenCLInitialized = false;
1394 static bool g_isOpenCLAvailable = false;
1396 if (!g_isOpenCLInitialized)
1401 g_isOpenCLAvailable = ::clGetPlatformIDs(0, NULL, &n) == CL_SUCCESS;
1405 g_isOpenCLAvailable = false;
1407 g_isOpenCLInitialized = true;
1409 return g_isOpenCLAvailable;
1417 CoreTLSData* data = coreTlsData.get();
1418 if( data->useOpenCL < 0 )
1422 data->useOpenCL = (int)haveOpenCL() && Device::getDefault().ptr() != NULL;
1426 data->useOpenCL = 0;
1429 return data->useOpenCL > 0;
1432 void setUseOpenCL(bool flag)
1436 CoreTLSData* data = coreTlsData.get();
1437 data->useOpenCL = (flag && Device::getDefault().ptr() != NULL) ? 1 : 0;
1441 #ifdef HAVE_CLAMDBLAS
1446 static AmdBlasHelper & getInstance()
1448 static AmdBlasHelper amdBlas;
1452 bool isAvailable() const
1454 return g_isAmdBlasAvailable;
1461 clAmdBlasTeardown();
1469 if (!g_isAmdBlasInitialized)
1473 if (!g_isAmdBlasInitialized && haveOpenCL())
1477 g_isAmdBlasAvailable = clAmdBlasSetup() == clAmdBlasSuccess;
1481 g_isAmdBlasAvailable = false;
1485 g_isAmdBlasAvailable = false;
1487 g_isAmdBlasInitialized = true;
1493 static bool g_isAmdBlasInitialized;
1494 static bool g_isAmdBlasAvailable;
1497 bool AmdBlasHelper::g_isAmdBlasAvailable = false;
1498 bool AmdBlasHelper::g_isAmdBlasInitialized = false;
1499 Mutex AmdBlasHelper::m;
1503 return AmdBlasHelper::getInstance().isAvailable();
1515 #ifdef HAVE_CLAMDFFT
1520 static AmdFftHelper & getInstance()
1522 static AmdFftHelper amdFft;
1526 bool isAvailable() const
1528 return g_isAmdFftAvailable;
1535 // clAmdFftTeardown();
1543 if (!g_isAmdFftInitialized)
1547 if (!g_isAmdFftInitialized && haveOpenCL())
1551 CV_Assert(clAmdFftInitSetupData(&setupData) == CLFFT_SUCCESS);
1552 g_isAmdFftAvailable = true;
1554 catch (const Exception &)
1556 g_isAmdFftAvailable = false;
1560 g_isAmdFftAvailable = false;
1562 g_isAmdFftInitialized = true;
1567 static clAmdFftSetupData setupData;
1569 static bool g_isAmdFftInitialized;
1570 static bool g_isAmdFftAvailable;
1573 clAmdFftSetupData AmdFftHelper::setupData;
1574 bool AmdFftHelper::g_isAmdFftAvailable = false;
1575 bool AmdFftHelper::g_isAmdFftInitialized = false;
1576 Mutex AmdFftHelper::m;
1580 return AmdFftHelper::getInstance().isAvailable();
1594 Queue::getDefault().finish();
1597 #define IMPLEMENT_REFCOUNTABLE() \
1598 void addref() { CV_XADD(&refcount, 1); } \
1599 void release() { if( CV_XADD(&refcount, -1) == 1 && !cv::__termination) delete this; } \
1602 /////////////////////////////////////////// Platform /////////////////////////////////////////////
1604 struct Platform::Impl
1610 initialized = false;
1619 //cl_uint num_entries
1621 if( clGetPlatformIDs(1, &handle, &n) != CL_SUCCESS || n == 0 )
1627 CV_OclDbgAssert(clGetPlatformInfo(handle, CL_PLATFORM_VENDOR, sizeof(buf), buf, &len) == CL_SUCCESS);
1629 vendor = String(buf);
1636 IMPLEMENT_REFCOUNTABLE();
1638 cl_platform_id handle;
1643 Platform::Platform()
1648 Platform::~Platform()
1654 Platform::Platform(const Platform& pl)
1661 Platform& Platform::operator = (const Platform& pl)
1663 Impl* newp = (Impl*)pl.p;
1672 void* Platform::ptr() const
1674 return p ? p->handle : 0;
1677 Platform& Platform::getDefault()
1688 /////////////////////////////////////// Device ////////////////////////////////////////////
1690 // deviceVersion has format
1691 // OpenCL<space><major_version.minor_version><space><vendor-specific information>
1693 // http://www.khronos.org/registry/cl/sdk/1.1/docs/man/xhtml/clGetDeviceInfo.html
1694 // http://www.khronos.org/registry/cl/sdk/1.2/docs/man/xhtml/clGetDeviceInfo.html
1695 static void parseDeviceVersion(const String &deviceVersion, int &major, int &minor)
1698 if (10 >= deviceVersion.length())
1700 const char *pstr = deviceVersion.c_str();
1701 if (0 != strncmp(pstr, "OpenCL ", 7))
1703 size_t ppos = deviceVersion.find('.', 7);
1704 if (String::npos == ppos)
1706 String temp = deviceVersion.substr(7, ppos - 7);
1707 major = atoi(temp.c_str());
1708 temp = deviceVersion.substr(ppos + 1);
1709 minor = atoi(temp.c_str());
1716 handle = (cl_device_id)d;
1719 name_ = getStrProp(CL_DEVICE_NAME);
1720 version_ = getStrProp(CL_DEVICE_VERSION);
1721 doubleFPConfig_ = getProp<cl_device_fp_config, int>(CL_DEVICE_DOUBLE_FP_CONFIG);
1722 hostUnifiedMemory_ = getBoolProp(CL_DEVICE_HOST_UNIFIED_MEMORY);
1723 maxComputeUnits_ = getProp<cl_uint, int>(CL_DEVICE_MAX_COMPUTE_UNITS);
1724 maxWorkGroupSize_ = getProp<size_t, size_t>(CL_DEVICE_MAX_WORK_GROUP_SIZE);
1725 type_ = getProp<cl_device_type, int>(CL_DEVICE_TYPE);
1726 driverVersion_ = getStrProp(CL_DRIVER_VERSION);
1728 String deviceVersion_ = getStrProp(CL_DEVICE_VERSION);
1729 parseDeviceVersion(deviceVersion_, deviceVersionMajor_, deviceVersionMinor_);
1731 vendorName_ = getStrProp(CL_DEVICE_VENDOR);
1732 if (vendorName_ == "Advanced Micro Devices, Inc." ||
1733 vendorName_ == "AMD")
1734 vendorID_ = VENDOR_AMD;
1735 else if (vendorName_ == "Intel(R) Corporation")
1736 vendorID_ = VENDOR_INTEL;
1737 else if (vendorName_ == "NVIDIA Corporation")
1738 vendorID_ = VENDOR_NVIDIA;
1740 vendorID_ = UNKNOWN_VENDOR;
1743 template<typename _TpCL, typename _TpOut>
1744 _TpOut getProp(cl_device_info prop) const
1749 return clGetDeviceInfo(handle, prop, sizeof(temp), &temp, &sz) == CL_SUCCESS &&
1750 sz == sizeof(temp) ? _TpOut(temp) : _TpOut();
1753 bool getBoolProp(cl_device_info prop) const
1755 cl_bool temp = CL_FALSE;
1758 return clGetDeviceInfo(handle, prop, sizeof(temp), &temp, &sz) == CL_SUCCESS &&
1759 sz == sizeof(temp) ? temp != 0 : false;
1762 String getStrProp(cl_device_info prop) const
1766 return clGetDeviceInfo(handle, prop, sizeof(buf)-16, buf, &sz) == CL_SUCCESS &&
1767 sz < sizeof(buf) ? String(buf) : String();
1770 IMPLEMENT_REFCOUNTABLE();
1771 cl_device_id handle;
1775 int doubleFPConfig_;
1776 bool hostUnifiedMemory_;
1777 int maxComputeUnits_;
1778 size_t maxWorkGroupSize_;
1780 int deviceVersionMajor_;
1781 int deviceVersionMinor_;
1782 String driverVersion_;
1793 Device::Device(void* d)
1799 Device::Device(const Device& d)
1806 Device& Device::operator = (const Device& d)
1808 Impl* newp = (Impl*)d.p;
1823 void Device::set(void* d)
1830 void* Device::ptr() const
1832 return p ? p->handle : 0;
1835 String Device::name() const
1836 { return p ? p->name_ : String(); }
1838 String Device::extensions() const
1839 { return p ? p->getStrProp(CL_DEVICE_EXTENSIONS) : String(); }
1841 String Device::version() const
1842 { return p ? p->version_ : String(); }
1844 String Device::vendorName() const
1845 { return p ? p->vendorName_ : String(); }
1847 int Device::vendorID() const
1848 { return p ? p->vendorID_ : 0; }
1850 String Device::OpenCL_C_Version() const
1851 { return p ? p->getStrProp(CL_DEVICE_OPENCL_C_VERSION) : String(); }
1853 String Device::OpenCLVersion() const
1854 { return p ? p->getStrProp(CL_DEVICE_EXTENSIONS) : String(); }
1856 int Device::deviceVersionMajor() const
1857 { return p ? p->deviceVersionMajor_ : 0; }
1859 int Device::deviceVersionMinor() const
1860 { return p ? p->deviceVersionMinor_ : 0; }
1862 String Device::driverVersion() const
1863 { return p ? p->driverVersion_ : String(); }
1865 int Device::type() const
1866 { return p ? p->type_ : 0; }
1868 int Device::addressBits() const
1869 { return p ? p->getProp<cl_uint, int>(CL_DEVICE_ADDRESS_BITS) : 0; }
1871 bool Device::available() const
1872 { return p ? p->getBoolProp(CL_DEVICE_AVAILABLE) : false; }
1874 bool Device::compilerAvailable() const
1875 { return p ? p->getBoolProp(CL_DEVICE_COMPILER_AVAILABLE) : false; }
1877 bool Device::linkerAvailable() const
1878 #ifdef CL_VERSION_1_2
1879 { return p ? p->getBoolProp(CL_DEVICE_LINKER_AVAILABLE) : false; }
1881 { CV_REQUIRE_OPENCL_1_2_ERROR; }
1884 int Device::doubleFPConfig() const
1885 { return p ? p->doubleFPConfig_ : 0; }
1887 int Device::singleFPConfig() const
1888 { return p ? p->getProp<cl_device_fp_config, int>(CL_DEVICE_SINGLE_FP_CONFIG) : 0; }
1890 int Device::halfFPConfig() const
1891 #ifdef CL_VERSION_1_2
1892 { return p ? p->getProp<cl_device_fp_config, int>(CL_DEVICE_HALF_FP_CONFIG) : 0; }
1894 { CV_REQUIRE_OPENCL_1_2_ERROR; }
1897 bool Device::endianLittle() const
1898 { return p ? p->getBoolProp(CL_DEVICE_ENDIAN_LITTLE) : false; }
1900 bool Device::errorCorrectionSupport() const
1901 { return p ? p->getBoolProp(CL_DEVICE_ERROR_CORRECTION_SUPPORT) : false; }
1903 int Device::executionCapabilities() const
1904 { return p ? p->getProp<cl_device_exec_capabilities, int>(CL_DEVICE_EXECUTION_CAPABILITIES) : 0; }
1906 size_t Device::globalMemCacheSize() const
1907 { return p ? p->getProp<cl_ulong, size_t>(CL_DEVICE_GLOBAL_MEM_CACHE_SIZE) : 0; }
1909 int Device::globalMemCacheType() const
1910 { return p ? p->getProp<cl_device_mem_cache_type, int>(CL_DEVICE_GLOBAL_MEM_CACHE_TYPE) : 0; }
1912 int Device::globalMemCacheLineSize() const
1913 { return p ? p->getProp<cl_uint, int>(CL_DEVICE_GLOBAL_MEM_CACHELINE_SIZE) : 0; }
1915 size_t Device::globalMemSize() const
1916 { return p ? p->getProp<cl_ulong, size_t>(CL_DEVICE_GLOBAL_MEM_SIZE) : 0; }
1918 size_t Device::localMemSize() const
1919 { return p ? p->getProp<cl_ulong, size_t>(CL_DEVICE_LOCAL_MEM_SIZE) : 0; }
1921 int Device::localMemType() const
1922 { return p ? p->getProp<cl_device_local_mem_type, int>(CL_DEVICE_LOCAL_MEM_TYPE) : 0; }
1924 bool Device::hostUnifiedMemory() const
1925 { return p ? p->hostUnifiedMemory_ : false; }
1927 bool Device::imageSupport() const
1928 { return p ? p->getBoolProp(CL_DEVICE_IMAGE_SUPPORT) : false; }
1930 bool Device::imageFromBufferSupport() const
1935 size_t pos = p->getStrProp(CL_DEVICE_EXTENSIONS).find("cl_khr_image2d_from_buffer");
1936 if (pos != String::npos)
1944 uint Device::imagePitchAlignment() const
1946 #ifdef CL_DEVICE_IMAGE_PITCH_ALIGNMENT
1947 return p ? p->getProp<cl_uint, uint>(CL_DEVICE_IMAGE_PITCH_ALIGNMENT) : 0;
1953 uint Device::imageBaseAddressAlignment() const
1955 #ifdef CL_DEVICE_IMAGE_BASE_ADDRESS_ALIGNMENT
1956 return p ? p->getProp<cl_uint, uint>(CL_DEVICE_IMAGE_BASE_ADDRESS_ALIGNMENT) : 0;
1962 size_t Device::image2DMaxWidth() const
1963 { return p ? p->getProp<size_t, size_t>(CL_DEVICE_IMAGE2D_MAX_WIDTH) : 0; }
1965 size_t Device::image2DMaxHeight() const
1966 { return p ? p->getProp<size_t, size_t>(CL_DEVICE_IMAGE2D_MAX_HEIGHT) : 0; }
1968 size_t Device::image3DMaxWidth() const
1969 { return p ? p->getProp<size_t, size_t>(CL_DEVICE_IMAGE3D_MAX_WIDTH) : 0; }
1971 size_t Device::image3DMaxHeight() const
1972 { return p ? p->getProp<size_t, size_t>(CL_DEVICE_IMAGE3D_MAX_HEIGHT) : 0; }
1974 size_t Device::image3DMaxDepth() const
1975 { return p ? p->getProp<size_t, size_t>(CL_DEVICE_IMAGE3D_MAX_DEPTH) : 0; }
1977 size_t Device::imageMaxBufferSize() const
1978 #ifdef CL_VERSION_1_2
1979 { return p ? p->getProp<size_t, size_t>(CL_DEVICE_IMAGE_MAX_BUFFER_SIZE) : 0; }
1981 { CV_REQUIRE_OPENCL_1_2_ERROR; }
1984 size_t Device::imageMaxArraySize() const
1985 #ifdef CL_VERSION_1_2
1986 { return p ? p->getProp<size_t, size_t>(CL_DEVICE_IMAGE_MAX_ARRAY_SIZE) : 0; }
1988 { CV_REQUIRE_OPENCL_1_2_ERROR; }
1991 int Device::maxClockFrequency() const
1992 { return p ? p->getProp<cl_uint, int>(CL_DEVICE_MAX_CLOCK_FREQUENCY) : 0; }
1994 int Device::maxComputeUnits() const
1995 { return p ? p->maxComputeUnits_ : 0; }
1997 int Device::maxConstantArgs() const
1998 { return p ? p->getProp<cl_uint, int>(CL_DEVICE_MAX_CONSTANT_ARGS) : 0; }
2000 size_t Device::maxConstantBufferSize() const
2001 { return p ? p->getProp<cl_ulong, size_t>(CL_DEVICE_MAX_CONSTANT_BUFFER_SIZE) : 0; }
2003 size_t Device::maxMemAllocSize() const
2004 { return p ? p->getProp<cl_ulong, size_t>(CL_DEVICE_MAX_MEM_ALLOC_SIZE) : 0; }
2006 size_t Device::maxParameterSize() const
2007 { return p ? p->getProp<cl_ulong, size_t>(CL_DEVICE_MAX_PARAMETER_SIZE) : 0; }
2009 int Device::maxReadImageArgs() const
2010 { return p ? p->getProp<cl_uint, int>(CL_DEVICE_MAX_READ_IMAGE_ARGS) : 0; }
2012 int Device::maxWriteImageArgs() const
2013 { return p ? p->getProp<cl_uint, int>(CL_DEVICE_MAX_WRITE_IMAGE_ARGS) : 0; }
2015 int Device::maxSamplers() const
2016 { return p ? p->getProp<cl_uint, int>(CL_DEVICE_MAX_SAMPLERS) : 0; }
2018 size_t Device::maxWorkGroupSize() const
2019 { return p ? p->maxWorkGroupSize_ : 0; }
2021 int Device::maxWorkItemDims() const
2022 { return p ? p->getProp<cl_uint, int>(CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS) : 0; }
2024 void Device::maxWorkItemSizes(size_t* sizes) const
2028 const int MAX_DIMS = 32;
2030 CV_OclDbgAssert(clGetDeviceInfo(p->handle, CL_DEVICE_MAX_WORK_ITEM_SIZES,
2031 MAX_DIMS*sizeof(sizes[0]), &sizes[0], &retsz) == CL_SUCCESS);
2035 int Device::memBaseAddrAlign() const
2036 { return p ? p->getProp<cl_uint, int>(CL_DEVICE_MEM_BASE_ADDR_ALIGN) : 0; }
2038 int Device::nativeVectorWidthChar() const
2039 { return p ? p->getProp<cl_uint, int>(CL_DEVICE_NATIVE_VECTOR_WIDTH_CHAR) : 0; }
2041 int Device::nativeVectorWidthShort() const
2042 { return p ? p->getProp<cl_uint, int>(CL_DEVICE_NATIVE_VECTOR_WIDTH_SHORT) : 0; }
2044 int Device::nativeVectorWidthInt() const
2045 { return p ? p->getProp<cl_uint, int>(CL_DEVICE_NATIVE_VECTOR_WIDTH_INT) : 0; }
2047 int Device::nativeVectorWidthLong() const
2048 { return p ? p->getProp<cl_uint, int>(CL_DEVICE_NATIVE_VECTOR_WIDTH_LONG) : 0; }
2050 int Device::nativeVectorWidthFloat() const
2051 { return p ? p->getProp<cl_uint, int>(CL_DEVICE_NATIVE_VECTOR_WIDTH_FLOAT) : 0; }
2053 int Device::nativeVectorWidthDouble() const
2054 { return p ? p->getProp<cl_uint, int>(CL_DEVICE_NATIVE_VECTOR_WIDTH_DOUBLE) : 0; }
2056 int Device::nativeVectorWidthHalf() const
2057 { return p ? p->getProp<cl_uint, int>(CL_DEVICE_NATIVE_VECTOR_WIDTH_HALF) : 0; }
2059 int Device::preferredVectorWidthChar() const
2060 { return p ? p->getProp<cl_uint, int>(CL_DEVICE_PREFERRED_VECTOR_WIDTH_CHAR) : 0; }
2062 int Device::preferredVectorWidthShort() const
2063 { return p ? p->getProp<cl_uint, int>(CL_DEVICE_PREFERRED_VECTOR_WIDTH_SHORT) : 0; }
2065 int Device::preferredVectorWidthInt() const
2066 { return p ? p->getProp<cl_uint, int>(CL_DEVICE_PREFERRED_VECTOR_WIDTH_INT) : 0; }
2068 int Device::preferredVectorWidthLong() const
2069 { return p ? p->getProp<cl_uint, int>(CL_DEVICE_PREFERRED_VECTOR_WIDTH_LONG) : 0; }
2071 int Device::preferredVectorWidthFloat() const
2072 { return p ? p->getProp<cl_uint, int>(CL_DEVICE_PREFERRED_VECTOR_WIDTH_FLOAT) : 0; }
2074 int Device::preferredVectorWidthDouble() const
2075 { return p ? p->getProp<cl_uint, int>(CL_DEVICE_PREFERRED_VECTOR_WIDTH_DOUBLE) : 0; }
2077 int Device::preferredVectorWidthHalf() const
2078 { return p ? p->getProp<cl_uint, int>(CL_DEVICE_PREFERRED_VECTOR_WIDTH_HALF) : 0; }
2080 size_t Device::printfBufferSize() const
2081 #ifdef CL_VERSION_1_2
2082 { return p ? p->getProp<size_t, size_t>(CL_DEVICE_PRINTF_BUFFER_SIZE) : 0; }
2084 { CV_REQUIRE_OPENCL_1_2_ERROR; }
2088 size_t Device::profilingTimerResolution() const
2089 { return p ? p->getProp<size_t, size_t>(CL_DEVICE_PROFILING_TIMER_RESOLUTION) : 0; }
2091 const Device& Device::getDefault()
2093 const Context& ctx = Context::getDefault();
2094 int idx = coreTlsData.get()->device;
2095 return ctx.device(idx);
2098 ////////////////////////////////////// Context ///////////////////////////////////////////////////
2100 template <typename Functor, typename ObjectType>
2101 inline cl_int getStringInfo(Functor f, ObjectType obj, cl_uint name, std::string& param)
2104 cl_int err = f(obj, name, 0, NULL, &required);
2105 if (err != CL_SUCCESS)
2111 AutoBuffer<char> buf(required + 1);
2112 char* ptr = (char*)buf; // cleanup is not needed
2113 err = f(obj, name, required, ptr, NULL);
2114 if (err != CL_SUCCESS)
2122 static void split(const std::string &s, char delim, std::vector<std::string> &elems)
2127 std::istringstream ss(s);
2131 std::getline(ss, item, delim);
2132 elems.push_back(item);
2136 // Layout: <Platform>:<CPU|GPU|ACCELERATOR|nothing=GPU/CPU>:<deviceName>
2138 // Sample: AMD:GPU:Tahiti
2139 // Sample: :GPU|CPU: = '' = ':' = '::'
2140 static bool parseOpenCLDeviceConfiguration(const std::string& configurationStr,
2141 std::string& platform, std::vector<std::string>& deviceTypes, std::string& deviceNameOrID)
2143 std::vector<std::string> parts;
2144 split(configurationStr, ':', parts);
2145 if (parts.size() > 3)
2147 std::cerr << "ERROR: Invalid configuration string for OpenCL device" << std::endl;
2150 if (parts.size() > 2)
2151 deviceNameOrID = parts[2];
2152 if (parts.size() > 1)
2154 split(parts[1], '|', deviceTypes);
2156 if (parts.size() > 0)
2158 platform = parts[0];
2164 static cl_device_id selectOpenCLDevice()
2169 static cl_device_id selectOpenCLDevice()
2171 std::string platform, deviceName;
2172 std::vector<std::string> deviceTypes;
2174 const char* configuration = getenv("OPENCV_OPENCL_DEVICE");
2175 if (configuration && !parseOpenCLDeviceConfiguration(std::string(configuration), platform, deviceTypes, deviceName))
2180 if (deviceName.length() == 1)
2181 // We limit ID range to 0..9, because we want to write:
2182 // - '2500' to mean i5-2500
2183 // - '8350' to mean AMD FX-8350
2184 // - '650' to mean GeForce 650
2185 // To extend ID range change condition to '> 0'
2188 for (size_t i = 0; i < deviceName.length(); i++)
2190 if (!isdigit(deviceName[i]))
2198 deviceID = atoi(deviceName.c_str());
2204 std::vector<cl_platform_id> platforms;
2206 cl_uint numPlatforms = 0;
2207 CV_OclDbgAssert(clGetPlatformIDs(0, NULL, &numPlatforms) == CL_SUCCESS);
2209 if (numPlatforms == 0)
2211 platforms.resize((size_t)numPlatforms);
2212 CV_OclDbgAssert(clGetPlatformIDs(numPlatforms, &platforms[0], &numPlatforms) == CL_SUCCESS);
2213 platforms.resize(numPlatforms);
2216 int selectedPlatform = -1;
2217 if (platform.length() > 0)
2219 for (size_t i = 0; i < platforms.size(); i++)
2222 CV_OclDbgAssert(getStringInfo(clGetPlatformInfo, platforms[i], CL_PLATFORM_NAME, name) == CL_SUCCESS);
2223 if (name.find(platform) != std::string::npos)
2225 selectedPlatform = (int)i;
2229 if (selectedPlatform == -1)
2231 std::cerr << "ERROR: Can't find OpenCL platform by name: " << platform << std::endl;
2235 if (deviceTypes.size() == 0)
2239 deviceTypes.push_back("GPU");
2241 deviceTypes.push_back("CPU");
2244 deviceTypes.push_back("ALL");
2246 for (size_t t = 0; t < deviceTypes.size(); t++)
2249 std::string tempStrDeviceType = deviceTypes[t];
2250 std::transform( tempStrDeviceType.begin(), tempStrDeviceType.end(), tempStrDeviceType.begin(), tolower );
2252 if (tempStrDeviceType == "gpu" || tempStrDeviceType == "dgpu" || tempStrDeviceType == "igpu")
2253 deviceType = Device::TYPE_GPU;
2254 else if (tempStrDeviceType == "cpu")
2255 deviceType = Device::TYPE_CPU;
2256 else if (tempStrDeviceType == "accelerator")
2257 deviceType = Device::TYPE_ACCELERATOR;
2258 else if (tempStrDeviceType == "all")
2259 deviceType = Device::TYPE_ALL;
2262 std::cerr << "ERROR: Unsupported device type for OpenCL device (GPU, CPU, ACCELERATOR): " << deviceTypes[t] << std::endl;
2266 std::vector<cl_device_id> devices; // TODO Use clReleaseDevice to cleanup
2267 for (int i = selectedPlatform >= 0 ? selectedPlatform : 0;
2268 (selectedPlatform >= 0 ? i == selectedPlatform : true) && (i < (int)platforms.size());
2272 cl_int status = clGetDeviceIDs(platforms[i], deviceType, 0, NULL, &count);
2273 CV_OclDbgAssert(status == CL_SUCCESS || status == CL_DEVICE_NOT_FOUND);
2276 size_t base = devices.size();
2277 devices.resize(base + count);
2278 status = clGetDeviceIDs(platforms[i], deviceType, count, &devices[base], &count);
2279 CV_OclDbgAssert(status == CL_SUCCESS || status == CL_DEVICE_NOT_FOUND);
2282 for (size_t i = (isID ? deviceID : 0);
2283 (isID ? (i == (size_t)deviceID) : true) && (i < devices.size());
2287 CV_OclDbgAssert(getStringInfo(clGetDeviceInfo, devices[i], CL_DEVICE_NAME, name) == CL_SUCCESS);
2288 cl_bool useGPU = true;
2289 if(tempStrDeviceType == "dgpu" || tempStrDeviceType == "igpu")
2291 cl_bool isIGPU = CL_FALSE;
2292 clGetDeviceInfo(devices[i], CL_DEVICE_HOST_UNIFIED_MEMORY, sizeof(isIGPU), &isIGPU, NULL);
2293 useGPU = tempStrDeviceType == "dgpu" ? !isIGPU : isIGPU;
2295 if ( (isID || name.find(deviceName) != std::string::npos) && useGPU)
2297 // TODO check for OpenCL 1.1
2304 std::cerr << "ERROR: Required OpenCL device not found, check configuration: " << (configuration == NULL ? "" : configuration) << std::endl
2305 << " Platform: " << (platform.length() == 0 ? "any" : platform) << std::endl
2306 << " Device types: ";
2307 for (size_t t = 0; t < deviceTypes.size(); t++)
2308 std::cerr << deviceTypes[t] << " ";
2310 std::cerr << std::endl << " Device name: " << (deviceName.length() == 0 ? "any" : deviceName) << std::endl;
2311 CV_Error(CL_INVALID_DEVICE, "Requested OpenCL device is not found");
2316 struct Context::Impl
2326 CV_Assert(handle == NULL);
2328 cl_device_id d = selectOpenCLDevice();
2333 cl_platform_id pl = NULL;
2334 CV_OclDbgAssert(clGetDeviceInfo(d, CL_DEVICE_PLATFORM, sizeof(cl_platform_id), &pl, NULL) == CL_SUCCESS);
2336 cl_context_properties prop[] =
2338 CL_CONTEXT_PLATFORM, (cl_context_properties)pl,
2342 // !!! in the current implementation force the number of devices to 1 !!!
2346 handle = clCreateContext(prop, nd, &d, 0, 0, &status);
2348 bool ok = handle != 0 && status == CL_SUCCESS;
2364 cl_platform_id pl = (cl_platform_id)Platform::getDefault().ptr();
2365 cl_context_properties prop[] =
2367 CL_CONTEXT_PLATFORM, (cl_context_properties)pl,
2371 cl_uint i, nd0 = 0, nd = 0;
2372 int dtype = dtype0 & 15;
2373 CV_OclDbgAssert(clGetDeviceIDs( pl, dtype, 0, 0, &nd0 ) == CL_SUCCESS);
2375 AutoBuffer<void*> dlistbuf(nd0*2+1);
2376 cl_device_id* dlist = (cl_device_id*)(void**)dlistbuf;
2377 cl_device_id* dlist_new = dlist + nd0;
2378 CV_OclDbgAssert(clGetDeviceIDs( pl, dtype, nd0, dlist, &nd0 ) == CL_SUCCESS);
2381 for(i = 0; i < nd0; i++)
2384 if( !d.available() || !d.compilerAvailable() )
2386 if( dtype0 == Device::TYPE_DGPU && d.hostUnifiedMemory() )
2388 if( dtype0 == Device::TYPE_IGPU && !d.hostUnifiedMemory() )
2390 String name = d.name();
2391 if( nd != 0 && name != name0 )
2394 dlist_new[nd++] = dlist[i];
2400 // !!! in the current implementation force the number of devices to 1 !!!
2403 handle = clCreateContext(prop, nd, dlist_new, 0, 0, &retval);
2404 bool ok = handle != 0 && retval == CL_SUCCESS;
2408 for( i = 0; i < nd; i++ )
2409 devices[i].set(dlist_new[i]);
2417 clReleaseContext(handle);
2423 Program getProg(const ProgramSource& src,
2424 const String& buildflags, String& errmsg)
2426 String prefix = Program::getPrefix(buildflags);
2427 HashKey k(src.hash(), crc64((const uchar*)prefix.c_str(), prefix.size()));
2428 phash_t::iterator it = phash.find(k);
2429 if( it != phash.end() )
2431 //String filename = format("%08x%08x_%08x%08x.clb2",
2432 Program prog(src, buildflags, errmsg);
2434 phash.insert(std::pair<HashKey,Program>(k, prog));
2438 IMPLEMENT_REFCOUNTABLE();
2441 std::vector<Device> devices;
2443 typedef ProgramSource::hash_t hash_t;
2447 HashKey(hash_t _a, hash_t _b) : a(_a), b(_b) {}
2448 bool operator < (const HashKey& k) const { return a < k.a || (a == k.a && b < k.b); }
2449 bool operator == (const HashKey& k) const { return a == k.a && b == k.b; }
2450 bool operator != (const HashKey& k) const { return a != k.a || b != k.b; }
2453 typedef std::map<HashKey, Program> phash_t;
2463 Context::Context(int dtype)
2469 bool Context::create()
2484 bool Context::create(int dtype0)
2490 p = new Impl(dtype0);
2508 Context::Context(const Context& c)
2515 Context& Context::operator = (const Context& c)
2517 Impl* newp = (Impl*)c.p;
2526 void* Context::ptr() const
2528 return p == NULL ? NULL : p->handle;
2531 size_t Context::ndevices() const
2533 return p ? p->devices.size() : 0;
2536 const Device& Context::device(size_t idx) const
2538 static Device dummy;
2539 return !p || idx >= p->devices.size() ? dummy : p->devices[idx];
2542 Context& Context::getDefault(bool initialize)
2544 static Context* ctx = new Context();
2545 if(!ctx->p && haveOpenCL())
2548 ctx->p = new Impl();
2551 // do not create new Context right away.
2552 // First, try to retrieve existing context of the same type.
2553 // In its turn, Platform::getContext() may call Context::create()
2554 // if there is no such context.
2555 if (ctx->p->handle == NULL)
2556 ctx->p->setDefault();
2563 Program Context::getProg(const ProgramSource& prog,
2564 const String& buildopts, String& errmsg)
2566 return p ? p->getProg(prog, buildopts, errmsg) : Program();
2569 void initializeContextFromHandle(Context& ctx, void* platform, void* _context, void* _device)
2571 cl_context context = (cl_context)_context;
2572 cl_device_id device = (cl_device_id)_device;
2574 // cleanup old context
2575 Context::Impl * impl = ctx.p;
2578 CV_OclDbgAssert(clReleaseContext(impl->handle) == CL_SUCCESS);
2580 impl->devices.clear();
2582 impl->handle = context;
2583 impl->devices.resize(1);
2584 impl->devices[0].set(device);
2586 Platform& p = Platform::getDefault();
2587 Platform::Impl* pImpl = p.p;
2588 pImpl->handle = (cl_platform_id)platform;
2591 /////////////////////////////////////////// Queue /////////////////////////////////////////////
2595 Impl(const Context& c, const Device& d)
2598 const Context* pc = &c;
2599 cl_context ch = (cl_context)pc->ptr();
2602 pc = &Context::getDefault();
2603 ch = (cl_context)pc->ptr();
2605 cl_device_id dh = (cl_device_id)d.ptr();
2607 dh = (cl_device_id)pc->device(0).ptr();
2609 handle = clCreateCommandQueue(ch, dh, 0, &retval);
2610 CV_OclDbgAssert(retval == CL_SUCCESS);
2616 if (!cv::__termination)
2622 clReleaseCommandQueue(handle);
2628 IMPLEMENT_REFCOUNTABLE();
2630 cl_command_queue handle;
2638 Queue::Queue(const Context& c, const Device& d)
2644 Queue::Queue(const Queue& q)
2651 Queue& Queue::operator = (const Queue& q)
2653 Impl* newp = (Impl*)q.p;
2668 bool Queue::create(const Context& c, const Device& d)
2673 return p->handle != 0;
2676 void Queue::finish()
2680 CV_OclDbgAssert(clFinish(p->handle) == CL_SUCCESS);
2684 void* Queue::ptr() const
2686 return p ? p->handle : 0;
2689 Queue& Queue::getDefault()
2691 Queue& q = coreTlsData.get()->oclQueue;
2692 if( !q.p && haveOpenCL() )
2693 q.create(Context::getDefault());
2697 static cl_command_queue getQueue(const Queue& q)
2699 cl_command_queue qq = (cl_command_queue)q.ptr();
2701 qq = (cl_command_queue)Queue::getDefault().ptr();
2705 /////////////////////////////////////////// KernelArg /////////////////////////////////////////////
2707 KernelArg::KernelArg()
2708 : flags(0), m(0), obj(0), sz(0), wscale(1), iwscale(1)
2712 KernelArg::KernelArg(int _flags, UMat* _m, int _wscale, int _iwscale, const void* _obj, size_t _sz)
2713 : flags(_flags), m(_m), obj(_obj), sz(_sz), wscale(_wscale), iwscale(_iwscale)
2717 KernelArg KernelArg::Constant(const Mat& m)
2719 CV_Assert(m.isContinuous());
2720 return KernelArg(CONSTANT, 0, 0, 0, m.data, m.total()*m.elemSize());
2723 /////////////////////////////////////////// Kernel /////////////////////////////////////////////
2727 Impl(const char* kname, const Program& prog) :
2728 refcount(1), e(0), nu(0)
2730 cl_program ph = (cl_program)prog.ptr();
2733 clCreateKernel(ph, kname, &retval) : 0;
2734 CV_OclDbgAssert(retval == CL_SUCCESS);
2735 for( int i = 0; i < MAX_ARRS; i++ )
2737 haveTempDstUMats = false;
2742 for( int i = 0; i < MAX_ARRS; i++ )
2745 if( CV_XADD(&u[i]->urefcount, -1) == 1 )
2746 u[i]->currAllocator->deallocate(u[i]);
2750 haveTempDstUMats = false;
2753 void addUMat(const UMat& m, bool dst)
2755 CV_Assert(nu < MAX_ARRS && m.u && m.u->urefcount > 0);
2757 CV_XADD(&m.u->urefcount, 1);
2759 if(dst && m.u->tempUMat())
2760 haveTempDstUMats = true;
2763 void addImage(const Image2D& image)
2765 images.push_back(image);
2772 if(e) { clReleaseEvent(e); e = 0; }
2779 clReleaseKernel(handle);
2782 IMPLEMENT_REFCOUNTABLE();
2786 enum { MAX_ARRS = 16 };
2787 UMatData* u[MAX_ARRS];
2789 std::list<Image2D> images;
2790 bool haveTempDstUMats;
2797 static void CL_CALLBACK oclCleanupCallback(cl_event, cl_int, void *p)
2799 ((cv::ocl::Kernel::Impl*)p)->finit();
2804 namespace cv { namespace ocl {
2811 Kernel::Kernel(const char* kname, const Program& prog)
2814 create(kname, prog);
2817 Kernel::Kernel(const char* kname, const ProgramSource& src,
2818 const String& buildopts, String* errmsg)
2821 create(kname, src, buildopts, errmsg);
2824 Kernel::Kernel(const Kernel& k)
2831 Kernel& Kernel::operator = (const Kernel& k)
2833 Impl* newp = (Impl*)k.p;
2848 bool Kernel::create(const char* kname, const Program& prog)
2852 p = new Impl(kname, prog);
2861 bool Kernel::create(const char* kname, const ProgramSource& src,
2862 const String& buildopts, String* errmsg)
2870 if( !errmsg ) errmsg = &tempmsg;
2871 const Program& prog = Context::getDefault().getProg(src, buildopts, *errmsg);
2872 return create(kname, prog);
2875 void* Kernel::ptr() const
2877 return p ? p->handle : 0;
2880 bool Kernel::empty() const
2885 int Kernel::set(int i, const void* value, size_t sz)
2887 if (!p || !p->handle)
2894 cl_int retval = clSetKernelArg(p->handle, (cl_uint)i, sz, value);
2895 CV_OclDbgAssert(retval == CL_SUCCESS);
2896 if (retval != CL_SUCCESS)
2901 int Kernel::set(int i, const Image2D& image2D)
2903 p->addImage(image2D);
2904 cl_mem h = (cl_mem)image2D.ptr();
2905 return set(i, &h, sizeof(h));
2908 int Kernel::set(int i, const UMat& m)
2910 return set(i, KernelArg(KernelArg::READ_WRITE, (UMat*)&m, 0, 0));
2913 int Kernel::set(int i, const KernelArg& arg)
2915 if( !p || !p->handle )
2923 int accessFlags = ((arg.flags & KernelArg::READ_ONLY) ? ACCESS_READ : 0) +
2924 ((arg.flags & KernelArg::WRITE_ONLY) ? ACCESS_WRITE : 0);
2925 bool ptronly = (arg.flags & KernelArg::PTR_ONLY) != 0;
2926 cl_mem h = (cl_mem)arg.m->handle(accessFlags);
2936 CV_OclDbgAssert(clSetKernelArg(p->handle, (cl_uint)i++, sizeof(h), &h) == CL_SUCCESS);
2937 else if( arg.m->dims <= 2 )
2940 CV_OclDbgAssert(clSetKernelArg(p->handle, (cl_uint)i, sizeof(h), &h) == CL_SUCCESS);
2941 CV_OclDbgAssert(clSetKernelArg(p->handle, (cl_uint)(i+1), sizeof(u2d.step), &u2d.step) == CL_SUCCESS);
2942 CV_OclDbgAssert(clSetKernelArg(p->handle, (cl_uint)(i+2), sizeof(u2d.offset), &u2d.offset) == CL_SUCCESS);
2945 if( !(arg.flags & KernelArg::NO_SIZE) )
2947 int cols = u2d.cols*arg.wscale/arg.iwscale;
2948 CV_OclDbgAssert(clSetKernelArg(p->handle, (cl_uint)i, sizeof(u2d.rows), &u2d.rows) == CL_SUCCESS);
2949 CV_OclDbgAssert(clSetKernelArg(p->handle, (cl_uint)(i+1), sizeof(cols), &cols) == CL_SUCCESS);
2956 CV_OclDbgAssert(clSetKernelArg(p->handle, (cl_uint)i, sizeof(h), &h) == CL_SUCCESS);
2957 CV_OclDbgAssert(clSetKernelArg(p->handle, (cl_uint)(i+1), sizeof(u3d.slicestep), &u3d.slicestep) == CL_SUCCESS);
2958 CV_OclDbgAssert(clSetKernelArg(p->handle, (cl_uint)(i+2), sizeof(u3d.step), &u3d.step) == CL_SUCCESS);
2959 CV_OclDbgAssert(clSetKernelArg(p->handle, (cl_uint)(i+3), sizeof(u3d.offset), &u3d.offset) == CL_SUCCESS);
2961 if( !(arg.flags & KernelArg::NO_SIZE) )
2963 int cols = u3d.cols*arg.wscale/arg.iwscale;
2964 CV_OclDbgAssert(clSetKernelArg(p->handle, (cl_uint)i, sizeof(u3d.slices), &u3d.rows) == CL_SUCCESS);
2965 CV_OclDbgAssert(clSetKernelArg(p->handle, (cl_uint)(i+1), sizeof(u3d.rows), &u3d.rows) == CL_SUCCESS);
2966 CV_OclDbgAssert(clSetKernelArg(p->handle, (cl_uint)(i+2), sizeof(u3d.cols), &cols) == CL_SUCCESS);
2970 p->addUMat(*arg.m, (accessFlags & ACCESS_WRITE) != 0);
2973 CV_OclDbgAssert(clSetKernelArg(p->handle, (cl_uint)i, arg.sz, arg.obj) == CL_SUCCESS);
2978 bool Kernel::run(int dims, size_t _globalsize[], size_t _localsize[],
2979 bool sync, const Queue& q)
2981 if(!p || !p->handle || p->e != 0)
2984 cl_command_queue qq = getQueue(q);
2985 size_t offset[CV_MAX_DIM] = {0}, globalsize[CV_MAX_DIM] = {1,1,1};
2987 CV_Assert(_globalsize != 0);
2988 for (int i = 0; i < dims; i++)
2990 size_t val = _localsize ? _localsize[i] :
2991 dims == 1 ? 64 : dims == 2 ? (i == 0 ? 256 : 8) : dims == 3 ? (8>>(int)(i>0)) : 1;
2992 CV_Assert( val > 0 );
2993 total *= _globalsize[i];
2994 globalsize[i] = ((_globalsize[i] + val - 1)/val)*val;
2998 if( p->haveTempDstUMats )
3000 cl_int retval = clEnqueueNDRangeKernel(qq, p->handle, (cl_uint)dims,
3001 offset, globalsize, _localsize, 0, 0,
3003 if( sync || retval != CL_SUCCESS )
3005 CV_OclDbgAssert(clFinish(qq) == CL_SUCCESS);
3011 CV_OclDbgAssert(clSetEventCallback(p->e, CL_COMPLETE, oclCleanupCallback, p) == CL_SUCCESS);
3013 return retval == CL_SUCCESS;
3016 bool Kernel::runTask(bool sync, const Queue& q)
3018 if(!p || !p->handle || p->e != 0)
3021 cl_command_queue qq = getQueue(q);
3022 cl_int retval = clEnqueueTask(qq, p->handle, 0, 0, sync ? 0 : &p->e);
3023 if( sync || retval != CL_SUCCESS )
3025 CV_OclDbgAssert(clFinish(qq) == CL_SUCCESS);
3031 CV_OclDbgAssert(clSetEventCallback(p->e, CL_COMPLETE, oclCleanupCallback, p) == CL_SUCCESS);
3033 return retval == CL_SUCCESS;
3037 size_t Kernel::workGroupSize() const
3039 if(!p || !p->handle)
3041 size_t val = 0, retsz = 0;
3042 cl_device_id dev = (cl_device_id)Device::getDefault().ptr();
3043 return clGetKernelWorkGroupInfo(p->handle, dev, CL_KERNEL_WORK_GROUP_SIZE,
3044 sizeof(val), &val, &retsz) == CL_SUCCESS ? val : 0;
3047 size_t Kernel::preferedWorkGroupSizeMultiple() const
3049 if(!p || !p->handle)
3051 size_t val = 0, retsz = 0;
3052 cl_device_id dev = (cl_device_id)Device::getDefault().ptr();
3053 return clGetKernelWorkGroupInfo(p->handle, dev, CL_KERNEL_PREFERRED_WORK_GROUP_SIZE_MULTIPLE,
3054 sizeof(val), &val, &retsz) == CL_SUCCESS ? val : 0;
3057 bool Kernel::compileWorkGroupSize(size_t wsz[]) const
3059 if(!p || !p->handle || !wsz)
3062 cl_device_id dev = (cl_device_id)Device::getDefault().ptr();
3063 return clGetKernelWorkGroupInfo(p->handle, dev, CL_KERNEL_COMPILE_WORK_GROUP_SIZE,
3064 sizeof(wsz[0]*3), wsz, &retsz) == CL_SUCCESS;
3067 size_t Kernel::localMemSize() const
3069 if(!p || !p->handle)
3073 cl_device_id dev = (cl_device_id)Device::getDefault().ptr();
3074 return clGetKernelWorkGroupInfo(p->handle, dev, CL_KERNEL_LOCAL_MEM_SIZE,
3075 sizeof(val), &val, &retsz) == CL_SUCCESS ? (size_t)val : 0;
3078 /////////////////////////////////////////// Program /////////////////////////////////////////////
3080 struct Program::Impl
3082 Impl(const ProgramSource& _src,
3083 const String& _buildflags, String& errmsg)
3086 const Context& ctx = Context::getDefault();
3088 buildflags = _buildflags;
3089 const String& srcstr = src.source();
3090 const char* srcptr = srcstr.c_str();
3091 size_t srclen = srcstr.size();
3094 handle = clCreateProgramWithSource((cl_context)ctx.ptr(), 1, &srcptr, &srclen, &retval);
3095 if( handle && retval == CL_SUCCESS )
3097 int i, n = (int)ctx.ndevices();
3098 AutoBuffer<void*> deviceListBuf(n+1);
3099 void** deviceList = deviceListBuf;
3100 for( i = 0; i < n; i++ )
3101 deviceList[i] = ctx.device(i).ptr();
3103 Device device = Device::getDefault();
3105 buildflags += " -D AMD_DEVICE";
3106 else if (device.isIntel())
3107 buildflags += " -D INTEL_DEVICE";
3109 retval = clBuildProgram(handle, n,
3110 (const cl_device_id*)deviceList,
3111 buildflags.c_str(), 0, 0);
3112 #if !CV_OPENCL_ALWAYS_SHOW_BUILD_LOG
3113 if( retval != CL_SUCCESS )
3117 cl_int buildInfo_retval = clGetProgramBuildInfo(handle, (cl_device_id)deviceList[0],
3118 CL_PROGRAM_BUILD_LOG, 0, 0, &retsz);
3119 if (buildInfo_retval == CL_SUCCESS && retsz > 1)
3121 AutoBuffer<char> bufbuf(retsz + 16);
3123 buildInfo_retval = clGetProgramBuildInfo(handle, (cl_device_id)deviceList[0],
3124 CL_PROGRAM_BUILD_LOG, retsz+1, buf, &retsz);
3125 if (buildInfo_retval == CL_SUCCESS)
3127 // TODO It is useful to see kernel name & program file name also
3128 errmsg = String(buf);
3129 printf("OpenCL program build log: %s\n%s\n", buildflags.c_str(), errmsg.c_str());
3133 if (retval != CL_SUCCESS && handle)
3135 clReleaseProgram(handle);
3142 Impl(const String& _buf, const String& _buildflags)
3146 buildflags = _buildflags;
3149 String prefix0 = Program::getPrefix(buildflags);
3150 const Context& ctx = Context::getDefault();
3151 const Device& dev = Device::getDefault();
3152 const char* pos0 = _buf.c_str();
3153 const char* pos1 = strchr(pos0, '\n');
3156 const char* pos2 = strchr(pos1+1, '\n');
3159 const char* pos3 = strchr(pos2+1, '\n');
3162 size_t prefixlen = (pos3 - pos0)+1;
3163 String prefix(pos0, prefixlen);
3164 if( prefix != prefix0 )
3166 const uchar* bin = (uchar*)(pos3+1);
3167 void* devid = dev.ptr();
3168 size_t codelen = _buf.length() - prefixlen;
3169 cl_int binstatus = 0, retval = 0;
3170 handle = clCreateProgramWithBinary((cl_context)ctx.ptr(), 1, (cl_device_id*)&devid,
3171 &codelen, &bin, &binstatus, &retval);
3172 CV_OclDbgAssert(retval == CL_SUCCESS);
3179 size_t progsz = 0, retsz = 0;
3180 String prefix = Program::getPrefix(buildflags);
3181 size_t prefixlen = prefix.length();
3182 if(clGetProgramInfo(handle, CL_PROGRAM_BINARY_SIZES, sizeof(progsz), &progsz, &retsz) != CL_SUCCESS)
3184 AutoBuffer<uchar> bufbuf(prefixlen + progsz + 16);
3185 uchar* buf = bufbuf;
3186 memcpy(buf, prefix.c_str(), prefixlen);
3188 if(clGetProgramInfo(handle, CL_PROGRAM_BINARIES, sizeof(buf), &buf, &retsz) != CL_SUCCESS)
3190 buf[progsz] = (uchar)'\0';
3191 return String((const char*)(uchar*)bufbuf, prefixlen + progsz);
3199 if (!cv::__termination)
3202 clReleaseProgram(handle);
3208 IMPLEMENT_REFCOUNTABLE();
3216 Program::Program() { p = 0; }
3218 Program::Program(const ProgramSource& src,
3219 const String& buildflags, String& errmsg)
3222 create(src, buildflags, errmsg);
3225 Program::Program(const Program& prog)
3232 Program& Program::operator = (const Program& prog)
3234 Impl* newp = (Impl*)prog.p;
3249 bool Program::create(const ProgramSource& src,
3250 const String& buildflags, String& errmsg)
3254 p = new Impl(src, buildflags, errmsg);
3263 const ProgramSource& Program::source() const
3265 static ProgramSource dummy;
3266 return p ? p->src : dummy;
3269 void* Program::ptr() const
3271 return p ? p->handle : 0;
3274 bool Program::read(const String& bin, const String& buildflags)
3278 p = new Impl(bin, buildflags);
3279 return p->handle != 0;
3282 bool Program::write(String& bin) const
3287 return !bin.empty();
3290 String Program::getPrefix() const
3294 return getPrefix(p->buildflags);
3297 String Program::getPrefix(const String& buildflags)
3299 const Context& ctx = Context::getDefault();
3300 const Device& dev = ctx.device(0);
3301 return format("name=%s\ndriver=%s\nbuildflags=%s\n",
3302 dev.name().c_str(), dev.driverVersion().c_str(), buildflags.c_str());
3305 ///////////////////////////////////////// ProgramSource ///////////////////////////////////////////////
3307 struct ProgramSource::Impl
3309 Impl(const char* _src)
3313 Impl(const String& _src)
3317 void init(const String& _src)
3321 h = crc64((uchar*)src.c_str(), src.size());
3324 IMPLEMENT_REFCOUNTABLE();
3326 ProgramSource::hash_t h;
3330 ProgramSource::ProgramSource()
3335 ProgramSource::ProgramSource(const char* prog)
3340 ProgramSource::ProgramSource(const String& prog)
3345 ProgramSource::~ProgramSource()
3351 ProgramSource::ProgramSource(const ProgramSource& prog)
3358 ProgramSource& ProgramSource::operator = (const ProgramSource& prog)
3360 Impl* newp = (Impl*)prog.p;
3369 const String& ProgramSource::source() const
3371 static String dummy;
3372 return p ? p->src : dummy;
3375 ProgramSource::hash_t ProgramSource::hash() const
3377 return p ? p->h : 0;
3380 //////////////////////////////////////////// OpenCLAllocator //////////////////////////////////////////////////
3382 class OpenCLBufferPool
3385 ~OpenCLBufferPool() { }
3387 virtual cl_mem allocate(size_t size, CV_OUT size_t& capacity) = 0;
3388 virtual void release(cl_mem handle, size_t capacity) = 0;
3391 class OpenCLBufferPoolImpl : public BufferPoolController, public OpenCLBufferPool
3402 size_t currentReservedSize;
3403 size_t maxReservedSize;
3405 std::list<BufferEntry> reservedEntries_; // LRU order
3408 bool _findAndRemoveEntryFromReservedList(CV_OUT BufferEntry& entry, const size_t size)
3410 if (reservedEntries_.empty())
3412 std::list<BufferEntry>::iterator i = reservedEntries_.begin();
3413 std::list<BufferEntry>::iterator result_pos = reservedEntries_.end();
3414 BufferEntry result = {NULL, 0};
3415 size_t minDiff = (size_t)(-1);
3416 for (; i != reservedEntries_.end(); ++i)
3418 BufferEntry& e = *i;
3419 if (e.capacity_ >= size)
3421 size_t diff = e.capacity_ - size;
3422 if (diff < size / 8 && (result_pos == reservedEntries_.end() || diff < minDiff))
3432 if (result_pos != reservedEntries_.end())
3434 //CV_DbgAssert(result == *result_pos);
3435 reservedEntries_.erase(result_pos);
3437 currentReservedSize -= entry.capacity_;
3444 void _checkSizeOfReservedEntries()
3446 while (currentReservedSize > maxReservedSize)
3448 CV_DbgAssert(!reservedEntries_.empty());
3449 const BufferEntry& entry = reservedEntries_.back();
3450 CV_DbgAssert(currentReservedSize >= entry.capacity_);
3451 currentReservedSize -= entry.capacity_;
3452 _releaseBufferEntry(entry);
3453 reservedEntries_.pop_back();
3457 inline size_t _allocationGranularity(size_t size)
3462 else if (size < 64*1024)
3464 else if (size < 1024*1024)
3466 else if (size < 16*1024*1024)
3472 void _allocateBufferEntry(BufferEntry& entry, size_t size)
3474 CV_DbgAssert(entry.clBuffer_ == NULL);
3475 entry.capacity_ = alignSize(size, (int)_allocationGranularity(size));
3476 Context& ctx = Context::getDefault();
3477 cl_int retval = CL_SUCCESS;
3478 entry.clBuffer_ = clCreateBuffer((cl_context)ctx.ptr(), CL_MEM_READ_WRITE, entry.capacity_, 0, &retval);
3479 CV_Assert(retval == CL_SUCCESS);
3480 CV_Assert(entry.clBuffer_ != NULL);
3481 LOG_BUFFER_POOL("OpenCL allocate %lld (0x%llx) bytes: %p\n",
3482 (long long)entry.capacity_, (long long)entry.capacity_, entry.clBuffer_);
3485 void _releaseBufferEntry(const BufferEntry& entry)
3487 CV_Assert(entry.capacity_ != 0);
3488 CV_Assert(entry.clBuffer_ != NULL);
3489 LOG_BUFFER_POOL("OpenCL release buffer: %p, %lld (0x%llx) bytes\n",
3490 entry.clBuffer_, (long long)entry.capacity_, (long long)entry.capacity_);
3491 clReleaseMemObject(entry.clBuffer_);
3494 OpenCLBufferPoolImpl()
3495 : currentReservedSize(0), maxReservedSize(0)
3497 int poolSize = ocl::Device::getDefault().isIntel() ? 1 << 27 : 0;
3498 maxReservedSize = getConfigurationParameterForSize("OPENCV_OPENCL_BUFFERPOOL_LIMIT", poolSize);
3500 virtual ~OpenCLBufferPoolImpl()
3502 freeAllReservedBuffers();
3503 CV_Assert(reservedEntries_.empty());
3506 virtual cl_mem allocate(size_t size, CV_OUT size_t& capacity)
3508 BufferEntry entry = {NULL, 0};
3509 if (maxReservedSize > 0)
3511 AutoLock locker(mutex_);
3512 if (_findAndRemoveEntryFromReservedList(entry, size))
3514 CV_DbgAssert(size <= entry.capacity_);
3515 LOG_BUFFER_POOL("Reuse reserved buffer: %p\n", entry.clBuffer_);
3516 capacity = entry.capacity_;
3517 return entry.clBuffer_;
3520 _allocateBufferEntry(entry, size);
3521 capacity = entry.capacity_;
3522 return entry.clBuffer_;
3524 virtual void release(cl_mem handle, size_t capacity)
3526 BufferEntry entry = {handle, capacity};
3527 if (maxReservedSize == 0 || entry.capacity_ > maxReservedSize / 8)
3529 _releaseBufferEntry(entry);
3533 AutoLock locker(mutex_);
3534 reservedEntries_.push_front(entry);
3535 currentReservedSize += entry.capacity_;
3536 _checkSizeOfReservedEntries();
3540 virtual size_t getReservedSize() const { return currentReservedSize; }
3541 virtual size_t getMaxReservedSize() const { return maxReservedSize; }
3542 virtual void setMaxReservedSize(size_t size)
3544 AutoLock locker(mutex_);
3545 size_t oldMaxReservedSize = maxReservedSize;
3546 maxReservedSize = size;
3547 if (maxReservedSize < oldMaxReservedSize)
3549 std::list<BufferEntry>::iterator i = reservedEntries_.begin();
3550 for (; i != reservedEntries_.end();)
3552 const BufferEntry& entry = *i;
3553 if (entry.capacity_ > maxReservedSize / 8)
3555 CV_DbgAssert(currentReservedSize >= entry.capacity_);
3556 currentReservedSize -= entry.capacity_;
3557 _releaseBufferEntry(entry);
3558 i = reservedEntries_.erase(i);
3563 _checkSizeOfReservedEntries();
3566 virtual void freeAllReservedBuffers()
3568 AutoLock locker(mutex_);
3569 std::list<BufferEntry>::const_iterator i = reservedEntries_.begin();
3570 for (; i != reservedEntries_.end(); ++i)
3572 const BufferEntry& entry = *i;
3573 _releaseBufferEntry(entry);
3575 reservedEntries_.clear();
3579 #if defined _MSC_VER
3580 #pragma warning(disable:4127) // conditional expression is constant
3582 template <bool readAccess, bool writeAccess>
3583 class AlignedDataPtr
3587 uchar* const originPtr_;
3588 const size_t alignment_;
3590 uchar* allocatedPtr_;
3593 AlignedDataPtr(uchar* ptr, size_t size, size_t alignment)
3594 : size_(size), originPtr_(ptr), alignment_(alignment), ptr_(ptr), allocatedPtr_(NULL)
3596 CV_DbgAssert((alignment & (alignment - 1)) == 0); // check for 2^n
3597 if (((size_t)ptr_ & (alignment - 1)) != 0)
3599 allocatedPtr_ = new uchar[size_ + alignment - 1];
3600 ptr_ = (uchar*)(((uintptr_t)allocatedPtr_ + (alignment - 1)) & ~(alignment - 1));
3603 memcpy(ptr_, originPtr_, size_);
3608 uchar* getAlignedPtr() const
3610 CV_DbgAssert(((size_t)ptr_ & (alignment_ - 1)) == 0);
3620 memcpy(originPtr_, ptr_, size_);
3622 delete[] allocatedPtr_;
3623 allocatedPtr_ = NULL;
3628 AlignedDataPtr(const AlignedDataPtr&); // disabled
3629 AlignedDataPtr& operator=(const AlignedDataPtr&); // disabled
3631 #if defined _MSC_VER
3632 #pragma warning(default:4127) // conditional expression is constant
3635 #ifndef CV_OPENCL_DATA_PTR_ALIGNMENT
3636 #define CV_OPENCL_DATA_PTR_ALIGNMENT 16
3639 class OpenCLAllocator : public MatAllocator
3641 mutable OpenCLBufferPoolImpl bufferPool;
3644 ALLOCATOR_FLAGS_BUFFER_POOL_USED = 1 << 0
3647 OpenCLAllocator() { matStdAllocator = Mat::getStdAllocator(); }
3649 UMatData* defaultAllocate(int dims, const int* sizes, int type, void* data, size_t* step,
3650 int flags, UMatUsageFlags usageFlags) const
3652 UMatData* u = matStdAllocator->allocate(dims, sizes, type, data, step, flags, usageFlags);
3656 void getBestFlags(const Context& ctx, int /*flags*/, UMatUsageFlags usageFlags, int& createFlags, int& flags0) const
3658 const Device& dev = ctx.device(0);
3660 if ((usageFlags & USAGE_ALLOCATE_HOST_MEMORY) != 0)
3661 createFlags |= CL_MEM_ALLOC_HOST_PTR;
3663 if( dev.hostUnifiedMemory() )
3666 flags0 = UMatData::COPY_ON_MAP;
3669 UMatData* allocate(int dims, const int* sizes, int type,
3670 void* data, size_t* step, int flags, UMatUsageFlags usageFlags) const
3673 return defaultAllocate(dims, sizes, type, data, step, flags, usageFlags);
3674 CV_Assert(data == 0);
3675 size_t total = CV_ELEM_SIZE(type);
3676 for( int i = dims-1; i >= 0; i-- )
3683 Context& ctx = Context::getDefault();
3684 int createFlags = 0, flags0 = 0;
3685 getBestFlags(ctx, flags, usageFlags, createFlags, flags0);
3687 size_t capacity = 0;
3688 void* handle = NULL;
3689 int allocatorFlags = 0;
3690 if (createFlags == 0)
3692 handle = bufferPool.allocate(total, capacity);
3694 return defaultAllocate(dims, sizes, type, data, step, flags, usageFlags);
3695 allocatorFlags = ALLOCATOR_FLAGS_BUFFER_POOL_USED;
3701 handle = clCreateBuffer((cl_context)ctx.ptr(),
3702 CL_MEM_READ_WRITE|createFlags, total, 0, &retval);
3703 if( !handle || retval != CL_SUCCESS )
3704 return defaultAllocate(dims, sizes, type, data, step, flags, usageFlags);
3706 UMatData* u = new UMatData(this);
3709 u->capacity = capacity;
3712 u->allocatorFlags_ = allocatorFlags;
3713 CV_DbgAssert(!u->tempUMat()); // for bufferPool.release() consistency in deallocate()
3717 bool allocate(UMatData* u, int accessFlags, UMatUsageFlags usageFlags) const
3722 UMatDataAutoLock lock(u);
3726 CV_Assert(u->origdata != 0);
3727 Context& ctx = Context::getDefault();
3728 int createFlags = 0, flags0 = 0;
3729 getBestFlags(ctx, accessFlags, usageFlags, createFlags, flags0);
3731 cl_context ctx_handle = (cl_context)ctx.ptr();
3733 int tempUMatFlags = UMatData::TEMP_UMAT;
3734 u->handle = clCreateBuffer(ctx_handle, CL_MEM_USE_HOST_PTR|CL_MEM_READ_WRITE,
3735 u->size, u->origdata, &retval);
3736 if((!u->handle || retval != CL_SUCCESS) && !(accessFlags & ACCESS_FAST))
3738 u->handle = clCreateBuffer(ctx_handle, CL_MEM_COPY_HOST_PTR|CL_MEM_READ_WRITE|createFlags,
3739 u->size, u->origdata, &retval);
3740 tempUMatFlags = UMatData::TEMP_COPIED_UMAT;
3742 if(!u->handle || retval != CL_SUCCESS)
3744 u->prevAllocator = u->currAllocator;
3745 u->currAllocator = this;
3746 u->flags |= tempUMatFlags;
3748 if(accessFlags & ACCESS_WRITE)
3749 u->markHostCopyObsolete(true);
3753 /*void sync(UMatData* u) const
3755 cl_command_queue q = (cl_command_queue)Queue::getDefault().ptr();
3756 UMatDataAutoLock lock(u);
3758 if( u->hostCopyObsolete() && u->handle && u->refcount > 0 && u->origdata)
3760 if( u->tempCopiedUMat() )
3762 clEnqueueReadBuffer(q, (cl_mem)u->handle, CL_TRUE, 0,
3763 u->size, u->origdata, 0, 0, 0);
3768 void* data = clEnqueueMapBuffer(q, (cl_mem)u->handle, CL_TRUE,
3769 (CL_MAP_READ | CL_MAP_WRITE),
3770 0, u->size, 0, 0, 0, &retval);
3771 clEnqueueUnmapMemObject(q, (cl_mem)u->handle, data, 0, 0, 0);
3774 u->markHostCopyObsolete(false);
3776 else if( u->copyOnMap() && u->deviceCopyObsolete() && u->data )
3778 clEnqueueWriteBuffer(q, (cl_mem)u->handle, CL_TRUE, 0,
3779 u->size, u->data, 0, 0, 0);
3783 void deallocate(UMatData* u) const
3788 CV_Assert(u->urefcount >= 0);
3789 CV_Assert(u->refcount >= 0);
3791 // TODO: !!! when we add Shared Virtual Memory Support,
3792 // this function (as well as the others) should be corrected
3793 CV_Assert(u->handle != 0 && u->urefcount == 0);
3796 // UMatDataAutoLock lock(u);
3797 if( u->hostCopyObsolete() && u->refcount > 0 )
3799 cl_command_queue q = (cl_command_queue)Queue::getDefault().ptr();
3800 if( u->tempCopiedUMat() )
3802 AlignedDataPtr<false, true> alignedPtr(u->origdata, u->size, CV_OPENCL_DATA_PTR_ALIGNMENT);
3803 CV_OclDbgAssert(clEnqueueReadBuffer(q, (cl_mem)u->handle, CL_TRUE, 0,
3804 u->size, alignedPtr.getAlignedPtr(), 0, 0, 0) == CL_SUCCESS);
3809 void* data = clEnqueueMapBuffer(q, (cl_mem)u->handle, CL_TRUE,
3810 (CL_MAP_READ | CL_MAP_WRITE),
3811 0, u->size, 0, 0, 0, &retval);
3812 CV_OclDbgAssert(retval == CL_SUCCESS);
3813 CV_OclDbgAssert(clEnqueueUnmapMemObject(q, (cl_mem)u->handle, data, 0, 0, 0) == CL_SUCCESS);
3814 CV_OclDbgAssert(clFinish(q) == CL_SUCCESS);
3817 u->markHostCopyObsolete(false);
3818 clReleaseMemObject((cl_mem)u->handle);
3820 u->currAllocator = u->prevAllocator;
3821 if(u->data && u->copyOnMap() && !(u->flags & UMatData::USER_ALLOCATED))
3823 u->data = u->origdata;
3824 if(u->refcount == 0)
3825 u->currAllocator->deallocate(u);
3829 CV_Assert(u->refcount == 0);
3830 if(u->data && u->copyOnMap() && !(u->flags & UMatData::USER_ALLOCATED))
3835 if (u->allocatorFlags_ & ALLOCATOR_FLAGS_BUFFER_POOL_USED)
3837 bufferPool.release((cl_mem)u->handle, u->capacity);
3841 clReleaseMemObject((cl_mem)u->handle);
3849 void map(UMatData* u, int accessFlags) const
3854 CV_Assert( u->handle != 0 );
3856 UMatDataAutoLock autolock(u);
3858 if(accessFlags & ACCESS_WRITE)
3859 u->markDeviceCopyObsolete(true);
3861 cl_command_queue q = (cl_command_queue)Queue::getDefault().ptr();
3863 // FIXIT Workaround for UMat synchronization issue
3864 // if( u->refcount == 0 )
3866 if( !u->copyOnMap() )
3868 if (u->data) // FIXIT Workaround for UMat synchronization issue
3870 //CV_Assert(u->hostCopyObsolete() == false);
3873 // because there can be other map requests for the same UMat with different access flags,
3874 // we use the universal (read-write) access mode.
3876 u->data = (uchar*)clEnqueueMapBuffer(q, (cl_mem)u->handle, CL_TRUE,
3877 (CL_MAP_READ | CL_MAP_WRITE),
3878 0, u->size, 0, 0, 0, &retval);
3879 if(u->data && retval == CL_SUCCESS)
3881 u->markHostCopyObsolete(false);
3885 // if map failed, switch to copy-on-map mode for the particular buffer
3886 u->flags |= UMatData::COPY_ON_MAP;
3891 u->data = (uchar*)fastMalloc(u->size);
3892 u->markHostCopyObsolete(true);
3896 if( (accessFlags & ACCESS_READ) != 0 && u->hostCopyObsolete() )
3898 AlignedDataPtr<false, true> alignedPtr(u->data, u->size, CV_OPENCL_DATA_PTR_ALIGNMENT);
3899 CV_Assert( clEnqueueReadBuffer(q, (cl_mem)u->handle, CL_TRUE, 0,
3900 u->size, alignedPtr.getAlignedPtr(), 0, 0, 0) == CL_SUCCESS );
3901 u->markHostCopyObsolete(false);
3905 void unmap(UMatData* u) const
3910 CV_Assert(u->handle != 0);
3912 UMatDataAutoLock autolock(u);
3914 // FIXIT Workaround for UMat synchronization issue
3918 cl_command_queue q = (cl_command_queue)Queue::getDefault().ptr();
3920 if( !u->copyOnMap() && u->data )
3922 CV_Assert( (retval = clEnqueueUnmapMemObject(q,
3923 (cl_mem)u->handle, u->data, 0, 0, 0)) == CL_SUCCESS );
3924 CV_OclDbgAssert(clFinish(q) == CL_SUCCESS);
3927 else if( u->copyOnMap() && u->deviceCopyObsolete() )
3929 AlignedDataPtr<true, false> alignedPtr(u->data, u->size, CV_OPENCL_DATA_PTR_ALIGNMENT);
3930 CV_Assert( (retval = clEnqueueWriteBuffer(q, (cl_mem)u->handle, CL_TRUE, 0,
3931 u->size, alignedPtr.getAlignedPtr(), 0, 0, 0)) == CL_SUCCESS );
3933 u->markDeviceCopyObsolete(false);
3934 u->markHostCopyObsolete(false);
3937 bool checkContinuous(int dims, const size_t sz[],
3938 const size_t srcofs[], const size_t srcstep[],
3939 const size_t dstofs[], const size_t dststep[],
3940 size_t& total, size_t new_sz[],
3941 size_t& srcrawofs, size_t new_srcofs[], size_t new_srcstep[],
3942 size_t& dstrawofs, size_t new_dstofs[], size_t new_dststep[]) const
3944 bool iscontinuous = true;
3945 srcrawofs = srcofs ? srcofs[dims-1] : 0;
3946 dstrawofs = dstofs ? dstofs[dims-1] : 0;
3948 for( int i = dims-2; i >= 0; i-- )
3950 if( i >= 0 && (total != srcstep[i] || total != dststep[i]) )
3951 iscontinuous = false;
3954 srcrawofs += srcofs[i]*srcstep[i];
3956 dstrawofs += dstofs[i]*dststep[i];
3961 // OpenCL uses {x, y, z} order while OpenCV uses {z, y, x} order.
3964 new_sz[0] = sz[1]; new_sz[1] = sz[0]; new_sz[2] = 1;
3965 // we assume that new_... arrays are initialized by caller
3966 // with 0's, so there is no else branch
3969 new_srcofs[0] = srcofs[1];
3970 new_srcofs[1] = srcofs[0];
3976 new_dstofs[0] = dstofs[1];
3977 new_dstofs[1] = dstofs[0];
3981 new_srcstep[0] = srcstep[0]; new_srcstep[1] = 0;
3982 new_dststep[0] = dststep[0]; new_dststep[1] = 0;
3986 // we could check for dims == 3 here,
3987 // but from user perspective this one is more informative
3988 CV_Assert(dims <= 3);
3989 new_sz[0] = sz[2]; new_sz[1] = sz[1]; new_sz[2] = sz[0];
3992 new_srcofs[0] = srcofs[2];
3993 new_srcofs[1] = srcofs[1];
3994 new_srcofs[2] = srcofs[0];
3999 new_dstofs[0] = dstofs[2];
4000 new_dstofs[1] = dstofs[1];
4001 new_dstofs[2] = dstofs[0];
4004 new_srcstep[0] = srcstep[1]; new_srcstep[1] = srcstep[0];
4005 new_dststep[0] = dststep[1]; new_dststep[1] = dststep[0];
4008 return iscontinuous;
4011 void download(UMatData* u, void* dstptr, int dims, const size_t sz[],
4012 const size_t srcofs[], const size_t srcstep[],
4013 const size_t dststep[]) const
4017 UMatDataAutoLock autolock(u);
4019 if( u->data && !u->hostCopyObsolete() )
4021 Mat::getStdAllocator()->download(u, dstptr, dims, sz, srcofs, srcstep, dststep);
4024 CV_Assert( u->handle != 0 );
4026 cl_command_queue q = (cl_command_queue)Queue::getDefault().ptr();
4028 size_t total = 0, new_sz[] = {0, 0, 0};
4029 size_t srcrawofs = 0, new_srcofs[] = {0, 0, 0}, new_srcstep[] = {0, 0, 0};
4030 size_t dstrawofs = 0, new_dstofs[] = {0, 0, 0}, new_dststep[] = {0, 0, 0};
4032 bool iscontinuous = checkContinuous(dims, sz, srcofs, srcstep, 0, dststep,
4034 srcrawofs, new_srcofs, new_srcstep,
4035 dstrawofs, new_dstofs, new_dststep);
4037 AlignedDataPtr<false, true> alignedPtr((uchar*)dstptr, sz[0] * dststep[0], CV_OPENCL_DATA_PTR_ALIGNMENT);
4040 CV_Assert( clEnqueueReadBuffer(q, (cl_mem)u->handle, CL_TRUE,
4041 srcrawofs, total, alignedPtr.getAlignedPtr(), 0, 0, 0) == CL_SUCCESS );
4045 CV_Assert( clEnqueueReadBufferRect(q, (cl_mem)u->handle, CL_TRUE,
4046 new_srcofs, new_dstofs, new_sz, new_srcstep[0], new_srcstep[1],
4047 new_dststep[0], new_dststep[1], alignedPtr.getAlignedPtr(), 0, 0, 0) == CL_SUCCESS );
4051 void upload(UMatData* u, const void* srcptr, int dims, const size_t sz[],
4052 const size_t dstofs[], const size_t dststep[],
4053 const size_t srcstep[]) const
4058 // there should be no user-visible CPU copies of the UMat which we are going to copy to
4059 CV_Assert(u->refcount == 0 || u->tempUMat());
4061 size_t total = 0, new_sz[] = {0, 0, 0};
4062 size_t srcrawofs = 0, new_srcofs[] = {0, 0, 0}, new_srcstep[] = {0, 0, 0};
4063 size_t dstrawofs = 0, new_dstofs[] = {0, 0, 0}, new_dststep[] = {0, 0, 0};
4065 bool iscontinuous = checkContinuous(dims, sz, 0, srcstep, dstofs, dststep,
4067 srcrawofs, new_srcofs, new_srcstep,
4068 dstrawofs, new_dstofs, new_dststep);
4070 UMatDataAutoLock autolock(u);
4072 // if there is cached CPU copy of the GPU matrix,
4073 // we could use it as a destination.
4074 // we can do it in 2 cases:
4075 // 1. we overwrite the whole content
4076 // 2. we overwrite part of the matrix, but the GPU copy is out-of-date
4077 if( u->data && (u->hostCopyObsolete() < u->deviceCopyObsolete() || total == u->size))
4079 Mat::getStdAllocator()->upload(u, srcptr, dims, sz, dstofs, dststep, srcstep);
4080 u->markHostCopyObsolete(false);
4081 u->markDeviceCopyObsolete(true);
4085 CV_Assert( u->handle != 0 );
4086 cl_command_queue q = (cl_command_queue)Queue::getDefault().ptr();
4088 AlignedDataPtr<true, false> alignedPtr((uchar*)srcptr, sz[0] * srcstep[0], CV_OPENCL_DATA_PTR_ALIGNMENT);
4091 CV_Assert( clEnqueueWriteBuffer(q, (cl_mem)u->handle,
4092 CL_TRUE, dstrawofs, total, srcptr, 0, 0, 0) == CL_SUCCESS );
4096 CV_Assert( clEnqueueWriteBufferRect(q, (cl_mem)u->handle, CL_TRUE,
4097 new_dstofs, new_srcofs, new_sz, new_dststep[0], new_dststep[1],
4098 new_srcstep[0], new_srcstep[1], srcptr, 0, 0, 0) == CL_SUCCESS );
4101 u->markHostCopyObsolete(true);
4102 u->markDeviceCopyObsolete(false);
4105 void copy(UMatData* src, UMatData* dst, int dims, const size_t sz[],
4106 const size_t srcofs[], const size_t srcstep[],
4107 const size_t dstofs[], const size_t dststep[], bool _sync) const
4112 size_t total = 0, new_sz[] = {0, 0, 0};
4113 size_t srcrawofs = 0, new_srcofs[] = {0, 0, 0}, new_srcstep[] = {0, 0, 0};
4114 size_t dstrawofs = 0, new_dstofs[] = {0, 0, 0}, new_dststep[] = {0, 0, 0};
4116 bool iscontinuous = checkContinuous(dims, sz, srcofs, srcstep, dstofs, dststep,
4118 srcrawofs, new_srcofs, new_srcstep,
4119 dstrawofs, new_dstofs, new_dststep);
4121 UMatDataAutoLock src_autolock(src);
4122 UMatDataAutoLock dst_autolock(dst);
4124 if( !src->handle || (src->data && src->hostCopyObsolete() < src->deviceCopyObsolete()) )
4126 upload(dst, src->data + srcrawofs, dims, sz, dstofs, dststep, srcstep);
4129 if( !dst->handle || (dst->data && dst->hostCopyObsolete() < dst->deviceCopyObsolete()) )
4131 download(src, dst->data + dstrawofs, dims, sz, srcofs, srcstep, dststep);
4132 dst->markHostCopyObsolete(false);
4133 dst->markDeviceCopyObsolete(true);
4137 // there should be no user-visible CPU copies of the UMat which we are going to copy to
4138 CV_Assert(dst->refcount == 0);
4139 cl_command_queue q = (cl_command_queue)Queue::getDefault().ptr();
4143 CV_Assert( clEnqueueCopyBuffer(q, (cl_mem)src->handle, (cl_mem)dst->handle,
4144 srcrawofs, dstrawofs, total, 0, 0, 0) == CL_SUCCESS );
4149 CV_Assert( (retval = clEnqueueCopyBufferRect(q, (cl_mem)src->handle, (cl_mem)dst->handle,
4150 new_srcofs, new_dstofs, new_sz,
4151 new_srcstep[0], new_srcstep[1],
4152 new_dststep[0], new_dststep[1],
4153 0, 0, 0)) == CL_SUCCESS );
4156 dst->markHostCopyObsolete(true);
4157 dst->markDeviceCopyObsolete(false);
4161 CV_OclDbgAssert(clFinish(q) == CL_SUCCESS);
4165 BufferPoolController* getBufferPoolController() const { return &bufferPool; }
4167 MatAllocator* matStdAllocator;
4170 MatAllocator* getOpenCLAllocator()
4172 static MatAllocator * allocator = new OpenCLAllocator();
4176 ///////////////////////////////////////////// Utility functions /////////////////////////////////////////////////
4178 static void getDevices(std::vector<cl_device_id>& devices, cl_platform_id platform)
4180 cl_uint numDevices = 0;
4181 CV_OclDbgAssert(clGetDeviceIDs(platform, (cl_device_type)Device::TYPE_ALL,
4182 0, NULL, &numDevices) == CL_SUCCESS);
4184 if (numDevices == 0)
4190 devices.resize((size_t)numDevices);
4191 CV_OclDbgAssert(clGetDeviceIDs(platform, (cl_device_type)Device::TYPE_ALL,
4192 numDevices, &devices[0], &numDevices) == CL_SUCCESS);
4195 struct PlatformInfo::Impl
4200 handle = *(cl_platform_id*)id;
4201 getDevices(devices, handle);
4204 String getStrProp(cl_device_info prop) const
4208 return clGetPlatformInfo(handle, prop, sizeof(buf)-16, buf, &sz) == CL_SUCCESS &&
4209 sz < sizeof(buf) ? String(buf) : String();
4212 IMPLEMENT_REFCOUNTABLE();
4213 std::vector<cl_device_id> devices;
4214 cl_platform_id handle;
4217 PlatformInfo::PlatformInfo()
4222 PlatformInfo::PlatformInfo(void* platform_id)
4224 p = new Impl(platform_id);
4227 PlatformInfo::~PlatformInfo()
4233 PlatformInfo::PlatformInfo(const PlatformInfo& i)
4240 PlatformInfo& PlatformInfo::operator =(const PlatformInfo& i)
4253 int PlatformInfo::deviceNumber() const
4255 return p ? (int)p->devices.size() : 0;
4258 void PlatformInfo::getDevice(Device& device, int d) const
4260 CV_Assert(p && d < (int)p->devices.size() );
4262 device.set(p->devices[d]);
4265 String PlatformInfo::name() const
4267 return p ? p->getStrProp(CL_PLATFORM_NAME) : String();
4270 String PlatformInfo::vendor() const
4272 return p ? p->getStrProp(CL_PLATFORM_VENDOR) : String();
4275 String PlatformInfo::version() const
4277 return p ? p->getStrProp(CL_PLATFORM_VERSION) : String();
4280 static void getPlatforms(std::vector<cl_platform_id>& platforms)
4282 cl_uint numPlatforms = 0;
4283 CV_OclDbgAssert(clGetPlatformIDs(0, NULL, &numPlatforms) == CL_SUCCESS);
4285 if (numPlatforms == 0)
4291 platforms.resize((size_t)numPlatforms);
4292 CV_OclDbgAssert(clGetPlatformIDs(numPlatforms, &platforms[0], &numPlatforms) == CL_SUCCESS);
4295 void getPlatfomsInfo(std::vector<PlatformInfo>& platformsInfo)
4297 std::vector<cl_platform_id> platforms;
4298 getPlatforms(platforms);
4300 for (size_t i = 0; i < platforms.size(); i++)
4301 platformsInfo.push_back( PlatformInfo((void*)&platforms[i]) );
4304 const char* typeToStr(int type)
4306 static const char* tab[]=
4308 "uchar", "uchar2", "uchar3", "uchar4", 0, 0, 0, "uchar8", 0, 0, 0, 0, 0, 0, 0, "uchar16",
4309 "char", "char2", "char3", "char4", 0, 0, 0, "char8", 0, 0, 0, 0, 0, 0, 0, "char16",
4310 "ushort", "ushort2", "ushort3", "ushort4",0, 0, 0, "ushort8", 0, 0, 0, 0, 0, 0, 0, "ushort16",
4311 "short", "short2", "short3", "short4", 0, 0, 0, "short8", 0, 0, 0, 0, 0, 0, 0, "short16",
4312 "int", "int2", "int3", "int4", 0, 0, 0, "int8", 0, 0, 0, 0, 0, 0, 0, "int16",
4313 "float", "float2", "float3", "float4", 0, 0, 0, "float8", 0, 0, 0, 0, 0, 0, 0, "float16",
4314 "double", "double2", "double3", "double4", 0, 0, 0, "double8", 0, 0, 0, 0, 0, 0, 0, "double16",
4315 "?", "?", "?", "?", "?", "?", "?", "?", "?", "?", "?", "?", "?", "?", "?", "?"
4317 int cn = CV_MAT_CN(type), depth = CV_MAT_DEPTH(type);
4318 return cn > 16 ? "?" : tab[depth*16 + cn-1];
4321 const char* memopTypeToStr(int type)
4323 static const char* tab[] =
4325 "uchar", "uchar2", "uchar3", "uchar4", 0, 0, 0, "uchar8", 0, 0, 0, 0, 0, 0, 0, "uchar16",
4326 "char", "char2", "char3", "char4", 0, 0, 0, "char8", 0, 0, 0, 0, 0, 0, 0, "char16",
4327 "ushort", "ushort2", "ushort3", "ushort4",0, 0, 0, "ushort8", 0, 0, 0, 0, 0, 0, 0, "ushort16",
4328 "short", "short2", "short3", "short4", 0, 0, 0, "short8", 0, 0, 0, 0, 0, 0, 0, "short16",
4329 "int", "int2", "int3", "int4", 0, 0, 0, "int8", 0, 0, 0, 0, 0, 0, 0, "int16",
4330 "int", "int2", "int3", "int4", 0, 0, 0, "int8", 0, 0, 0, 0, 0, 0, 0, "int16",
4331 "ulong", "ulong2", "ulong3", "ulong4", 0, 0, 0, "ulong8", 0, 0, 0, 0, 0, 0, 0, "ulong16",
4332 "?", "?", "?", "?", "?", "?", "?", "?", "?", "?", "?", "?", "?", "?", "?", "?"
4334 int cn = CV_MAT_CN(type), depth = CV_MAT_DEPTH(type);
4335 return cn > 16 ? "?" : tab[depth*16 + cn-1];
4338 const char* convertTypeStr(int sdepth, int ddepth, int cn, char* buf)
4340 if( sdepth == ddepth )
4342 const char *typestr = typeToStr(CV_MAKETYPE(ddepth, cn));
4343 if( ddepth >= CV_32F ||
4344 (ddepth == CV_32S && sdepth < CV_32S) ||
4345 (ddepth == CV_16S && sdepth <= CV_8S) ||
4346 (ddepth == CV_16U && sdepth == CV_8U))
4348 sprintf(buf, "convert_%s", typestr);
4350 else if( sdepth >= CV_32F )
4351 sprintf(buf, "convert_%s%s_rte", typestr, (ddepth < CV_32S ? "_sat" : ""));
4353 sprintf(buf, "convert_%s_sat", typestr);
4358 template <typename T>
4359 static std::string kerToStr(const Mat & k)
4361 int width = k.cols - 1, depth = k.depth();
4362 const T * const data = reinterpret_cast<const T *>(k.data);
4364 std::ostringstream stream;
4365 stream.precision(10);
4369 for (int i = 0; i < width; ++i)
4370 stream << "DIG(" << (int)data[i] << ")";
4371 stream << "DIG(" << (int)data[width] << ")";
4373 else if (depth == CV_32F)
4375 stream.setf(std::ios_base::showpoint);
4376 for (int i = 0; i < width; ++i)
4377 stream << "DIG(" << data[i] << "f)";
4378 stream << "DIG(" << data[width] << "f)";
4382 for (int i = 0; i < width; ++i)
4383 stream << "DIG(" << data[i] << ")";
4384 stream << "DIG(" << data[width] << ")";
4387 return stream.str();
4390 String kernelToStr(InputArray _kernel, int ddepth, const char * name)
4392 Mat kernel = _kernel.getMat().reshape(1, 1);
4394 int depth = kernel.depth();
4398 if (ddepth != depth)
4399 kernel.convertTo(kernel, ddepth);
4401 typedef std::string (* func_t)(const Mat &);
4402 static const func_t funcs[] = { kerToStr<uchar>, kerToStr<char>, kerToStr<ushort>, kerToStr<short>,
4403 kerToStr<int>, kerToStr<float>, kerToStr<double>, 0 };
4404 const func_t func = funcs[ddepth];
4405 CV_Assert(func != 0);
4407 return cv::format(" -D %s=%s", name ? name : "COEFF", func(kernel).c_str());
4410 #define PROCESS_SRC(src) \
4415 CV_Assert(src.isMat() || src.isUMat()); \
4416 int ctype = src.type(), ccn = CV_MAT_CN(ctype); \
4417 Size csize = src.size(); \
4418 cols.push_back(ccn * csize.width); \
4419 if (ctype != type) \
4421 offsets.push_back(src.offset()); \
4422 steps.push_back(src.step()); \
4427 int predictOptimalVectorWidth(InputArray src1, InputArray src2, InputArray src3,
4428 InputArray src4, InputArray src5, InputArray src6,
4429 InputArray src7, InputArray src8, InputArray src9)
4431 int type = src1.type(), depth = CV_MAT_DEPTH(type), cn = CV_MAT_CN(type), esz1 = CV_ELEM_SIZE1(depth);
4432 Size ssize = src1.size();
4433 const ocl::Device & d = ocl::Device::getDefault();
4435 int vectorWidths[] = { d.preferredVectorWidthChar(), d.preferredVectorWidthChar(),
4436 d.preferredVectorWidthShort(), d.preferredVectorWidthShort(),
4437 d.preferredVectorWidthInt(), d.preferredVectorWidthFloat(),
4438 d.preferredVectorWidthDouble(), -1 }, kercn = vectorWidths[depth];
4440 // if the device says don't use vectors
4441 if (vectorWidths[0] == 1)
4444 int vectorWidthsOthers[] = { 16, 16, 8, 8, 1, 1, 1, -1 };
4445 kercn = vectorWidthsOthers[depth];
4448 if (ssize.width * cn < kercn || kercn <= 0)
4451 std::vector<size_t> offsets, steps, cols;
4462 size_t size = offsets.size();
4463 int wsz = kercn * esz1;
4464 std::vector<int> dividers(size, wsz);
4466 for (size_t i = 0; i < size; ++i)
4467 while (offsets[i] % dividers[i] != 0 || steps[i] % dividers[i] != 0 || cols[i] % dividers[i] != 0)
4471 for (size_t i = 0; i < size; ++i)
4472 if (dividers[i] != wsz)
4479 // width = *std::min_element(dividers.begin(), dividers.end());
4487 // TODO Make this as a method of OpenCL "BuildOptions" class
4488 void buildOptionsAddMatrixDescription(String& buildOptions, const String& name, InputArray _m)
4490 if (!buildOptions.empty())
4491 buildOptions += " ";
4492 int type = _m.type(), depth = CV_MAT_DEPTH(type);
4493 buildOptions += format(
4494 "-D %s_T=%s -D %s_T1=%s -D %s_CN=%d -D %s_TSIZE=%d -D %s_T1SIZE=%d -D %s_DEPTH=%d",
4495 name.c_str(), ocl::typeToStr(type),
4496 name.c_str(), ocl::typeToStr(CV_MAKE_TYPE(depth, 1)),
4497 name.c_str(), (int)CV_MAT_CN(type),
4498 name.c_str(), (int)CV_ELEM_SIZE(type),
4499 name.c_str(), (int)CV_ELEM_SIZE1(type),
4500 name.c_str(), (int)depth
4505 struct Image2D::Impl
4507 Impl(const UMat &src, bool norm, bool alias)
4511 init(src, norm, alias);
4517 clReleaseMemObject(handle);
4520 static cl_image_format getImageFormat(int depth, int cn, bool norm)
4522 cl_image_format format;
4523 static const int channelTypes[] = { CL_UNSIGNED_INT8, CL_SIGNED_INT8, CL_UNSIGNED_INT16,
4524 CL_SIGNED_INT16, CL_SIGNED_INT32, CL_FLOAT, -1, -1 };
4525 static const int channelTypesNorm[] = { CL_UNORM_INT8, CL_SNORM_INT8, CL_UNORM_INT16,
4526 CL_SNORM_INT16, -1, -1, -1, -1 };
4527 static const int channelOrders[] = { -1, CL_R, CL_RG, -1, CL_RGBA };
4529 int channelType = norm ? channelTypesNorm[depth] : channelTypes[depth];
4530 int channelOrder = channelOrders[cn];
4531 format.image_channel_data_type = (cl_channel_type)channelType;
4532 format.image_channel_order = (cl_channel_order)channelOrder;
4536 static bool isFormatSupported(cl_image_format format)
4538 cl_context context = (cl_context)Context::getDefault().ptr();
4539 // Figure out how many formats are supported by this context.
4540 cl_uint numFormats = 0;
4541 cl_int err = clGetSupportedImageFormats(context, CL_MEM_READ_WRITE,
4542 CL_MEM_OBJECT_IMAGE2D, numFormats,
4544 AutoBuffer<cl_image_format> formats(numFormats);
4545 err = clGetSupportedImageFormats(context, CL_MEM_READ_WRITE,
4546 CL_MEM_OBJECT_IMAGE2D, numFormats,
4548 CV_OclDbgAssert(err == CL_SUCCESS);
4549 for (cl_uint i = 0; i < numFormats; ++i)
4551 if (!memcmp(&formats[i], &format, sizeof(format)))
4559 void init(const UMat &src, bool norm, bool alias)
4561 CV_Assert(ocl::Device::getDefault().imageSupport());
4563 int err, depth = src.depth(), cn = src.channels();
4565 cl_image_format format = getImageFormat(depth, cn, norm);
4567 if (!isFormatSupported(format))
4568 CV_Error(Error::OpenCLApiCallError, "Image format is not supported");
4570 cl_context context = (cl_context)Context::getDefault().ptr();
4571 cl_command_queue queue = (cl_command_queue)Queue::getDefault().ptr();
4573 #ifdef CL_VERSION_1_2
4574 // this enables backwards portability to
4575 // run on OpenCL 1.1 platform if library binaries are compiled with OpenCL 1.2 support
4576 const Device & d = ocl::Device::getDefault();
4577 int minor = d.deviceVersionMinor(), major = d.deviceVersionMajor();
4578 CV_Assert(!alias || canCreateAlias(src));
4579 if (1 < major || (1 == major && 2 <= minor))
4582 desc.image_type = CL_MEM_OBJECT_IMAGE2D;
4583 desc.image_width = src.cols;
4584 desc.image_height = src.rows;
4585 desc.image_depth = 0;
4586 desc.image_array_size = 1;
4587 desc.image_row_pitch = alias ? src.step[0] : 0;
4588 desc.image_slice_pitch = 0;
4589 desc.buffer = alias ? (cl_mem)src.handle(ACCESS_RW) : 0;
4590 desc.num_mip_levels = 0;
4591 desc.num_samples = 0;
4592 handle = clCreateImage(context, CL_MEM_READ_WRITE, &format, &desc, NULL, &err);
4597 CV_SUPPRESS_DEPRECATED_START
4598 CV_Assert(!alias); // This is an OpenCL 1.2 extension
4599 handle = clCreateImage2D(context, CL_MEM_READ_WRITE, &format, src.cols, src.rows, 0, NULL, &err);
4600 CV_SUPPRESS_DEPRECATED_END
4602 CV_OclDbgAssert(err == CL_SUCCESS);
4604 size_t origin[] = { 0, 0, 0 };
4605 size_t region[] = { static_cast<size_t>(src.cols), static_cast<size_t>(src.rows), 1 };
4608 if (!alias && !src.isContinuous())
4610 devData = clCreateBuffer(context, CL_MEM_READ_ONLY, src.cols * src.rows * src.elemSize(), NULL, &err);
4611 CV_OclDbgAssert(err == CL_SUCCESS);
4613 const size_t roi[3] = {static_cast<size_t>(src.cols) * src.elemSize(), static_cast<size_t>(src.rows), 1};
4614 CV_Assert(clEnqueueCopyBufferRect(queue, (cl_mem)src.handle(ACCESS_READ), devData, origin, origin,
4615 roi, src.step, 0, src.cols * src.elemSize(), 0, 0, NULL, NULL) == CL_SUCCESS);
4616 CV_OclDbgAssert(clFlush(queue) == CL_SUCCESS);
4620 devData = (cl_mem)src.handle(ACCESS_READ);
4622 CV_Assert(devData != NULL);
4626 CV_OclDbgAssert(clEnqueueCopyBufferToImage(queue, devData, handle, 0, origin, region, 0, NULL, 0) == CL_SUCCESS);
4627 if (!src.isContinuous())
4629 CV_OclDbgAssert(clFlush(queue) == CL_SUCCESS);
4630 CV_OclDbgAssert(clReleaseMemObject(devData) == CL_SUCCESS);
4635 IMPLEMENT_REFCOUNTABLE();
4645 Image2D::Image2D(const UMat &src, bool norm, bool alias)
4647 p = new Impl(src, norm, alias);
4650 bool Image2D::canCreateAlias(const UMat &m)
4653 const Device & d = ocl::Device::getDefault();
4654 if (d.imageFromBufferSupport())
4656 // This is the required pitch alignment in pixels
4657 uint pitchAlign = d.imagePitchAlignment();
4658 if (pitchAlign && !(m.step % (pitchAlign * m.elemSize())))
4660 // We don't currently handle the case where the buffer was created
4661 // with CL_MEM_USE_HOST_PTR
4662 if (!m.u->tempUMat())
4671 bool Image2D::isFormatSupported(int depth, int cn, bool norm)
4673 cl_image_format format = Impl::getImageFormat(depth, cn, norm);
4675 return Impl::isFormatSupported(format);
4678 Image2D::Image2D(const Image2D & i)
4685 Image2D & Image2D::operator = (const Image2D & i)
4704 void* Image2D::ptr() const
4706 return p ? p->handle : 0;