2 Copyright (c) 2005-2019 Intel Corporation
4 Licensed under the Apache License, Version 2.0 (the "License");
5 you may not use this file except in compliance with the License.
6 You may obtain a copy of the License at
8 http://www.apache.org/licenses/LICENSE-2.0
10 Unless required by applicable law or agreed to in writing, software
11 distributed under the License is distributed on an "AS IS" BASIS,
12 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 See the License for the specific language governing permissions and
14 limitations under the License.
17 #ifndef __TBB_flow_graph_opencl_node_H
18 #define __TBB_flow_graph_opencl_node_H
20 #define __TBB_flow_graph_opencl_node_H_include_area
21 #include "internal/_warning_suppress_enable_notice.h"
23 #include "tbb/tbb_config.h"
24 #if __TBB_PREVIEW_OPENCL_NODE
26 #include "flow_graph.h"
37 #include <OpenCL/opencl.h>
45 namespace interface11 {
47 template <typename DeviceFilter>
50 namespace opencl_info {
51 class default_opencl_factory;
54 template <typename Factory>
57 inline void enforce_cl_retcode(cl_int err, std::string msg) {
58 if (err != CL_SUCCESS) {
59 std::cerr << msg << "; error code: " << err << std::endl;
65 T event_info(cl_event e, cl_event_info i) {
67 enforce_cl_retcode(clGetEventInfo(e, i, sizeof(res), &res, NULL), "Failed to get OpenCL event information");
72 T device_info(cl_device_id d, cl_device_info i) {
74 enforce_cl_retcode(clGetDeviceInfo(d, i, sizeof(res), &res, NULL), "Failed to get OpenCL device information");
79 inline std::string device_info<std::string>(cl_device_id d, cl_device_info i) {
81 enforce_cl_retcode(clGetDeviceInfo(d, i, 0, NULL, &required), "Failed to get OpenCL device information");
83 char *buff = (char*)alloca(required);
84 enforce_cl_retcode(clGetDeviceInfo(d, i, required, buff, NULL), "Failed to get OpenCL device information");
90 T platform_info(cl_platform_id p, cl_platform_info i) {
92 enforce_cl_retcode(clGetPlatformInfo(p, i, sizeof(res), &res, NULL), "Failed to get OpenCL platform information");
97 inline std::string platform_info<std::string>(cl_platform_id p, cl_platform_info i) {
99 enforce_cl_retcode(clGetPlatformInfo(p, i, 0, NULL, &required), "Failed to get OpenCL platform information");
101 char *buff = (char*)alloca(required);
102 enforce_cl_retcode(clGetPlatformInfo(p, i, required, buff, NULL), "Failed to get OpenCL platform information");
108 class opencl_device {
110 typedef size_t device_id_type;
111 enum : device_id_type {
112 unknown = device_id_type( -2 ),
113 host = device_id_type( -1 )
116 opencl_device() : my_device_id( unknown ), my_cl_device_id( NULL ), my_cl_command_queue( NULL ) {}
118 opencl_device( cl_device_id d_id ) : my_device_id( unknown ), my_cl_device_id( d_id ), my_cl_command_queue( NULL ) {}
120 opencl_device( cl_device_id cl_d_id, device_id_type device_id ) : my_device_id( device_id ), my_cl_device_id( cl_d_id ), my_cl_command_queue( NULL ) {}
122 std::string platform_profile() const {
123 return platform_info<std::string>( platform_id(), CL_PLATFORM_PROFILE );
125 std::string platform_version() const {
126 return platform_info<std::string>( platform_id(), CL_PLATFORM_VERSION );
128 std::string platform_name() const {
129 return platform_info<std::string>( platform_id(), CL_PLATFORM_NAME );
131 std::string platform_vendor() const {
132 return platform_info<std::string>( platform_id(), CL_PLATFORM_VENDOR );
134 std::string platform_extensions() const {
135 return platform_info<std::string>( platform_id(), CL_PLATFORM_EXTENSIONS );
138 template <typename T>
139 void info( cl_device_info i, T &t ) const {
140 t = device_info<T>( my_cl_device_id, i );
142 std::string version() const {
143 // The version string format: OpenCL<space><major_version.minor_version><space><vendor-specific information>
144 return device_info<std::string>( my_cl_device_id, CL_DEVICE_VERSION );
146 int major_version() const {
148 std::sscanf( version().c_str(), "OpenCL %d", &major );
151 int minor_version() const {
153 std::sscanf( version().c_str(), "OpenCL %d.%d", &major, &minor );
156 bool out_of_order_exec_mode_on_host_present() const {
158 if ( major_version() >= 2 )
159 return (device_info<cl_command_queue_properties>( my_cl_device_id, CL_DEVICE_QUEUE_ON_HOST_PROPERTIES ) & CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE) != 0;
161 #endif /* CL_VERSION_2_0 */
162 return (device_info<cl_command_queue_properties>( my_cl_device_id, CL_DEVICE_QUEUE_PROPERTIES ) & CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE) != 0;
164 bool out_of_order_exec_mode_on_device_present() const {
166 if ( major_version() >= 2 )
167 return (device_info<cl_command_queue_properties>( my_cl_device_id, CL_DEVICE_QUEUE_ON_DEVICE_PROPERTIES ) & CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE) != 0;
169 #endif /* CL_VERSION_2_0 */
172 std::array<size_t, 3> max_work_item_sizes() const {
173 return device_info<std::array<size_t, 3>>( my_cl_device_id, CL_DEVICE_MAX_WORK_ITEM_SIZES );
175 size_t max_work_group_size() const {
176 return device_info<size_t>( my_cl_device_id, CL_DEVICE_MAX_WORK_GROUP_SIZE );
178 bool built_in_kernel_available( const std::string& k ) const {
179 const std::string semi = ";";
180 // Added semicolumns to force an exact match (to avoid a partial match, e.g. "add" is partly matched with "madd").
181 return (semi + built_in_kernels() + semi).find( semi + k + semi ) != std::string::npos;
183 std::string built_in_kernels() const {
184 return device_info<std::string>( my_cl_device_id, CL_DEVICE_BUILT_IN_KERNELS );
186 std::string name() const {
187 return device_info<std::string>( my_cl_device_id, CL_DEVICE_NAME );
189 cl_bool available() const {
190 return device_info<cl_bool>( my_cl_device_id, CL_DEVICE_AVAILABLE );
192 cl_bool compiler_available() const {
193 return device_info<cl_bool>( my_cl_device_id, CL_DEVICE_COMPILER_AVAILABLE );
195 cl_bool linker_available() const {
196 return device_info<cl_bool>( my_cl_device_id, CL_DEVICE_LINKER_AVAILABLE );
198 bool extension_available( const std::string &ext ) const {
199 const std::string space = " ";
200 // Added space to force an exact match (to avoid a partial match, e.g. "ext" is partly matched with "ext2").
201 return (space + extensions() + space).find( space + ext + space ) != std::string::npos;
203 std::string extensions() const {
204 return device_info<std::string>( my_cl_device_id, CL_DEVICE_EXTENSIONS );
207 cl_device_type type() const {
208 return device_info<cl_device_type>( my_cl_device_id, CL_DEVICE_TYPE );
211 std::string vendor() const {
212 return device_info<std::string>( my_cl_device_id, CL_DEVICE_VENDOR );
215 cl_uint address_bits() const {
216 return device_info<cl_uint>( my_cl_device_id, CL_DEVICE_ADDRESS_BITS );
219 cl_device_id device_id() const {
220 return my_cl_device_id;
223 cl_command_queue command_queue() const {
224 return my_cl_command_queue;
227 void set_command_queue( cl_command_queue cmd_queue ) {
228 my_cl_command_queue = cmd_queue;
231 cl_platform_id platform_id() const {
232 return device_info<cl_platform_id>( my_cl_device_id, CL_DEVICE_PLATFORM );
237 device_id_type my_device_id;
238 cl_device_id my_cl_device_id;
239 cl_command_queue my_cl_command_queue;
241 friend bool operator==(opencl_device d1, opencl_device d2) { return d1.my_cl_device_id == d2.my_cl_device_id; }
243 template <typename DeviceFilter>
244 friend class opencl_factory;
245 template <typename Factory>
246 friend class opencl_memory;
247 template <typename Factory>
248 friend class opencl_program;
251 template <typename T, typename Factory>
252 friend class opencl_buffer;
256 class opencl_device_list {
257 typedef std::vector<opencl_device> container_type;
259 typedef container_type::iterator iterator;
260 typedef container_type::const_iterator const_iterator;
261 typedef container_type::size_type size_type;
263 opencl_device_list() {}
264 opencl_device_list( std::initializer_list<opencl_device> il ) : my_container( il ) {}
266 void add( opencl_device d ) { my_container.push_back( d ); }
267 size_type size() const { return my_container.size(); }
268 bool empty() const { return my_container.empty(); }
269 iterator begin() { return my_container.begin(); }
270 iterator end() { return my_container.end(); }
271 const_iterator begin() const { return my_container.begin(); }
272 const_iterator end() const { return my_container.end(); }
273 const_iterator cbegin() const { return my_container.cbegin(); }
274 const_iterator cend() const { return my_container.cend(); }
277 container_type my_container;
282 // Retrieve all OpenCL devices from machine
283 inline opencl_device_list find_available_devices() {
284 opencl_device_list opencl_devices;
286 cl_uint num_platforms;
287 enforce_cl_retcode(clGetPlatformIDs(0, NULL, &num_platforms), "clGetPlatformIDs failed");
289 std::vector<cl_platform_id> platforms(num_platforms);
290 enforce_cl_retcode(clGetPlatformIDs(num_platforms, platforms.data(), NULL), "clGetPlatformIDs failed");
293 std::vector<cl_platform_id>::iterator platforms_it = platforms.begin();
294 cl_uint num_all_devices = 0;
295 while (platforms_it != platforms.end()) {
296 cl_int err = clGetDeviceIDs(*platforms_it, CL_DEVICE_TYPE_ALL, 0, NULL, &num_devices);
297 if (err == CL_DEVICE_NOT_FOUND) {
298 platforms_it = platforms.erase(platforms_it);
301 enforce_cl_retcode(err, "clGetDeviceIDs failed");
302 num_all_devices += num_devices;
307 std::vector<cl_device_id> devices(num_all_devices);
308 std::vector<cl_device_id>::iterator devices_it = devices.begin();
309 for (auto p = platforms.begin(); p != platforms.end(); ++p) {
310 enforce_cl_retcode(clGetDeviceIDs((*p), CL_DEVICE_TYPE_ALL, (cl_uint)std::distance(devices_it, devices.end()), &*devices_it, &num_devices), "clGetDeviceIDs failed");
311 devices_it += num_devices;
314 for (auto d = devices.begin(); d != devices.end(); ++d) {
315 opencl_devices.add(opencl_device((*d)));
318 return opencl_devices;
321 } // namespace internal
323 // TODO: consider this namespace as public API
324 namespace opencl_info {
326 inline const opencl_device_list& available_devices() {
327 // Static storage for all available OpenCL devices on machine
328 static const opencl_device_list my_devices = internal::find_available_devices();
332 } // namespace opencl_info
335 class callback_base : tbb::internal::no_copy {
337 virtual void call() = 0;
338 virtual ~callback_base() {}
341 template <typename Callback, typename T>
342 class callback : public callback_base {
343 Callback my_callback;
346 callback( Callback c, const T& t ) : my_callback( c ), my_data( t ) {}
348 void call() __TBB_override {
349 my_callback( my_data );
353 template <typename T, typename Factory = opencl_info::default_opencl_factory>
354 class opencl_async_msg : public async_msg<T> {
356 typedef T value_type;
358 opencl_async_msg() : my_callback_flag_ptr( std::make_shared< tbb::atomic<bool>>() ) {
359 my_callback_flag_ptr->store<tbb::relaxed>(false);
362 explicit opencl_async_msg( const T& data ) : my_data(data), my_callback_flag_ptr( std::make_shared<tbb::atomic<bool>>() ) {
363 my_callback_flag_ptr->store<tbb::relaxed>(false);
366 opencl_async_msg( const T& data, cl_event event ) : my_data(data), my_event(event), my_is_event(true), my_callback_flag_ptr( std::make_shared<tbb::atomic<bool>>() ) {
367 my_callback_flag_ptr->store<tbb::relaxed>(false);
368 enforce_cl_retcode( clRetainEvent( my_event ), "Failed to retain an event" );
371 T& data( bool wait = true ) {
372 if ( my_is_event && wait ) {
373 enforce_cl_retcode( clWaitForEvents( 1, &my_event ), "Failed to wait for an event" );
374 enforce_cl_retcode( clReleaseEvent( my_event ), "Failed to release an event" );
380 const T& data( bool wait = true ) const {
381 if ( my_is_event && wait ) {
382 enforce_cl_retcode( clWaitForEvents( 1, &my_event ), "Failed to wait for an event" );
383 enforce_cl_retcode( clReleaseEvent( my_event ), "Failed to release an event" );
389 opencl_async_msg( const opencl_async_msg &dmsg ) : async_msg<T>(dmsg),
390 my_data(dmsg.my_data), my_event(dmsg.my_event), my_is_event( dmsg.my_is_event ),
391 my_callback_flag_ptr(dmsg.my_callback_flag_ptr)
394 enforce_cl_retcode( clRetainEvent( my_event ), "Failed to retain an event" );
397 opencl_async_msg( opencl_async_msg &&dmsg ) : async_msg<T>(std::move(dmsg)),
398 my_data(std::move(dmsg.my_data)), my_event(dmsg.my_event), my_is_event(dmsg.my_is_event),
399 my_callback_flag_ptr( std::move(dmsg.my_callback_flag_ptr) )
401 dmsg.my_is_event = false;
404 opencl_async_msg& operator=(const opencl_async_msg &dmsg) {
405 async_msg<T>::operator =(dmsg);
407 // Release original event
409 enforce_cl_retcode( clReleaseEvent( my_event ), "Failed to retain an event" );
411 my_data = dmsg.my_data;
412 my_event = dmsg.my_event;
413 my_is_event = dmsg.my_is_event;
415 // Retain copied event
417 enforce_cl_retcode( clRetainEvent( my_event ), "Failed to retain an event" );
419 my_callback_flag_ptr = dmsg.my_callback_flag_ptr;
423 ~opencl_async_msg() {
425 enforce_cl_retcode( clReleaseEvent( my_event ), "Failed to release an event" );
428 cl_event const * get_event() const { return my_is_event ? &my_event : NULL; }
429 void set_event( cl_event e ) const {
431 cl_command_queue cq = event_info<cl_command_queue>( my_event, CL_EVENT_COMMAND_QUEUE );
432 if ( cq != event_info<cl_command_queue>( e, CL_EVENT_COMMAND_QUEUE ) )
433 enforce_cl_retcode( clFlush( cq ), "Failed to flush an OpenCL command queue" );
434 enforce_cl_retcode( clReleaseEvent( my_event ), "Failed to release an event" );
438 clRetainEvent( my_event );
441 void clear_event() const {
443 enforce_cl_retcode( clFlush( event_info<cl_command_queue>( my_event, CL_EVENT_COMMAND_QUEUE ) ), "Failed to flush an OpenCL command queue" );
444 enforce_cl_retcode( clReleaseEvent( my_event ), "Failed to release an event" );
449 template <typename Callback>
450 void register_callback( Callback c ) const {
451 __TBB_ASSERT( my_is_event, "The OpenCL event is not set" );
452 enforce_cl_retcode( clSetEventCallback( my_event, CL_COMPLETE, register_callback_func, new callback<Callback, T>( c, my_data ) ), "Failed to set an OpenCL callback" );
455 operator T&() { return data(); }
456 operator const T&() const { return data(); }
459 // Overridden in this derived class to inform that
460 // async calculation chain is over
461 void finalize() const __TBB_override {
462 receive_if_memory_object(*this);
463 if (! my_callback_flag_ptr->fetch_and_store(true)) {
464 opencl_async_msg a(*this);
466 register_callback([a](const T& t) mutable {
478 static void CL_CALLBACK register_callback_func( cl_event, cl_int event_command_exec_status, void *data ) {
479 tbb::internal::suppress_unused_warning( event_command_exec_status );
480 __TBB_ASSERT( event_command_exec_status == CL_COMPLETE, NULL );
481 __TBB_ASSERT( data, NULL );
482 callback_base *c = static_cast<callback_base*>(data);
488 mutable cl_event my_event;
489 mutable bool my_is_event = false;
491 std::shared_ptr< tbb::atomic<bool> > my_callback_flag_ptr;
494 template <typename K, typename T, typename Factory>
495 K key_from_message( const opencl_async_msg<T, Factory> &dmsg ) {
496 using tbb::flow::key_from_message;
497 const T &t = dmsg.data( false );
498 __TBB_STATIC_ASSERT( true, "" );
499 return key_from_message<K, T>( t );
502 template <typename Factory>
503 class opencl_memory {
506 opencl_memory( Factory &f ) : my_host_ptr( NULL ), my_factory( &f ), my_sending_event_present( false ) {
507 my_curr_device_id = my_factory->devices().begin()->my_device_id;
511 if ( my_sending_event_present ) enforce_cl_retcode( clReleaseEvent( my_sending_event ), "Failed to release an event for the OpenCL buffer" );
512 enforce_cl_retcode( clReleaseMemObject( my_cl_mem ), "Failed to release an memory object" );
515 cl_mem get_cl_mem() const {
519 void* get_host_ptr() {
520 if ( !my_host_ptr ) {
521 opencl_async_msg<void*, Factory> d = receive( NULL );
523 __TBB_ASSERT( d.data() == my_host_ptr, NULL );
528 Factory *factory() const { return my_factory; }
530 opencl_async_msg<void*, Factory> receive(const cl_event *e) {
531 opencl_async_msg<void*, Factory> d;
533 d = opencl_async_msg<void*, Factory>(my_host_ptr, *e);
535 d = opencl_async_msg<void*, Factory>(my_host_ptr);
538 // Concurrent receives are prohibited so we do not worry about synchronization.
539 if (my_curr_device_id.load<tbb::relaxed>() != opencl_device::host) {
540 map_memory(*my_factory->devices().begin(), d);
541 my_curr_device_id.store<tbb::relaxed>(opencl_device::host);
542 my_host_ptr = d.data(false);
544 // Release the sending event
545 if (my_sending_event_present) {
546 enforce_cl_retcode(clReleaseEvent(my_sending_event), "Failed to release an event");
547 my_sending_event_present = false;
552 opencl_async_msg<void*, Factory> send(opencl_device device, const cl_event *e) {
553 opencl_device::device_id_type device_id = device.my_device_id;
554 if (!my_factory->is_same_context(my_curr_device_id.load<tbb::acquire>(), device_id)) {
556 tbb::spin_mutex::scoped_lock lock(my_sending_lock);
557 if (!my_factory->is_same_context(my_curr_device_id.load<tbb::relaxed>(), device_id)) {
558 __TBB_ASSERT(my_host_ptr, "The buffer has not been mapped");
559 opencl_async_msg<void*, Factory> d(my_host_ptr);
560 my_factory->enqueue_unmap_buffer(device, *this, d);
561 my_sending_event = *d.get_event();
562 my_sending_event_present = true;
563 enforce_cl_retcode(clRetainEvent(my_sending_event), "Failed to retain an event");
565 my_curr_device_id.store<tbb::release>(device_id);
568 __TBB_ASSERT(my_sending_event_present, NULL);
571 // !e means that buffer has come from the host
572 if (!e && my_sending_event_present) e = &my_sending_event;
574 __TBB_ASSERT(!my_host_ptr, "The buffer has not been unmapped");
575 return e ? opencl_async_msg<void*, Factory>(NULL, *e) : opencl_async_msg<void*, Factory>(NULL);
578 virtual void map_memory( opencl_device, opencl_async_msg<void*, Factory> & ) = 0;
581 tbb::atomic<opencl_device::device_id_type> my_curr_device_id;
585 tbb::spin_mutex my_sending_lock;
586 bool my_sending_event_present;
587 cl_event my_sending_event;
590 template <typename Factory>
591 class opencl_buffer_impl : public opencl_memory<Factory> {
594 opencl_buffer_impl( size_t size, Factory& f ) : opencl_memory<Factory>( f ), my_size( size ) {
596 this->my_cl_mem = clCreateBuffer( this->my_factory->context(), CL_MEM_ALLOC_HOST_PTR, size, NULL, &err );
597 enforce_cl_retcode( err, "Failed to create an OpenCL buffer" );
600 // The constructor for subbuffers.
601 opencl_buffer_impl( cl_mem m, size_t index, size_t size, Factory& f ) : opencl_memory<Factory>( f ), my_size( size ) {
603 cl_buffer_region region = { index, size };
604 this->my_cl_mem = clCreateSubBuffer( m, 0, CL_BUFFER_CREATE_TYPE_REGION, ®ion, &err );
605 enforce_cl_retcode( err, "Failed to create an OpenCL subbuffer" );
608 size_t size() const {
612 void map_memory( opencl_device device, opencl_async_msg<void*, Factory> &dmsg ) __TBB_override {
613 this->my_factory->enqueue_map_buffer( device, *this, dmsg );
617 template <typename, typename>
618 friend class opencl_buffer;
628 template <typename T, typename Factory = opencl_info::default_opencl_factory>
629 class opencl_subbuffer;
631 template <typename T, typename Factory = opencl_info::default_opencl_factory>
632 class opencl_buffer {
634 typedef cl_mem native_object_type;
635 typedef opencl_buffer memory_object_type;
636 typedef Factory opencl_factory_type;
638 template<access_type a> using iterator = T*;
640 template <access_type a>
641 iterator<a> access() const {
642 T* ptr = (T*)my_impl->get_host_ptr();
643 __TBB_ASSERT( ptr, NULL );
644 return iterator<a>( ptr );
647 T* data() const { return &access<read_write>()[0]; }
649 template <access_type a = read_write>
650 iterator<a> begin() const { return access<a>(); }
652 template <access_type a = read_write>
653 iterator<a> end() const { return access<a>()+my_impl->size()/sizeof(T); }
655 size_t size() const { return my_impl->size()/sizeof(T); }
657 T& operator[] ( ptrdiff_t k ) { return begin()[k]; }
660 opencl_buffer( size_t size );
661 opencl_buffer( Factory &f, size_t size ) : my_impl( std::make_shared<impl_type>( size*sizeof(T), f ) ) {}
663 cl_mem native_object() const {
664 return my_impl->get_cl_mem();
667 const opencl_buffer& memory_object() const {
671 void send( opencl_device device, opencl_async_msg<opencl_buffer, Factory> &dependency ) const {
672 __TBB_ASSERT( dependency.data( /*wait = */false ) == *this, NULL );
673 opencl_async_msg<void*, Factory> d = my_impl->send( device, dependency.get_event() );
674 const cl_event *e = d.get_event();
675 if ( e ) dependency.set_event( *e );
676 else dependency.clear_event();
678 void receive( const opencl_async_msg<opencl_buffer, Factory> &dependency ) const {
679 __TBB_ASSERT( dependency.data( /*wait = */false ) == *this, NULL );
680 opencl_async_msg<void*, Factory> d = my_impl->receive( dependency.get_event() );
681 const cl_event *e = d.get_event();
682 if ( e ) dependency.set_event( *e );
683 else dependency.clear_event();
686 opencl_subbuffer<T, Factory> subbuffer( size_t index, size_t size ) const;
688 // The constructor for subbuffers.
689 opencl_buffer( Factory &f, cl_mem m, size_t index, size_t size ) : my_impl( std::make_shared<impl_type>( m, index*sizeof(T), size*sizeof(T), f ) ) {}
691 typedef opencl_buffer_impl<Factory> impl_type;
693 std::shared_ptr<impl_type> my_impl;
695 friend bool operator==(const opencl_buffer<T, Factory> &lhs, const opencl_buffer<T, Factory> &rhs) {
696 return lhs.my_impl == rhs.my_impl;
700 friend class opencl_factory;
701 template <typename, typename>
702 friend class opencl_subbuffer;
705 template <typename T, typename Factory>
706 class opencl_subbuffer : public opencl_buffer<T, Factory> {
707 opencl_buffer<T, Factory> my_owner;
709 opencl_subbuffer() {}
710 opencl_subbuffer( const opencl_buffer<T, Factory> &owner, size_t index, size_t size ) :
711 opencl_buffer<T, Factory>( *owner.my_impl->factory(), owner.native_object(), index, size ), my_owner( owner ) {}
714 template <typename T, typename Factory>
715 opencl_subbuffer<T, Factory> opencl_buffer<T, Factory>::subbuffer( size_t index, size_t size ) const {
716 return opencl_subbuffer<T, Factory>( *this, index, size );
720 #define is_typedef(type) \
721 template <typename T> \
723 template <typename C> \
724 static std::true_type check( typename C::type* ); \
725 template <typename C> \
726 static std::false_type check( ... ); \
728 static const bool value = decltype(check<T>(0))::value; \
731 is_typedef( native_object_type );
732 is_typedef( memory_object_type );
734 template <typename T>
735 typename std::enable_if<is_native_object_type<T>::value, typename T::native_object_type>::type get_native_object( const T &t ) {
736 return t.native_object();
739 template <typename T>
740 typename std::enable_if<!is_native_object_type<T>::value, T>::type get_native_object( T t ) {
744 // send_if_memory_object checks if the T type has memory_object_type and call the send method for the object.
745 template <typename T, typename Factory>
746 typename std::enable_if<is_memory_object_type<T>::value>::type send_if_memory_object( opencl_device device, opencl_async_msg<T, Factory> &dmsg ) {
747 const T &t = dmsg.data( false );
748 typedef typename T::memory_object_type mem_obj_t;
749 mem_obj_t mem_obj = t.memory_object();
750 opencl_async_msg<mem_obj_t, Factory> d( mem_obj );
751 if ( dmsg.get_event() ) d.set_event( *dmsg.get_event() );
752 mem_obj.send( device, d );
753 if ( d.get_event() ) dmsg.set_event( *d.get_event() );
756 template <typename T>
757 typename std::enable_if<is_memory_object_type<T>::value>::type send_if_memory_object( opencl_device device, T &t ) {
758 typedef typename T::memory_object_type mem_obj_t;
759 mem_obj_t mem_obj = t.memory_object();
760 opencl_async_msg<mem_obj_t, typename mem_obj_t::opencl_factory_type> dmsg( mem_obj );
761 mem_obj.send( device, dmsg );
764 template <typename T>
765 typename std::enable_if<!is_memory_object_type<T>::value>::type send_if_memory_object( opencl_device, T& ) {};
767 // receive_if_memory_object checks if the T type has memory_object_type and call the receive method for the object.
768 template <typename T, typename Factory>
769 typename std::enable_if<is_memory_object_type<T>::value>::type receive_if_memory_object( const opencl_async_msg<T, Factory> &dmsg ) {
770 const T &t = dmsg.data( false );
771 typedef typename T::memory_object_type mem_obj_t;
772 mem_obj_t mem_obj = t.memory_object();
773 opencl_async_msg<mem_obj_t, Factory> d( mem_obj );
774 if ( dmsg.get_event() ) d.set_event( *dmsg.get_event() );
775 mem_obj.receive( d );
776 if ( d.get_event() ) dmsg.set_event( *d.get_event() );
779 template <typename T>
780 typename std::enable_if<!is_memory_object_type<T>::value>::type receive_if_memory_object( const T& ) {}
784 typedef size_t range_index_type;
785 typedef std::array<range_index_type, 3> nd_range_type;
787 template <typename G = std::initializer_list<int>, typename L = std::initializer_list<int>,
788 typename = typename std::enable_if<!std::is_same<typename std::decay<G>::type, opencl_range>::value>::type>
789 opencl_range(G&& global_work = std::initializer_list<int>({ 0 }), L&& local_work = std::initializer_list<int>({ 0, 0, 0 })) {
790 auto g_it = global_work.begin();
791 auto l_it = local_work.begin();
792 my_global_work_size = { size_t(-1), size_t(-1), size_t(-1) };
793 // my_local_work_size is still uninitialized
794 for (int s = 0; s < 3 && g_it != global_work.end(); ++g_it, ++l_it, ++s) {
795 __TBB_ASSERT(l_it != local_work.end(), "global_work & local_work must have same size");
796 my_global_work_size[s] = *g_it;
797 my_local_work_size[s] = *l_it;
801 const nd_range_type& global_range() const { return my_global_work_size; }
802 const nd_range_type& local_range() const { return my_local_work_size; }
805 nd_range_type my_global_work_size;
806 nd_range_type my_local_work_size;
809 template <typename DeviceFilter>
810 class opencl_factory {
812 template<typename T> using async_msg_type = opencl_async_msg<T, opencl_factory<DeviceFilter>>;
813 typedef opencl_device device_type;
815 class kernel : tbb::internal::no_assign {
817 kernel( const kernel& k ) : my_factory( k.my_factory ) {
818 // Clone my_cl_kernel via opencl_program
821 std::vector<char> kernel_name;
822 for ( size_t curr_size = 32;; curr_size <<= 1 ) {
823 kernel_name.resize( curr_size <<= 1 );
824 enforce_cl_retcode( clGetKernelInfo( k.my_cl_kernel, CL_KERNEL_FUNCTION_NAME, curr_size, kernel_name.data(), &ret_size ), "Failed to get kernel info" );
825 if ( ret_size < curr_size ) break;
829 enforce_cl_retcode( clGetKernelInfo( k.my_cl_kernel, CL_KERNEL_PROGRAM, sizeof(program), &program, &ret_size ), "Failed to get kernel info" );
830 __TBB_ASSERT( ret_size == sizeof(program), NULL );
832 my_cl_kernel = opencl_program< factory_type >( my_factory, program ).get_cl_kernel( kernel_name.data() );
836 enforce_cl_retcode( clReleaseKernel( my_cl_kernel ), "Failed to release a kernel" );
840 typedef opencl_factory<DeviceFilter> factory_type;
842 kernel( const cl_kernel& k, factory_type& f ) : my_cl_kernel( k ), my_factory( f ) {}
845 cl_kernel my_cl_kernel;
846 factory_type& my_factory;
848 template <typename DeviceFilter_>
849 friend class opencl_factory;
851 template <typename Factory>
852 friend class opencl_program;
855 typedef kernel kernel_type;
857 // 'range_type' enables kernel_executor with range support
858 // it affects expectations for enqueue_kernel(.....) interface method
859 typedef opencl_range range_type;
863 if ( my_devices.size() ) {
864 for ( auto d = my_devices.begin(); d != my_devices.end(); ++d ) {
865 enforce_cl_retcode( clReleaseCommandQueue( (*d).my_cl_command_queue ), "Failed to release a command queue" );
867 enforce_cl_retcode( clReleaseContext( my_cl_context ), "Failed to release a context" );
871 bool init( const opencl_device_list &device_list ) {
872 tbb::spin_mutex::scoped_lock lock( my_devices_mutex );
873 if ( !my_devices.size() ) {
874 my_devices = device_list;
882 template <typename Factory>
883 void enqueue_map_buffer( opencl_device device, opencl_buffer_impl<Factory> &buffer, opencl_async_msg<void*, Factory>& dmsg ) {
884 cl_event const* e1 = dmsg.get_event();
887 void *ptr = clEnqueueMapBuffer( device.my_cl_command_queue, buffer.get_cl_mem(), false, CL_MAP_READ | CL_MAP_WRITE, 0, buffer.size(),
888 e1 == NULL ? 0 : 1, e1, &e2, &err );
889 enforce_cl_retcode( err, "Failed to map a buffer" );
890 dmsg.data( false ) = ptr;
891 dmsg.set_event( e2 );
892 enforce_cl_retcode( clReleaseEvent( e2 ), "Failed to release an event" );
896 template <typename Factory>
897 void enqueue_unmap_buffer( opencl_device device, opencl_memory<Factory> &memory, opencl_async_msg<void*, Factory>& dmsg ) {
898 cl_event const* e1 = dmsg.get_event();
901 clEnqueueUnmapMemObject( device.my_cl_command_queue, memory.get_cl_mem(), memory.get_host_ptr(), e1 == NULL ? 0 : 1, e1, &e2 ),
902 "Failed to unmap a buffer" );
903 dmsg.set_event( e2 );
904 enforce_cl_retcode( clReleaseEvent( e2 ), "Failed to release an event" );
907 // --------- Kernel argument & event list helpers --------- //
908 template <size_t NUM_ARGS, typename T>
909 void process_one_arg( const kernel_type& kernel, std::array<cl_event, NUM_ARGS>&, int&, int& place, const T& t ) {
910 auto p = get_native_object(t);
911 enforce_cl_retcode( clSetKernelArg(kernel.my_cl_kernel, place++, sizeof(p), &p), "Failed to set a kernel argument" );
914 template <size_t NUM_ARGS, typename T, typename F>
915 void process_one_arg( const kernel_type& kernel, std::array<cl_event, NUM_ARGS>& events, int& num_events, int& place, const opencl_async_msg<T, F>& msg ) {
916 __TBB_ASSERT((static_cast<typename std::array<cl_event, NUM_ARGS>::size_type>(num_events) < events.size()), NULL);
918 const cl_event * const e = msg.get_event();
920 events[num_events++] = *e;
923 process_one_arg( kernel, events, num_events, place, msg.data(false) );
926 template <size_t NUM_ARGS, typename T, typename ...Rest>
927 void process_arg_list( const kernel_type& kernel, std::array<cl_event, NUM_ARGS>& events, int& num_events, int& place, const T& t, const Rest&... args ) {
928 process_one_arg( kernel, events, num_events, place, t );
929 process_arg_list( kernel, events, num_events, place, args... );
932 template <size_t NUM_ARGS>
933 void process_arg_list( const kernel_type&, std::array<cl_event, NUM_ARGS>&, int&, int& ) {}
934 // ------------------------------------------- //
935 template <typename T>
936 void update_one_arg( cl_event, T& ) {}
938 template <typename T, typename F>
939 void update_one_arg( cl_event e, opencl_async_msg<T, F>& msg ) {
943 template <typename T, typename ...Rest>
944 void update_arg_list( cl_event e, T& t, Rest&... args ) {
945 update_one_arg( e, t );
946 update_arg_list( e, args... );
949 void update_arg_list( cl_event ) {}
950 // ------------------------------------------- //
952 template <typename ...Args>
953 void send_kernel( opencl_device device, const kernel_type& kernel, const range_type& work_size, Args&... args ) {
954 std::array<cl_event, sizeof...(Args)> events;
957 process_arg_list( kernel, events, num_events, place, args... );
959 const cl_event e = send_kernel_impl( device, kernel.my_cl_kernel, work_size, num_events, events.data() );
961 update_arg_list(e, args...);
963 // Release our own reference to cl_event
964 enforce_cl_retcode( clReleaseEvent(e), "Failed to release an event" );
967 // ------------------------------------------- //
968 template <typename T, typename ...Rest>
969 void send_data(opencl_device device, T& t, Rest&... args) {
970 send_if_memory_object( device, t );
971 send_data( device, args... );
974 void send_data(opencl_device) {}
975 // ------------------------------------------- //
978 cl_event send_kernel_impl( opencl_device device, const cl_kernel& kernel,
979 const range_type& work_size, cl_uint num_events, cl_event* event_list ) {
980 const typename range_type::nd_range_type g_offset = { { 0, 0, 0 } };
981 const typename range_type::nd_range_type& g_size = work_size.global_range();
982 const typename range_type::nd_range_type& l_size = work_size.local_range();
984 for ( s = 1; s < 3 && g_size[s] != size_t(-1); ++s) {}
987 clEnqueueNDRangeKernel( device.my_cl_command_queue, kernel, s,
988 g_offset.data(), g_size.data(), l_size[0] ? l_size.data() : NULL, num_events, num_events ? event_list : NULL, &event ),
989 "Failed to enqueue a kernel" );
993 // ------------------------------------------- //
994 template <typename T>
995 bool get_event_from_one_arg( cl_event&, const T& ) {
999 template <typename T, typename F>
1000 bool get_event_from_one_arg( cl_event& e, const opencl_async_msg<T, F>& msg) {
1001 cl_event const *e_ptr = msg.get_event();
1003 if ( e_ptr != NULL ) {
1011 template <typename T, typename ...Rest>
1012 bool get_event_from_args( cl_event& e, const T& t, const Rest&... args ) {
1013 if ( get_event_from_one_arg( e, t ) ) {
1017 return get_event_from_args( e, args... );
1020 bool get_event_from_args( cl_event& ) {
1023 // ------------------------------------------- //
1025 struct finalize_fn : tbb::internal::no_assign {
1026 virtual ~finalize_fn() {}
1027 virtual void operator() () {}
1030 template<typename Fn>
1031 struct finalize_fn_leaf : public finalize_fn {
1033 finalize_fn_leaf(Fn fn) : my_fn(fn) {}
1034 void operator() () __TBB_override { my_fn(); }
1037 static void CL_CALLBACK finalize_callback(cl_event, cl_int event_command_exec_status, void *data) {
1038 tbb::internal::suppress_unused_warning(event_command_exec_status);
1039 __TBB_ASSERT(event_command_exec_status == CL_COMPLETE, NULL);
1041 finalize_fn * const fn_ptr = static_cast<finalize_fn*>(data);
1042 __TBB_ASSERT(fn_ptr != NULL, "Invalid finalize function pointer");
1045 // Function pointer was created by 'new' & this callback must be called once only
1049 template <typename FinalizeFn, typename ...Args>
1050 void finalize( opencl_device device, FinalizeFn fn, Args&... args ) {
1053 if ( get_event_from_args( e, args... ) ) {
1054 enforce_cl_retcode( clSetEventCallback( e, CL_COMPLETE, finalize_callback,
1055 new finalize_fn_leaf<FinalizeFn>(fn) ), "Failed to set a callback" );
1058 enforce_cl_retcode( clFlush( device.my_cl_command_queue ), "Failed to flush an OpenCL command queue" );
1061 const opencl_device_list& devices() {
1062 std::call_once( my_once_flag, &opencl_factory::init_once, this );
1067 bool is_same_context( opencl_device::device_id_type d1, opencl_device::device_id_type d2 ) {
1068 __TBB_ASSERT( d1 != opencl_device::unknown && d2 != opencl_device::unknown, NULL );
1069 // Currently, factory supports only one context so if the both devices are not host it means the are in the same context.
1070 if ( d1 != opencl_device::host && d2 != opencl_device::host )
1075 opencl_factory( const opencl_factory& );
1076 opencl_factory& operator=(const opencl_factory&);
1078 cl_context context() {
1079 std::call_once( my_once_flag, &opencl_factory::init_once, this );
1080 return my_cl_context;
1085 tbb::spin_mutex::scoped_lock lock(my_devices_mutex);
1086 if (!my_devices.size())
1087 my_devices = DeviceFilter()( opencl_info::available_devices() );
1090 enforce_cl_retcode(my_devices.size() ? CL_SUCCESS : CL_INVALID_DEVICE, "No devices in the device list");
1091 cl_platform_id platform_id = my_devices.begin()->platform_id();
1092 for (opencl_device_list::iterator it = ++my_devices.begin(); it != my_devices.end(); ++it)
1093 enforce_cl_retcode(it->platform_id() == platform_id ? CL_SUCCESS : CL_INVALID_PLATFORM, "All devices should be in the same platform");
1095 std::vector<cl_device_id> cl_device_ids;
1096 for (auto d = my_devices.begin(); d != my_devices.end(); ++d) {
1097 cl_device_ids.push_back((*d).my_cl_device_id);
1100 cl_context_properties context_properties[3] = { CL_CONTEXT_PLATFORM, (cl_context_properties)platform_id, (cl_context_properties)NULL };
1102 cl_context ctx = clCreateContext(context_properties,
1103 (cl_uint)cl_device_ids.size(),
1104 cl_device_ids.data(),
1106 enforce_cl_retcode(err, "Failed to create context");
1107 my_cl_context = ctx;
1109 size_t device_counter = 0;
1110 for (auto d = my_devices.begin(); d != my_devices.end(); d++) {
1111 (*d).my_device_id = device_counter++;
1113 cl_command_queue cq;
1115 if ((*d).major_version() >= 2) {
1116 if ((*d).out_of_order_exec_mode_on_host_present()) {
1117 cl_queue_properties props[] = { CL_QUEUE_PROPERTIES, CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE, 0 };
1118 cq = clCreateCommandQueueWithProperties(ctx, (*d).my_cl_device_id, props, &err2);
1120 cl_queue_properties props[] = { 0 };
1121 cq = clCreateCommandQueueWithProperties(ctx, (*d).my_cl_device_id, props, &err2);
1126 cl_command_queue_properties props = (*d).out_of_order_exec_mode_on_host_present() ? CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE : 0;
1127 // Suppress "declared deprecated" warning for the next line.
1128 #if __TBB_GCC_WARNING_SUPPRESSION_PRESENT
1129 #pragma GCC diagnostic push
1130 #pragma GCC diagnostic ignored "-Wdeprecated-declarations"
1132 #if _MSC_VER || __INTEL_COMPILER
1133 #pragma warning( push )
1134 #if __INTEL_COMPILER
1135 #pragma warning (disable: 1478)
1137 #pragma warning (disable: 4996)
1140 cq = clCreateCommandQueue(ctx, (*d).my_cl_device_id, props, &err2);
1141 #if _MSC_VER || __INTEL_COMPILER
1142 #pragma warning( pop )
1144 #if __TBB_GCC_WARNING_SUPPRESSION_PRESENT
1145 #pragma GCC diagnostic pop
1148 enforce_cl_retcode(err2, "Failed to create command queue");
1149 (*d).my_cl_command_queue = cq;
1153 std::once_flag my_once_flag;
1154 opencl_device_list my_devices;
1155 cl_context my_cl_context;
1157 tbb::spin_mutex my_devices_mutex;
1159 template <typename Factory>
1160 friend class opencl_program;
1161 template <typename Factory>
1162 friend class opencl_buffer_impl;
1163 template <typename Factory>
1164 friend class opencl_memory;
1165 }; // class opencl_factory
1167 // TODO: consider this namespace as public API
1168 namespace opencl_info {
1172 template <typename Factory>
1173 struct default_device_selector {
1174 opencl_device operator()(Factory& f) {
1175 __TBB_ASSERT(!f.devices().empty(), "No available devices");
1176 return *(f.devices().begin());
1180 struct default_device_filter {
1181 opencl_device_list operator()(const opencl_device_list &devices) {
1182 opencl_device_list dl;
1183 cl_platform_id platform_id = devices.begin()->platform_id();
1184 for (opencl_device_list::const_iterator it = devices.cbegin(); it != devices.cend(); ++it) {
1185 if (it->platform_id() == platform_id) {
1193 class default_opencl_factory : public opencl_factory < default_device_filter >, tbb::internal::no_copy {
1195 template<typename T> using async_msg_type = opencl_async_msg<T, default_opencl_factory>;
1197 friend default_opencl_factory& default_factory();
1200 default_opencl_factory() = default;
1203 inline default_opencl_factory& default_factory() {
1204 static default_opencl_factory default_factory;
1205 return default_factory;
1208 } // namespace opencl_info
1210 template <typename T, typename Factory>
1211 opencl_buffer<T, Factory>::opencl_buffer( size_t size ) : my_impl( std::make_shared<impl_type>( size*sizeof(T), opencl_info::default_factory() ) ) {}
1214 enum class opencl_program_type {
1220 template <typename Factory = opencl_info::default_opencl_factory>
1221 class opencl_program : tbb::internal::no_assign {
1223 typedef typename Factory::kernel_type kernel_type;
1225 opencl_program( Factory& factory, opencl_program_type type, const std::string& program_name ) : my_factory( factory ), my_type(type) , my_arg_str( program_name) {}
1226 opencl_program( Factory& factory, const char* program_name ) : opencl_program( factory, std::string( program_name ) ) {}
1227 opencl_program( Factory& factory, const std::string& program_name ) : opencl_program( factory, opencl_program_type::SOURCE, program_name ) {}
1229 opencl_program( opencl_program_type type, const std::string& program_name ) : opencl_program( opencl_info::default_factory(), type, program_name ) {}
1230 opencl_program( const char* program_name ) : opencl_program( opencl_info::default_factory(), program_name ) {}
1231 opencl_program( const std::string& program_name ) : opencl_program( opencl_info::default_factory(), program_name ) {}
1232 opencl_program( opencl_program_type type ) : opencl_program( opencl_info::default_factory(), type ) {}
1234 opencl_program( const opencl_program &src ) : my_factory( src.my_factory ), my_type( src.type ), my_arg_str( src.my_arg_str ), my_cl_program( src.my_cl_program ) {
1235 // Set my_do_once_flag to the called state.
1236 std::call_once( my_do_once_flag, [](){} );
1239 kernel_type get_kernel( const std::string& k ) const {
1240 return kernel_type( get_cl_kernel(k), my_factory );
1244 opencl_program( Factory& factory, cl_program program ) : my_factory( factory ), my_cl_program( program ) {
1245 // Set my_do_once_flag to the called state.
1246 std::call_once( my_do_once_flag, [](){} );
1249 cl_kernel get_cl_kernel( const std::string& k ) const {
1250 std::call_once( my_do_once_flag, [this, &k](){ this->init( k ); } );
1252 cl_kernel kernel = clCreateKernel( my_cl_program, k.c_str(), &err );
1253 enforce_cl_retcode( err, std::string( "Failed to create kernel: " ) + k );
1259 file_reader( const std::string& filepath ) {
1260 std::ifstream file_descriptor( filepath, std::ifstream::binary );
1261 if ( !file_descriptor.is_open() ) {
1262 std::string str = std::string( "Could not open file: " ) + filepath;
1263 std::cerr << str << std::endl;
1266 file_descriptor.seekg( 0, file_descriptor.end );
1267 size_t length = size_t( file_descriptor.tellg() );
1268 file_descriptor.seekg( 0, file_descriptor.beg );
1269 my_content.resize( length );
1270 char* begin = &*my_content.begin();
1271 file_descriptor.read( begin, length );
1272 file_descriptor.close();
1274 const char* content() { return &*my_content.cbegin(); }
1275 size_t length() { return my_content.length(); }
1277 std::string my_content;
1280 class opencl_program_builder {
1282 typedef void (CL_CALLBACK *cl_callback_type)(cl_program, void*);
1283 opencl_program_builder( Factory& f, const std::string& name, cl_program program,
1284 cl_uint num_devices, cl_device_id* device_list,
1285 const char* options, cl_callback_type callback,
1287 cl_int err = clBuildProgram( program, num_devices, device_list, options,
1288 callback, user_data );
1289 if( err == CL_SUCCESS )
1291 std::string str = std::string( "Failed to build program: " ) + name;
1292 if ( err == CL_BUILD_PROGRAM_FAILURE ) {
1293 const opencl_device_list &devices = f.devices();
1294 for ( auto d = devices.begin(); d != devices.end(); ++d ) {
1295 std::cerr << "Build log for device: " << (*d).name() << std::endl;
1297 cl_int query_err = clGetProgramBuildInfo(
1298 program, (*d).my_cl_device_id, CL_PROGRAM_BUILD_LOG, 0, NULL,
1300 enforce_cl_retcode( query_err, "Failed to get build log size" );
1302 std::vector<char> output;
1303 output.resize( log_size );
1304 query_err = clGetProgramBuildInfo(
1305 program, (*d).my_cl_device_id, CL_PROGRAM_BUILD_LOG,
1306 output.size(), output.data(), NULL );
1307 enforce_cl_retcode( query_err, "Failed to get build output" );
1308 std::cerr << output.data() << std::endl;
1310 std::cerr << "No build log available" << std::endl;
1314 enforce_cl_retcode( err, str );
1318 class opencl_device_filter {
1320 template<typename Filter>
1321 opencl_device_filter( cl_uint& num_devices, cl_device_id* device_list,
1322 Filter filter, const char* message ) {
1323 for ( cl_uint i = 0; i < num_devices; ++i )
1324 if ( filter(device_list[i]) ) {
1325 device_list[i--] = device_list[--num_devices];
1328 enforce_cl_retcode( CL_DEVICE_NOT_AVAILABLE, message );
1332 void init( const std::string& ) const {
1333 cl_uint num_devices;
1334 enforce_cl_retcode( clGetContextInfo( my_factory.context(), CL_CONTEXT_NUM_DEVICES, sizeof( num_devices ), &num_devices, NULL ),
1335 "Failed to get OpenCL context info" );
1337 enforce_cl_retcode( CL_DEVICE_NOT_FOUND, "No supported devices found" );
1338 cl_device_id *device_list = (cl_device_id *)alloca( num_devices*sizeof( cl_device_id ) );
1339 enforce_cl_retcode( clGetContextInfo( my_factory.context(), CL_CONTEXT_DEVICES, num_devices*sizeof( cl_device_id ), device_list, NULL ),
1340 "Failed to get OpenCL context info" );
1341 const char *options = NULL;
1342 switch ( my_type ) {
1343 case opencl_program_type::SOURCE: {
1344 file_reader fr( my_arg_str );
1345 const char *s[] = { fr.content() };
1346 const size_t l[] = { fr.length() };
1348 my_cl_program = clCreateProgramWithSource( my_factory.context(), 1, s, l, &err );
1349 enforce_cl_retcode( err, std::string( "Failed to create program: " ) + my_arg_str );
1350 opencl_device_filter(
1351 num_devices, device_list,
1352 []( const opencl_device& d ) -> bool {
1353 return !d.compiler_available() || !d.linker_available();
1354 }, "No one device supports building program from sources" );
1355 opencl_program_builder(
1356 my_factory, my_arg_str, my_cl_program, num_devices, device_list,
1357 options, /*callback*/ NULL, /*user data*/NULL );
1360 case opencl_program_type::SPIR:
1361 options = "-x spir";
1362 case opencl_program_type::PRECOMPILED: {
1363 file_reader fr( my_arg_str );
1364 std::vector<const unsigned char*> s(
1365 num_devices, reinterpret_cast<const unsigned char*>(fr.content()) );
1366 std::vector<size_t> l( num_devices, fr.length() );
1367 std::vector<cl_int> bin_statuses( num_devices, -1 );
1369 my_cl_program = clCreateProgramWithBinary( my_factory.context(), num_devices,
1370 device_list, l.data(), s.data(),
1371 bin_statuses.data(), &err );
1372 if( err != CL_SUCCESS ) {
1373 std::string statuses_str;
1374 for (auto st = bin_statuses.begin(); st != bin_statuses.end(); ++st) {
1375 statuses_str += std::to_string((*st));
1378 enforce_cl_retcode( err, std::string( "Failed to create program, error " + std::to_string( err ) + " : " ) + my_arg_str +
1379 std::string( ", binary_statuses = " ) + statuses_str );
1381 opencl_program_builder(
1382 my_factory, my_arg_str, my_cl_program, num_devices, device_list,
1383 options, /*callback*/ NULL, /*user data*/NULL );
1387 __TBB_ASSERT( false, "Unsupported program type" );
1391 Factory& my_factory;
1392 opencl_program_type my_type;
1393 std::string my_arg_str;
1394 mutable cl_program my_cl_program;
1395 mutable std::once_flag my_do_once_flag;
1397 template <typename DeviceFilter>
1398 friend class opencl_factory;
1400 template <typename DeviceFilter>
1401 friend class opencl_factory<DeviceFilter>::kernel;
1404 template<typename... Args>
1407 template<typename JP, typename Factory, typename... Ports>
1408 class opencl_node< tuple<Ports...>, JP, Factory > : public streaming_node< tuple<Ports...>, JP, Factory > {
1409 typedef streaming_node < tuple<Ports...>, JP, Factory > base_type;
1411 typedef typename base_type::kernel_type kernel_type;
1413 opencl_node( graph &g, const kernel_type& kernel )
1414 : base_type( g, kernel, opencl_info::default_device_selector< opencl_info::default_opencl_factory >(), opencl_info::default_factory() )
1416 tbb::internal::fgt_multiinput_multioutput_node( CODEPTR(), tbb::internal::FLOW_OPENCL_NODE, this, &this->my_graph );
1419 opencl_node( graph &g, const kernel_type& kernel, Factory &f )
1420 : base_type( g, kernel, opencl_info::default_device_selector <Factory >(), f )
1422 tbb::internal::fgt_multiinput_multioutput_node( CODEPTR(), tbb::internal::FLOW_OPENCL_NODE, this, &this->my_graph );
1425 template <typename DeviceSelector>
1426 opencl_node( graph &g, const kernel_type& kernel, DeviceSelector d, Factory &f)
1427 : base_type( g, kernel, d, f)
1429 tbb::internal::fgt_multiinput_multioutput_node( CODEPTR(), tbb::internal::FLOW_OPENCL_NODE, this, &this->my_graph );
1433 template<typename JP, typename... Ports>
1434 class opencl_node< tuple<Ports...>, JP > : public opencl_node < tuple<Ports...>, JP, opencl_info::default_opencl_factory > {
1435 typedef opencl_node < tuple<Ports...>, JP, opencl_info::default_opencl_factory > base_type;
1437 typedef typename base_type::kernel_type kernel_type;
1439 opencl_node( graph &g, const kernel_type& kernel )
1440 : base_type( g, kernel, opencl_info::default_device_selector< opencl_info::default_opencl_factory >(), opencl_info::default_factory() )
1443 template <typename DeviceSelector>
1444 opencl_node( graph &g, const kernel_type& kernel, DeviceSelector d )
1445 : base_type( g, kernel, d, opencl_info::default_factory() )
1449 template<typename... Ports>
1450 class opencl_node< tuple<Ports...> > : public opencl_node < tuple<Ports...>, queueing, opencl_info::default_opencl_factory > {
1451 typedef opencl_node < tuple<Ports...>, queueing, opencl_info::default_opencl_factory > base_type;
1453 typedef typename base_type::kernel_type kernel_type;
1455 opencl_node( graph &g, const kernel_type& kernel )
1456 : base_type( g, kernel, opencl_info::default_device_selector< opencl_info::default_opencl_factory >(), opencl_info::default_factory() )
1459 template <typename DeviceSelector>
1460 opencl_node( graph &g, const kernel_type& kernel, DeviceSelector d )
1461 : base_type( g, kernel, d, opencl_info::default_factory() )
1465 } // namespace interfaceX
1467 using interface11::opencl_node;
1468 using interface11::read_only;
1469 using interface11::read_write;
1470 using interface11::write_only;
1471 using interface11::opencl_buffer;
1472 using interface11::opencl_subbuffer;
1473 using interface11::opencl_device;
1474 using interface11::opencl_device_list;
1475 using interface11::opencl_program;
1476 using interface11::opencl_program_type;
1477 using interface11::opencl_async_msg;
1478 using interface11::opencl_factory;
1479 using interface11::opencl_range;
1483 #endif /* __TBB_PREVIEW_OPENCL_NODE */
1485 #include "internal/_warning_suppress_disable_notice.h"
1486 #undef __TBB_flow_graph_opencl_node_H_include_area
1488 #endif // __TBB_flow_graph_opencl_node_H