2 // Copyright (c) 2016 Intel Corporation
4 // Licensed under the Apache License, Version 2.0 (the "License");
5 // you may not use this file except in compliance with the License.
6 // You may obtain a copy of the License at
8 // http://www.apache.org/licenses/LICENSE-2.0
10 // Unless required by applicable law or agreed to in writing, software
11 // distributed under the License is distributed on an "AS IS" BASIS,
12 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 // See the License for the specific language governing permissions and
14 // limitations under the License.
17 ///////////////////////////////////////////////////////////////////////////////////////////////////
18 #include "memory_gpu.h"
19 #include "engine_impl.h"
20 #include "ocl_base_event.h"
22 namespace cldnn { namespace gpu {
24 gpu_buffer::gpu_buffer(const refcounted_obj_ptr<engine_impl>& engine, const layout& layout)
25 : memory_impl(engine, layout, false)
26 , _context(engine->get_context())
28 , _buffer(_context->context(), CL_MEM_READ_WRITE, size())
29 , _mapped_ptr(nullptr)
31 void* ptr = gpu_buffer::lock();
32 memset(ptr, 0, size());
36 gpu_buffer::gpu_buffer(const refcounted_obj_ptr<engine_impl>& engine, const layout& new_layout, const cl::Buffer& buffer)
37 : memory_impl(engine, new_layout, true)
38 , _context(engine->get_context())
41 , _mapped_ptr(nullptr)
46 void* gpu_buffer::lock() {
47 std::lock_guard<std::mutex> locker(_mutex);
48 if (0 == _lock_count) {
49 _mapped_ptr = _context->queue().enqueueMapBuffer(_buffer, CL_TRUE, CL_MAP_WRITE, 0, size());
55 void gpu_buffer::unlock() {
56 std::lock_guard<std::mutex> locker(_mutex);
58 if (0 == _lock_count) {
59 _context->queue().enqueueUnmapMemObject(_buffer, _mapped_ptr);
60 _mapped_ptr = nullptr;
64 void gpu_buffer::fill(unsigned char pattern, event_impl::ptr ev) {
65 cl::Event ev_ocl = dynamic_cast<base_event*>(ev.get())->get();
66 _context->queue().enqueueFillBuffer<unsigned char>(_buffer, pattern, 0, size(), 0, &ev_ocl);
69 gpu_image2d::gpu_image2d(const refcounted_obj_ptr<engine_impl>& engine, const layout& layout)
70 : memory_impl(engine, layout, false)
71 , _context(engine->get_context())
73 , _mapped_ptr(nullptr)
75 cl_channel_order order;
76 switch (layout.format)
78 case format::image_2d_weights_c1_b_fyx:
79 _width = layout.size.batch[0];
80 _height = layout.size.spatial[0] * layout.size.feature[0] * layout.size.spatial[1];
83 case format::image_2d_weights_winograd_6x3_s1_fbxyb:
84 _height = layout.size.feature[0];
85 _width = layout.size.spatial[0] * layout.size.batch[0] * layout.size.spatial[1] * 8 / 3;
88 case format::image_2d_weights_winograd_6x3_s1_xfbyb:
89 _height = layout.size.feature[0] * layout.size.spatial[0] * 8 / 3;
90 _width = layout.size.batch[0] * layout.size.spatial[1] ;
93 case format::image_2d_weights_c4_fyx_b:
94 _width = layout.size.batch[0];
95 _height = layout.size.spatial[0] * layout.size.feature[0] * layout.size.spatial[1];
99 throw error("unsupported image type!");
102 cl_channel_type type = layout.data_type == data_types::f16 ? CL_HALF_FLOAT : CL_FLOAT;
103 cl::ImageFormat imageFormat(order, type);
104 _buffer = cl::Image2D(_context->context(), CL_MEM_READ_WRITE, imageFormat, _width, _height, 0);
106 void* ptr = gpu_image2d::lock();
107 for(uint64_t y = 0; y < static_cast<uint64_t>(_height); y++)
108 memset(ptr, 0, static_cast<size_t>(y*_row_pitch));
109 gpu_image2d::unlock();
112 gpu_image2d::gpu_image2d(const refcounted_obj_ptr<engine_impl>& engine, const layout& new_layout, const cl::Image2D& buffer)
113 : memory_impl(engine, new_layout, true)
114 , _context(engine->get_context())
117 , _mapped_ptr(nullptr)
122 void* gpu_image2d::lock() {
123 std::lock_guard<std::mutex> locker(_mutex);
124 if (0 == _lock_count) {
125 _mapped_ptr = _context->queue().enqueueMapImage(_buffer, CL_TRUE, CL_MAP_WRITE, { 0, 0, 0 }, { _width, _height, 1 }, &_row_pitch, &_slice_pitch);
131 void gpu_image2d::unlock() {
132 std::lock_guard<std::mutex> locker(_mutex);
134 if (0 == _lock_count) {
135 _context->queue().enqueueUnmapMemObject(_buffer, _mapped_ptr);
136 _mapped_ptr = nullptr;
140 void gpu_image2d::fill(unsigned char pattern, event_impl::ptr ev) {
141 cl::Event ev_ocl = dynamic_cast<base_event*>(ev.get())->get();
142 cl_uint4 pattern_uint4 = { pattern, pattern, pattern, pattern };
143 _context->queue().enqueueFillImage(_buffer, pattern_uint4, { 0, 0, 0 }, { _width, _height, 1 }, 0, &ev_ocl);