Publishing 2019 R1 content
[platform/upstream/dldt.git] / inference-engine / thirdparty / clDNN / src / gpu / memory_gpu.cpp
1 /*
2 // Copyright (c) 2016 Intel Corporation
3 //
4 // Licensed under the Apache License, Version 2.0 (the "License");
5 // you may not use this file except in compliance with the License.
6 // You may obtain a copy of the License at
7 //
8 //      http://www.apache.org/licenses/LICENSE-2.0
9 //
10 // Unless required by applicable law or agreed to in writing, software
11 // distributed under the License is distributed on an "AS IS" BASIS,
12 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 // See the License for the specific language governing permissions and
14 // limitations under the License.
15 */
16
17 ///////////////////////////////////////////////////////////////////////////////////////////////////
18 #include "memory_gpu.h"
19 #include "engine_impl.h"
20 #include "ocl_base_event.h"
21
22 namespace cldnn { namespace gpu {
23
24 gpu_buffer::gpu_buffer(const refcounted_obj_ptr<engine_impl>& engine, const layout& layout)
25     : memory_impl(engine, layout, false)
26     , _context(engine->get_context())
27     , _lock_count(0)
28     , _buffer(_context->context(), CL_MEM_READ_WRITE, size())
29     , _mapped_ptr(nullptr)
30 {
31     void* ptr = gpu_buffer::lock();
32     memset(ptr, 0, size());
33     gpu_buffer::unlock();
34 }
35
36 gpu_buffer::gpu_buffer(const refcounted_obj_ptr<engine_impl>& engine, const layout& new_layout, const cl::Buffer& buffer)
37     : memory_impl(engine, new_layout, true)
38     , _context(engine->get_context())
39     , _lock_count(0)
40     , _buffer(buffer)
41     , _mapped_ptr(nullptr)
42 {
43
44 }
45
46 void* gpu_buffer::lock() {
47     std::lock_guard<std::mutex> locker(_mutex);
48     if (0 == _lock_count) {
49         _mapped_ptr = _context->queue().enqueueMapBuffer(_buffer, CL_TRUE, CL_MAP_WRITE, 0, size());
50     }
51     _lock_count++;
52     return _mapped_ptr;
53 }
54
55 void gpu_buffer::unlock() {
56     std::lock_guard<std::mutex> locker(_mutex);
57     _lock_count--;
58     if (0 == _lock_count) {
59         _context->queue().enqueueUnmapMemObject(_buffer, _mapped_ptr);
60         _mapped_ptr = nullptr;
61     }
62 }
63
64 void gpu_buffer::fill(unsigned char pattern, event_impl::ptr ev) {
65     cl::Event ev_ocl = dynamic_cast<base_event*>(ev.get())->get();
66     _context->queue().enqueueFillBuffer<unsigned char>(_buffer, pattern, 0, size(), 0, &ev_ocl);
67 }
68
69 gpu_image2d::gpu_image2d(const refcounted_obj_ptr<engine_impl>& engine, const layout& layout)
70     : memory_impl(engine, layout, false)
71     , _context(engine->get_context())
72     , _lock_count(0)
73     , _mapped_ptr(nullptr)
74 {
75     cl_channel_order order;
76     switch (layout.format)
77     {
78     case format::image_2d_weights_c1_b_fyx:
79         _width = layout.size.batch[0];
80         _height = layout.size.spatial[0] * layout.size.feature[0] * layout.size.spatial[1];
81         order = CL_R;
82         break;
83     case format::image_2d_weights_winograd_6x3_s1_fbxyb:
84         _height =  layout.size.feature[0];
85         _width = layout.size.spatial[0] * layout.size.batch[0] * layout.size.spatial[1] * 8 / 3;
86         order = CL_R;
87         break;
88     case format::image_2d_weights_winograd_6x3_s1_xfbyb:
89         _height = layout.size.feature[0] * layout.size.spatial[0] * 8 / 3;
90         _width =  layout.size.batch[0] * layout.size.spatial[1] ;
91         order = CL_R;
92         break;
93     case format::image_2d_weights_c4_fyx_b:
94         _width = layout.size.batch[0];
95         _height = layout.size.spatial[0] * layout.size.feature[0] * layout.size.spatial[1];
96         order = CL_RGBA;
97         break;
98     default:
99         throw error("unsupported image type!");
100     }
101
102     cl_channel_type type = layout.data_type == data_types::f16 ? CL_HALF_FLOAT : CL_FLOAT;
103     cl::ImageFormat imageFormat(order, type);
104     _buffer = cl::Image2D(_context->context(), CL_MEM_READ_WRITE, imageFormat, _width, _height, 0);
105
106     void* ptr = gpu_image2d::lock();
107     for(uint64_t y = 0; y < static_cast<uint64_t>(_height); y++)
108         memset(ptr, 0, static_cast<size_t>(y*_row_pitch));
109     gpu_image2d::unlock();
110 }
111
112 gpu_image2d::gpu_image2d(const refcounted_obj_ptr<engine_impl>& engine, const layout& new_layout, const cl::Image2D& buffer)
113     : memory_impl(engine, new_layout, true)
114     , _context(engine->get_context())
115     , _lock_count(0)
116     , _buffer(buffer)
117     , _mapped_ptr(nullptr)
118 {
119
120 }
121
122 void* gpu_image2d::lock() {
123     std::lock_guard<std::mutex> locker(_mutex);
124     if (0 == _lock_count) {
125         _mapped_ptr = _context->queue().enqueueMapImage(_buffer, CL_TRUE, CL_MAP_WRITE, { 0, 0, 0 }, { _width, _height, 1 }, &_row_pitch, &_slice_pitch);
126     }
127     _lock_count++;
128     return _mapped_ptr;
129 }
130
131 void gpu_image2d::unlock() {
132     std::lock_guard<std::mutex> locker(_mutex);
133     _lock_count--;
134     if (0 == _lock_count) {
135         _context->queue().enqueueUnmapMemObject(_buffer, _mapped_ptr);
136         _mapped_ptr = nullptr;
137     }
138 }
139
140 void gpu_image2d::fill(unsigned char pattern, event_impl::ptr ev) {
141     cl::Event ev_ocl = dynamic_cast<base_event*>(ev.get())->get();
142     cl_uint4 pattern_uint4 = { pattern, pattern, pattern, pattern };
143     _context->queue().enqueueFillImage(_buffer, pattern_uint4, { 0, 0, 0 }, { _width, _height, 1 }, 0, &ev_ocl);
144 }
145
146 }}