inference-engine/thirdparty/clDNN/src/gpu/memory_gpu.cpp

   1 /*
   2 // Copyright (c) 2016 Intel Corporation
   3 //
   4 // Licensed under the Apache License, Version 2.0 (the "License");
   5 // you may not use this file except in compliance with the License.
   6 // You may obtain a copy of the License at
   7 //
   8 //      http://www.apache.org/licenses/LICENSE-2.0
   9 //
  10 // Unless required by applicable law or agreed to in writing, software
  11 // distributed under the License is distributed on an "AS IS" BASIS,
  12 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13 // See the License for the specific language governing permissions and
  14 // limitations under the License.
  15 */
  16
  17 ///////////////////////////////////////////////////////////////////////////////////////////////////
  18 #include "memory_gpu.h"
  19 #include "engine_impl.h"
  20 #include "ocl_base_event.h"
  21
  22 namespace cldnn { namespace gpu {
  23
  24 gpu_buffer::gpu_buffer(const refcounted_obj_ptr<engine_impl>& engine, const layout& layout)
  25     : memory_impl(engine, layout, false)
  26     , _context(engine->get_context())
  27     , _lock_count(0)
  28     , _buffer(_context->context(), CL_MEM_READ_WRITE, size())
  29     , _mapped_ptr(nullptr)
  30 {
  31     void* ptr = gpu_buffer::lock();
  32     memset(ptr, 0, size());
  33     gpu_buffer::unlock();
  34 }
  35
  36 gpu_buffer::gpu_buffer(const refcounted_obj_ptr<engine_impl>& engine, const layout& new_layout, const cl::Buffer& buffer)
  37     : memory_impl(engine, new_layout, true)
  38     , _context(engine->get_context())
  39     , _lock_count(0)
  40     , _buffer(buffer)
  41     , _mapped_ptr(nullptr)
  42 {
  43
  44 }
  45
  46 void* gpu_buffer::lock() {
  47     std::lock_guard<std::mutex> locker(_mutex);
  48     if (0 == _lock_count) {
  49         _mapped_ptr = _context->queue().enqueueMapBuffer(_buffer, CL_TRUE, CL_MAP_WRITE, 0, size());
  50     }
  51     _lock_count++;
  52     return _mapped_ptr;
  53 }
  54
  55 void gpu_buffer::unlock() {
  56     std::lock_guard<std::mutex> locker(_mutex);
  57     _lock_count--;
  58     if (0 == _lock_count) {
  59         _context->queue().enqueueUnmapMemObject(_buffer, _mapped_ptr);
  60         _mapped_ptr = nullptr;
  61     }
  62 }
  63
  64 void gpu_buffer::fill(unsigned char pattern, event_impl::ptr ev) {
  65     cl::Event ev_ocl = dynamic_cast<base_event*>(ev.get())->get();
  66     _context->queue().enqueueFillBuffer<unsigned char>(_buffer, pattern, 0, size(), 0, &ev_ocl);
  67 }
  68
  69 gpu_image2d::gpu_image2d(const refcounted_obj_ptr<engine_impl>& engine, const layout& layout)
  70     : memory_impl(engine, layout, false)
  71     , _context(engine->get_context())
  72     , _lock_count(0)
  73     , _mapped_ptr(nullptr)
  74 {
  75     cl_channel_order order;
  76     switch (layout.format)
  77     {
  78     case format::image_2d_weights_c1_b_fyx:
  79         _width = layout.size.batch[0];
  80         _height = layout.size.spatial[0] * layout.size.feature[0] * layout.size.spatial[1];
  81         order = CL_R;
  82         break;
  83     case format::image_2d_weights_winograd_6x3_s1_fbxyb:
  84         _height =  layout.size.feature[0];
  85         _width = layout.size.spatial[0] * layout.size.batch[0] * layout.size.spatial[1] * 8 / 3;
  86         order = CL_R;
  87         break;
  88     case format::image_2d_weights_winograd_6x3_s1_xfbyb:
  89         _height = layout.size.feature[0] * layout.size.spatial[0] * 8 / 3;
  90         _width =  layout.size.batch[0] * layout.size.spatial[1] ;
  91         order = CL_R;
  92         break;
  93     case format::image_2d_weights_c4_fyx_b:
  94         _width = layout.size.batch[0];
  95         _height = layout.size.spatial[0] * layout.size.feature[0] * layout.size.spatial[1];
  96         order = CL_RGBA;
  97         break;
  98     default:
  99         throw error("unsupported image type!");
 100     }
 101
 102     cl_channel_type type = layout.data_type == data_types::f16 ? CL_HALF_FLOAT : CL_FLOAT;
 103     cl::ImageFormat imageFormat(order, type);
 104     _buffer = cl::Image2D(_context->context(), CL_MEM_READ_WRITE, imageFormat, _width, _height, 0);
 105
 106     void* ptr = gpu_image2d::lock();
 107     for(uint64_t y = 0; y < static_cast<uint64_t>(_height); y++)
 108         memset(ptr, 0, static_cast<size_t>(y*_row_pitch));
 109     gpu_image2d::unlock();
 110 }
 111
 112 gpu_image2d::gpu_image2d(const refcounted_obj_ptr<engine_impl>& engine, const layout& new_layout, const cl::Image2D& buffer)
 113     : memory_impl(engine, new_layout, true)
 114     , _context(engine->get_context())
 115     , _lock_count(0)
 116     , _buffer(buffer)
 117     , _mapped_ptr(nullptr)
 118 {
 119
 120 }
 121
 122 void* gpu_image2d::lock() {
 123     std::lock_guard<std::mutex> locker(_mutex);
 124     if (0 == _lock_count) {
 125         _mapped_ptr = _context->queue().enqueueMapImage(_buffer, CL_TRUE, CL_MAP_WRITE, { 0, 0, 0 }, { _width, _height, 1 }, &_row_pitch, &_slice_pitch);
 126     }
 127     _lock_count++;
 128     return _mapped_ptr;
 129 }
 130
 131 void gpu_image2d::unlock() {
 132     std::lock_guard<std::mutex> locker(_mutex);
 133     _lock_count--;
 134     if (0 == _lock_count) {
 135         _context->queue().enqueueUnmapMemObject(_buffer, _mapped_ptr);
 136         _mapped_ptr = nullptr;
 137     }
 138 }
 139
 140 void gpu_image2d::fill(unsigned char pattern, event_impl::ptr ev) {
 141     cl::Event ev_ocl = dynamic_cast<base_event*>(ev.get())->get();
 142     cl_uint4 pattern_uint4 = { pattern, pattern, pattern, pattern };
 143     _context->queue().enqueueFillImage(_buffer, pattern_uint4, { 0, 0, 0 }, { _width, _height, 1 }, 0, &ev_ocl);
 144 }
 145
 146 }}