inference-engine/thirdparty/clDNN/src/program_helpers.cpp

   1 /*
   2 // Copyright (c) 2018 Intel Corporation
   3 //
   4 // Licensed under the Apache License, Version 2.0 (the "License");
   5 // you may not use this file except in compliance with the License.
   6 // You may obtain a copy of the License at
   7 //
   8 //      http://www.apache.org/licenses/LICENSE-2.0
   9 //
  10 // Unless required by applicable law or agreed to in writing, software
  11 // distributed under the License is distributed on an "AS IS" BASIS,
  12 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13 // See the License for the specific language governing permissions and
  14 // limitations under the License.
  15 */
  16
  17 ///////////////////////////////////////////////////////////////////////////////////////////////////
  18
  19 #include "program_helpers.h"
  20 #include "program_impl.h"
  21 #include "data_inst.h"
  22
  23 namespace cldnn
  24 {
  25     //helper function for merging the weights/biases buffers on cpu side for depthwise separable convolution optimization
  26     void program_helpers::merge_buffers(engine_impl &engine, program_node &node, layout target_layout, size_t begin_offset, size_t end_offset)
  27     {
  28         memory_impl::ptr data_to_allocate = engine.allocate_memory(target_layout);
  29
  30         for (size_t i = begin_offset; i < end_offset; i++)
  31         {
  32             auto& weights = node.get_dependency(i).as<data>();
  33             mem_lock<char> src{ weights.get_attached_memory() };
  34             mem_lock<char> dst{ data_to_allocate };
  35             std::copy(src.begin(), src.end(), dst.begin() + (i - begin_offset)*src.size());
  36         }
  37
  38         for (size_t i = 0; i < end_offset - begin_offset - 1; i++)
  39             node.remove_dependency(begin_offset + 1);
  40
  41         auto& data_node = node.get_dependency(begin_offset).as<data>();
  42         data_node.attach_memory(*data_to_allocate, false);
  43     }
  44
  45     //helper function for getting target layout used in depthwise sep optimization
  46     layout program_helpers::get_weights_layout(typed_program_node<cldnn::data> &data_node, int32_t split)
  47     {
  48         auto mem_layout = data_node.get_output_layout();
  49
  50         return layout(mem_layout.data_type, mem_layout.format, { split * mem_layout.size.batch[0], mem_layout.size.feature[0], mem_layout.size.spatial[0], mem_layout.size.spatial[1] });
  51     }
  52
  53     // pair.first tells whether l1 and l2 are absolutely identical
  54     // pair.second tells whether l1 and l2 can be reinterpreted to each other without need of reordering
  55     // note: layouts can only be considered identical if data size described by both layouts match (so no data are genereted nor dropped)
  56     // note: if layouts describe two buffers with different size, consider them not to be identical even if smaller buffer can be considered to hold subsequence of larger buffer,
  57     //       this behavior is required to force buffer allocation for smaller buffer which, currently, should always be performed
  58     std::pair<bool, bool> program_helpers::are_layouts_identical(layout const& l1, layout const& l2)
  59     {
  60         if (l1 == l2)
  61             return{ true, true };
  62         if (l1.data_type != l2.data_type)
  63             return{ false, false };
  64         if (l1.size != l2.size)
  65             return{ false, false };
  66         if (l1.get_linear_size() != l2.get_linear_size())
  67             return{ false, false };
  68         if ((l1.format == format::bf8_xy16 && l2.format != format::bf8_xy16) ||
  69             (l2.format == format::bf8_xy16 && l1.format != format::bf8_xy16) ||
  70             (l1.format == format::b_fs_yx_fsv4 && l2.format != format::b_fs_yx_fsv4) ||
  71             (l2.format == format::b_fs_yx_fsv4 && l1.format != format::b_fs_yx_fsv4))
  72             return{ false, false };
  73
  74         auto l1_pitch = l1.get_pitches();
  75         auto l2_pitch = l2.get_pitches();
  76
  77         //ignore pitches which will never be used (for dims with size == 1)
  78         for (size_t i = 0; i < CLDNN_TENSOR_DIM_MAX; ++i)
  79             if (l1.size.raw[i] == 1)
  80                 l1_pitch.raw[i] = 0;
  81         for (size_t i = 0; i < CLDNN_TENSOR_DIM_MAX; ++i)
  82             if (l2.size.raw[i] == 1)
  83                 l2_pitch.raw[i] = 0;
  84
  85         auto l1_offset = l1.get_linear_offset();
  86         auto l2_offset = l2.get_linear_offset();
  87         if (l1_pitch == l2_pitch && l1_offset == l2_offset)
  88             return{ false, true };
  89
  90         return{ false, false };
  91     }
  92 }