inference-engine/thirdparty/clDNN/src/include/layout_optimizer.h

   1 /*
   2 // Copyright (c) 2018 Intel Corporation
   3 //
   4 // Licensed under the Apache License, Version 2.0 (the "License");
   5 // you may not use this file except in compliance with the License.
   6 // You may obtain a copy of the License at
   7 //
   8 //      http://www.apache.org/licenses/LICENSE-2.0
   9 //
  10 // Unless required by applicable law or agreed to in writing, software
  11 // distributed under the License is distributed on an "AS IS" BASIS,
  12 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13 // See the License for the specific language governing permissions and
  14 // limitations under the License.
  15 */
  16
  17 #pragma once
  18
  19 #include "memory_impl.h"
  20 #include "engine_impl.h"
  21 #include "meta_utils.h"
  22
  23 #include "data_inst.h"
  24 #include "reorder_inst.h"
  25 #include "convolution_inst.h"
  26 #include "deconvolution_inst.h"
  27 #include "fully_connected_inst.h"
  28 #include "detection_output_inst.h"
  29 #include "embed_inst.h"
  30 #include "lstm_gemm_inst.h"
  31 #include "generic_layer.hpp"
  32
  33 #include "kernel_selector_common.h"
  34 #include "kernel_selector_helper.h"
  35 #include <boost/optional.hpp>
  36
  37 #include <vector>
  38
  39 namespace cldnn
  40 {
  41
  42 class primitive_inst;
  43
  44 //this class is used for both static and dynamic reordering of data withing network.
  45 //static reordering is done for cldnn::data (i.e. immutable) primitives via internal network
  46 //  - its done once before network build by running reorder in separate network and fetching its result.
  47 //dynamic reordering is done for cldnn::input_layout (i.e. unknown data during network building)
  48 //  - its done by inserting extra reorder into target topology.
  49 //
  50 //this class does not choose whether there's a need for static or dynamic optimization.
  51 //it's programmers responsiblity to choose between 'get_reorder', which creates reorder to best format
  52 //for given primitive (or nullptr if it's already optimal) and user shall insert it into it's own topology.
  53 //  (note: layout_optimizer has internal caching mechanism, so if there's already reorder added for given (mem,format)
  54 //   pair during 'get_reorder' call, it will be reused);
  55 //or 'add_weights_for_optimization' which, beside creating the reorder, adds both primitives (data and reorder) to its
  56 //internal network which allows later to call 'optimize' and get already reordered data to be exchanged in target topology.
  57 class layout_optimizer
  58 {
  59 public:
  60     enum class data_type
  61     {
  62         weights,
  63         bias,
  64         input
  65     };
  66     enum class optimization_attributes_type
  67     {
  68         splitted_convolution,
  69         bfyx_only_layer
  70     };
  71     struct optimization_attributes
  72     {
  73         int32_t splitted_convolution = 0;
  74         int32_t bfyx_only_layer = 0;
  75     };
  76
  77 private:
  78     optimization_attributes _optimization_attributes;
  79     // TODO: Remove once we will get full support for input/output padding in all primitive implementations.
  80     bool _output_size_handling_enabled;
  81
  82     struct cache_key
  83     {
  84         primitive_id data_source;
  85         layout expected_layout;
  86
  87         friend bool operator ==(cache_key const& lhs, cache_key const& rhs)
  88         {
  89             return lhs.data_source == rhs.data_source && lhs.expected_layout == rhs.expected_layout;
  90         }
  91
  92         friend bool operator !=(cache_key const& lhs, cache_key const& rhs)
  93         {
  94             return !(lhs == rhs);
  95         }
  96
  97         friend bool operator <(cache_key const& lhs, cache_key const& rhs)
  98         {
  99             if (lhs.data_source != rhs.data_source)
 100                 return (lhs.data_source < rhs.data_source);
 101             return lhs.expected_layout < rhs.expected_layout;
 102         }
 103     };
 104
 105     std::map<cache_key, std::shared_ptr<reorder>> _cached_reorders;
 106     std::map<cache_key, std::shared_ptr<generic_layer>> _cached_generic_layers;
 107
 108     layout get_expected_layout(layout const& current_layout, data_type type, convolution_node const& node, layout const& output_or_weights_layout);
 109     layout get_expected_layout(layout const& current_layout, data_type type, deconvolution_node const& node, layout const& output_or_weights_layout);
 110     layout get_expected_layout(layout const& current_layout, data_type type, fully_connected_node const& node, layout const& output_or_weights_layout);
 111     layout get_expected_layout(layout const& current_layout, data_type type, detection_output_node const& node, layout const& output_or_weights_layout);
 112     layout get_expected_layout(layout const& current_layout, data_type type, embed_node const& node, layout const& output_or_weights_layout);
 113     layout get_expected_layout(layout const& current_layout, data_type type, lstm_gemm_node const& node, layout const& output_or_weights_layout);
 114
 115     bool convolution_bfyx_opt(const layout& output_layout, const layout& weights_layout, std::shared_ptr<const convolution> conv);
 116     bool convolution_byxf_opt(const layout& output_layout, const layout& weights_layout, std::shared_ptr<const convolution> conv);
 117     bool users_for_convolution_byxf_opt(program_node const& node, uint32_t depth);
 118     bool deps_depth_in_same_format(program_node const& node, const cldnn::format format, uint32_t depth);
 119
 120     //pair.first is reorder (may be nullptr if reorder is not needed), pair.second tells if returned reorder was cached (no need to add it to 'ouputs' etc.)
 121     //for pair.first == nullptr, pair.second == true
 122     std::pair<std::shared_ptr<cldnn::reorder>, bool>
 123     create_reorder_if_needed(const layout& current_layout, const cldnn::primitive_id& memid, layout const& expected_layout);
 124
 125     std::pair<std::shared_ptr<cldnn::generic_layer>, bool>
 126     create_reorder_from_given_source(const cldnn::primitive_id& memid, layout const& expected_layout, const kernel_selector::weights_reorder_params& reorder_params);
 127
 128 public:
 129     explicit layout_optimizer(bool output_size_handling_enabled = true);
 130
 131     //this method creates reorder for data, which is currently in 'data_layout' format, to best format in context of 'user' primitive.
 132     //data is used by 'user' in a way described by 'type' (i.e. weights/bias/input).
 133     //id shall be primitive_id of data's source (used as reorder's input and for cache checks).
 134     //user_layout is optional parameter (required for weights and bias, optional for input) which tells what kind of output 'user'
 135     //  is supposed to compute - it's used for example to decide if weights shall be converted to fp16.
 136     //
 137     //if 'data_layout' is already optimal, nullptr is returned
 138     //currently optimizations are supported only for convolution and fully-connected.
 139     //
 140     //returns a pair<reorder,bool> - where pair.first is a pointer to the reorder primitive and pair.second tells if it's been reused
 141     //from cache, pair.second == false means this is a newly created primitive and probably needs to be added to topology etc.
 142     template <class T>
 143     auto get_reorder(layout const& data_layout,
 144                      primitive_id const& id,
 145                      data_type type,
 146                      T& node,
 147                      layout const& user_layout)
 148         -> std::enable_if_t<
 149             meta::is_any_of_v<T, convolution_node, fully_connected_node, deconvolution_node, detection_output_node, embed_node, lstm_gemm_node>,
 150             meta::deduce_ret_type_t<decltype(&layout_optimizer::create_reorder_if_needed)>
 151         >
 152     {
 153         auto expected_layout = get_expected_layout(data_layout, type, node, user_layout);
 154         return create_reorder_if_needed(data_layout, id, expected_layout);
 155     }
 156
 157     //case for unsupported 'user' primitives
 158     template <class T>
 159     auto get_reorder(layout const& data_layout,
 160                      primitive_id const& id,
 161                      data_type type,
 162                      T& node,
 163                      layout const& user_layout)
 164         -> std::enable_if_t<
 165             !meta::is_any_of_v<T, convolution_node, fully_connected_node, deconvolution_node, detection_output_node, embed_node, lstm_gemm_node>,
 166             meta::deduce_ret_type_t<decltype(&layout_optimizer::create_reorder_if_needed)>
 167         >
 168     {
 169         static_assert(meta::always_false_v<T>, "Layout optimization for given primitive type is currently unsupported!");
 170         return meta::deduce_ret_type_t<decltype(&layout_optimizer::create_reorder_if_needed)>();
 171     }
 172
 173     std::vector<std::pair<std::shared_ptr<primitive>, bool>> get_generic_layer(
 174         const kernel_selector::weights_reorder_params& reorder_params,
 175         primitive_id input_id,
 176         const layout& old_layout,
 177         data_type type);
 178
 179     void set_optimization_attribute(optimization_attributes_type attribute, int32_t val);
 180 };
 181 }