2 // Copyright (c) 2018 Intel Corporation
4 // Licensed under the Apache License, Version 2.0 (the "License");
5 // you may not use this file except in compliance with the License.
6 // You may obtain a copy of the License at
8 // http://www.apache.org/licenses/LICENSE-2.0
10 // Unless required by applicable law or agreed to in writing, software
11 // distributed under the License is distributed on an "AS IS" BASIS,
12 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 // See the License for the specific language governing permissions and
14 // limitations under the License.
19 #include "memory_impl.h"
20 #include "engine_impl.h"
21 #include "meta_utils.h"
23 #include "data_inst.h"
24 #include "reorder_inst.h"
25 #include "convolution_inst.h"
26 #include "deconvolution_inst.h"
27 #include "fully_connected_inst.h"
28 #include "detection_output_inst.h"
29 #include "embed_inst.h"
30 #include "lstm_gemm_inst.h"
31 #include "generic_layer.hpp"
33 #include "kernel_selector_common.h"
34 #include "kernel_selector_helper.h"
35 #include <boost/optional.hpp>
44 //this class is used for both static and dynamic reordering of data withing network.
45 //static reordering is done for cldnn::data (i.e. immutable) primitives via internal network
46 // - its done once before network build by running reorder in separate network and fetching its result.
47 //dynamic reordering is done for cldnn::input_layout (i.e. unknown data during network building)
48 // - its done by inserting extra reorder into target topology.
50 //this class does not choose whether there's a need for static or dynamic optimization.
51 //it's programmers responsiblity to choose between 'get_reorder', which creates reorder to best format
52 //for given primitive (or nullptr if it's already optimal) and user shall insert it into it's own topology.
53 // (note: layout_optimizer has internal caching mechanism, so if there's already reorder added for given (mem,format)
54 // pair during 'get_reorder' call, it will be reused);
55 //or 'add_weights_for_optimization' which, beside creating the reorder, adds both primitives (data and reorder) to its
56 //internal network which allows later to call 'optimize' and get already reordered data to be exchanged in target topology.
57 class layout_optimizer
66 enum class optimization_attributes_type
71 struct optimization_attributes
73 int32_t splitted_convolution = 0;
74 int32_t bfyx_only_layer = 0;
78 optimization_attributes _optimization_attributes;
79 // TODO: Remove once we will get full support for input/output padding in all primitive implementations.
80 bool _output_size_handling_enabled;
84 primitive_id data_source;
85 layout expected_layout;
87 friend bool operator ==(cache_key const& lhs, cache_key const& rhs)
89 return lhs.data_source == rhs.data_source && lhs.expected_layout == rhs.expected_layout;
92 friend bool operator !=(cache_key const& lhs, cache_key const& rhs)
97 friend bool operator <(cache_key const& lhs, cache_key const& rhs)
99 if (lhs.data_source != rhs.data_source)
100 return (lhs.data_source < rhs.data_source);
101 return lhs.expected_layout < rhs.expected_layout;
105 std::map<cache_key, std::shared_ptr<reorder>> _cached_reorders;
106 std::map<cache_key, std::shared_ptr<generic_layer>> _cached_generic_layers;
108 layout get_expected_layout(layout const& current_layout, data_type type, convolution_node const& node, layout const& output_or_weights_layout);
109 layout get_expected_layout(layout const& current_layout, data_type type, deconvolution_node const& node, layout const& output_or_weights_layout);
110 layout get_expected_layout(layout const& current_layout, data_type type, fully_connected_node const& node, layout const& output_or_weights_layout);
111 layout get_expected_layout(layout const& current_layout, data_type type, detection_output_node const& node, layout const& output_or_weights_layout);
112 layout get_expected_layout(layout const& current_layout, data_type type, embed_node const& node, layout const& output_or_weights_layout);
113 layout get_expected_layout(layout const& current_layout, data_type type, lstm_gemm_node const& node, layout const& output_or_weights_layout);
115 bool convolution_bfyx_opt(const layout& output_layout, const layout& weights_layout, std::shared_ptr<const convolution> conv);
116 bool convolution_byxf_opt(const layout& output_layout, const layout& weights_layout, std::shared_ptr<const convolution> conv);
117 bool users_for_convolution_byxf_opt(program_node const& node, uint32_t depth);
118 bool deps_depth_in_same_format(program_node const& node, const cldnn::format format, uint32_t depth);
120 //pair.first is reorder (may be nullptr if reorder is not needed), pair.second tells if returned reorder was cached (no need to add it to 'ouputs' etc.)
121 //for pair.first == nullptr, pair.second == true
122 std::pair<std::shared_ptr<cldnn::reorder>, bool>
123 create_reorder_if_needed(const layout& current_layout, const cldnn::primitive_id& memid, layout const& expected_layout);
125 std::pair<std::shared_ptr<cldnn::generic_layer>, bool>
126 create_reorder_from_given_source(const cldnn::primitive_id& memid, layout const& expected_layout, const kernel_selector::weights_reorder_params& reorder_params);
129 explicit layout_optimizer(bool output_size_handling_enabled = true);
131 //this method creates reorder for data, which is currently in 'data_layout' format, to best format in context of 'user' primitive.
132 //data is used by 'user' in a way described by 'type' (i.e. weights/bias/input).
133 //id shall be primitive_id of data's source (used as reorder's input and for cache checks).
134 //user_layout is optional parameter (required for weights and bias, optional for input) which tells what kind of output 'user'
135 // is supposed to compute - it's used for example to decide if weights shall be converted to fp16.
137 //if 'data_layout' is already optimal, nullptr is returned
138 //currently optimizations are supported only for convolution and fully-connected.
140 //returns a pair<reorder,bool> - where pair.first is a pointer to the reorder primitive and pair.second tells if it's been reused
141 //from cache, pair.second == false means this is a newly created primitive and probably needs to be added to topology etc.
143 auto get_reorder(layout const& data_layout,
144 primitive_id const& id,
147 layout const& user_layout)
149 meta::is_any_of_v<T, convolution_node, fully_connected_node, deconvolution_node, detection_output_node, embed_node, lstm_gemm_node>,
150 meta::deduce_ret_type_t<decltype(&layout_optimizer::create_reorder_if_needed)>
153 auto expected_layout = get_expected_layout(data_layout, type, node, user_layout);
154 return create_reorder_if_needed(data_layout, id, expected_layout);
157 //case for unsupported 'user' primitives
159 auto get_reorder(layout const& data_layout,
160 primitive_id const& id,
163 layout const& user_layout)
165 !meta::is_any_of_v<T, convolution_node, fully_connected_node, deconvolution_node, detection_output_node, embed_node, lstm_gemm_node>,
166 meta::deduce_ret_type_t<decltype(&layout_optimizer::create_reorder_if_needed)>
169 static_assert(meta::always_false_v<T>, "Layout optimization for given primitive type is currently unsupported!");
170 return meta::deduce_ret_type_t<decltype(&layout_optimizer::create_reorder_if_needed)>();
173 std::vector<std::pair<std::shared_ptr<primitive>, bool>> get_generic_layer(
174 const kernel_selector::weights_reorder_params& reorder_params,
175 primitive_id input_id,
176 const layout& old_layout,
179 void set_optimization_attribute(optimization_attributes_type attribute, int32_t val);