2 // Copyright (c) 2016 Intel Corporation
4 // Licensed under the Apache License, Version 2.0 (the "License");
5 // you may not use this file except in compliance with the License.
6 // You may obtain a copy of the License at
8 // http://www.apache.org/licenses/LICENSE-2.0
10 // Unless required by applicable law or agreed to in writing, software
11 // distributed under the License is distributed on an "AS IS" BASIS,
12 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 // See the License for the specific language governing permissions and
14 // limitations under the License.
17 ///////////////////////////////////////////////////////////////////////////////////////////////////
18 #include "reorder_inst.h"
19 #include "primitive_type_base.h"
20 #include "error_handler.h"
21 #include "json_object.h"
28 primitive_type_id reorder_type_id()
30 static primitive_type_base<reorder> instance;
34 layout reorder_inst::calc_output_layout(reorder_node const& node)
36 auto input_layout = node.input().get_output_layout();
37 auto ifmt = input_layout.format;
39 auto odt = *node.get_primitive()->output_data_type;
40 auto ofmt = node.get_primitive()->output_format;
41 auto op = node.get_primitive()->output_padding;
43 if (ofmt.is_winograd() && ifmt.is_winograd())
46 return layout(odt, ofmt, input_layout.size, op);
48 CLDNN_ERROR_MESSAGE(node.id(), "Reordering between winograd weights and data formats is unsupported");
51 //transformation of data from standard to winograd
52 if (ofmt == format::winograd_2x3_s1_data)
54 //some constants which are defined by F(2,3) with stride 1 -- todo: think about generic way to calculate them for any F(r,m) with stride s
55 // NOTE: FOR THE FOLLOWING CONSTANTS 'OUTPUT' MEANS OUTPUT OF WINOGRAD CONV (in standard domain) AND 'INPUT' MEANS INPUT FOR WINOGRAD CONV (in winograd domain),
56 // THEREFORE 'INPUT' ACTUALLY REFERS TO THE OUTPUT OF THIS CONVERSION (which is later fed as input for winograd conv)
57 constexpr tensor::value_type output_tile_width = 2; //by definition of F(2,3)
58 constexpr tensor::value_type filter_width = 3; //by definition of F(2,3)
59 constexpr tensor::value_type filter_stride = 1; //by definition of format::winograd_2x3_s1_data (our assumption)
61 constexpr tensor::value_type input_tile_width = filter_width + (output_tile_width - 1) * filter_stride; //input tile should be large enought to hold data for computations of output tile (for given filter size and stride)
63 auto input_offset = node.get_input_offset();
65 //how many tiles do we need to produce
66 // each input tile produces one output tile so we can find no. of input tiles by calculating no. of output tiles (which is equal to width of an output divided by output tile width)
67 tensor::value_type conv_output_width = input_layout.size.spatial[0] - input_offset.spatial[0] - filter_width + 1;
68 tensor::value_type input_tiles_count_x = conv_output_width / output_tile_width;
69 tensor::value_type output_width = input_tiles_count_x * input_tile_width;
70 tensor::value_type output_height = input_layout.size.spatial[1] - input_offset.spatial[1];
72 tensor::value_type padd_x = 0;
73 tensor::value_type padd_y = (8 - ((output_height - 2) % 8)) % 8;
74 if (conv_output_width % output_tile_width != 0) //leftovers
76 output_width += 3; //one tile is 4 elements from which only 3 first are used to generate first output value
80 auto data_size = tensor{ input_layout.size.batch[0], input_layout.size.feature[0], output_width, output_height };
81 tensor upper_padd = tensor{ 0, 0, padd_x, padd_y };
82 return layout(odt, ofmt, data_size, padding{ { 0,0,0,0}, upper_padd.sizes() });
85 //transformation of weights from standard to winograd
86 if (ofmt == format::winograd_2x3_s1_weights || ofmt == format::winograd_2x3_s1_fused_weights)
88 CLDNN_ERROR_NOT_EQUAL(node.id(), "input_layout.size.spatial[0]", input_layout.size.spatial[0], "expected value", 3, "input for conversion to winograd_2x3_s1 weights format should have spatial size 3x3");
89 CLDNN_ERROR_NOT_EQUAL(node.id(), "input_layout.size.spatial[1]", input_layout.size.spatial[1], "expected value", 3, "input for conversion to winograd_2x3_s1 weights format should have spatial size 3x3");
91 return layout(odt, ofmt, tensor{ input_layout.size.batch[0], input_layout.size.feature[0], 4, 3 });
93 else if(ofmt == format::winograd_6x3_s1_fused_weights)
95 CLDNN_ERROR_NOT_EQUAL(node.id(), "input_layout.size.spatial[0]", input_layout.size.spatial[0], "expected value", 3, "input for conversion to winograd_2x3_s1 weights format should have spatial size 3x3");
96 CLDNN_ERROR_NOT_EQUAL(node.id(), "input_layout.size.spatial[1]", input_layout.size.spatial[1], "expected value", 3, "input for conversion to winograd_2x3_s1 weights format should have spatial size 3x3");
98 return layout(odt, ofmt, tensor{ input_layout.size.batch[0], input_layout.size.feature[0], 8, 3 });
101 //transformation of data from winograd to standard
102 if (ifmt == format::winograd_2x3_s1_data)
104 constexpr tensor::value_type output_tile_width = 2; //by definition of F(2,3)
105 constexpr tensor::value_type filter_width = 3; //by definition of F(2,3)
106 constexpr tensor::value_type filter_stride = 1; //by definition of format::winograd_2x3_s1_data (our assumption)
108 constexpr tensor::value_type input_tile_width = filter_width + (output_tile_width - 1) * filter_stride; //input tile should be large enought to hold data for computations of output tile (for given filter size and stride)
110 auto output_width = input_layout.size.spatial[0] / input_tile_width * output_tile_width;
111 if (input_layout.size.spatial[0] % input_tile_width != 0) //leftovers
112 ++output_width; //output tile is 2 by default, so we can have only 1 value as leftover
114 return layout(odt, ofmt, tensor{ input_layout.size.batch[0], input_layout.size.feature[0], output_width, input_layout.size.spatial[1] });
117 //transformation of weights from winograd to standard
118 if (ifmt == format::winograd_2x3_s1_weights || ifmt == format::winograd_2x3_s1_fused_weights || ifmt == format::winograd_6x3_s1_fused_weights)
120 CLDNN_ERROR_MESSAGE(node.id(), "Conversion of weights from winograd to standard domain is currently unsupported");
123 if(ofmt == format::bs_xs_xsv8_bsv8 || ofmt == format::bs_xs_xsv8_bsv16 || ofmt == format::bs_x_bsv16)
124 return layout(odt, ofmt, input_layout.size.transform(ofmt, 1), op);
126 return layout(odt, ofmt, input_layout.size, op);
129 std::string reorder_inst::to_string(reorder_node const& node)
131 auto desc = node.get_primitive();
132 auto mean = desc->mean;
133 auto node_info = node.desc_to_json();
134 auto& input = node.input();
136 std::stringstream primitive_description;
138 json_composite reorder_info;
139 reorder_info.add("input id", input.id());
140 reorder_info.add("mean", mean);
141 if (desc->subtract_per_feature.size() > 0)
143 reorder_info.add("subtract per feature", desc->subtract_per_feature);
146 node_info->add("reorder info", reorder_info);
147 node_info->dump(primitive_description);
149 return primitive_description.str();
152 reorder_inst::typed_primitive_inst(network_impl& network, reorder_node const& node)
153 : parent(network, node, !node.can_be_optimized())
155 if (node.can_be_optimized())
162 auto input_layout = node.input().get_output_layout();
163 auto output_layout = node.get_output_layout();
165 CLDNN_ERROR_LESS_THAN(node.id(), "Input dimension size", input_layout.size.raw.size(), "ouput dimension size", output_layout.size.raw.size(), "Input dimension < output dimension. Reorder primitive woks only with same dimension sizes (reorder) or when input > output (flatten).");
167 if (!argument.subtract_per_feature.empty())
169 CLDNN_ERROR_GREATER_THAN(node.id(), "Input feature dimension size", input_layout.size.feature.size(), "value", 1, "Subtracting values work only for formats that have feature dimension == 1");
170 CLDNN_ERROR_NOT_EQUAL(node.id(), "Input feature size[0]", static_cast<size_t>(input_layout.size.feature[0]), "argument subtract per feature size", argument.subtract_per_feature.size(), "Number of features/channels in input does not match the number of features/channels in values to subtract");
174 void reorder_inst::on_execute()
176 if (node.can_be_optimized())
180 void reorder_inst::reuse_input()
182 if (!node.can_be_optimized())
185 if (node.requires_reinterpret())
187 if (!_output || !_network.get_engine().is_the_same_buffer(output_memory(), input_memory()))
188 _output = _network.get_engine().reinterpret_buffer(input_memory(), node.get_output_layout());
191 _output = &input_memory();