2 * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
8 * http://www.apache.org/licenses/LICENSE-2.0
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
17 #include "KernelGenerator.h"
19 #include "ops/ConvolutionLayer.h"
20 #include "ops/DepthwiseConvolutionLayer.h"
21 #include "ops/FullyConnectedLayer.h"
23 #include <backend/Backend.h>
24 #include <backend/IConfig.h>
26 #include <util/Utils.h>
27 #include <util/logging.h>
28 #include <exec/DynamicShapeInferer.h>
39 KernelGenerator::KernelGenerator(
40 const ir::Operands &operands_ctx, const ir::Operations &operations_ctx,
41 const std::shared_ptr<TensorBuilder> &tensor_builder,
42 const std::shared_ptr<cpu_common::TensorRegistry> &tensor_reg,
43 const std::shared_ptr<backend::custom::IKernelBuilder> &kernel_builder,
44 const std::shared_ptr<ExternalContext> &external_context)
45 : _ctx(operands_ctx), _operations_ctx{operations_ctx}, _tensor_builder(tensor_builder),
46 _tensor_reg{tensor_reg}, _kernel_builder(kernel_builder),
47 _current_layout(ir::Layout::UNKNOWN), _external_context(external_context)
52 void KernelGenerator::visit(const ir::OpSequence &op_seq)
54 assert(!_return_fn_seq);
55 assert(_tensor_builder->dynamicTensorManager());
58 auto dyn_shape_inferer = std::make_shared<exec::DynamicShapeInferer>(_ctx, _tensor_reg);
60 _return_fn_seq = std::make_unique<exec::FunctionSequence>();
62 // Prepare to handle dynamic tensors later
63 auto dyn_ctx = std::make_shared<exec::FunctionSequence::DynamicTensorCtx>();
65 dyn_ctx->op_seq = &op_seq;
66 dyn_ctx->operations = &_operations_ctx;
67 dyn_ctx->dynamic_shape_inferer = std::move(dyn_shape_inferer);
68 dyn_ctx->dynamic_tensor_manager = _tensor_builder->dynamicTensorManager();
70 _return_fn_seq->dynamic_tensor_ctx(dyn_ctx);
73 _current_layout = op_seq.getLayout();
74 for (const auto &operation_idx : op_seq.operations())
76 const auto &node = _operations_ctx.at(operation_idx);
78 _return_fn_seq->append(releaseFunction());
80 for (const auto &ind : (node.getInputs() | ir::Remove::UNDEFINED) + node.getOutputs())
82 auto portable_tensor = _tensor_reg->getPortableTensor(ind);
85 assert(portable_tensor->layout() == ir::Layout::NHWC);
88 auto tensor = _tensor_reg->getNativeTensor(ind);
91 tensor->increase_ref();
97 void KernelGenerator::visit(const ir::operation::Conv2D &node)
99 using ir::operation::Conv2D;
101 const auto ofm_index{node.getOutputs().at(0)};
102 const auto ifm_index{node.getInputs().at(Conv2D::Input::INPUT)};
103 const auto ker_index{node.getInputs().at(Conv2D::Input::KERNEL)};
104 const auto bias_index{node.getInputs().at(Conv2D::Input::BIAS)};
106 auto ofm_tensor = _tensor_reg->getPortableTensor(ofm_index);
107 auto ifm_tensor = _tensor_reg->getPortableTensor(ifm_index);
108 auto ker_tensor = _tensor_reg->getPortableTensor(ker_index);
109 auto bias_tensor = _tensor_reg->getPortableTensor(bias_index);
111 const auto stride = node.param().stride;
112 const auto activation = node.param().activation;
113 const auto param_padding = node.param().padding;
114 const auto dilation = node.param().dilation;
115 auto fn = std::make_unique<ops::ConvolutionLayer>(_external_context);
117 const auto ifm_shape = _ctx.at(ifm_index).shape().asFeature(_current_layout);
118 const auto ofm_shape = _ctx.at(ofm_index).shape().asFeature(_current_layout);
119 // Kernel format is [depth_out, kernel_height, kernel_width, depth_in].
120 const auto &ker_shape = _ctx.at(ker_index).shape();
121 const auto ker_height = ker_shape.dim(1);
122 const auto ker_width = ker_shape.dim(2);
125 ir::calculatePadding(param_padding, ifm_shape, ofm_shape, stride, ker_width, ker_height,
126 dilation.width_factor, dilation.height_factor);
128 fn->configure(ifm_tensor, ker_tensor, bias_tensor, param_padding.type, padding.left,
129 padding.right, padding.top, padding.bottom, stride.horizontal, stride.vertical,
130 dilation.width_factor, dilation.height_factor, activation, ofm_tensor);
132 _return_fn = std::move(fn);
135 void KernelGenerator::visit(const ir::operation::DepthwiseConv2D &node)
137 using ir::operation::DepthwiseConv2D;
139 const auto ofm_index{node.getOutputs().at(0)};
140 const auto ifm_index{node.getInputs().at(DepthwiseConv2D::Input::INPUT)};
141 const auto ker_index{node.getInputs().at(DepthwiseConv2D::Input::KERNEL)};
142 const auto bias_index{node.getInputs().at(DepthwiseConv2D::Input::BIAS)};
144 const auto stride = node.param().stride;
145 const auto ifm_shape = _ctx.at(ifm_index).shape().asFeature(_current_layout);
146 const auto ofm_shape = _ctx.at(ofm_index).shape().asFeature(_current_layout);
147 // Kernel format is [1, kernel_height, kernel_width, depth_out].
148 const auto &ker_shape = _ctx.at(ker_index).shape();
149 const auto ker_height = ker_shape.dim(1);
150 const auto ker_width = ker_shape.dim(2);
151 const auto dilation_width = node.param().dilation.width_factor;
152 const auto dilation_height = node.param().dilation.height_factor;
153 const auto param_padding = node.param().padding;
154 const auto padding = ir::calculatePadding(param_padding, ifm_shape, ofm_shape, stride, ker_width,
155 ker_height, dilation_width, dilation_height);
156 const auto multiplier = node.param().multiplier;
157 const auto activation = node.param().activation;
159 auto ofm_tensor = _tensor_reg->getPortableTensor(ofm_index);
160 auto ifm_tensor = _tensor_reg->getPortableTensor(ifm_index);
161 auto ker_tensor = _tensor_reg->getPortableTensor(ker_index);
162 auto bias_tensor = _tensor_reg->getPortableTensor(bias_index);
164 auto fn = std::make_unique<ops::DepthwiseConvolutionLayer>(_external_context);
166 fn->configure(ifm_tensor, ker_tensor, bias_tensor, param_padding.type, padding.left,
167 padding.right, padding.top, padding.bottom, stride.horizontal, stride.vertical,
168 multiplier, dilation_width, dilation_height, activation, ofm_tensor);
170 _return_fn = std::move(fn);
173 void KernelGenerator::visit(const ir::operation::FullyConnected &node)
175 using ir::operation::FullyConnected;
177 const auto output_index{node.getOutputs().at(0)};
178 const auto input_index{node.getInputs().at(FullyConnected::Input::INPUT)};
179 const auto weight_index{node.getInputs().at(FullyConnected::Input::WEIGHT)};
180 const auto bias_index{node.getInputs().at(FullyConnected::Input::BIAS)};
181 const auto activation = node.param().activation;
183 auto output_tensor = _tensor_reg->getPortableTensor(output_index);
184 auto input_tensor = _tensor_reg->getPortableTensor(input_index);
185 auto weight_tensor = _tensor_reg->getPortableTensor(weight_index);
186 auto bias_tensor = bias_index.undefined() ? nullptr : _tensor_reg->getPortableTensor(bias_index);
188 auto fn = std::make_unique<ops::FullyConnectedLayer>(_external_context);
190 fn->configure(input_tensor, weight_tensor, bias_tensor, activation, output_tensor);
192 _return_fn = std::move(fn);
195 } // namespace xnnpack
196 } // namespace backend