#include "internal/layers/SimpleArithmeticAddition.h"
#include "internal/layers/SimpleCastLayer.h"
#include "internal/layers/GenericFullyConnectedLayer.h"
+#include "internal/layers/PadLayer.h"
#include "util/matrix/IndexIterator.h"
#include "util/kernel/IndexIterator.h"
void visit(const ::internal::tflite::op::Floor::Node &node) override;
void visit(const ::internal::tflite::op::Split::Node &node) override;
void visit(const ::internal::tflite::op::RSQRT::Node &node) override;
+ void visit(const ::internal::tflite::op::Pad::Node &node) override;
private:
const ::internal::tflite::operand::Set &_ctx;
// NOTE Split has no actual operation!
}
+void Planner::visit(const ::internal::tflite::op::Pad::Node &node)
+{
+ const ::internal::tflite::operand::Index ofm_index{node.param().ofm_index};
+ const ::internal::tflite::operand::Index ifm_index{node.param().ifm_index};
+ const ::internal::tflite::operand::Index paddings_index{node.param().paddings_index};
+
+ const auto ofm_shape = _ctx.at(ofm_index).shape().asFeature();
+ const auto ifm_shape = _ctx.at(ifm_index).shape().asFeature();
+ const auto paddings_shape = _ctx.at(paddings_index).shape().asTensor();
+
+ // Set Shape Constraints and TensorInfo
+ _builder.addShapeConstr(
+ ifm_index, asTensorInfo(asTensorShape(_ctx.at(ifm_index).shape()), _ctx.at(ifm_index).type(),
+ _ctx.at(ifm_index).scale(), _ctx.at(ifm_index).zeroPoint()));
+ _builder.addShapeConstr(
+ ofm_index, asTensorInfo(asTensorShape(_ctx.at(ofm_index).shape()), _ctx.at(ofm_index).type(),
+ _ctx.at(ofm_index).scale(), _ctx.at(ofm_index).zeroPoint()));
+ _builder.addShapeConstr(
+ paddings_index,
+ asTensorInfo(asTensorShape(_ctx.at(paddings_index).shape()), _ctx.at(paddings_index).type(),
+ _ctx.at(paddings_index).scale(), _ctx.at(paddings_index).zeroPoint()));
+
+ // Construct operation parameters
+ struct Param
+ {
+ int ofm_index;
+ int ifm_index;
+ int32_t padding_size;
+ };
+
+ Param param;
+
+ param.ofm_index = ofm_index.asInt();
+ param.ifm_index = ifm_index.asInt();
+
+ assert(_ctx.at(paddings_index).hasData() == true);
+
+ // TODO: Currently we are supporting uniform padding for the tensor, so only a single
+ // value is being read. (TOP = BOTTOM = LEFT = RIGHT).
+ // Need to read padding values for all the sides (TOP, BOTTOM, LEFT & RIGHT)
+
+ const auto &padding_data = _ctx.at(paddings_index).data();
+ auto base = padding_data.base();
+ auto padsize = reinterpret_cast<const int *>(base) + 3;
+ param.padding_size = *padsize;
+
+ auto stage = [param](const IAllocationContext &ctx, IExecutionBuilder &builder) {
+ auto ofm_alloc = ctx.at(::internal::tflite::operand::Index{param.ofm_index});
+ auto ifm_alloc = ctx.at(::internal::tflite::operand::Index{param.ifm_index});
+
+ auto fn = nnfw::make_unique<PadLayer>();
+
+ fn->configure(CAST_CL(ifm_alloc), CAST_CL(ofm_alloc), param.padding_size);
+ builder.append("Pad", std::move(fn));
+
+ };
+
+ _builder.addStage(stage);
+}
+
class AllocationContext final : public IAllocationContext
{
public:
--- /dev/null
+#include <iostream>\r
+#include "PadLayer.h"\r
+#include <arm_compute/runtime/CL/CLScheduler.h>\r
+\r
+void PadLayer::configure(::arm_compute::ICLTensor *input, ::arm_compute::ICLTensor *output,\r
+ unsigned int border_width)\r
+{\r
+ _input = input;\r
+ _output = output;\r
+ _border_width = border_width;\r
+ _output_height = _output->info()->dimension(0);\r
+ _output_width = _output->info()->dimension(1);\r
+\r
+ uint8_t constant_border_value = 0;\r
+ ::arm_compute::PixelValue constant_pixel_value = ::arm_compute::PixelValue(constant_border_value);\r
+\r
+ unsigned int padding_size = _border_width;\r
+ input->info()->extend_padding(::arm_compute::PaddingSize{padding_size});\r
+ _fillborderkernel.configure(input, _border_width, ::arm_compute::BorderMode::CONSTANT,\r
+ constant_pixel_value);\r
+}\r
+\r
+void PadLayer::run(void)\r
+{\r
+ _fillborderkernel.run();\r
+\r
+ ::arm_compute::Coordinates coordinates =\r
+ ::arm_compute::Coordinates(-_border_width, -_border_width);\r
+ ::arm_compute::TensorShape new_tensor_shape =\r
+ ::arm_compute::TensorShape(_output_height, _output_width);\r
+\r
+ /* NOTE: The cl kernel fills the data in the borders(not in the tensor).\r
+ Once the tensor is received back at NNAPI, we are adjusting\r
+ the valid region in such a way that the padding becomes part of the tensor itself\r
+ and matches the size of output. */\r
+ _input->info()->set_valid_region(::arm_compute::ValidRegion(coordinates, new_tensor_shape));\r
+\r
+ /* NOTE: Since cl kernel does not have an argument for output tensor while NNAPI does.\r
+ We need to map the input (tensor that is passed to the cl kernel) back to\r
+ output. */\r
+\r
+ // TODO: Write a modified CLCopy kernel to do this job.\r
+ populateOutput();\r
+}\r
+\r
+void PadLayer::populateOutput()\r
+{\r
+ auto &queue = ::arm_compute::CLScheduler::get().queue();\r
+ _input->map(queue);\r
+ _output->map(queue);\r
+\r
+ auto input_tensor = static_cast<::arm_compute::ITensor *>(_input);\r
+ auto const source_data = input_tensor->buffer();\r
+\r
+ auto output_tensor = static_cast<::arm_compute::ITensor *>(_output);\r
+ auto dst_data = output_tensor->buffer();\r
+\r
+ memmove(dst_data, source_data, _output_height * _output_width * 4);\r
+\r
+ _input->unmap(queue);\r
+ _output->unmap(queue);\r
+}\r
--- /dev/null
+#ifndef __PAD_LAYER_H__\r
+#define __PAD_LAYER_H__\r
+\r
+#include <arm_compute/runtime/CL/CLTensor.h>\r
+#include <arm_compute/runtime/CL/functions/CLFillBorder.h>\r
+\r
+class PadLayer : public ::arm_compute::IFunction\r
+{\r
+public:\r
+ void configure(::arm_compute::ICLTensor *input, ::arm_compute::ICLTensor *output,\r
+ unsigned int border_width);\r
+ void run(void) override;\r
+\r
+private:\r
+ ::arm_compute::ICLTensor *_input;\r
+ ::arm_compute::ICLTensor *_output;\r
+ int _border_width;\r
+ int _output_height;\r
+ int _output_width;\r
+\r
+ ::arm_compute::CLFillBorder _fillborderkernel;\r
+ void populateOutput();\r
+};\r
+\r
+#endif // __PAD_LAYER_H__\r
#include "internal/op/Floor.h"
#include "internal/op/Split.h"
#include "internal/op/RSQRT.h"
+#include "internal/op/Pad.h"
namespace internal
{
virtual void visit(const Floor::Node &) = 0;
virtual void visit(const Split::Node &) = 0;
virtual void visit(const RSQRT::Node &) = 0;
+ virtual void visit(const Pad::Node &) = 0;
};
} // namespace op
--- /dev/null
+#include "internal/op/Pad.h"\r
+#include "internal/op/NodeVisitor.h"\r
+\r
+#include <cassert>\r
+\r
+namespace internal\r
+{\r
+namespace tflite\r
+{\r
+namespace op\r
+{\r
+namespace Pad\r
+{\r
+\r
+void Node::accept(NodeVisitor &&v) const { v.visit(*this); }\r
+\r
+} // namespace Pad\r
+} // namespace op\r
+} // namespace tflite\r
+} // namespace internal\r
+\r
+namespace internal\r
+{\r
+namespace tflite\r
+{\r
+namespace op\r
+{\r
+namespace Pad\r
+{\r
+\r
+Param::Param(uint32_t inputCount, const uint32_t *inputs, uint32_t outputCount,\r
+ const uint32_t *outputs)\r
+{\r
+ assert(inputCount == 2 && outputCount == 1);\r
+ ofm_index = outputs[0];\r
+\r
+ // Each input should be interpreted as follows:\r
+ //\r
+ // 0 -> input Tensor Index\r
+ // 1 -> paddings\r
+ ifm_index = inputs[0];\r
+ paddings_index = inputs[1];\r
+}\r
+} // namespace Pad\r
+} // namespace op\r
+} // namespace tflite\r
+} // namespace internal\r
--- /dev/null
+#ifndef __INTERNAL_OP_PAD_H__\r
+#define __INTERNAL_OP_PAD_H__\r
+\r
+#include "internal/op/Node.h"\r
+\r
+#include <cstdint>\r
+\r
+namespace internal\r
+{\r
+namespace tflite\r
+{\r
+namespace op\r
+{\r
+namespace Pad\r
+{\r
+\r
+struct Param\r
+{\r
+ int32_t ifm_index;\r
+ int32_t paddings_index;\r
+ int32_t ofm_index;\r
+\r
+ Param() = default;\r
+ Param(uint32_t inputCount, const uint32_t *inputs, uint32_t outputCount, const uint32_t *outputs);\r
+};\r
+\r
+class Node final : public op::Node\r
+{\r
+public:\r
+ Node(const Param ¶m) : _param(param)\r
+ {\r
+ // DO NOTHING\r
+ }\r
+\r
+public:\r
+ virtual ~Node() = default;\r
+\r
+public:\r
+ const Param ¶m(void) const { return _param; }\r
+\r
+public:\r
+ void accept(NodeVisitor &&) const override;\r
+\r
+private:\r
+ const Param _param;\r
+};\r
+\r
+} // namespace Pad\r
+} // namespace op\r
+} // namespace tflite\r
+} // namespace internal\r
+\r
+#endif // __INTERNAL_OP_PAD_H_\r
break;
}
+ case ANEURALNETWORKS_PAD:
+ {
+ assert(inputCount == 2 && outputCount == 1);
+
+ using internal::tflite::op::Pad::Param;
+ using internal::tflite::op::Pad::Node;
+
+ // Add 'operations'
+ auto &operations = model->deref().operations();
+
+ operations.emplace_back<Node>(Param{inputCount, inputs, outputCount, outputs});
+
+ break;
+ }
default:
throw std::runtime_error{"Not supported operation"};
};