[Pure ACL NN Runtime] Implement basic compilation (#596)
author박종현/동작제어Lab(SR)/Senior Engineer/삼성전자 <jh1302.park@samsung.com>
Thu, 12 Apr 2018 00:14:05 +0000 (09:14 +0900)
committer서상민/동작제어Lab(SR)/Senior Engineer/삼성전자 <sangmin7.seo@samsung.com>
Thu, 12 Apr 2018 00:14:05 +0000 (09:14 +0900)
This commit implements basic compilation phase for (experiemntal) pure
ACL NN runtime.

Signed-off-by: Jonghyun Park <jh1302.park@samsung.com>
tools/nnapi_bindings/bindings/pure_arm_compute/src/compilation.cc
tools/nnapi_bindings/bindings/pure_arm_compute/src/internal/arm_compute.h

index d046f8b..3ff2040 100644 (file)
@@ -1,8 +1,278 @@
 #include <nnapi.h>
 
+#include <arm_compute/core/CL/ICLTensor.h>
+
+#include <arm_compute/runtime/IFunction.h>
+#include <arm_compute/runtime/CL/CLScheduler.h>
+
 #include "compilation.h"
 #include "model.h"
 
+::arm_compute::TensorShape asTensorShape(const nnfw::util::feature::Shape &shape)
+{
+  return ::arm_compute::TensorShape(shape.W, shape.H, shape.C, 1);
+}
+
+::arm_compute::TensorShape asTensorShape(const nnfw::util::kernel::Shape &shape)
+{
+  return ::arm_compute::TensorShape(shape.W, shape.H, shape.C, shape.N);
+}
+
+::arm_compute::TensorInfo asTensorInfo(const nnfw::util::feature::Shape &shape)
+{
+  return ::arm_compute::TensorInfo(asTensorShape(shape), 1, ::arm_compute::DataType::F32);
+}
+
+::arm_compute::TensorInfo asTensorInfo(const nnfw::util::kernel::Shape &shape)
+{
+  return ::arm_compute::TensorInfo(asTensorShape(shape), 1, ::arm_compute::DataType::F32);
+}
+
+::arm_compute::TensorInfo asTensorInfo(int32_t size)
+{
+  return ::arm_compute::TensorInfo(::arm_compute::TensorShape(size), 1, ::arm_compute::DataType::F32);
+}
+
+
+struct IAllocationContext
+{
+  virtual ~IAllocationContext() = default;
+
+  virtual ::arm_compute::ICLTensor *at(const ::internal::tflite::operand::Index &ind) const = 0;
+};
+
+struct IExecutionBuilder
+{
+  virtual ~IExecutionBuilder() = default;
+
+  virtual void append(std::unique_ptr<::arm_compute::IFunction> &&f) = 0;
+};
+
+using Stage = std::function<void (const IAllocationContext &, IExecutionBuilder &)>;
+
+struct IPlanBuilder
+{
+  virtual ~IPlanBuilder() = default;
+
+  virtual void addShapeConstr(const ::internal::tflite::operand::Index &ind,
+                              const ::arm_compute::TensorInfo &info) = 0;
+
+  virtual void addStage(const Stage &) = 0;
+};
+
+#include <arm_compute/runtime/CL/functions/CLConvolutionLayer.h>
+
+class Planner : public ::internal::tflite::op::NodeVisitor
+{
+public:
+  Planner(const ::internal::tflite::operand::Set &ctx, IPlanBuilder &builder) : _ctx{ctx}, _builder{builder}
+  {
+    // DO NOTHING
+  }
+
+public:
+  void visit(const ::internal::tflite::op::Conv2D::implicit::Node &node) override;
+
+private:
+  const ::internal::tflite::operand::Set &_ctx;
+  IPlanBuilder &_builder;
+};
+
+void Planner::visit(const ::internal::tflite::op::Conv2D::implicit::Node &node)
+{
+  const ::internal::tflite::operand::Index ofm_index{node.param().ofm_index};
+
+  const ::internal::tflite::operand::Index ifm_index{node.param().ifm_index};
+  const ::internal::tflite::operand::Index ker_index{node.param().ker_index};
+  const ::internal::tflite::operand::Index bias_index{node.param().bias_index};
+
+  const auto ofm_shape = _ctx.at(ofm_index).shape().asFeature();
+  const auto ifm_shape = _ctx.at(ifm_index).shape().asFeature();
+  const auto ker_shape = _ctx.at(ker_index).shape().asKernel();
+  const auto bias_size = _ctx.at(bias_index).shape().asVector();
+
+  // Set Shape Constraints
+  _builder.addShapeConstr(ofm_index, asTensorInfo(ofm_shape));
+  _builder.addShapeConstr(ifm_index, asTensorInfo(ifm_shape));
+  _builder.addShapeConstr(ker_index, asTensorInfo(ker_shape));
+  _builder.addShapeConstr(bias_index, asTensorInfo(bias_size));
+
+  // TODO Set initializer for kernel and bias
+
+  // Construct operation parameters
+  struct Padding
+  {
+    uint32_t top;
+    uint32_t bottom;
+    uint32_t left;
+    uint32_t right;
+  };
+
+  struct Stride
+  {
+    uint32_t vertical;
+    uint32_t horizontal;
+  };
+
+  struct Param
+  {
+    int ofm_index;
+    int ifm_index;
+    int ker_index;
+    int bias_index;
+
+    Padding padding;
+    Stride stride;
+
+    // TODO Add 'activation' field
+  };
+
+  Param param;
+
+  param.ofm_index = ofm_index.asInt();
+  param.ifm_index = ifm_index.asInt();
+  param.ker_index = ker_index.asInt();
+  param.bias_index = bias_index.asInt();
+
+  // TODO Get padding, stride, and activation from model
+  param.padding.top = 0;
+  param.padding.bottom = 0;
+  param.padding.left = 0;
+  param.padding.right = 0;
+
+  param.stride.vertical = 0;
+  param.stride.horizontal = 0;
+
+  auto stage = [param] (const IAllocationContext &ctx, IExecutionBuilder &builder)
+  {
+    auto ofm_alloc = ctx.at(::internal::tflite::operand::Index{param.ofm_index});
+    auto ifm_alloc = ctx.at(::internal::tflite::operand::Index{param.ifm_index});
+    auto ker_alloc = ctx.at(::internal::tflite::operand::Index{param.ker_index});
+    auto bias_alloc = ctx.at(::internal::tflite::operand::Index{param.bias_index});
+
+    const ::arm_compute::PadStrideInfo conv_info{param.stride.horizontal, param.stride.vertical,
+                                                 param.padding.left, param.padding.right,
+                                                 param.padding.top, param.padding.bottom,
+                                                 ::arm_compute::DimensionRoundingType::FLOOR};
+
+    std::unique_ptr<::arm_compute::CLConvolutionLayer> fn{new ::arm_compute::CLConvolutionLayer};
+
+    fn->configure(ifm_alloc, ker_alloc, bias_alloc, ofm_alloc, conv_info);
+
+    builder.append(std::move(fn));
+  };
+
+  _builder.addStage(stage);
+}
+
+class AllocationContext final : public IAllocationContext
+{
+public:
+  AllocationContext(::internal::arm_compute::Plan &plan) : _plan{plan}
+  {
+    // DO NOTHING
+  }
+
+public:
+  ::arm_compute::ICLTensor *at(const ::internal::tflite::operand::Index &ind) const override
+  {
+    return _plan.operands().at(ind).ptr();
+  }
+
+private:
+  ::internal::arm_compute::Plan &_plan;
+};
+
+class ExecutionBuilder final : public IExecutionBuilder
+{
+public:
+  ExecutionBuilder(::internal::arm_compute::Plan &plan) : _plan{plan}
+  {
+    // DO NOTHING
+  }
+
+public:
+  void append(std::unique_ptr<::arm_compute::IFunction> &&f) override
+  {
+    _plan.operations().append(std::move(f));
+  }
+
+private:
+  ::internal::arm_compute::Plan &_plan;
+};
+
+class PlanBuilder final : public IPlanBuilder
+{
+public:
+  PlanBuilder(::internal::arm_compute::Plan &plan) : _plan{plan}
+  {
+    // DO NOTHING
+  }
+
+public:
+  void addShapeConstr(const ::internal::tflite::operand::Index &ind,
+                      const ::arm_compute::TensorInfo &info) override;
+
+public:
+  void addStage(const Stage &stage) override;
+
+public:
+  void finalize(void) const;
+
+private:
+  ::internal::arm_compute::Plan &_plan;
+
+private:
+  std::map<int, ::arm_compute::TensorInfo> _tensor_info_ctx;
+  std::vector<Stage> _stages;
+};
+
+void PlanBuilder::addShapeConstr(const ::internal::tflite::operand::Index &ind,
+                                 const ::arm_compute::TensorInfo &info)
+{
+  _tensor_info_ctx[ind.asInt()] = info;
+}
+
+void PlanBuilder::addStage(const Stage &stage) { _stages.emplace_back(stage); }
+
+void PlanBuilder::finalize(void) const
+{
+  // CLTensor objects to be initialized later
+  std::vector<std::shared_ptr<::arm_compute::CLTensor>> tensors;
+
+  // Create CLTensor
+  for (auto it = _tensor_info_ctx.begin(); it != _tensor_info_ctx.end(); ++it)
+  {
+    auto tensor = std::make_shared<::arm_compute::CLTensor>();
+
+    tensor->allocator()->init(it->second);
+
+    // NOTE Do NOT allocate here. allocate should be invoked after configure functions
+    _plan.operands().set(::internal::tflite::operand::Index{it->first}, tensor);
+    tensors.emplace_back(tensor);
+  }
+
+  // Process Stage
+  AllocationContext allocation_context{_plan};
+  ExecutionBuilder execution_builder{_plan};
+
+  for (const auto &stage : _stages)
+  {
+    stage(allocation_context, execution_builder);
+  }
+
+  // Allocate Tensor Memory
+  for (const auto &tensor : tensors)
+  {
+    tensor->allocator()->allocate();
+  }
+
+  // TODO Fill weight/bias
+}
+
+//
+// NNAPI Implementation
+//
 ResultCode
 ANeuralNetworksCompilation_create(ANeuralNetworksModel* model, ANeuralNetworksCompilation** compilation)
 {
@@ -18,5 +288,19 @@ ANeuralNetworksCompilation_create(ANeuralNetworksModel* model, ANeuralNetworksCo
 ResultCode
 ANeuralNetworksCompilation_finish(ANeuralNetworksCompilation* compilation)
 {
+  arm_compute::CLScheduler::get().default_init();
+
+  const auto &operands = compilation->plan().model().operands();
+  const auto &operations = compilation->plan().model().operations();
+
+  PlanBuilder plan_builder{compilation->plan()};
+
+  for (uint32_t n = 0; n < operations.size(); ++n)
+  {
+    operations.at(n).accept(Planner{operands, plan_builder});
+  }
+
+  plan_builder.finalize();
+
   return ANEURALNETWORKS_NO_ERROR;
 }
index 407d99c..7c49e0d 100644 (file)
@@ -46,12 +46,19 @@ namespace operand
 class Context
 {
 public:
-  Context &set(const ::internal::tflite::operand::Index &id,
+  Context &set(const ::internal::tflite::operand::Index &ind,
                const std::shared_ptr<::arm_compute::ICLTensor> &tensor);
 
 public:
-  const Object &at(const ::internal::tflite::operand::Index &) const;
-  Object &at(const ::internal::tflite::operand::Index &);
+  const Object &at(const ::internal::tflite::operand::Index &ind) const
+  {
+    return _objects.at(ind.asInt());
+  }
+
+  Object &at(const ::internal::tflite::operand::Index &ind)
+  {
+    return _objects.at(ind.asInt());
+  }
 
 private:
   std::map<int, Object> _objects;