Imported Upstream version 1.8.0

[platform/core/ml/nnfw.git] / runtime / onert / backend / cpu / ops / FullyConnectedLayer.cc
diff --git a/runtime/onert/backend/cpu/ops/FullyConnectedLayer.cc b/runtime/onert/backend/cpu/ops/FullyConnectedLayer.cc

index c00be64..05da33a 100644 (file)
--- a/runtime/onert/backend/cpu/ops/FullyConnectedLayer.cc
+++ b/runtime/onert/backend/cpu/ops/FullyConnectedLayer.cc
@@ -18,6 +18,8 @@
  
  #include "../Tensor.h"
  #include <cker/operation/FullyConnected.h>
+#include <cker/TensorUtils.h>
+#include <misc/polymorphic_downcast.h>
  
  namespace onert
  {
@@ -31,7 +33,7 @@ namespace ops
  FullyConnectedLayer::FullyConnectedLayer()
      : _input(nullptr), _weights(nullptr), _bias(nullptr), _output(nullptr),
        _activation(ir::Activation::NONE), _temp_arena(new nnfw::cker::FCTempArena()),
-      _is_hybrid(false)
+      _external_context(nullptr), _is_hybrid(false)
  {
    // DO NOTHING
  }
@@ -102,7 +104,8 @@ void FullyConnectedLayer::fullyConnectedHybrid()
        op_params, getTensorShape(_input), reinterpret_cast<const float *>(_input->buffer()),
        getTensorShape(_weights), reinterpret_cast<const int8_t *>(_weights->buffer()),
        getTensorShape(_bias), reinterpret_cast<const float *>(_bias ? _bias->buffer() : nullptr),
-      getTensorShape(_output), reinterpret_cast<float *>(_output->buffer()), temp_arena);
+      getTensorShape(_output), reinterpret_cast<float *>(_output->buffer()), temp_arena,
+      _external_context->ruy_context());
  #else
    nnfw::cker::FullyConnectedHybrid(
        op_params, getTensorShape(_input), reinterpret_cast<const float *>(_input->buffer()),
@@ -110,31 +113,67 @@ void FullyConnectedLayer::fullyConnectedHybrid()
        (_cached_weights) ? reinterpret_cast<const int8_t *>(_cached_weights)
                          : reinterpret_cast<const int8_t *>(_weights->buffer()),
        getTensorShape(_bias), reinterpret_cast<const float *>(_bias ? _bias->buffer() : nullptr),
-      getTensorShape(_output), reinterpret_cast<float *>(_output->buffer()), temp_arena);
+      getTensorShape(_output), reinterpret_cast<float *>(_output->buffer()), temp_arena,
+      _external_context->ruy_context());
  
-// TODO Enable calling decrease_ref
-#if 0
    if (_cached_weights == nullptr || _is_weights_freed)
      return;
  
-  auto weight_tensor = dynamic_cast<const Tensor *>(_weights);
-  if (weight_tensor)
+  // '_cached_weights is not nullptr and _is_weights_freed is false' means
+  // this weight shape is satisfied with the ruy kernel's prepack cache's condition.
+  // After entering here, it will not enter again except below the case - input is zero-vector
+
+  // if input's elements are filled with zero, it by-passes(does not enter ruy-kernel path)
+  // so that handle this case
+  const int input_size = getTensorShape(_input).FlatSize();
+  if (nnfw::cker::IsZeroVector(reinterpret_cast<float *>(_input->buffer()), input_size))
+    return;
+
+  auto weight_tensor = nnfw::misc::polymorphic_downcast<const Tensor *>(_weights);
+
+  // This weight tensor could be other ops' const tensor.
+  // Therefore, below reference should be checked like following
+  auto tensor = const_cast<Tensor *>(weight_tensor);
+  if (tensor->buffer() == nullptr) // ref is already 0?
    {
-    auto tensor = const_cast<Tensor *>(weight_tensor);
+    _is_weights_freed = true;
+    return;
+  }
  
-    tensor->decrease_ref();
-    if (tensor->buffer() == nullptr) // ref == 0?
-    {
-      _is_weights_freed = true;
-    }
+  tensor->decrease_ref();
+  if (tensor->buffer() == nullptr) // ref == 0?
+  {
+    _is_weights_freed = true;
    }
-#endif // if 0
  #endif
  }
  
+void FullyConnectedLayer::fullyConnectedSparseWeight()
+{
+  float output_activation_min = 0, output_activation_max = 0;
+  CalculateActivationRange(_activation, &output_activation_min, &output_activation_max);
+
+  nnfw::cker::FullyConnectedParams op_params;
+  op_params.float_activation_min = output_activation_min;
+  op_params.float_activation_max = output_activation_max;
+  op_params.activation = convertActivationType(_activation);
+
+  int w0_size = getTensorShape(_weights).Dims(0);
+  const uint16_t *w1_segments = _weights->w1_segments();
+  const uint16_t *w1_indices = _weights->w1_indices();
+
+  nnfw::cker::FullyConnectedSparseWeight(
+      op_params, getTensorShape(_input), reinterpret_cast<const float *>(_input->buffer()),
+      getTensorShape(_weights), reinterpret_cast<const float *>(_weights->buffer()),
+      getTensorShape(_bias), reinterpret_cast<const float *>(_bias ? _bias->buffer() : nullptr),
+      getTensorShape(_output), reinterpret_cast<float *>(_output->buffer()), w0_size, w1_segments,
+      w1_indices);
+}
+
  void FullyConnectedLayer::configure(const IPortableTensor *input, const IPortableTensor *weights,
                                      const IPortableTensor *bias, ir::Activation activation,
-                                    IPortableTensor *output)
+                                    IPortableTensor *output,
+                                    const std::shared_ptr<ExternalContext> &external_context)
  {
    _input = input;
    _weights = weights;
@@ -143,6 +182,7 @@ void FullyConnectedLayer::configure(const IPortableTensor *input, const IPortabl
    _output = output;
    _is_hybrid = input->data_type() == OperandType::FLOAT32 &&
                 weights->data_type() == OperandType::QUANT_INT8_SYMM;
+  _external_context = external_context;
  }
  
  void FullyConnectedLayer::run()
@@ -151,6 +191,10 @@ void FullyConnectedLayer::run()
    {
      fullyConnectedHybrid();
    }
+  else if (_weights->is_sparse())
+  {
+    fullyConnectedSparseWeight();
+  }
    else if (_input->data_type() == OperandType::FLOAT32)
    {
      fullyConnectedFloat32();
@@ -167,7 +211,16 @@ void FullyConnectedLayer::run()
  
  void FullyConnectedLayer::prepare()
  {
-#ifdef USE_RUY_GEMV
+  if (_bias && _bias->is_constant())
+  {
+    const int bias_size = getTensorShape(_bias).FlatSize();
+    if (nnfw::cker::IsZeroVector(reinterpret_cast<float *>(_bias->buffer()), bias_size))
+    {
+      _bias = nullptr;
+    }
+  }
+
+#if (defined(__ARM_NEON__) || defined(__ARM_NEON)) && defined(USE_RUY_GEMV)
    // TODO This is workaround
    // The only fc hybrid will use ruy kernel
    if (_input->data_type() != OperandType::FLOAT32 ||