fully connected test fail (#2263)
authorPRAVEEN DORESWAMY NAIDU/System SW /SRI-Bangalore/Staff Engineer/삼성전자 <praveen.dn@samsung.com>
Tue, 21 Aug 2018 09:59:08 +0000 (15:29 +0530)
committer샤란/System SW /SRI-Bangalore/Staff Engineer/삼성전자 <sharan.allur@samsung.com>
Tue, 21 Aug 2018 09:59:08 +0000 (15:29 +0530)
Fully connected test for float is failing.
The google test case for fully connected is failing.
The failure for rank 2 operands which might be caused due to improper handling of batches.

Signed-off-by: praveen D N <praveen.dn@samsung.com>
runtimes/pure_arm_compute/src/compilation.cc
runtimes/pure_arm_compute/src/internal/arm_compute/Cast.h

index 074bf32..9f7615e 100644 (file)
@@ -1934,28 +1934,55 @@ void Planner::visit(const ::internal::tflite::op::FullyConnected::Node &node)
 
   const ::internal::tflite::operand::Index activation_index{node.param().activation_index};
 
+  assert(_ctx.at(input_index).shape().rank() >= 2);
   assert(_ctx.at(output_index).shape().rank() == 2);
-  const auto output_size = _ctx.at(output_index).shape().dim(1);
 
-  // NOTE We assume that input is a feature map
-  // TODO Remove this restriction!
-  const auto ifm_shape = _ctx.at(input_index).shape().asFeature();
+  auto no_of_input_elements = 1;
+  for (size_t i = 0; i < _ctx.at(input_index).shape().rank(); i++)
+  {
+    no_of_input_elements *= _ctx.at(input_index).shape().dim(i);
+  }
 
+  const auto output_size = _ctx.at(output_index).shape().dim(1);
   assert(_ctx.at(weight_index).shape().rank() == 2);
   const auto num_output = _ctx.at(weight_index).shape().dim(0);
   const auto input_size = _ctx.at(weight_index).shape().dim(1);
-  assert(ifm_shape.C * ifm_shape.H * ifm_shape.W == input_size);
 
+  uint32_t C, N, H, W;
+  if (_ctx.at(input_index).shape().rank() == 2)
+  {
+    nnfw::util::matrix::Shape ifm_shape_matrix;
+    ifm_shape_matrix = _ctx.at(input_index).shape().asMatrix();
+    H = ifm_shape_matrix.H;
+    W = ifm_shape_matrix.W;
+    N = num_output;
+    C = 1;
+    _builder.addShapeConstr(input_index, asTensorInfo(ifm_shape_matrix, _ctx.at(input_index).type(),
+                                                      _ctx.at(input_index).scale(),
+                                                      _ctx.at(input_index).zeroPoint()));
+  }
+  else
+  {
+    nnfw::util::feature::Shape ifm_shape_feature;
+    ifm_shape_feature = _ctx.at(input_index).shape().asFeature();
+    H = ifm_shape_feature.H;
+    W = ifm_shape_feature.W;
+    N = num_output;
+    C = ifm_shape_feature.C;
+    assert(C * H * W == input_size);
+    _builder.addShapeConstr(
+        input_index, asTensorInfo(ifm_shape_feature, _ctx.at(input_index).type(),
+                                  _ctx.at(input_index).scale(), _ctx.at(input_index).zeroPoint()));
+  }
+
+  const auto batches = no_of_input_elements / input_size;
   const auto bias_size = _ctx.at(bias_index).shape().asVector();
 
   // TODO Should move to the place where the operand is handled, if it is possible.
   // Set Shape Constraints
-  _builder.addShapeConstr(output_index, asTensorInfo(output_size, _ctx.at(output_index).type(),
-                                                     _ctx.at(output_index).scale(),
-                                                     _ctx.at(output_index).zeroPoint()));
-  _builder.addShapeConstr(input_index, asTensorInfo(ifm_shape, _ctx.at(input_index).type(),
-                                                    _ctx.at(input_index).scale(),
-                                                    _ctx.at(input_index).zeroPoint()));
+  _builder.addShapeConstr(
+      output_index, asTensorInfo(batches, num_output, _ctx.at(output_index).type(),
+                                 _ctx.at(output_index).scale(), _ctx.at(output_index).zeroPoint()));
   _builder.addShapeConstr(
       weight_index, asTensorInfo(num_output /*H*/, input_size /*W*/, _ctx.at(weight_index).type(),
                                  _ctx.at(weight_index).scale(), _ctx.at(weight_index).zeroPoint()));
@@ -1975,10 +2002,9 @@ void Planner::visit(const ::internal::tflite::op::FullyConnected::Node &node)
     {
       case ANEURALNETWORKS_TENSOR_FLOAT32:
       {
-        auto initializer = [num_output, ifm_shape, weight_base,
+        auto initializer = [num_output, N, C, H, W, weight_base,
                             weight_size](::arm_compute::ITensor &tensor) {
-          const ::nnfw::util::kernel::Shape ker_shape{num_output, ifm_shape.C, ifm_shape.H,
-                                                      ifm_shape.W};
+          const ::nnfw::util::kernel::Shape ker_shape{N, C, H, W};
           const ::internal::nnapi::kernel::Reader<float> from{ker_shape, weight_base, weight_size};
           ::nnfw::util::kernel::iterate(ker_shape)
               << [&](uint32_t nth, uint32_t ch, uint32_t row, uint32_t col) {
@@ -1986,9 +2012,9 @@ void Planner::visit(const ::internal::tflite::op::FullyConnected::Node &node)
                    uint32_t offset = 0;
 
                    // ARM Compute Library uses 'NCHW' ordering
-                   offset += nth * ifm_shape.C * ifm_shape.H * ifm_shape.W;
-                   offset += ch * ifm_shape.H * ifm_shape.W;
-                   offset += row * ifm_shape.W;
+                   offset += nth * C * H * W;
+                   offset += ch * H * W;
+                   offset += row * W;
                    offset += col;
 
                    const ::arm_compute::Coordinates coordinate{offset};
@@ -2004,10 +2030,10 @@ void Planner::visit(const ::internal::tflite::op::FullyConnected::Node &node)
       }
       case ANEURALNETWORKS_TENSOR_QUANT8_ASYMM:
       {
-        auto initializer = [num_output, ifm_shape, weight_base,
+        auto initializer = [num_output, N, C, H, W, weight_base,
                             weight_size](::arm_compute::ITensor &tensor) {
-          const ::nnfw::util::kernel::Shape ker_shape{num_output, ifm_shape.C, ifm_shape.H,
-                                                      ifm_shape.W};
+          const ::nnfw::util::kernel::Shape ker_shape{N, C, H, W};
+
           const ::internal::nnapi::kernel::Reader<uint8_t> from{ker_shape, weight_base,
                                                                 weight_size};
           ::nnfw::util::kernel::iterate(ker_shape)
@@ -2016,9 +2042,9 @@ void Planner::visit(const ::internal::tflite::op::FullyConnected::Node &node)
                    uint32_t offset = 0;
 
                    // ARM Compute Library uses 'NCHW' ordering
-                   offset += nth * ifm_shape.C * ifm_shape.H * ifm_shape.W;
-                   offset += ch * ifm_shape.H * ifm_shape.W;
-                   offset += row * ifm_shape.W;
+                   offset += nth * C * H * W;
+                   offset += ch * H * W;
+                   offset += row * W;
                    offset += col;
 
                    const ::arm_compute::Coordinates coordinate{offset};
index 486c0af..e9f693b 100644 (file)
   return ::arm_compute::TensorShape(shape.W, shape.H, shape.C, shape.N);
 }
 
+::arm_compute::TensorShape asTensorShape(const nnfw::util::matrix::Shape &shape)
+{
+  return ::arm_compute::TensorShape(shape.W, shape.H);
+}
+
 ::arm_compute::TensorShape asTensorShape(const nnfw::util::tensor::Shape &shape)
 {
   if (shape.rank() == 0)
@@ -203,6 +208,13 @@ inline ::arm_compute::TensorShape asTensorShape(const internal::tflite::operand:
                                    asQuantizationInfo(scale, zeroPoint));
 }
 
+::arm_compute::TensorInfo asTensorInfo(const nnfw::util::matrix::Shape &shape, const int32_t type,
+                                       const float scale = 0.0f, const int32_t zeroPoint = 0)
+{
+  return ::arm_compute::TensorInfo(asTensorShape(shape), 1, asDataType(type),
+                                   asQuantizationInfo(scale, zeroPoint));
+}
+
 ::arm_compute::TensorInfo asTensorInfoForBroadcast(const nnfw::util::tensor::Shape &shape,
                                                    const int32_t type, const size_t baseRank,
                                                    const float scale = 0.0f,