From af76fbca19f76449164406f7ff2e69dc8e6a5c88 Mon Sep 17 00:00:00 2001 From: =?utf8?q?=EC=9E=A5=EC=A7=80=EC=84=AD/=EB=8F=99=EC=9E=91=EC=A0=9C?= =?utf8?q?=EC=96=B4Lab=28SR=29/Engineer/=EC=82=BC=EC=84=B1=EC=A0=84?= =?utf8?q?=EC=9E=90?= Date: Mon, 26 Nov 2018 14:43:58 +0900 Subject: [PATCH] Apply CL Kernel of ReduceOperation to PACL. (#3673) This commit applies CL Kernel of ReduceOperation to PACL. Signed-off-by: jiseob.jang --- libs/ARMComputeEx/src/core/CL/CLKernelLibrary.cpp | 9 +- runtimes/pure_arm_compute/src/compilation.cc | 113 +++++++++++----------- 2 files changed, 65 insertions(+), 57 deletions(-) diff --git a/libs/ARMComputeEx/src/core/CL/CLKernelLibrary.cpp b/libs/ARMComputeEx/src/core/CL/CLKernelLibrary.cpp index 3c28739..786ed31 100644 --- a/libs/ARMComputeEx/src/core/CL/CLKernelLibrary.cpp +++ b/libs/ARMComputeEx/src/core/CL/CLKernelLibrary.cpp @@ -244,8 +244,9 @@ const std::map CLKernelLibraryEx::_kernel_program_map {"pooling_layer_MxN_quantized_nhwc", "pooling_layer_quantized.cl"}, {"pooling_layer_MxN_quantized_nchw", "pooling_layer_quantized.cl"}, {"quantization_layer", "quantization_layer.cl"}, - {"reduce_max", "reduce_max.cl"}, - {"reduce_sum", "reduce_sum.cl"}, + {"reduce_max", "reduce_operation.cl"}, + {"reduce_mean", "reduce_operation.cl"}, + {"reduce_sum", "reduce_operation.cl"}, {"reduction_operation", "reduction_operation.cl"}, {"reduction_mean", "reduction_mean.cl"}, {"remap_nearest_neighbour", "remap.cl"}, @@ -392,6 +393,10 @@ const std::map CLKernelLibraryEx::_program_source_map #include "./cl_kernels/pixelwise_div_int.clembed" }, { + "reduce_operation.cl", +#include "./cl_kernels/reduce_operation.clembed" + }, + { "reduce_max.cl", #include "./cl_kernels/reduce_max.clembed" }, diff --git a/runtimes/pure_arm_compute/src/compilation.cc b/runtimes/pure_arm_compute/src/compilation.cc index c736a93..8f9cf58 100644 --- a/runtimes/pure_arm_compute/src/compilation.cc +++ b/runtimes/pure_arm_compute/src/compilation.cc @@ -44,15 +44,13 @@ #include #include #include -#include -#include #include #include #include #include #include -#include #include +#include #include #include #include @@ -2602,7 +2600,7 @@ void Planner::visit(const ::internal::tflite::op::ReduceMax::Node &node) ifm_index, asTensorInfo(asTensorShape(_ctx.at(ifm_index).shape()), _ctx.at(ifm_index).type(), _ctx.at(ifm_index).scale(), _ctx.at(ifm_index).zeroPoint())); - std::vector axis; + std::set axis; { const auto ifm_rank = ifm_shape.rank(); switch (axis_shape.rank()) @@ -2614,7 +2612,7 @@ void Planner::visit(const ::internal::tflite::op::ReduceMax::Node &node) { axis_value += ifm_rank; } - axis.push_back(ToARMComputeAxis(ifm_rank, axis_value).value()); + axis.insert(ToARMComputeAxis(ifm_rank, axis_value).value()); break; } case 1: // vector @@ -2632,7 +2630,7 @@ void Planner::visit(const ::internal::tflite::op::ReduceMax::Node &node) { axis_value += ifm_rank; } - axis.push_back(ToARMComputeAxis(ifm_rank, axis_value).value()); + axis.insert(ToARMComputeAxis(ifm_rank, axis_value).value()); } break; } @@ -2640,9 +2638,6 @@ void Planner::visit(const ::internal::tflite::op::ReduceMax::Node &node) throw std::runtime_error("Not supported"); break; } - std::sort(axis.begin(), axis.end()); - auto last = std::unique(axis.begin(), axis.end()); - axis.erase(last, axis.end()); } // Construct operation parameters @@ -2650,7 +2645,7 @@ void Planner::visit(const ::internal::tflite::op::ReduceMax::Node &node) { int ofm_index; int ifm_index; - std::vector axis; + std::set axis; }; Param param; @@ -2665,9 +2660,10 @@ void Planner::visit(const ::internal::tflite::op::ReduceMax::Node &node) if (::internal::arm_compute::isGpuMode()) { - auto fn = nnfw::make_unique<::arm_compute::CLReduceMax>(); + auto fn = nnfw::make_unique<::arm_compute::CLReduceOperation>(); - fn->configure(CAST_CL(ifm_alloc), param.axis, CAST_CL(ofm_alloc)); + fn->configure(CAST_CL(ifm_alloc), CAST_CL(ofm_alloc), param.axis, + ::arm_compute::ReduceOperation::MAX); builder.append("ReduceMax", std::move(fn)); } @@ -3195,47 +3191,44 @@ void Planner::visit(const ::internal::tflite::op::Mean::Node &node) _ctx.at(axis_index).type(), _ctx.at(axis_index).scale(), _ctx.at(axis_index).zeroPoint())); - // TODO keep_dims==0 - assert(keep_dims != 0); - - // Set axis - // TODO Other axis (Axis for width and height are currently supported.) - // TODO Other ranks (Rank 4 is currently supported.) - assert(_ctx.at(ifm_index).shape().rank() == 4); - - std::vector axis; + std::set axis; { - const auto axis_base = _ctx.at(axis_index).data().base(); - const auto axis_type = _ctx.at(axis_index).type(); - const auto axis_size = _ctx.at(axis_index).shape().asVector(); - - // If axis's data does not exist as constant values and can be gotten as input data, we have - // to find a way to infer output shape when sinking output. - assert(axis_base != nullptr); - // NHWC type -> WHCN type - if (_ctx.at(ofm_index).shape().rank() == 4) + const auto ifm_rank = ifm_shape.rank(); + const auto axis_shape = _ctx.at(axis_index).shape(); + switch (axis_shape.rank()) { - for (uint32_t n = 0; n < axis_size; ++n) + case 0: // scalar { - const ::arm_compute::Coordinates coordinate{n}; - const int32_t *from = reinterpret_cast(axis_base) + n; - if (*from == 1) - { - axis.push_back(1); // h - } - else if (*from == 2) - { - axis.push_back(0); // w - } - else if (*from < 0) + int32_t axis_value = _ctx.at(axis_index).asScalar(); + if (axis_value < 0) { - // Nothing to do + axis_value += ifm_rank; } - else + axis.insert(ToARMComputeAxis(ifm_rank, axis_value).value()); + break; + } + case 1: // vector + { + const auto axis_base = _ctx.at(axis_index).data().base(); + const auto axis_size = _ctx.at(axis_index).shape().asVector(); + + // If axis's data does not exist as constant values and can be gotten as input data, we have + // to find a way to infer output shape when sinking output. + assert(axis_base != nullptr); + for (uint32_t n = 0; n < axis_size; ++n) { - throw std::runtime_error{"Not supported axis"}; + int32_t axis_value = *(reinterpret_cast(axis_base) + n); + if (axis_value < 0) + { + axis_value += ifm_rank; + } + axis.insert(ToARMComputeAxis(ifm_rank, axis_value).value()); } + break; } + default: + throw std::runtime_error("Not supported"); + break; } } @@ -3243,7 +3236,7 @@ void Planner::visit(const ::internal::tflite::op::Mean::Node &node) { int ofm_index; int ifm_index; - std::vector axis; + std::set axis; }; Param param; @@ -3258,9 +3251,10 @@ void Planner::visit(const ::internal::tflite::op::Mean::Node &node) if (::internal::arm_compute::isGpuMode()) { - auto fn = nnfw::make_unique<::arm_compute::CLReductionMean>(); + auto fn = nnfw::make_unique<::arm_compute::CLReduceOperation>(); - fn->configure(CAST_CL(ifm_alloc), CAST_CL(ofm_alloc), param.axis); + fn->configure(CAST_CL(ifm_alloc), CAST_CL(ofm_alloc), param.axis, + ::arm_compute::ReduceOperation::MEAN); builder.append("Mean", std::move(fn)); } @@ -5073,13 +5067,17 @@ void Planner::visit(const ::internal::tflite::op::ReduceSum::Node &node) _ctx.at(ifm_index).scale(), _ctx.at(ifm_index).zeroPoint())); uint32_t input_rank = ifm_shape.rank(); - std::vector axis; + std::set axis; int32_t axis_rank = axis_shape.rank(); if (axis_rank == 0) { int32_t axis_value = _ctx.at(axis_index).asScalar(); - axis.push_back(ToARMComputeAxis(input_rank, axis_value).value()); + if (axis_value < 0) + { + axis_value += input_rank; + } + axis.insert(ToARMComputeAxis(input_rank, axis_value).value()); } else if (axis_rank == 1) { @@ -5091,8 +5089,12 @@ void Planner::visit(const ::internal::tflite::op::ReduceSum::Node &node) assert(axis_base != nullptr); for (uint32_t n = 0; n < axis_size; ++n) { - const int32_t *from = reinterpret_cast(axis_base) + n; - axis.push_back(ToARMComputeAxis(input_rank, *from).value()); + int32_t axis_value = *(reinterpret_cast(axis_base) + n); + if (axis_value < 0) + { + axis_value += input_rank; + } + axis.insert(ToARMComputeAxis(input_rank, axis_value).value()); } } else @@ -5104,7 +5106,7 @@ void Planner::visit(const ::internal::tflite::op::ReduceSum::Node &node) { int ofm_index; int ifm_index; - std::vector axis; + std::set axis; }; Param param; @@ -5119,9 +5121,10 @@ void Planner::visit(const ::internal::tflite::op::ReduceSum::Node &node) if (::internal::arm_compute::isGpuMode()) { - auto fn = nnfw::make_unique<::arm_compute::CLReduceSum>(); + auto fn = nnfw::make_unique<::arm_compute::CLReduceOperation>(); - fn->configure(CAST_CL(ifm_alloc), CAST_CL(ofm_alloc), param.axis); + fn->configure(CAST_CL(ifm_alloc), CAST_CL(ofm_alloc), param.axis, + ::arm_compute::ReduceOperation::SUM); builder.append("ReduceSum", std::move(fn)); } -- 2.7.4