From 623dc9580587572577f4d1119213666a03496bcd Mon Sep 17 00:00:00 2001 From: =?utf8?q?Prasanna=20R/SNAP=20/SRI-Bangalore/Engineer/=EC=82=BC?= =?utf8?q?=EC=84=B1=EC=A0=84=EC=9E=90?= Date: Tue, 4 Dec 2018 09:02:17 +0530 Subject: [PATCH] Add CL kernel calls for ReduceMin from PACL. (#3772) This patch adds CL kernel calls for ReduceMin from PACL. Signed-off-by: prasannar --- runtimes/pure_arm_compute/src/compilation.cc | 122 ++++++++++++++++++++++++++- 1 file changed, 121 insertions(+), 1 deletion(-) diff --git a/runtimes/pure_arm_compute/src/compilation.cc b/runtimes/pure_arm_compute/src/compilation.cc index 44a4aa0..8b0457d 100644 --- a/runtimes/pure_arm_compute/src/compilation.cc +++ b/runtimes/pure_arm_compute/src/compilation.cc @@ -2559,7 +2559,127 @@ void Planner::visit(const ::internal::tflite::op::ReduceMin::Node &node) { VERBOSE(ReduceMin) << "Configure REDUCEMIN operation" << std::endl; - throw std::runtime_error("Not supported, yet"); + const ::internal::tflite::operand::Index ofm_index{node.param().ofm_index}; + const ::internal::tflite::operand::Index ifm_index{node.param().ifm_index}; + const ::internal::tflite::operand::Index axis_index{node.param().axis_index}; + + auto ifm_shape = _ctx.at(ifm_index).shape(); + auto ofm_shape = _ctx.at(ofm_index).shape(); + auto axis_shape = _ctx.at(axis_index).shape(); + assert(ifm_shape.rank() <= 4); + assert(ofm_shape.rank() <= ifm_shape.rank()); + assert(_ctx.at(axis_index).hasData()); + assert(axis_shape.rank() == 0 || axis_shape.rank() == 1); + + // NOTE For the 4-dimensions, if the rank of input and output are different, this runtime only + // supports cases reducing height and width or reducing depth. + // TODO We have to support all cases of dimensions up to 4. + // For correct permuting, we have to set output's shape to be equal in dimension position of the + // input. But the positions of the same dimensions in the input and output may be set differently. + // For example {2,3,4,5}(input's shape) can be reduced to {3,5}(output's shape). The original + // output shape should be {1,3,1,5}, but real output shape may be {3,5}. If you simply try to + // extend it in 4 dimensions, it should be {1,1,3,5}. + // Even if output shape is changed to {1,3,1,5}, there is another problem. It is that shape of + // output tensor used at next operation is changed to {1,3,1,5} after this operation even if the + // next operation is not desired. + if (ifm_shape.rank() == 4 && ifm_shape.rank() != ofm_shape.rank()) + { + if (ofm_shape.rank() == 2) + { + // Reducing HW + assert(ifm_shape.dim(0) == ofm_shape.dim(0) && ifm_shape.dim(3) == ofm_shape.dim(1)); + } + else if (ofm_shape.rank() == 3) + { + // Reducing C or + // (Reducing H and C(ifm and ofm) == 1) or (Reducing W and C(ifm and ofm) == 1) + assert(ifm_shape.dim(0) == ofm_shape.dim(0) && ifm_shape.dim(1) == ofm_shape.dim(1) && + ifm_shape.dim(2) == ofm_shape.dim(2) || + (ifm_shape.dim(0) == ofm_shape.dim(0) && + (ifm_shape.dim(1) == ofm_shape.dim(1) || ifm_shape.dim(2) == ofm_shape.dim(1)) && + ifm_shape.dim(3) == 1 && ofm_shape.dim(2) == 1)); + } + } + + _builder.addShapeConstr( + ofm_index, asTensorInfo(asTensorShape(_ctx.at(ofm_index).shape()), _ctx.at(ofm_index).type(), + _ctx.at(ofm_index).scale(), _ctx.at(ofm_index).zeroPoint())); + _builder.addShapeConstr( + ifm_index, asTensorInfo(asTensorShape(_ctx.at(ifm_index).shape()), _ctx.at(ifm_index).type(), + _ctx.at(ifm_index).scale(), _ctx.at(ifm_index).zeroPoint())); + + std::set axis; + { + const auto ifm_rank = ifm_shape.rank(); + switch (axis_shape.rank()) + { + case 0: // scalar + { + int32_t axis_value = _ctx.at(axis_index).asScalar(); + if (axis_value < 0) + { + axis_value += ifm_rank; + } + axis.insert(ToARMComputeAxis(ifm_rank, axis_value).value()); + break; + } + case 1: // vector + { + const auto axis_base = _ctx.at(axis_index).data().base(); + const auto axis_size = _ctx.at(axis_index).shape().asVector(); + + // If axis's data does not exist as constant values and can be gotten as input data, we have + // to find a way to infer output shape when sinking output. + assert(axis_base != nullptr); + for (uint32_t n = 0; n < axis_size; ++n) + { + int32_t axis_value = *(reinterpret_cast(axis_base) + n); + if (axis_value < 0) + { + axis_value += ifm_rank; + } + axis.insert(ToARMComputeAxis(ifm_rank, axis_value).value()); + } + break; + } + default: + throw std::runtime_error("Not supported"); + break; + } + } + + // Construct operation parameters + struct Param + { + int ofm_index; + int ifm_index; + std::set axis; + }; + + Param param; + + param.ofm_index = ofm_index.asInt(); + param.ifm_index = ifm_index.asInt(); + param.axis = axis; + + auto stage = [param](const IAllocationContext &ctx, IExecutionBuilder &builder) { + auto ofm_alloc = ctx.at(::internal::tflite::operand::Index{param.ofm_index}); + auto ifm_alloc = ctx.at(::internal::tflite::operand::Index{param.ifm_index}); + + if (::internal::arm_compute::isGpuMode()) + { + auto fn = nnfw::make_unique<::arm_compute::CLReduceOperation>(); + + fn->configure(CAST_CL(ifm_alloc), CAST_CL(ofm_alloc), param.axis, + ::arm_compute::ReduceOperation::MIN); + + builder.append("ReduceMin", std::move(fn)); + } + else + throw std::runtime_error("Not supported, yet"); + }; + + _builder.addStage(stage); } void Planner::visit(const ::internal::tflite::op::ReduceMax::Node &node) -- 2.7.4