This commit adds `ReduceSumEx` op and enable testcase into neurun.
Signed-off-by: jiseob.jang <jiseob.jang@samsung.com>
#include <arm_compute/runtime/misc/functions/GenericFullyConnectedLayer.h>
#include <arm_compute/runtime/CL/functions/CLStridedSlice.h>
#include <arm_compute/runtime/CL/functions/CLPixelWiseMultiplication.h>
+#include <arm_compute/runtime/CL/functions/CLReduceOperation.h>
#include <arm_compute/runtime/CL/functions/CLCast.h>
#include <arm_compute/runtime/CL/functions/CLArithmeticDivision.h>
#include <arm_compute/runtime/CL/functions/CLPermuteEx.h>
});
}
+void StageGenerator::visit(const model::operation::ReduceSumNode &node)
+{
+ const auto output_index{node.getOutputs().at(0)};
+ const auto input_index{node.getInputs().at(model::operation::ReduceSumNode::Input::INPUT)};
+ const auto axis_index{node.param().axis_index};
+
+ std::set<uint32_t> axes;
+ const auto axis_base = _ctx.at(axis_index).data().base();
+ const auto axis_size = _ctx.at(axis_index).shape().element_nums();
+ const auto input_rank = _ctx.at(input_index).shape().rank();
+
+ // The axis's data must exist as constant values
+ assert(axis_base != nullptr);
+ for (size_t n = 0; n < axis_size; ++n)
+ {
+ int32_t axis_value = *(reinterpret_cast<const int32_t *>(axis_base) + n);
+ if (axis_value < 0)
+ {
+ axis_value += input_rank;
+ }
+ axes.insert(ToARMComputeAxis(input_rank, axis_value).value());
+ }
+
+ struct Param
+ {
+ model::operand::Index output_index;
+ model::operand::Index input_index;
+
+ std::set<uint32_t> axes;
+ };
+
+ Param param;
+
+ param.output_index = output_index;
+ param.input_index = input_index;
+
+ param.axes = std::move(axes);
+
+ auto tensors = _tensor_builder;
+
+ returnStage([tensors, param](compiler::IExecutionBuilder &builder) {
+ auto output_alloc = tensors->at(param.output_index).get();
+ auto input_alloc = tensors->at(param.input_index).get();
+
+ auto fn = make_layer<::arm_compute::CLReduceOperation>();
+
+ fn->configure(input_alloc->handle(), output_alloc->handle(), param.axes,
+ ::arm_compute::ReduceOperation::SUM);
+
+ auto acl_fn = make_cl_function(std::move(fn));
+
+ builder.append(std::move(acl_fn));
+ });
+}
+
void StageGenerator::visit(const model::operation::ReshapeNode &node)
{
const auto output_index{node.getOutputs().at(0)};
virtual void visit(const model::operation::ConcatNode &) override;
virtual void visit(const model::operation::FullyConnectedNode &) override;
virtual void visit(const model::operation::MulNode &) override;
+ virtual void visit(const model::operation::ReduceSumNode &) override;
virtual void visit(const model::operation::ReshapeNode &) override;
virtual void visit(const model::operation::TanhNode &) override;
virtual void visit(const model::operation::SoftmaxNode &) override;
assert(_ctx.at(output_index).shape().rank() == _ctx.at(input_index).shape().rank());
}
+void OperationValidator::visit(const model::operation::ReduceSumNode &node)
+{
+ VERBOSE(Permute) << "Configure ReduceSum operation" << std::endl;
+
+ const auto output_index{node.getOutputs().at(0)};
+ const auto input_index{node.getInputs().at(model::operation::ReduceSumNode::Input::INPUT)};
+ const auto axis_index{node.param().axis_index};
+
+ UNUSED_RELEASE(output_index);
+ UNUSED_RELEASE(input_index);
+ UNUSED_RELEASE(axis_index);
+
+ const auto input_shape = _ctx.at(input_index).shape();
+ const auto output_shape = _ctx.at(output_index).shape();
+ const auto axis_shape = _ctx.at(axis_index).shape();
+
+ UNUSED_RELEASE(output_shape);
+ UNUSED_RELEASE(input_shape);
+ UNUSED_RELEASE(axis_shape);
+
+ assert(input_shape.rank() <= 4);
+ assert(output_shape.rank() <= input_shape.rank());
+ assert(_ctx.at(axis_index).hasData());
+ assert(axis_shape.rank() == 0 || axis_shape.rank() == 1);
+
+ // NOTE For the 4-dimensions, if the rank of input and output are different, this runtime only
+ // supports cases reducing height and width or reducing depth.
+ // TODO We have to support all cases of dimensions up to 4.
+ // For correct permuting, we have to set output's shape to be equal in dimension position of the
+ // input. But the positions of the same dimensions in the input and output may be set differently.
+ // For example {2,3,4,5}(input's shape) can be reduced to {3,5}(output's shape). The original
+ // output shape should be {1,3,1,5}, but real output shape may be {3,5}. If you simply try to
+ // extend it in 4 dimensions, it should be {1,1,3,5}.
+ // Even if output shape is changed to {1,3,1,5}, there is another problem. It is that shape of
+ // output tensor used at next operation is changed to {1,3,1,5} after this operation even if the
+ // next operation is not desired.
+ if (input_shape.rank() == 4 && input_shape.rank() != output_shape.rank())
+ {
+ if (output_shape.rank() == 2)
+ {
+ // Reducing HW
+ assert(input_shape.dim(0) == output_shape.dim(0) &&
+ input_shape.dim(3) == output_shape.dim(1));
+ }
+ else if (output_shape.rank() == 3)
+ {
+ // Reducing C or
+ // (Reducing H and C(input and output) == 1) or (Reducing W and C(input and output) == 1)
+ assert((input_shape.dim(0) == output_shape.dim(0) &&
+ input_shape.dim(1) == output_shape.dim(1) &&
+ input_shape.dim(2) == output_shape.dim(2)) ||
+ (input_shape.dim(0) == output_shape.dim(0) &&
+ (input_shape.dim(1) == output_shape.dim(1) ||
+ input_shape.dim(2) == output_shape.dim(1)) &&
+ input_shape.dim(3) == 1 && output_shape.dim(2) == 1));
+ }
+ }
+}
+
void OperationValidator::visit(const model::operation::TransposeNode &node)
{
const auto ofm_idx{node.getOutputs().at(0)};
virtual void visit(const model::operation::CastNode &node) override;
virtual void visit(const model::operation::SoftmaxNode &node) override;
virtual void visit(const model::operation::PermuteNode &node) override;
+ virtual void visit(const model::operation::ReduceSumNode &node) override;
virtual void visit(const model::operation::TransposeNode &node) override;
private:
return new operation::AddNode{inputs, outputs, param};
};
+ _map[ANEURALNETWORKS_REDUCE_SUM_EX] = [](const OperationFactory::Param &init_param) {
+ assert(init_param.input_count == 2);
+ assert(init_param.output_count == 1);
+
+ // Each input should be interpreted as follows:
+ //
+ // 0 -> Input Tensor Index
+ // 1 -> Reduced Axes Tensor Index
+
+ operand::IndexSet inputs{init_param.inputs[0]};
+ operand::IndexSet outputs{init_param.outputs[0]};
+
+ operation::ReduceSumNode::Param param;
+
+ param.axis_index = operand::Index{init_param.inputs[1]};
+
+ return new operation::ReduceSumNode{inputs, outputs, param};
+ };
+
_map[ANEURALNETWORKS_SUB] = [](const OperationFactory::Param &init_param) {
assert(init_param.input_count == 3);
assert(init_param.output_count == 1);
#include "SoftmaxNode.h"
#include "TransposeNode.h"
#include "PermuteNode.h"
+#include "ReduceSumNode.h"
#include "AddNode.h"
#include "SubNode.h"
#include "DepthwiseConv2DNode.h"
OP(MaxPool2DNode , true , MAX_POOL_2D)
OP(ConcatNode , true , CONCATENATION)
OP(FullyConnectedNode , true , FULLY_CONNECTED)
+OP(ReduceSumNode , true , REDUCE_SUM_EX)
OP(ReshapeNode , true , RESHAPE)
OP(MulNode , true , MUL)
OP(SoftmaxNode , true , SOFTMAX)
--- /dev/null
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "ReduceSumNode.h"
+
+#include <cassert>
+
+#include "NodeVisitor.h"
+
+namespace neurun
+{
+namespace model
+{
+namespace operation
+{
+
+void ReduceSumNode::accept(NodeVisitor &&v) const { v.visit(*this); }
+
+ReduceSumNode::ReduceSumNode(const operand::IndexSet &inputs, const operand::IndexSet &outputs,
+ const Param ¶m)
+ : model::operation::Node{OperandConstraint::createExact(1u), inputs, outputs}, _param{param}
+{
+}
+
+} // namespace operation
+} // namespace model
+} // namespace neurun
--- /dev/null
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __NEURUN_MODEL_OPERATION_REDUCE_SUM_NODE_H__
+#define __NEURUN_MODEL_OPERATION_REDUCE_SUM_NODE_H__
+
+#include "model/operation/Node.h"
+
+namespace neurun
+{
+namespace model
+{
+namespace operation
+{
+
+class ReduceSumNode : public model::operation::Node
+{
+public:
+ enum Input
+ {
+ INPUT = 0
+ };
+
+ struct Param
+ {
+ operand::Index axis_index;
+ };
+
+public:
+ ReduceSumNode(const operand::IndexSet &inputs, const operand::IndexSet &outputs,
+ const Param ¶m);
+
+public:
+ virtual void accept(NodeVisitor &&) const override;
+ virtual std::string getName() const override { return "ReduceSum"; }
+
+public:
+ const Param ¶m() const { return _param; }
+
+private:
+ Param _param;
+};
+
+} // namespace operation
+} // namespace model
+} // namespace neurun
+
+#endif // __NEURUN_MODEL_OPERATION_REDUCE_SUM_H__
GeneratedTests.squeeze*
GeneratedTests.gather_ex*
GeneratedTests.tensorflowmax_ex*
-GeneratedTests.reduce_sum_ex*
GeneratedTests.topk_v2*
# Unexpected result
GeneratedTests.split*