From: Андрей Шедько/AI Tools Lab /SRR/Assistant Engineer/삼성전자 Date: Mon, 3 Dec 2018 17:38:21 +0000 (+0300) Subject: [nnc] MeanSoftBackend (#2367) X-Git-Tag: nncc_backup~1215 X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=ed9db59a63c00cd623642c6c4e4fafee5a57de05;p=platform%2Fcore%2Fml%2Fnnfw.git [nnc] MeanSoftBackend (#2367) This adds Support for reduction with Mean to C++ soft backend. The implementation is a reference one from TFLite. Signed-off-by: Andrei Shedko --- diff --git a/contrib/nnc/core/modelIR/Index.cpp b/contrib/nnc/core/modelIR/Index.cpp index 36e0921..ebeb182 100644 --- a/contrib/nnc/core/modelIR/Index.cpp +++ b/contrib/nnc/core/modelIR/Index.cpp @@ -37,7 +37,7 @@ Index& Index::fill(int32_t index) { return (*this); } -int32_t &Index::at(int32_t axis) { return _indices[(axis < 0) ? (_indices.size() + axis) : axis]; } +int32_t& Index::at(int32_t axis) { return _indices[(axis < 0) ? (_indices.size() + axis) : axis]; } int32_t Index::at(int32_t axis) const { return _indices[(axis < 0) ? (_indices.size() + axis) : axis]; } diff --git a/contrib/nnc/passes/soft_backend/CPPGenerator.cpp b/contrib/nnc/passes/soft_backend/CPPGenerator.cpp index 2ed4343..1e2928c 100644 --- a/contrib/nnc/passes/soft_backend/CPPGenerator.cpp +++ b/contrib/nnc/passes/soft_backend/CPPGenerator.cpp @@ -37,6 +37,7 @@ using namespace std; #include "cpp_fully_connected.generated.h" #include "cpp_pool.generated.h" #include "cpp_relu.generated.h" +#include "cpp_reduce.generated.h" #include "cpp_softmax.generated.h" #include "cpp_scale.generated.h" #include "cpp_dropout.generated.h" @@ -282,6 +283,7 @@ void CPPCodeGenerator::materializeCode(ostream &out, const ModelAnalyzer &ma, co out.write(cpp_fully_connected, sizeof(cpp_fully_connected)); out.write(cpp_pool, sizeof(cpp_pool)); out.write(cpp_relu, sizeof(cpp_relu)); + out.write(cpp_reduce, sizeof(cpp_reduce)); out.write(cpp_softmax, sizeof(cpp_softmax)); out.write(cpp_elementwise, sizeof(cpp_elementwise)); out.write(cpp_elu, sizeof(cpp_elu)); diff --git a/contrib/nnc/passes/soft_backend/ModelAnalyzer.cpp b/contrib/nnc/passes/soft_backend/ModelAnalyzer.cpp index ddb73b5..8cbbcb5 100644 --- a/contrib/nnc/passes/soft_backend/ModelAnalyzer.cpp +++ b/contrib/nnc/passes/soft_backend/ModelAnalyzer.cpp @@ -290,8 +290,13 @@ void ModelAnalyzer::visit(mir::ops::PadOp& op) { } void ModelAnalyzer::visit(mir::ops::ReduceFOp& op) { - assert(false && "Not implemented yet"); - addOpDescr(&op, "ReduceMean"); + switch (op.getFuncType()) { + case mir::ops::ReduceFOp::FuncType::mean: + addOpDescr(&op, "reduceMean"); + break; + default: + assert(false && "NOT IMPLEMENTED"); + } } void ModelAnalyzer::visit(mir::ops::TransposeOp& op) { diff --git a/contrib/nnc/passes/soft_backend/code_snippets/cpp_common_funcs.def b/contrib/nnc/passes/soft_backend/code_snippets/cpp_common_funcs.def index 24f95c3..d7ea8ed 100644 --- a/contrib/nnc/passes/soft_backend/code_snippets/cpp_common_funcs.def +++ b/contrib/nnc/passes/soft_backend/code_snippets/cpp_common_funcs.def @@ -263,6 +263,56 @@ inline int Offset(const Dims<4>& dims, int i0, int i1, int i2, int i3) { i3 * dims.strides[3]; } +// Gets next index to iterate through a multidimensional array. +inline bool NextIndex(const int num_dims, const int* dims, int* current) { + if (num_dims == 0) { + return false; + } + TFLITE_DCHECK(dims != nullptr); + TFLITE_DCHECK(current != nullptr); + int carry = 1; + for (int idx = num_dims - 1; idx >= 0; --idx) { + int current_val = current[idx] + carry; + TFLITE_DCHECK_GE(dims[idx], current_val); + if (dims[idx] == current_val) { + current[idx] = 0; + } else { + current[idx] = current_val; + carry = 0; + break; + } + } + return (carry == 0); +} + +inline size_t ReducedOutputOffset(const int num_dims, const int* dims, + const int* index, const int num_axis, + const int* axis) { + if (num_dims == 0) { + return 0; + } + TFLITE_DCHECK(dims != nullptr); + TFLITE_DCHECK(index != nullptr); + size_t offset = 0; + for (int idx = 0; idx < num_dims; ++idx) { + // if we need to skip this axis + bool is_axis = false; + if (axis != nullptr) { + for (int axis_idx = 0; axis_idx < num_axis; ++axis_idx) { + if (idx == axis[axis_idx]) { + is_axis = true; + break; + } + } + } + if (!is_axis) { + offset = offset * static_cast(dims[idx]) + + static_cast(index[idx]); + } + } + return offset; +} + template bool IsPackedWithoutStrides(const Dims& dims) { int expected_stride = 1; diff --git a/contrib/nnc/passes/soft_backend/code_snippets/cpp_operations.def b/contrib/nnc/passes/soft_backend/code_snippets/cpp_operations.def index b04cc11..e0b1b6c 100644 --- a/contrib/nnc/passes/soft_backend/code_snippets/cpp_operations.def +++ b/contrib/nnc/passes/soft_backend/code_snippets/cpp_operations.def @@ -13,7 +13,6 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - #include #include #include @@ -504,8 +503,8 @@ void ElementWise(Tensor &out, } } -void reshape(Tensor &out, const char *params, const Tensor &in) -{ +// TODO refactor tflite's code for this op +void reshape(Tensor& out, const char* params, const Tensor& in) { Shape out_s = deserializeShape(params); assert(out_s.getNumElems() == in.getShape().getNumElems()); @@ -513,6 +512,47 @@ void reshape(Tensor &out, const char *params, const Tensor &in) out.fillData(in.getData()); } +void reduceMean(Tensor& out, const char* params, const Tensor& in) { + Shape tmp_reduction_dims = deserializeShape(params); + bool keep_dims = deserializeT(params); + Shape out_s = deserializeShape(params); + out.reShape(out_s); + + const int32_t rank_inp = in.getShape().getDims(); + const int32_t rank_out = out_s.getDims(); + const int32_t rank_axis = tmp_reduction_dims.getDims(); + + + int32_t in_dim[8]; + int32_t tmp_index[8]; // input iterator storage + assert(rank_inp < 8); + for (int i = 0; i < rank_inp; i++) { + in_dim[i] = in.getShape()[i]; + } + int32_t out_dim[8]; + assert(rank_out <= 8); + for (int i = 0; i < rank_out; i++) { + out_dim[i] = out.getShape()[i]; + } + int32_t axis[8]; + int32_t resolved_axis[8]; // in case there are negative or duplicate indexes + assert(rank_axis <= 8); + for (int i = 0; i < rank_axis; i++) { + axis[i] = tmp_reduction_dims[i]; + } + + float* temp_sum = new float[out_s.getNumElems()]; + + bool succ = Mean( + in.getData(), in_dim, rank_inp, + out.getData(), out_dim, rank_out, + axis, rank_axis, keep_dims, + tmp_index, resolved_axis, temp_sum + ); + assert(succ && "Mean failed!"); + delete[] temp_sum; +} + void pad(Tensor& out, const char* params, const Tensor& in) { const float* input = in.getData(); const Dims<4> input_dims = shapeToDims(in.getShape()); diff --git a/contrib/nnc/passes/soft_backend/code_snippets/cpp_reduce.def b/contrib/nnc/passes/soft_backend/code_snippets/cpp_reduce.def new file mode 100644 index 0000000..cbf2a48 --- /dev/null +++ b/contrib/nnc/passes/soft_backend/code_snippets/cpp_reduce.def @@ -0,0 +1,185 @@ +/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + http://www.apache.org/licenses/LICENSE-2.0 +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + + +// A generic reduce method that can be used for reduce_sum, reduce_mean, etc. +// This method iterates through input data and reduce elements along the +// dimensions given in axis. +template +inline bool Reduce(const In* input_data, const int* input_dims, + const int* output_dims, const int input_num_dims, + const int output_num_dims, const int* axis, + const int num_axis, int* input_iter, + Out reducer(const Out current, const In in), + Out* output_data) { + // Reset input iterator. + for (int idx = 0; idx < input_num_dims; ++idx) { + input_iter[idx] = 0; + } + // Iterate through input_data. + do { + size_t input_offset = + ReducedOutputOffset(input_num_dims, input_dims, input_iter, 0, nullptr); + size_t output_offset = ReducedOutputOffset(input_num_dims, input_dims, + input_iter, num_axis, axis); + output_data[output_offset] = + reducer(output_data[output_offset], input_data[input_offset]); + } while (NextIndex(input_num_dims, input_dims, input_iter)); + return true; +} + +inline bool ResolveAxis(const int num_dims, const int* axis, + const int64_t num_axis, int* out_axis, + int* out_num_axis) { + *out_num_axis = 0; // Just in case. + // Short-circuit axis resolution for scalars; the axis will go unused. + if (num_dims == 0) { + return true; + } + // o(n^2) is fine since out_num_axis should be really small, mostly <= 4 + for (int64_t idx = 0; idx < num_axis; ++idx) { + // Handle negative index. + int current = axis[idx] < 0 ? (axis[idx] + num_dims) : axis[idx]; + TFLITE_DCHECK(current >= 0 && current < num_dims); + bool is_dup = false; + for (int j = 0; j < *out_num_axis; ++j) { + if (out_axis[j] == current) { + is_dup = true; + break; + } + } + if (!is_dup) { + out_axis[*out_num_axis] = current; + *out_num_axis += 1; + } + } + return true; +} + +// This method expects that output_data has been initialized. +template +inline bool ReduceSumImpl(const In* input_data, const int* input_dims, + const int* output_dims, const int input_num_dims, + const int output_num_dims, const int* axis, + const int num_axis, int* input_iter, + Out* output_data) { + auto reducer = [ ](const Out current, const In in) -> Out { + const Out actual_in = static_cast(in); + return current + actual_in; + }; + return Reduce(input_data, input_dims, output_dims, input_num_dims, + output_num_dims, axis, num_axis, input_iter, reducer, + output_data); +} + +template +inline bool InitTensorDataForReduce(const int* dims, const int num_dims, + const T init_value, T* data) { + size_t num_elements = 1; + for (int idx = 0; idx < num_dims; ++idx) { + size_t current = static_cast(dims[idx]); + // Overflow prevention. + if (num_elements > std::numeric_limits::max() / current) { + return false; + } + num_elements *= current; + } + for (size_t idx = 0; idx < num_elements; ++idx) { + data[idx] = init_value; + } + return true; +} + +// Computes the generic value (i.e., sum/max/min/prod) of elements across +// dimensions given in axis. It needs to pass in init_value and reducer. +template +inline bool ReduceGeneric(const T* input_data, const int* input_dims, + const int input_num_dims, T* output_data, + const int* output_dims, const int output_num_dims, + const int* axis, const int64_t num_axis_dimensions, + bool keep_dims, int* temp_index, int* resolved_axis, + T init_value, + T reducer(const T current, const T in)) { + // Reset output data. + if (!InitTensorDataForReduce(output_dims, output_num_dims, init_value, + output_data)) { + return false; + } + + // Resolve axis. + int num_resolved_axis = 0; + if (!ResolveAxis(input_num_dims, axis, num_axis_dimensions, resolved_axis, + &num_resolved_axis)) { + return false; + } + + return Reduce(input_data, input_dims, output_dims, input_num_dims, + output_num_dims, resolved_axis, num_resolved_axis, + temp_index, reducer, output_data); +} + +// Computes the mean of elements across dimensions given in axis. +// It does so in two stages, first calculates the sum of elements along the axis +// then divides it by the number of element in axis. +template +inline bool Mean(const T* input_data, const int* input_dims, + const int input_num_dims, T* output_data, + const int* output_dims, const int output_num_dims, + const int* axis, const int num_axis_dimensions, bool keep_dims, + int* temp_index, int* resolved_axis, U* temp_sum) { + // Reset output data. + size_t num_outputs = 1; + for (int idx = 0; idx < output_num_dims; ++idx) { + size_t current = static_cast(output_dims[idx]); + // Overflow prevention. + if (num_outputs > std::numeric_limits::max() / current) { + return false; + } + num_outputs *= current; + } + for (size_t idx = 0; idx < num_outputs; ++idx) { + output_data[idx] = T(); + temp_sum[idx] = U(); + } + + // Resolve axis. + int num_resolved_axis = 0; + if (!ResolveAxis(input_num_dims, axis, num_axis_dimensions, resolved_axis, + &num_resolved_axis)) { + return false; + } + + if (!ReduceSumImpl(input_data, input_dims, output_dims, input_num_dims, + output_num_dims, resolved_axis, num_resolved_axis, + temp_index, temp_sum)) { + return false; + } + + // Calculate mean by dividing output_data by num of aggregated element. + U num_elements_in_axis = 1; + for (int idx = 0; idx < num_resolved_axis; ++idx) { + size_t current = static_cast(input_dims[resolved_axis[idx]]); + // Overflow prevention. + if (current > (std::numeric_limits::max() / num_elements_in_axis)) { + return false; + } + num_elements_in_axis *= current; + } + + if (num_elements_in_axis > 0) { + for (size_t idx = 0; idx < num_outputs; ++idx) { + output_data[idx] = + static_cast(temp_sum[idx] / static_cast(num_elements_in_axis)); + } + } + return true; +} \ No newline at end of file diff --git a/contrib/nnc/unittests/soft_backend/CPPOperations.cpp b/contrib/nnc/unittests/soft_backend/CPPOperations.cpp index 75a7891..c975e99 100644 --- a/contrib/nnc/unittests/soft_backend/CPPOperations.cpp +++ b/contrib/nnc/unittests/soft_backend/CPPOperations.cpp @@ -34,6 +34,7 @@ #include "code_snippets/cpp_depthwise_conv.def" #include "code_snippets/cpp_fully_connected.def" #include "code_snippets/cpp_pool.def" +#include "code_snippets/cpp_reduce.def" #include "code_snippets/cpp_relu.def" #include "code_snippets/cpp_softmax.def" #include "code_snippets/cpp_elu.def" @@ -59,6 +60,7 @@ #include "core/modelIR/operations/DepthwiseConv2DOp.h" #include "core/modelIR/operations/PoolOp.h" #include "core/modelIR/operations/ReluOp.h" +#include "core/modelIR/operations/ReduceFOp.h" #include "core/modelIR/operations/CappedReluOp.h" #include "core/modelIR/operations/ReshapeOp.h" #include "core/modelIR/operations/ConcatOp.h" @@ -476,7 +478,6 @@ TEST(cpp_operations_test, convTransposed2d) } } - TEST(cpp_operations_test, conv2d) { // Iterate over kernel width, kernel height, @@ -647,6 +648,38 @@ TEST(cpp_operations_test, tanh) { createAndRunTestGraph(op_generator, tanhActivation, input_n_tensors, a_input_tensor); } +TEST(cpp_operations_test, reduceMeanTst) { + // test prerequisites + // different test cases + std::vector test_axis_list[] = { + {2, 3}, + {1}, + {0}, + {2}, + {3}, + {0, 2}, + {1, 2, 3} + }; + for (const vector& axis_list: test_axis_list) { + for (const bool keep_dims: {true, false}) { + vector input_shape_data{2, 3, 4, 5}; + Tensor a_input_tensor; + vector> input_n_tensors(1); + fillTensors(input_n_tensors[0], a_input_tensor, input_shape_data, 1.0f); + auto op_generator = [axis_list, keep_dims](mir::Graph& g, + const std::vector& inputs) { + auto op = g.create( + "y", inputs[0], axis_list, keep_dims, + mir::ops::ReduceFOp::FuncType::mean); + return op; + }; + + createAndRunTestGraph(op_generator, reduceMean, input_n_tensors, a_input_tensor); + } + } +} + + TEST(cpp_operations_test, softmax) { // iterate over number of dimensions in tensor