From: Alexander Alekhin Date: Tue, 20 Dec 2022 06:09:34 +0000 (+0000) Subject: dnn: fix gather layer implementation X-Git-Tag: accepted/tizen/unified/20230127.161057~1^2~24^2 X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=1102b7eff88728577f1bbec1126108f0795e3534;p=platform%2Fupstream%2Fopencv.git dnn: fix gather layer implementation - support FP16 data --- diff --git a/modules/dnn/src/layers/gather_layer.cpp b/modules/dnn/src/layers/gather_layer.cpp index 8d93a85dc4..924b5fcbc1 100644 --- a/modules/dnn/src/layers/gather_layer.cpp +++ b/modules/dnn/src/layers/gather_layer.cpp @@ -45,34 +45,70 @@ public: CV_TRACE_FUNCTION(); CV_TRACE_ARG_VALUE(name, "name", name.c_str()); + // FP16 fallback is not needed as we handle FP16 below + std::vector inputs, outputs; inputs_arr.getMatVector(inputs); outputs_arr.getMatVector(outputs); + CV_CheckEQ(inputs.size(), (size_t)2, ""); + CV_CheckEQ(outputs.size(), (size_t)1, ""); + const Mat& inp = inputs[0]; - const Mat& indices = inputs[1]; + + int indicesType = inputs[1].type(); + CV_CheckType(indicesType, indicesType == CV_32FC1 || indicesType == CV_16SC1, ""); + Mat indices32S; + if (indicesType == CV_16S/*FP16*/) + { + Mat indicesF32; + convertFp16(inputs[1], indicesF32); + indicesF32.convertTo(indices32S, CV_32S); + } + else + { + inputs[1].convertTo(indices32S, CV_32S); + } + const size_t indices_total = indices32S.total(); + indices32S = indices32S.reshape(1, indices_total); + Mat& out = outputs[0]; + CV_CheckTypeEQ(inp.type(), out.type(), ""); + CV_CheckTypeEQ(indices32S.type(), CV_32SC1, ""); + const int axis = normalize_axis(m_axis, shape(inp)); + // FIXIT: why should we work with non-normalized input? it should be handled in importer or layers's output generator + const int axis_size = (int)inp.size[axis]; + for (size_t j = 0 ; j < indices_total; ++j) + { + int& idx = indices32S.at(j); + idx = normalize_axis(idx, axis_size); // validate and normalize indices + } + const size_t outer_size = axis == 0 ? inp.total() : inp.step1(axis - 1); const size_t outer_dims = inp.total() / outer_size; const size_t inner_size = inp.step1(axis); - const float* idx = indices.ptr(); // TODO: change type to integer in the future. + const int* idx = indices32S.ptr(); const char* src = inp.ptr(); char* dst = out.ptr(); + CV_CheckEQ(out.total(), outer_dims * indices_total * inner_size, ""); const size_t es = inp.elemSize1(); + // TODO: optimize through switch (inner_size * es) + const size_t inner_bytes = inner_size * es; for (size_t i = 0; i < outer_dims; ++i) { const size_t src_offset = i * outer_size; - for (size_t j = 0 ; j < indices.total(); ++j) + for (size_t j = 0 ; j < indices_total; ++j) { - const size_t index = (static_cast(idx[j]) + inp.size[axis]) % inp.size[axis]; - const size_t new_offset = src_offset + index * inp.step1(axis); - std::memcpy(dst, src + new_offset * es, inner_size * es); - dst += inner_size * es; + const int index = idx[j]; + CV_DbgCheck(index, index >= 0 && index < axis_size, ""); + const size_t new_offset = src_offset + index * inner_size; + std::memcpy(dst, src + new_offset * es, inner_bytes); + dst += inner_bytes; } } } diff --git a/modules/dnn/test/test_onnx_importer.cpp b/modules/dnn/test/test_onnx_importer.cpp index e8350e418d..58d2086b4c 100644 --- a/modules/dnn/test/test_onnx_importer.cpp +++ b/modules/dnn/test/test_onnx_importer.cpp @@ -199,9 +199,11 @@ TEST_P(Test_ONNX_layers, Convolution_variable_weight_bias) TEST_P(Test_ONNX_layers, Gather) { - if (backend == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019 && target == DNN_TARGET_MYRIAD) - applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_MYRIAD, CV_TEST_TAG_DNN_SKIP_IE_NN_BUILDER); testONNXModels("gather", npy, 0, 0, false, false); +} + +TEST_P(Test_ONNX_layers, Gather_Scalar) +{ testONNXModels("gather_scalar", npy, 0, 0, false, false); }