dnn: fix gather layer implementation
authorAlexander Alekhin <a.alekhin@yadro.com>
Tue, 20 Dec 2022 06:09:34 +0000 (06:09 +0000)
committerAlexander Alekhin <a.alekhin@yadro.com>
Tue, 20 Dec 2022 06:09:34 +0000 (06:09 +0000)
- support FP16 data

modules/dnn/src/layers/gather_layer.cpp
modules/dnn/test/test_onnx_importer.cpp

index 8d93a85dc449b5a1356697a6c126296b9e0a87cf..924b5fcbc191792db59e8bda3bc2c222b563c1c7 100644 (file)
@@ -45,34 +45,70 @@ public:
         CV_TRACE_FUNCTION();
         CV_TRACE_ARG_VALUE(name, "name", name.c_str());
 
+        // FP16 fallback is not needed as we handle FP16 below
+
         std::vector<Mat> inputs, outputs;
         inputs_arr.getMatVector(inputs);
         outputs_arr.getMatVector(outputs);
 
+        CV_CheckEQ(inputs.size(), (size_t)2, "");
+        CV_CheckEQ(outputs.size(), (size_t)1, "");
+
         const Mat& inp = inputs[0];
-        const Mat& indices = inputs[1];
+
+        int indicesType = inputs[1].type();
+        CV_CheckType(indicesType, indicesType == CV_32FC1 || indicesType == CV_16SC1, "");
+        Mat indices32S;
+        if (indicesType == CV_16S/*FP16*/)
+        {
+            Mat indicesF32;
+            convertFp16(inputs[1], indicesF32);
+            indicesF32.convertTo(indices32S, CV_32S);
+        }
+        else
+        {
+            inputs[1].convertTo(indices32S, CV_32S);
+        }
+        const size_t indices_total = indices32S.total();
+        indices32S = indices32S.reshape(1, indices_total);
+
         Mat& out = outputs[0];
 
+        CV_CheckTypeEQ(inp.type(), out.type(), "");
+        CV_CheckTypeEQ(indices32S.type(), CV_32SC1, "");
+
         const int axis = normalize_axis(m_axis, shape(inp));
 
+        // FIXIT: why should we work with non-normalized input? it should be handled in importer or layers's output generator
+        const int axis_size = (int)inp.size[axis];
+        for (size_t j = 0 ; j < indices_total; ++j)
+        {
+            int& idx = indices32S.at<int>(j);
+            idx = normalize_axis(idx, axis_size);  // validate and normalize indices
+        }
+
         const size_t outer_size = axis == 0 ? inp.total() : inp.step1(axis - 1);
         const size_t outer_dims = inp.total() / outer_size;
         const size_t inner_size = inp.step1(axis);
 
-        const float* idx = indices.ptr<const float>(); // TODO: change type to integer in the future.
+        const int* idx = indices32S.ptr<int>();
         const char* src = inp.ptr<const char>();
         char* dst = out.ptr<char>();
+        CV_CheckEQ(out.total(), outer_dims * indices_total * inner_size, "");
 
         const size_t es = inp.elemSize1();
+        // TODO: optimize through switch (inner_size * es)
+        const size_t inner_bytes = inner_size * es;
         for (size_t i = 0; i < outer_dims; ++i)
         {
             const size_t src_offset = i * outer_size;
-            for (size_t j = 0 ; j < indices.total(); ++j)
+            for (size_t j = 0 ; j < indices_total; ++j)
             {
-                const size_t index = (static_cast<int>(idx[j]) + inp.size[axis]) % inp.size[axis];
-                const size_t new_offset = src_offset + index * inp.step1(axis);
-                std::memcpy(dst, src + new_offset * es, inner_size * es);
-                dst += inner_size * es;
+                const int index = idx[j];
+                CV_DbgCheck(index, index >= 0 && index < axis_size, "");
+                const size_t new_offset = src_offset + index * inner_size;
+                std::memcpy(dst, src + new_offset * es, inner_bytes);
+                dst += inner_bytes;
             }
         }
     }
index e8350e418d587109a64c1cbdd93e13c73456e16b..58d2086b4c148d82e0f6132152a44c92e5d20bd3 100644 (file)
@@ -199,9 +199,11 @@ TEST_P(Test_ONNX_layers, Convolution_variable_weight_bias)
 
 TEST_P(Test_ONNX_layers, Gather)
 {
-    if (backend == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019 && target == DNN_TARGET_MYRIAD)
-        applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_MYRIAD, CV_TEST_TAG_DNN_SKIP_IE_NN_BUILDER);
     testONNXModels("gather", npy, 0, 0, false, false);
+}
+
+TEST_P(Test_ONNX_layers, Gather_Scalar)
+{
     testONNXModels("gather_scalar", npy, 0, 0, false, false);
 }