Merge remote-tracking branch 'upstream/3.4' into merge-3.4

author Alexander Alekhin <alexander.a.alekhin@gmail.com>

Sat, 27 Mar 2021 15:35:16 +0000 (15:35 +0000)

committer Alexander Alekhin <alexander.a.alekhin@gmail.com>

Sat, 27 Mar 2021 15:35:16 +0000 (15:35 +0000)
author Alexander Alekhin <alexander.a.alekhin@gmail.com>
Sat, 27 Mar 2021 15:35:16 +0000 (15:35 +0000)
committer Alexander Alekhin <alexander.a.alekhin@gmail.com>
Sat, 27 Mar 2021 15:35:16 +0000 (15:35 +0000)
diff --cc modules/core/src/directx.cpp
Simple merge
diff --cc modules/core/src/va_intel.cpp

index c81e6f368740b3d500f330bb1e8e29482547fb20,30f89c41815db3326bb64aef6136e09da3a952f2..1d2b1cbf323b1e85e929130947768882a4d8a602
--- 1/modules/core/src/va_intel.cpp
--- 2/modules/core/src/va_intel.cpp
+++ b/modules/core/src/va_intel.cpp
@@@ -202,10 -171,10 +202,10 @@@ static bool ocl_convert_bgr_to_nv12(cl_
   
       k.args(clBuffer, step, cols, rows, clImageY, clImageUV);
   
-     size_t globalsize[] = { (size_t)cols, (size_t)rows };
+     size_t globalsize[] = { (size_t)cols/2, (size_t)rows/2 };
       return k.run(2, globalsize, 0, false);
   }
- -#endif // HAVE_VA_INTEL && HAVE_OPENCL
+ +#endif // HAVE_VA_INTEL
   
   } // namespace cv::va_intel::ocl
   
diff --cc modules/dnn/include/opencv2/dnn/all_layers.hpp
Simple merge
diff --cc modules/dnn/src/layers/slice_layer.cpp

index 790788b70e5ce1529f8db6ff55e98f453ecdb032,507964edf9809dccfc5db317758bab11eb9ce45a..ff997c3afc686cf63d9ac37bee051f4ee54046b5
--- 1/modules/dnn/src/layers/slice_layer.cpp
--- 2/modules/dnn/src/layers/slice_layer.cpp
+++ b/modules/dnn/src/layers/slice_layer.cpp
@@@ -130,10 -141,9 +147,13 @@@ public
   #endif
   #ifdef HAVE_DNN_NGRAPH
           if (backendId == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH)
-             return sliceRanges.size() == 1;
+             return sliceRanges.size() == 1 && !hasSteps;
++#endif
++#ifdef HAVE_CUDA
++        if (backendId == DNN_BACKEND_CUDA)
++            return !hasSteps;
   #endif
-         return backendId == DNN_BACKEND_OPENCV ||
-                backendId == DNN_BACKEND_CUDA;
+         return backendId == DNN_BACKEND_OPENCV;
       }
   
       bool getMemoryShapes(const std::vector<MatShape> &inputs,
@@@ -579,30 -612,36 +624,60 @@@
       }
   #endif  // HAVE_DNN_NGRAPH
   
+ +
+ +#ifdef HAVE_CUDA
+ +    Ptr<BackendNode> initCUDA(
+ +        void *context_,
+ +        const std::vector<Ptr<BackendWrapper>>& inputs,
+ +        const std::vector<Ptr<BackendWrapper>>& outputs
+ +    ) override
+ +    {
+ +        auto context = reinterpret_cast<csl::CSLContext*>(context_);
+ +
+ +        std::vector<std::vector<std::size_t>> offsets;
+ +        for (const auto& ranges : finalSliceRanges)
+ +        {
+ +            std::vector<std::size_t> offsets_i;
+ +            for (const auto& range : ranges)
+ +                offsets_i.push_back(range.start);
+ +            offsets.push_back(std::move(offsets_i));
+ +        }
+ +
+ +        return make_cuda_node<cuda4dnn::SliceOp>(preferableTarget, std::move(context->stream), std::move(offsets));
+ +    }
+ +#endif
+ +
+ +
+ private:
+     void getSliceRecursive(const Mat &inpMat, std::vector<int> &inpIdx,
+                            const std::vector<Range> &sliceRanges,
+                            const std::vector<int> &sliceSteps, int dim, int dimsNum,
+                            Mat &outputs, std::vector<int> &outIdx)
+     {
+         int begin = sliceRanges[dim].start;
+         int end = sliceRanges[dim].end;
+         int step = !sliceSteps.empty() ? sliceSteps[dim] : 1;
+ 
+         const bool is32F = inpMat.depth() == CV_32F;
+ 
+         // TODO optimization is required (for 2D tail case at least)
+         for (int k = begin, j = 0; k < end; k += step, j++)
+         {
+             inpIdx[dim] = k;
+             outIdx[dim] = j;
+ 
+             if (dim + 1 < dimsNum)
+                 getSliceRecursive(inpMat, inpIdx, sliceRanges, sliceSteps, dim + 1, dimsNum, outputs, outIdx);
+             else
+             {
+                 if (is32F)
+                     outputs.at<float>(outIdx.data()) = inpMat.at<float>(inpIdx.data());
+                 else
+                     outputs.at<short>(outIdx.data()) = inpMat.at<short>(inpIdx.data());  // 16F emulation
+             }
+         }
+     }
+ 
   protected:
       // The actual non-negative values determined from @p sliceRanges depends on input size.
       std::vector<std::vector<Range> > finalSliceRanges;
diff --cc modules/dnn/src/onnx/onnx_importer.cpp
Simple merge
diff --cc modules/dnn/test/test_halide_layers.cpp
Simple merge
diff --cc modules/dnn/test/test_layers.cpp
Simple merge
diff --cc modules/dnn/test/test_onnx_importer.cpp
Simple merge
diff --cc modules/dnn/test/test_torch_importer.cpp

index c0d86d93709a6d4c31148df992b974fd64ba1037,8738e5e25cc49eda6c3ca5c310cf2a5a60939f64..f1d636895baad76cdc7d23aef33a154805b39478
--- 1/modules/dnn/test/test_torch_importer.cpp
--- 2/modules/dnn/test/test_torch_importer.cpp
+++ b/modules/dnn/test/test_torch_importer.cpp
@@@ -253,11 -224,14 +253,19 @@@ TEST_P(Test_Torch_layers, net_conv_gemm
           l1 = 0.046;
           lInf = 0.023;
       }
+ +    else if (target == DNN_TARGET_CUDA_FP16)
+ +    {
+ +        l1 = 0.0042;
+ +        lInf = 0.021;
+ +    }
+     // The OpenCL kernels use the native_ math functions which have
+     // implementation defined accuracy, so we use relaxed thresholds. See
+     // https://github.com/opencv/opencv/issues/9821 for more details.
+     else if (target == DNN_TARGET_OPENCL)
+     {
+         l1 = 0.02;
+         lInf = 0.02;
+     }
       runTorchNet("net_conv_gemm_lrn", "", false, true, true, l1, lInf);
   }
   
diff --cc platforms/winpack_dldt/build_package.py
Simple merge
author	Alexander Alekhin <alexander.a.alekhin@gmail.com>
	Sat, 27 Mar 2021 15:35:16 +0000 (15:35 +0000)
committer	Alexander Alekhin <alexander.a.alekhin@gmail.com>
	Sat, 27 Mar 2021 15:35:16 +0000 (15:35 +0000)
		1	2
modules/core/src/directx.cpp	patch \|	diff1 \|	diff2 \|	blob \| history
modules/core/src/va_intel.cpp	patch \|	diff1 \|	diff2 \|	blob \| history
modules/dnn/include/opencv2/dnn/all_layers.hpp	patch \|	diff1 \|	diff2 \|	blob \| history
modules/dnn/src/layers/slice_layer.cpp	patch \|	diff1 \|	diff2 \|	blob \| history
modules/dnn/src/onnx/onnx_importer.cpp	patch \|	diff1 \|	diff2 \|	blob \| history
modules/dnn/test/test_halide_layers.cpp	patch \|	diff1 \|	diff2 \|	blob \| history
modules/dnn/test/test_layers.cpp	patch \|	diff1 \|	diff2 \|	blob \| history
modules/dnn/test/test_onnx_importer.cpp	patch \|	diff1 \|	diff2 \|	blob \| history
modules/dnn/test/test_torch_importer.cpp	patch \|	diff1 \|	diff2 \|	blob \| history
platforms/winpack_dldt/build_package.py	patch \|	diff1 \|	diff2 \|	blob \| history