Merge remote-tracking branch 'upstream/3.4' into merge-3.4
authorAlexander Alekhin <alexander.a.alekhin@gmail.com>
Sat, 27 Mar 2021 15:35:16 +0000 (15:35 +0000)
committerAlexander Alekhin <alexander.a.alekhin@gmail.com>
Sat, 27 Mar 2021 15:35:16 +0000 (15:35 +0000)
1  2 
modules/core/src/directx.cpp
modules/core/src/va_intel.cpp
modules/dnn/include/opencv2/dnn/all_layers.hpp
modules/dnn/src/layers/slice_layer.cpp
modules/dnn/src/onnx/onnx_importer.cpp
modules/dnn/test/test_halide_layers.cpp
modules/dnn/test/test_layers.cpp
modules/dnn/test/test_onnx_importer.cpp
modules/dnn/test/test_torch_importer.cpp
platforms/winpack_dldt/build_package.py

Simple merge
@@@ -202,10 -171,10 +202,10 @@@ static bool ocl_convert_bgr_to_nv12(cl_
  
      k.args(clBuffer, step, cols, rows, clImageY, clImageUV);
  
-     size_t globalsize[] = { (size_t)cols, (size_t)rows };
+     size_t globalsize[] = { (size_t)cols/2, (size_t)rows/2 };
      return k.run(2, globalsize, 0, false);
  }
 -#endif // HAVE_VA_INTEL && HAVE_OPENCL
 +#endif // HAVE_VA_INTEL
  
  } // namespace cv::va_intel::ocl
  
@@@ -130,10 -141,9 +147,13 @@@ public
  #endif
  #ifdef HAVE_DNN_NGRAPH
          if (backendId == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH)
-             return sliceRanges.size() == 1;
+             return sliceRanges.size() == 1 && !hasSteps;
+ #endif
++#ifdef HAVE_CUDA
++        if (backendId == DNN_BACKEND_CUDA)
++            return !hasSteps;
 +#endif
-         return backendId == DNN_BACKEND_OPENCV ||
-                backendId == DNN_BACKEND_CUDA;
+         return backendId == DNN_BACKEND_OPENCV;
      }
  
      bool getMemoryShapes(const std::vector<MatShape> &inputs,
      }
  #endif  // HAVE_DNN_NGRAPH
  
 +
 +#ifdef HAVE_CUDA
 +    Ptr<BackendNode> initCUDA(
 +        void *context_,
 +        const std::vector<Ptr<BackendWrapper>>& inputs,
 +        const std::vector<Ptr<BackendWrapper>>& outputs
 +    ) override
 +    {
 +        auto context = reinterpret_cast<csl::CSLContext*>(context_);
 +
 +        std::vector<std::vector<std::size_t>> offsets;
 +        for (const auto& ranges : finalSliceRanges)
 +        {
 +            std::vector<std::size_t> offsets_i;
 +            for (const auto& range : ranges)
 +                offsets_i.push_back(range.start);
 +            offsets.push_back(std::move(offsets_i));
 +        }
 +
 +        return make_cuda_node<cuda4dnn::SliceOp>(preferableTarget, std::move(context->stream), std::move(offsets));
 +    }
 +#endif
 +
 +
+ private:
+     void getSliceRecursive(const Mat &inpMat, std::vector<int> &inpIdx,
+                            const std::vector<Range> &sliceRanges,
+                            const std::vector<int> &sliceSteps, int dim, int dimsNum,
+                            Mat &outputs, std::vector<int> &outIdx)
+     {
+         int begin = sliceRanges[dim].start;
+         int end = sliceRanges[dim].end;
+         int step = !sliceSteps.empty() ? sliceSteps[dim] : 1;
+         const bool is32F = inpMat.depth() == CV_32F;
+         // TODO optimization is required (for 2D tail case at least)
+         for (int k = begin, j = 0; k < end; k += step, j++)
+         {
+             inpIdx[dim] = k;
+             outIdx[dim] = j;
+             if (dim + 1 < dimsNum)
+                 getSliceRecursive(inpMat, inpIdx, sliceRanges, sliceSteps, dim + 1, dimsNum, outputs, outIdx);
+             else
+             {
+                 if (is32F)
+                     outputs.at<float>(outIdx.data()) = inpMat.at<float>(inpIdx.data());
+                 else
+                     outputs.at<short>(outIdx.data()) = inpMat.at<short>(inpIdx.data());  // 16F emulation
+             }
+         }
+     }
  protected:
      // The actual non-negative values determined from @p sliceRanges depends on input size.
      std::vector<std::vector<Range> > finalSliceRanges;
Simple merge
@@@ -253,11 -224,14 +253,19 @@@ TEST_P(Test_Torch_layers, net_conv_gemm
          l1 = 0.046;
          lInf = 0.023;
      }
 +    else if (target == DNN_TARGET_CUDA_FP16)
 +    {
 +        l1 = 0.0042;
 +        lInf = 0.021;
 +    }
+     // The OpenCL kernels use the native_ math functions which have
+     // implementation defined accuracy, so we use relaxed thresholds. See
+     // https://github.com/opencv/opencv/issues/9821 for more details.
+     else if (target == DNN_TARGET_OPENCL)
+     {
+         l1 = 0.02;
+         lInf = 0.02;
+     }
      runTorchNet("net_conv_gemm_lrn", "", false, true, true, l1, lInf);
  }