CUDA kernels interface

author marina.kolpakova <marina.kolpakova@itseez.com>

Fri, 21 Sep 2012 12:12:18 +0000 (16:12 +0400)

committer marina.kolpakova <marina.kolpakova@itseez.com>

Sat, 10 Nov 2012 01:02:37 +0000 (05:02 +0400)
author marina.kolpakova <marina.kolpakova@itseez.com>
Fri, 21 Sep 2012 12:12:18 +0000 (16:12 +0400)
committer marina.kolpakova <marina.kolpakova@itseez.com>
Sat, 10 Nov 2012 01:02:37 +0000 (05:02 +0400)
diff --git a/modules/gpu/src/cuda/isf-sc.cu b/modules/gpu/src/cuda/isf-sc.cu

index a6418c1..89a74ee 100644 (file)
--- a/modules/gpu/src/cuda/isf-sc.cu
+++ b/modules/gpu/src/cuda/isf-sc.cu
@@ -42,13 +42,90 @@
  
  #include <icf.hpp>
  
-void icf::Cascade::detect(const cv::gpu::PtrStepSzb& hogluv) const
+namespace cv { namespace gpu {
+
+
+ namespace device {
+
+__global__ void rgb2grayluv(const uchar3* __restrict__ rgb, uchar* __restrict__ hog,
+                            const int rgbPitch, const int hogPitch)
+{
+}
+
+__global__ void gray2hog(const uchar* __restrict__ gray, uchar* __restrict__ hog,
+                         const int pitch)
+{
+}
+
+__global__ void decimate(const uchar* __restrict__ hogluv, uchar* __restrict__ shrank,
+                        const int inPitch, const int outPitch )
+{
+}
+
+__global__ void intRow(const uchar* __restrict__ hogluv, ushort* __restrict__ sum,
+                       const int inPitch, const int outPitch)
+{
+
+}
+
+__global__ void intCol(ushort* __restrict__ sum, const int pitch)
+{
+
+}
+
+
+__global__ void detect(const cv::gpu::icf::Cascade cascade, const uchar* __restrict__ hogluv, const int pitch)
+{
+    cascade.detectAt();
+}
+
+}
+
+void __device icf::Cascade::detectAt() const
+{
+
+}
+
+void icf::Cascade::detect(const cv::gpu::PtrStepSzb& hogluv, cudaStream_t stream) const
  {
      // detection kernel
+
  }
  
-void icf::ChannelStorage::frame(const cv::gpu::PtrStepSz<uchar4>& image)
+void icf::ChannelStorage::frame(const cv::gpu::PtrStepSz<uchar3>& rgb, cudaStream_t stream)
  {
      // color convertin kernel
+    dim3 block(32, 8);
+    dim3 grid(FRAME_WIDTH / 32, FRAME_HEIGHT / 8);
+
+    uchar * channels = (uchar*)dmem.ptr(FRAME_HEIGHT * HOG_BINS);
+    device::rgb2grayluv<<<grid, block, 0, stream>>>((uchar3*)rgb.ptr(), channels, rgb.step, dmem.step);
+    cudaSafeCall( cudaGetLastError());
+
      // hog calculation kernel
-}
-\ No newline at end of file
+    channels = (uchar*)dmem.ptr(FRAME_HEIGHT * HOG_LUV_BINS);
+    device::gray2hog<<<grid, block, 0, stream>>>(channels, (uchar*)dmem.ptr(), dmem.step);
+    cudaSafeCall( cudaGetLastError() );
+
+    const int shrWidth  = FRAME_WIDTH / shrinkage;
+    const int shrHeight = FRAME_HEIGHT / shrinkage;
+
+    // decimate kernel
+    grid = dim3(shrWidth / 32, shrHeight / 8);
+    device::decimate<<<grid, block, 0, stream>>>((uchar*)dmem.ptr(), (uchar*)shrunk.ptr(), dmem.step, shrunk.step);
+    cudaSafeCall( cudaGetLastError() );
+
+    // integrate rows
+    block = dim3(shrWidth, 1);
+    grid = dim3(shrHeight * HOG_LUV_BINS, 1);
+    device::intRow<<<grid, block, 0, stream>>>((uchar*)shrunk.ptr(), (ushort*)hogluv.ptr(), shrunk.step, hogluv.step);
+    cudaSafeCall( cudaGetLastError() );
+
+    // integrate cols
+    block = dim3(128, 1);
+    grid = dim3(shrWidth * HOG_LUV_BINS, 1);
+    device::intCol<<<grid, block, 0, stream>>>((ushort*)hogluv.ptr(), hogluv.step);
+    cudaSafeCall( cudaGetLastError() );
+}
+
+}}
+\ No newline at end of file
diff --git a/modules/gpu/src/icf.hpp b/modules/gpu/src/icf.hpp

index 7183fc0..8b075be 100644 (file)
--- a/modules/gpu/src/icf.hpp
+++ b/modules/gpu/src/icf.hpp
@@ -46,17 +46,19 @@
  #define __OPENCV_ICF_HPP__
  
  #if defined __CUDACC__
-# define __hd__ __host__ __device__ __forceinline__
+# define __device __device__ __forceinline__
  #else
-# define __hd__
+# define __device
  #endif
  
  
-namespace icf {
+namespace cv { namespace gpu { namespace icf {
  
  using cv::gpu::PtrStepSzb;
  using cv::gpu::PtrStepSzf;
  
+typedef unsigned char uchar;
+
  struct Cascade
  {
      Cascade() {}
@@ -64,7 +66,8 @@ struct Cascade
          const cv::gpu::PtrStepSzf& lvs, const cv::gpu::PtrStepSzb& fts, const cv::gpu::PtrStepSzb& lls)
      : octaves(octs), stages(sts), nodes(nds), leaves(lvs), features(fts), levels(lls) {}
  
-    void detect(const cv::gpu::PtrStepSzb& hogluv) const;
+    void detect(const cv::gpu::PtrStepSzb& hogluv, cudaStream_t stream) const;
+    void __device detectAt() const;
  
      PtrStepSzb octaves;
      PtrStepSzf stages;
@@ -83,12 +86,24 @@ struct ChannelStorage
          const cv::gpu::PtrStepSzb& itg, const int s)
      : dmem (buff), shrunk(shr), hogluv(itg), shrinkage(s) {}
  
-    void frame(const cv::gpu::PtrStepSz<uchar4>& image);
+    void frame(const cv::gpu::PtrStepSz<uchar3>& rgb, cudaStream_t stream);
  
      PtrStepSzb dmem;
      PtrStepSzb shrunk;
      PtrStepSzb hogluv;
  
+    enum
+    {
+        FRAME_WIDTH        = 640,
+        FRAME_HEIGHT       = 480,
+        TOTAL_SCALES       = 55,
+        CLASSIFIERS        = 5,
+        ORIG_OBJECT_WIDTH  = 64,
+        ORIG_OBJECT_HEIGHT = 128,
+        HOG_BINS           = 6,
+        HOG_LUV_BINS       = 10
+    };
+
      int shrinkage;
  };
  
@@ -143,6 +158,6 @@ struct __align__(8) Level //is actually 24 bytes
          objSize.y  = round(oct.size.y * relScale);
      }
  };
-}
+}}}
  
  #endif
 \ No newline at end of file
diff --git a/modules/gpu/src/softcascade.cpp b/modules/gpu/src/softcascade.cpp

index fb36efd..b2419c1 100644 (file)
--- a/modules/gpu/src/softcascade.cpp
+++ b/modules/gpu/src/softcascade.cpp
@@ -100,9 +100,9 @@ struct cv::gpu::SoftCascade::Filds
      };
  
      bool fill(const FileNode &root, const float mins, const float maxs);
-    void detect() const
+    void detect(cudaStream_t stream) const
      {
-        cascade.detect(hogluv);
+        cascade.detect(hogluv, stream);
      }
  
  private:
@@ -394,18 +394,20 @@ bool cv::gpu::SoftCascade::load( const string& filename, const float minScale, c
  }
  
  void cv::gpu::SoftCascade::detectMultiScale(const GpuMat& image, const GpuMat& /*rois*/,
-                                GpuMat& /*objects*/, const int /*rejectfactor*/, Stream /*stream*/)
+                                GpuMat& /*objects*/, const int /*rejectfactor*/, Stream s)
  {
      // only color images are supperted
-    CV_Assert(image.type() == CV_8UC4);
+    CV_Assert(image.type() == CV_8UC3);
  
      // only this window size allowed
      CV_Assert(image.cols == 640 && image.rows == 480);
  
      Filds& flds = *filds;
  
-    flds.storage.frame(image);
-    flds.detect();
+    cudaStream_t stream = StreamAccessor::getStream(s);
+
+    flds.storage.frame(image, stream);
+    flds.detect(stream);
  }
  
  #endif
 \ No newline at end of file
author	marina.kolpakova <marina.kolpakova@itseez.com>
	Fri, 21 Sep 2012 12:12:18 +0000 (16:12 +0400)
committer	marina.kolpakova <marina.kolpakova@itseez.com>
	Sat, 10 Nov 2012 01:02:37 +0000 (05:02 +0400)
modules/gpu/src/cuda/isf-sc.cu		patch \| blob \| history
modules/gpu/src/icf.hpp		patch \| blob \| history
modules/gpu/src/softcascade.cpp		patch \| blob \| history