fixed cudev compilation for old pre-Fermi archs

author Vladislav Vinogradov <vlad.vinogradov@itseez.com>

Tue, 17 Sep 2013 13:43:12 +0000 (17:43 +0400)

committer Vladislav Vinogradov <vlad.vinogradov@itseez.com>

Tue, 17 Sep 2013 13:43:12 +0000 (17:43 +0400)
author Vladislav Vinogradov <vlad.vinogradov@itseez.com>
Tue, 17 Sep 2013 13:43:12 +0000 (17:43 +0400)
committer Vladislav Vinogradov <vlad.vinogradov@itseez.com>
Tue, 17 Sep 2013 13:43:12 +0000 (17:43 +0400)
diff --git a/modules/cudev/include/opencv2/cudev/grid/detail/histogram.hpp b/modules/cudev/include/opencv2/cudev/grid/detail/histogram.hpp

index 4c26750..a27955d 100644 (file)
--- a/modules/cudev/include/opencv2/cudev/grid/detail/histogram.hpp
+++ b/modules/cudev/include/opencv2/cudev/grid/detail/histogram.hpp
@@ -56,6 +56,7 @@ namespace grid_histogram_detail
      template <int BIN_COUNT, int BLOCK_SIZE, class SrcPtr, typename ResType, class MaskPtr>
      __global__ void histogram(const SrcPtr src, ResType* hist, const MaskPtr mask, const int rows, const int cols)
      {
+    #if CV_CUDEV_ARCH >= 120
          __shared__ ResType smem[BIN_COUNT];
  
          const int y = blockIdx.x * blockDim.y + threadIdx.y;
@@ -86,6 +87,7 @@ namespace grid_histogram_detail
              if (histVal > 0)
                  atomicAdd(hist + i, histVal);
          }
+    #endif
      }
  
      template <int BIN_COUNT, class Policy, class SrcPtr, typename ResType, class MaskPtr>
diff --git a/modules/cudev/include/opencv2/cudev/grid/histogram.hpp b/modules/cudev/include/opencv2/cudev/grid/histogram.hpp

index b81b57f..ecb1a19 100644 (file)
--- a/modules/cudev/include/opencv2/cudev/grid/histogram.hpp
+++ b/modules/cudev/include/opencv2/cudev/grid/histogram.hpp
@@ -57,6 +57,8 @@ namespace cv { namespace cudev {
  template <int BIN_COUNT, class Policy, class SrcPtr, typename ResType, class MaskPtr>
  __host__ void gridHistogram_(const SrcPtr& src, GpuMat_<ResType>& dst, const MaskPtr& mask, Stream& stream = Stream::Null())
  {
+    CV_Assert( deviceSupports(SHARED_ATOMICS) );
+
      const int rows = getRows(src);
      const int cols = getCols(src);
  
@@ -75,6 +77,8 @@ __host__ void gridHistogram_(const SrcPtr& src, GpuMat_<ResType>& dst, const Mas
  template <int BIN_COUNT, class Policy, class SrcPtr, typename ResType>
  __host__ void gridHistogram_(const SrcPtr& src, GpuMat_<ResType>& dst, Stream& stream = Stream::Null())
  {
+    CV_Assert( deviceSupports(SHARED_ATOMICS) );
+
      const int rows = getRows(src);
      const int cols = getCols(src);
  
diff --git a/modules/cudev/include/opencv2/cudev/ptr2d/texture.hpp b/modules/cudev/include/opencv2/cudev/ptr2d/texture.hpp

index b01a2c7..095864f 100644 (file)
--- a/modules/cudev/include/opencv2/cudev/ptr2d/texture.hpp
+++ b/modules/cudev/include/opencv2/cudev/ptr2d/texture.hpp
@@ -52,6 +52,40 @@
  #include "gpumat.hpp"
  #include "traits.hpp"
  
+namespace
+{
+    template <typename T> struct CvCudevTextureRef
+    {
+        typedef texture<T, cudaTextureType2D, cudaReadModeElementType> TexRef;
+
+        static TexRef ref;
+
+        __host__ static void bind(const cv::cudev::GlobPtrSz<T>& mat,
+                                  bool normalizedCoords = false,
+                                  cudaTextureFilterMode filterMode = cudaFilterModePoint,
+                                  cudaTextureAddressMode addressMode = cudaAddressModeClamp)
+        {
+            ref.normalized = normalizedCoords;
+            ref.filterMode = filterMode;
+            ref.addressMode[0] = addressMode;
+            ref.addressMode[1] = addressMode;
+            ref.addressMode[2] = addressMode;
+
+            cudaChannelFormatDesc desc = cudaCreateChannelDesc<T>();
+
+            CV_CUDEV_SAFE_CALL( cudaBindTexture2D(0, &ref, mat.data, &desc, mat.cols, mat.rows, mat.step) );
+        }
+
+        __host__ static void unbind()
+        {
+            CV_CUDEV_SAFE_CALL( cudaUnbindTexture(ref) );
+        }
+    };
+
+    template <typename T>
+    typename CvCudevTextureRef<T>::TexRef CvCudevTextureRef<T>::ref;
+}
+
  namespace cv { namespace cudev {
  
  template <typename T> struct TexturePtr
@@ -63,79 +97,73 @@ template <typename T> struct TexturePtr
  
      __device__ __forceinline__ T operator ()(float y, float x) const
      {
+    #if CV_CUDEV_ARCH < 300
+        // Use the texture reference
+        return tex2D(CvCudevTextureRef<T>::ref, x, y);
+    #else
+        // Use the texture object
          return tex2D<T>(texObj, x, y);
+    #endif
      }
  };
  
  template <typename T> struct Texture : TexturePtr<T>
  {
      int rows, cols;
+    bool cc30;
  
      __host__ explicit Texture(const GlobPtrSz<T>& mat,
                                bool normalizedCoords = false,
                                cudaTextureFilterMode filterMode = cudaFilterModePoint,
                                cudaTextureAddressMode addressMode = cudaAddressModeClamp)
      {
-        CV_Assert( deviceSupports(FEATURE_SET_COMPUTE_30) );
-
-        rows = mat.rows;
-        cols = mat.cols;
-
-        cudaResourceDesc texRes;
-        std::memset(&texRes, 0, sizeof(texRes));
-        texRes.resType = cudaResourceTypePitch2D;
-        texRes.res.pitch2D.devPtr = mat.data;
-        texRes.res.pitch2D.height = mat.rows;
-        texRes.res.pitch2D.width = mat.cols;
-        texRes.res.pitch2D.pitchInBytes = mat.step;
-        texRes.res.pitch2D.desc = cudaCreateChannelDesc<T>();
-
-        cudaTextureDesc texDescr;
-        std::memset(&texDescr, 0, sizeof(texDescr));
-        texDescr.addressMode[0] = addressMode;
-        texDescr.addressMode[1] = addressMode;
-        texDescr.addressMode[2] = addressMode;
-        texDescr.filterMode = filterMode;
-        texDescr.readMode = cudaReadModeElementType;
-        texDescr.normalizedCoords = normalizedCoords;
-
-        CV_CUDEV_SAFE_CALL( cudaCreateTextureObject(&this->texObj, &texRes, &texDescr, 0) );
-    }
-
-    __host__ explicit Texture(const GpuMat_<T>& mat,
-                              bool normalizedCoords = false,
-                              cudaTextureFilterMode filterMode = cudaFilterModePoint,
-                              cudaTextureAddressMode addressMode = cudaAddressModeClamp)
-    {
-        CV_Assert( deviceSupports(FEATURE_SET_COMPUTE_30) );
+        cc30 = deviceSupports(FEATURE_SET_COMPUTE_30);
  
          rows = mat.rows;
          cols = mat.cols;
  
-        cudaResourceDesc texRes;
-        std::memset(&texRes, 0, sizeof(texRes));
-        texRes.resType = cudaResourceTypePitch2D;
-        texRes.res.pitch2D.devPtr = mat.data;
-        texRes.res.pitch2D.height = mat.rows;
-        texRes.res.pitch2D.width = mat.cols;
-        texRes.res.pitch2D.pitchInBytes = mat.step;
-        texRes.res.pitch2D.desc = cudaCreateChannelDesc<T>();
-
-        cudaTextureDesc texDescr;
-        std::memset(&texDescr, 0, sizeof(texDescr));
-        texDescr.addressMode[0] = addressMode;
-        texDescr.addressMode[1] = addressMode;
-        texDescr.addressMode[2] = addressMode;
-        texDescr.filterMode = filterMode;
-        texDescr.readMode = cudaReadModeElementType;
-        texDescr.normalizedCoords = normalizedCoords;
-
-        CV_CUDEV_SAFE_CALL( cudaCreateTextureObject(&this->texObj, &texRes, &texDescr, 0) );
+        if (cc30)
+        {
+            // Use the texture object
+            cudaResourceDesc texRes;
+            std::memset(&texRes, 0, sizeof(texRes));
+            texRes.resType = cudaResourceTypePitch2D;
+            texRes.res.pitch2D.devPtr = mat.data;
+            texRes.res.pitch2D.height = mat.rows;
+            texRes.res.pitch2D.width = mat.cols;
+            texRes.res.pitch2D.pitchInBytes = mat.step;
+            texRes.res.pitch2D.desc = cudaCreateChannelDesc<T>();
+
+            cudaTextureDesc texDescr;
+            std::memset(&texDescr, 0, sizeof(texDescr));
+            texDescr.normalizedCoords = normalizedCoords;
+            texDescr.filterMode = filterMode;
+            texDescr.addressMode[0] = addressMode;
+            texDescr.addressMode[1] = addressMode;
+            texDescr.addressMode[2] = addressMode;
+            texDescr.readMode = cudaReadModeElementType;
+
+            CV_CUDEV_SAFE_CALL( cudaCreateTextureObject(&this->texObj, &texRes, &texDescr, 0) );
+        }
+        else
+        {
+            // Use the texture reference
+            CvCudevTextureRef<T>::bind(mat, normalizedCoords, filterMode, addressMode);
+        }
      }
  
      __host__ ~Texture()
      {
-        cudaDestroyTextureObject(this->texObj);
+        if (cc30)
+        {
+            // Use the texture object
+            cudaDestroyTextureObject(this->texObj);
+        }
+        else
+        {
+            // Use the texture reference
+            CvCudevTextureRef<T>::unbind();
+        }
      }
  };
  
diff --git a/modules/cudev/include/opencv2/cudev/util/atomic.hpp b/modules/cudev/include/opencv2/cudev/util/atomic.hpp

index f650c68..2da1102 100644 (file)
--- a/modules/cudev/include/opencv2/cudev/util/atomic.hpp
+++ b/modules/cudev/include/opencv2/cudev/util/atomic.hpp
@@ -64,11 +64,23 @@ __device__ __forceinline__ uint atomicAdd(uint* address, uint val)
  
  __device__ __forceinline__ float atomicAdd(float* address, float val)
  {
+#if CV_CUDEV_ARCH >= 200
      return ::atomicAdd(address, val);
+#else
+    int* address_as_i = (int*) address;
+    int old = *address_as_i, assumed;
+    do {
+        assumed = old;
+        old = ::atomicCAS(address_as_i, assumed,
+            __float_as_int(val + __int_as_float(assumed)));
+    } while (assumed != old);
+    return __int_as_float(old);
+#endif
  }
  
  __device__ static double atomicAdd(double* address, double val)
  {
+#if CV_CUDEV_ARCH >= 130
      unsigned long long int* address_as_ull = (unsigned long long int*) address;
      unsigned long long int old = *address_as_ull, assumed;
      do {
@@ -77,6 +89,11 @@ __device__ static double atomicAdd(double* address, double val)
              __double_as_longlong(val + __longlong_as_double(assumed)));
      } while (assumed != old);
      return __longlong_as_double(old);
+#else
+    (void) address;
+    (void) val;
+    return 0.0;
+#endif
  }
  
  // atomicMin
@@ -93,6 +110,7 @@ __device__ __forceinline__ uint atomicMin(uint* address, uint val)
  
  __device__ static float atomicMin(float* address, float val)
  {
+#if CV_CUDEV_ARCH >= 120
      int* address_as_i = (int*) address;
      int old = *address_as_i, assumed;
      do {
@@ -101,10 +119,16 @@ __device__ static float atomicMin(float* address, float val)
              __float_as_int(::fminf(val, __int_as_float(assumed))));
      } while (assumed != old);
      return __int_as_float(old);
+#else
+    (void) address;
+    (void) val;
+    return 0.0f;
+#endif
  }
  
  __device__ static double atomicMin(double* address, double val)
  {
+#if CV_CUDEV_ARCH >= 130
      unsigned long long int* address_as_ull = (unsigned long long int*) address;
      unsigned long long int old = *address_as_ull, assumed;
      do {
@@ -113,6 +137,11 @@ __device__ static double atomicMin(double* address, double val)
              __double_as_longlong(::fmin(val, __longlong_as_double(assumed))));
      } while (assumed != old);
      return __longlong_as_double(old);
+#else
+    (void) address;
+    (void) val;
+    return 0.0;
+#endif
  }
  
  // atomicMax
@@ -129,6 +158,7 @@ __device__ __forceinline__ uint atomicMax(uint* address, uint val)
  
  __device__ static float atomicMax(float* address, float val)
  {
+#if CV_CUDEV_ARCH >= 120
      int* address_as_i = (int*) address;
      int old = *address_as_i, assumed;
      do {
@@ -137,10 +167,16 @@ __device__ static float atomicMax(float* address, float val)
              __float_as_int(::fmaxf(val, __int_as_float(assumed))));
      } while (assumed != old);
      return __int_as_float(old);
+#else
+    (void) address;
+    (void) val;
+    return 0.0f;
+#endif
  }
  
  __device__ static double atomicMax(double* address, double val)
  {
+#if CV_CUDEV_ARCH >= 130
      unsigned long long int* address_as_ull = (unsigned long long int*) address;
      unsigned long long int old = *address_as_ull, assumed;
      do {
@@ -149,6 +185,11 @@ __device__ static double atomicMax(double* address, double val)
              __double_as_longlong(::fmax(val, __longlong_as_double(assumed))));
      } while (assumed != old);
      return __longlong_as_double(old);
+#else
+    (void) address;
+    (void) val;
+    return 0.0;
+#endif
  }
  
  }}
diff --git a/modules/cudev/include/opencv2/cudev/util/saturate_cast.hpp b/modules/cudev/include/opencv2/cudev/util/saturate_cast.hpp

index faa12e3..ff7ce85 100644 (file)
--- a/modules/cudev/include/opencv2/cudev/util/saturate_cast.hpp
+++ b/modules/cudev/include/opencv2/cudev/util/saturate_cast.hpp
@@ -228,7 +228,11 @@ template <> __device__ __forceinline__ int saturate_cast<int>(float v)
  }
  template <> __device__ __forceinline__ int saturate_cast<int>(double v)
  {
+#if CV_CUDEV_ARCH >= 130
      return __double2int_rn(v);
+#else
+    return saturate_cast<int>((float) v);
+#endif
  }
  
  template <> __device__ __forceinline__ uint saturate_cast<uint>(schar v)
@@ -256,7 +260,11 @@ template <> __device__ __forceinline__ uint saturate_cast<uint>(float v)
  }
  template <> __device__ __forceinline__ uint saturate_cast<uint>(double v)
  {
+#if CV_CUDEV_ARCH >= 130
      return __double2uint_rn(v);
+#else
+    return saturate_cast<uint>((float) v);
+#endif
  }
  
  }}
author	Vladislav Vinogradov <vlad.vinogradov@itseez.com>
	Tue, 17 Sep 2013 13:43:12 +0000 (17:43 +0400)
committer	Vladislav Vinogradov <vlad.vinogradov@itseez.com>
	Tue, 17 Sep 2013 13:43:12 +0000 (17:43 +0400)
modules/cudev/include/opencv2/cudev/grid/detail/histogram.hpp		patch \| blob \| history
modules/cudev/include/opencv2/cudev/grid/histogram.hpp		patch \| blob \| history
modules/cudev/include/opencv2/cudev/ptr2d/texture.hpp		patch \| blob \| history
modules/cudev/include/opencv2/cudev/util/atomic.hpp		patch \| blob \| history
modules/cudev/include/opencv2/cudev/util/saturate_cast.hpp		patch \| blob \| history