added aligned memory allocation to CudaMem (if allocation type is ZERO_COPY)

author Vladislav Vinogradov <no@email>

Tue, 3 May 2011 09:09:05 +0000 (09:09 +0000)

committer Vladislav Vinogradov <no@email>

Tue, 3 May 2011 09:09:05 +0000 (09:09 +0000)
author Vladislav Vinogradov <no@email>
Tue, 3 May 2011 09:09:05 +0000 (09:09 +0000)
committer Vladislav Vinogradov <no@email>
Tue, 3 May 2011 09:09:05 +0000 (09:09 +0000)
diff --git a/modules/gpu/src/color.cpp b/modules/gpu/src/color.cpp

index dc022f4..51a71dd 100644 (file)
--- a/modules/gpu/src/color.cpp
+++ b/modules/gpu/src/color.cpp
@@ -243,7 +243,7 @@ namespace
                      if (dcn <= 0) dcn = 3;\r
                      CV_Assert((scn == 3 || scn == 4) && (dcn == 3 || dcn == 4));\r
  \r
-                    bidx = code == CV_BGR2YCrCb || code == CV_RGB2YUV ? 0 : 2;\r
+                    bidx = code == CV_BGR2YCrCb || code == CV_BGR2YUV ? 0 : 2;\r
  \r
                      static const float yuv_f[] = { 0.114f, 0.587f, 0.299f, 0.492f, 0.877f };\r
                      static const int yuv_i[] = { B2Y, G2Y, R2Y, 8061, 14369 };\r
@@ -281,7 +281,7 @@ namespace
  \r
                      CV_Assert((scn == 3 || scn == 4) && (dcn == 3 || dcn == 4));\r
  \r
-                    bidx = code == CV_YCrCb2BGR || code == CV_YUV2RGB ? 0 : 2;\r
+                    bidx = code == CV_YCrCb2BGR || code == CV_YUV2BGR ? 0 : 2;\r
  \r
                      static const float yuv_f[] = { 2.032f, -0.395f, -0.581f, 1.140f };\r
                      static const int yuv_i[] = { 33292, -6472, -9519, 18678 }; \r
@@ -391,9 +391,9 @@ namespace
                          \r
                      dst.create(sz, CV_MAKETYPE(depth, dcn));\r
                      \r
-                    //const void* coeffs = depth == CV_32F ? (void*)coeffs_f : (void*)coeffs_i;\r
+                    const void* coeffs = depth == CV_32F ? (void*)coeffs_f : (void*)coeffs_i;\r
  \r
-                    funcs[depth](src, scn, dst, dcn, coeffs_i, stream);\r
+                    funcs[depth](src, scn, dst, dcn, coeffs, stream);\r
                      break;\r
                  }\r
  \r
diff --git a/modules/gpu/src/matrix_operations.cpp b/modules/gpu/src/matrix_operations.cpp

index 52821f9..2a79167 100644 (file)
--- a/modules/gpu/src/matrix_operations.cpp
+++ b/modules/gpu/src/matrix_operations.cpp
@@ -590,10 +590,21 @@ void cv::gpu::ensureSizeIsEnough(int rows, int cols, int type, GpuMat& m)
  bool cv::gpu::CudaMem::canMapHostMemory()\r
  {\r
      cudaDeviceProp prop;\r
-    cudaGetDeviceProperties(&prop, 0);\r
+    cudaGetDeviceProperties(&prop, getDevice());\r
      return (prop.canMapHostMemory != 0) ? true : false;\r
  }\r
  \r
+namespace\r
+{\r
+    int alignUp(int what, int alignment)\r
+    {\r
+        int alignMask = alignment-1;\r
+        int inverseAlignMask = ~alignMask;\r
+        int res = (what + alignMask) & inverseAlignMask;\r
+        return res;\r
+    }\r
+}\r
+\r
  void cv::gpu::CudaMem::create(int _rows, int _cols, int _type, int _alloc_type)\r
  {\r
      if (_alloc_type == ALLOC_ZEROCOPY && !canMapHostMemory())\r
@@ -611,6 +622,12 @@ void cv::gpu::CudaMem::create(int _rows, int _cols, int _type, int _alloc_type)
          rows = _rows;\r
          cols = _cols;\r
          step = elemSize()*cols;\r
+        if (_alloc_type == ALLOC_ZEROCOPY)\r
+        {\r
+            cudaDeviceProp prop;\r
+            cudaGetDeviceProperties(&prop, getDevice());\r
+            step = alignUp(step, prop.textureAlignment);\r
+        }\r
          int64 _nettosize = (int64)step*rows;\r
          size_t nettosize = (size_t)_nettosize;\r
          if( _nettosize != (int64)nettosize )\r
author	Vladislav Vinogradov <no@email>
	Tue, 3 May 2011 09:09:05 +0000 (09:09 +0000)
committer	Vladislav Vinogradov <no@email>
	Tue, 3 May 2011 09:09:05 +0000 (09:09 +0000)
modules/gpu/src/color.cpp		patch \| blob \| history
modules/gpu/src/matrix_operations.cpp		patch \| blob \| history