tabs
authorAnatoly Baksheev <no@email>
Thu, 16 Jun 2011 10:33:32 +0000 (10:33 +0000)
committerAnatoly Baksheev <no@email>
Thu, 16 Jun 2011 10:33:32 +0000 (10:33 +0000)
modules/gpu/src/opencv2/gpu/device/datamov_utils.hpp

index a705cb6..2602991 100644 (file)
 // or tort (including negligence or otherwise) arising in any way out of\r
 // the use of this software, even if advised of the possibility of such damage.\r
 //\r
-//M*/
+//M*/\r
 \r
-#ifndef __OPENCV_GPU_DATAMOV_UTILS_HPP__
-#define __OPENCV_GPU_DATAMOV_UTILS_HPP__
+#ifndef __OPENCV_GPU_DATAMOV_UTILS_HPP__\r
+#define __OPENCV_GPU_DATAMOV_UTILS_HPP__\r
+\r
+#include "internal_shared.hpp"\r
 \r
-#include "internal_shared.hpp"
-
 namespace cv { namespace gpu { namespace device\r
-{
-    #if __CUDA_ARCH__ >= 200
-
-           // for Fermi memory space is detected automatically
-           template <typename T> struct ForceGlob
-           {
-                   __device__ __forceinline__ static void Load(const T* ptr, int offset, T& val)  { val = ptr[offset];  }
-           };
-               
-    #else // __CUDA_ARCH__ >= 200
-
-           #if defined(_WIN64) || defined(__LP64__)            
-                   // 64-bit register modifier for inlined asm
-                   #define _OPENCV_ASM_PTR_ "l"
-           #else       
-                   // 32-bit register modifier for inlined asm
-                   #define _OPENCV_ASM_PTR_ "r"
-           #endif
-
-           template<class T> struct ForceGlob;
-
-        #define DEFINE_FORCE_GLOB(base_type, ptx_type, reg_mod)                                                   \
-           template <> struct ForceGlob<base_type>                                                                   \
-           {                                                                                                         \
-                   __device__ __forceinline__ static void Load(const base_type* ptr, int offset, base_type& val)         \
-            {                                                                                                     \
-                           asm("ld.global."#ptx_type" %0, [%1];" : "="#reg_mod(val) : _OPENCV_ASM_PTR_(ptr + offset));       \
-                   }                                                                                                     \
-           };
-        #define DEFINE_FORCE_GLOB_B(base_type, ptx_type)                                                                          \
-           template <> struct ForceGlob<base_type>                                                                                   \
-           {                                                                                                                         \
-                   __device__ __forceinline__ static void Load(const base_type* ptr, int offset, base_type& val)                         \
-            {                                                                                                                     \
-                           asm("ld.global."#ptx_type" %0, [%1];" : "=r"(*reinterpret_cast<uint*>(&val)) : _OPENCV_ASM_PTR_(ptr + offset));   \
-                   }                                                                                                                     \
-           };
-       
-           DEFINE_FORCE_GLOB_B(uchar,  u8)
-           DEFINE_FORCE_GLOB_B(schar,  s8)
-           DEFINE_FORCE_GLOB_B(char,   b8)
-           DEFINE_FORCE_GLOB  (ushort, u16, h)
-           DEFINE_FORCE_GLOB  (short,  s16, h)
-           DEFINE_FORCE_GLOB  (uint,   u32, r)
-           DEFINE_FORCE_GLOB  (int,    s32, r) 
-           DEFINE_FORCE_GLOB  (float,  f32, f) 
-           DEFINE_FORCE_GLOB  (double, f64, d) 
-               
-
-    #undef DEFINE_FORCE_GLOB
-    #undef DEFINE_FORCE_GLOB_B
-    #undef _OPENCV_ASM_PTR_
-       
-    #endif // __CUDA_ARCH__ >= 200
-}}}
-
-#endif // __OPENCV_GPU_DATAMOV_UTILS_HPP__
+{\r
+    #if __CUDA_ARCH__ >= 200\r
+\r
+        // for Fermi memory space is detected automatically\r
+        template <typename T> struct ForceGlob\r
+        {\r
+            __device__ __forceinline__ static void Load(const T* ptr, int offset, T& val)  { val = ptr[offset];  }\r
+        };\r
+            \r
+    #else // __CUDA_ARCH__ >= 200\r
+\r
+        #if defined(_WIN64) || defined(__LP64__)               \r
+            // 64-bit register modifier for inlined asm\r
+            #define _OPENCV_ASM_PTR_ "l"\r
+        #else  \r
+            // 32-bit register modifier for inlined asm\r
+            #define _OPENCV_ASM_PTR_ "r"\r
+        #endif\r
+\r
+        template<class T> struct ForceGlob;\r
+\r
+        #define DEFINE_FORCE_GLOB(base_type, ptx_type, reg_mod)                                                   \\r
+        template <> struct ForceGlob<base_type>                                                                   \\r
+        {                                                                                                         \\r
+            __device__ __forceinline__ static void Load(const base_type* ptr, int offset, base_type& val)         \\r
+            {                                                                                                     \\r
+                asm("ld.global."#ptx_type" %0, [%1];" : "="#reg_mod(val) : _OPENCV_ASM_PTR_(ptr + offset));       \\r
+            }                                                                                                     \\r
+        };\r
+        #define DEFINE_FORCE_GLOB_B(base_type, ptx_type)                                                                          \\r
+        template <> struct ForceGlob<base_type>                                                                                   \\r
+        {                                                                                                                         \\r
+            __device__ __forceinline__ static void Load(const base_type* ptr, int offset, base_type& val)                         \\r
+            {                                                                                                                     \\r
+                asm("ld.global."#ptx_type" %0, [%1];" : "=r"(*reinterpret_cast<uint*>(&val)) : _OPENCV_ASM_PTR_(ptr + offset));   \\r
+            }                                                                                                                     \\r
+        };\r
+        \r
+        DEFINE_FORCE_GLOB_B(uchar,  u8)\r
+        DEFINE_FORCE_GLOB_B(schar,  s8)\r
+        DEFINE_FORCE_GLOB_B(char,   b8)\r
+        DEFINE_FORCE_GLOB  (ushort, u16, h)\r
+        DEFINE_FORCE_GLOB  (short,  s16, h)\r
+        DEFINE_FORCE_GLOB  (uint,   u32, r)\r
+        DEFINE_FORCE_GLOB  (int,    s32, r)    \r
+        DEFINE_FORCE_GLOB  (float,  f32, f)    \r
+        DEFINE_FORCE_GLOB  (double, f64, d)    \r
+            \r
+\r
+    #undef DEFINE_FORCE_GLOB\r
+    #undef DEFINE_FORCE_GLOB_B\r
+    #undef _OPENCV_ASM_PTR_\r
+        \r
+    #endif // __CUDA_ARCH__ >= 200\r
+}}}\r
+\r
+#endif // __OPENCV_GPU_DATAMOV_UTILS_HPP__\r