// or tort (including negligence or otherwise) arising in any way out of\r
// the use of this software, even if advised of the possibility of such damage.\r
//\r
-//M*/
+//M*/\r
\r
-#ifndef __OPENCV_GPU_DATAMOV_UTILS_HPP__
-#define __OPENCV_GPU_DATAMOV_UTILS_HPP__
+#ifndef __OPENCV_GPU_DATAMOV_UTILS_HPP__\r
+#define __OPENCV_GPU_DATAMOV_UTILS_HPP__\r
+\r
+#include "internal_shared.hpp"\r
\r
-#include "internal_shared.hpp"
-
namespace cv { namespace gpu { namespace device\r
-{
- #if __CUDA_ARCH__ >= 200
-
- // for Fermi memory space is detected automatically
- template <typename T> struct ForceGlob
- {
- __device__ __forceinline__ static void Load(const T* ptr, int offset, T& val) { val = ptr[offset]; }
- };
-
- #else // __CUDA_ARCH__ >= 200
-
- #if defined(_WIN64) || defined(__LP64__)
- // 64-bit register modifier for inlined asm
- #define _OPENCV_ASM_PTR_ "l"
- #else
- // 32-bit register modifier for inlined asm
- #define _OPENCV_ASM_PTR_ "r"
- #endif
-
- template<class T> struct ForceGlob;
-
- #define DEFINE_FORCE_GLOB(base_type, ptx_type, reg_mod) \
- template <> struct ForceGlob<base_type> \
- { \
- __device__ __forceinline__ static void Load(const base_type* ptr, int offset, base_type& val) \
- { \
- asm("ld.global."#ptx_type" %0, [%1];" : "="#reg_mod(val) : _OPENCV_ASM_PTR_(ptr + offset)); \
- } \
- };
- #define DEFINE_FORCE_GLOB_B(base_type, ptx_type) \
- template <> struct ForceGlob<base_type> \
- { \
- __device__ __forceinline__ static void Load(const base_type* ptr, int offset, base_type& val) \
- { \
- asm("ld.global."#ptx_type" %0, [%1];" : "=r"(*reinterpret_cast<uint*>(&val)) : _OPENCV_ASM_PTR_(ptr + offset)); \
- } \
- };
-
- DEFINE_FORCE_GLOB_B(uchar, u8)
- DEFINE_FORCE_GLOB_B(schar, s8)
- DEFINE_FORCE_GLOB_B(char, b8)
- DEFINE_FORCE_GLOB (ushort, u16, h)
- DEFINE_FORCE_GLOB (short, s16, h)
- DEFINE_FORCE_GLOB (uint, u32, r)
- DEFINE_FORCE_GLOB (int, s32, r)
- DEFINE_FORCE_GLOB (float, f32, f)
- DEFINE_FORCE_GLOB (double, f64, d)
-
-
- #undef DEFINE_FORCE_GLOB
- #undef DEFINE_FORCE_GLOB_B
- #undef _OPENCV_ASM_PTR_
-
- #endif // __CUDA_ARCH__ >= 200
-}}}
-
-#endif // __OPENCV_GPU_DATAMOV_UTILS_HPP__
+{\r
+ #if __CUDA_ARCH__ >= 200\r
+\r
+ // for Fermi memory space is detected automatically\r
+ template <typename T> struct ForceGlob\r
+ {\r
+ __device__ __forceinline__ static void Load(const T* ptr, int offset, T& val) { val = ptr[offset]; }\r
+ };\r
+ \r
+ #else // __CUDA_ARCH__ >= 200\r
+\r
+ #if defined(_WIN64) || defined(__LP64__) \r
+ // 64-bit register modifier for inlined asm\r
+ #define _OPENCV_ASM_PTR_ "l"\r
+ #else \r
+ // 32-bit register modifier for inlined asm\r
+ #define _OPENCV_ASM_PTR_ "r"\r
+ #endif\r
+\r
+ template<class T> struct ForceGlob;\r
+\r
+ #define DEFINE_FORCE_GLOB(base_type, ptx_type, reg_mod) \\r
+ template <> struct ForceGlob<base_type> \\r
+ { \\r
+ __device__ __forceinline__ static void Load(const base_type* ptr, int offset, base_type& val) \\r
+ { \\r
+ asm("ld.global."#ptx_type" %0, [%1];" : "="#reg_mod(val) : _OPENCV_ASM_PTR_(ptr + offset)); \\r
+ } \\r
+ };\r
+ #define DEFINE_FORCE_GLOB_B(base_type, ptx_type) \\r
+ template <> struct ForceGlob<base_type> \\r
+ { \\r
+ __device__ __forceinline__ static void Load(const base_type* ptr, int offset, base_type& val) \\r
+ { \\r
+ asm("ld.global."#ptx_type" %0, [%1];" : "=r"(*reinterpret_cast<uint*>(&val)) : _OPENCV_ASM_PTR_(ptr + offset)); \\r
+ } \\r
+ };\r
+ \r
+ DEFINE_FORCE_GLOB_B(uchar, u8)\r
+ DEFINE_FORCE_GLOB_B(schar, s8)\r
+ DEFINE_FORCE_GLOB_B(char, b8)\r
+ DEFINE_FORCE_GLOB (ushort, u16, h)\r
+ DEFINE_FORCE_GLOB (short, s16, h)\r
+ DEFINE_FORCE_GLOB (uint, u32, r)\r
+ DEFINE_FORCE_GLOB (int, s32, r) \r
+ DEFINE_FORCE_GLOB (float, f32, f) \r
+ DEFINE_FORCE_GLOB (double, f64, d) \r
+ \r
+\r
+ #undef DEFINE_FORCE_GLOB\r
+ #undef DEFINE_FORCE_GLOB_B\r
+ #undef _OPENCV_ASM_PTR_\r
+ \r
+ #endif // __CUDA_ARCH__ >= 200\r
+}}}\r
+\r
+#endif // __OPENCV_GPU_DATAMOV_UTILS_HPP__\r