From 652fb1212e6ec4cb6fd9f73ee3f23224f5cb4d83 Mon Sep 17 00:00:00 2001 From: Anatoly Baksheev Date: Mon, 6 Dec 2010 16:37:32 +0000 Subject: [PATCH] module reorganization: added folder with pure device functions, cuda_shared.hpp renamed to internal_shared.hpp --- modules/gpu/CMakeLists.txt | 278 +++--- modules/gpu/include/opencv2/gpu/devmem2d.hpp | 11 +- ...efpropagation_gpu.cpp => beliefpropagation.cpp} | 0 modules/gpu/src/border_interpolate.cpp | 63 -- modules/gpu/src/border_interpolate.hpp | 56 -- ...constantspacebp_gpu.cpp => constantspacebp.cpp} | 47 +- modules/gpu/src/cuda/beliefpropagation.cu | 15 +- modules/gpu/src/cuda/border_interpolate.hpp | 179 ---- modules/gpu/src/cuda/brute_force_matcher.cu | 7 +- modules/gpu/src/cuda/color.cu | 9 +- modules/gpu/src/cuda/constantspacebp.cu | 132 +-- modules/gpu/src/cuda/filters.cu | 8 +- modules/gpu/src/cuda/hog.cu | 2 +- modules/gpu/src/cuda/imgproc.cu | 14 +- .../cuda/{cuda_shared.hpp => internal_shared.hpp} | 19 +- modules/gpu/src/cuda/linear_filters_beta.cu | 6 +- modules/gpu/src/cuda/match_template.cu | 2 +- modules/gpu/src/cuda/mathfunc.cu | 6 +- modules/gpu/src/cuda/matrix_operations.cu | 6 +- modules/gpu/src/cuda/saturate_cast.hpp | 169 ---- modules/gpu/src/cuda/split_merge.cu | 2 +- modules/gpu/src/cuda/stereobm.cu | 2 +- modules/gpu/src/cuda/transform.hpp | 2 +- modules/gpu/src/cuda/vecmath.hpp | 936 -------------------- modules/gpu/src/imgproc_gpu.cpp | 22 +- modules/gpu/src/internal_shared.hpp | 57 -- .../src/opencv2/gpu/device/border_interpolate.hpp | 176 ++++ .../{cuda => opencv2/gpu/device}/dynamic_smem.hpp | 0 .../{cuda => opencv2/gpu/device}/limits_gpu.hpp | 2 +- .../gpu/src/opencv2/gpu/device/saturate_cast.hpp | 172 ++++ modules/gpu/src/opencv2/gpu/device/vecmath.hpp | 939 +++++++++++++++++++++ modules/gpu/src/precomp.hpp | 2 +- modules/gpu/src/{stereobm_gpu.cpp => stereobm.cpp} | 0 33 files changed, 1579 insertions(+), 1762 deletions(-) rename modules/gpu/src/{beliefpropagation_gpu.cpp => beliefpropagation.cpp} (100%) delete mode 100644 modules/gpu/src/border_interpolate.cpp delete mode 100644 modules/gpu/src/border_interpolate.hpp rename modules/gpu/src/{constantspacebp_gpu.cpp => constantspacebp.cpp} (79%) delete mode 100644 modules/gpu/src/cuda/border_interpolate.hpp rename modules/gpu/src/cuda/{cuda_shared.hpp => internal_shared.hpp} (87%) delete mode 100644 modules/gpu/src/cuda/saturate_cast.hpp delete mode 100644 modules/gpu/src/cuda/vecmath.hpp delete mode 100644 modules/gpu/src/internal_shared.hpp create mode 100644 modules/gpu/src/opencv2/gpu/device/border_interpolate.hpp rename modules/gpu/src/{cuda => opencv2/gpu/device}/dynamic_smem.hpp (100%) rename modules/gpu/src/{cuda => opencv2/gpu/device}/limits_gpu.hpp (97%) create mode 100644 modules/gpu/src/opencv2/gpu/device/saturate_cast.hpp create mode 100644 modules/gpu/src/opencv2/gpu/device/vecmath.hpp rename modules/gpu/src/{stereobm_gpu.cpp => stereobm.cpp} (100%) diff --git a/modules/gpu/CMakeLists.txt b/modules/gpu/CMakeLists.txt index f41817b..239b06a 100644 --- a/modules/gpu/CMakeLists.txt +++ b/modules/gpu/CMakeLists.txt @@ -1,134 +1,144 @@ - -set(name "gpu") -set(DEPS "opencv_core" "opencv_imgproc" "opencv_objdetect" "opencv_features2d" "opencv_flann") - -set(OPENCV_LINKER_LIBS ${OPENCV_LINKER_LIBS} opencv_gpu) - -set(the_target "opencv_${name}") - -project(${the_target}) - -add_definitions(-DCVAPI_EXPORTS) - -include_directories("${CMAKE_CURRENT_SOURCE_DIR}/include" - "${CMAKE_CURRENT_SOURCE_DIR}/src/cuda" - "${CMAKE_CURRENT_SOURCE_DIR}/src" - "${CMAKE_CURRENT_BINARY_DIR}") - -foreach(d ${DEPS}) - if(${d} MATCHES "opencv_") - string(REPLACE "opencv_" "${CMAKE_CURRENT_SOURCE_DIR}/../" d_dir ${d}) - include_directories("${d_dir}/include") - endif() -endforeach() - -file(GLOB lib_srcs "src/*.cpp") -file(GLOB lib_int_hdrs "src/*.h*") -file(GLOB lib_cuda "src/cuda/*.cu*") -file(GLOB lib_cuda_hdrs "src/cuda/*.h*") -source_group("Src\\Host" FILES ${lib_srcs} ${lib_int_hdrs}) -source_group("Src\\Cuda" FILES ${lib_cuda} ${lib_cuda_hdrs}) - -file(GLOB lib_hdrs "include/opencv2/${name}/*.h*") -source_group("Include" FILES ${lib_hdrs}) - -if (HAVE_CUDA) - get_filename_component(_path_to_findnpp "${CMAKE_CURRENT_LIST_FILE}" PATH) - set(CMAKE_MODULE_PATH ${CMAKE_MODULE_PATH} ${_path_to_findnpp}) - find_package(NPP 3.2.16 REQUIRED) - message(STATUS "NPP detected: " ${NPP_VERSION}) - - include_directories(${CUDA_INCLUDE_DIRS} ${CUDA_NPP_INCLUDES}) - - if (UNIX OR APPLE) - set (CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS} "-Xcompiler;-fPIC;") - #set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS}" "-fPIC") - endif() - - #set (CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS} "-keep") - #set (CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS} "-Xcompiler;/EHsc-;") - - string(REPLACE "/W4" "/W3" CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS}") - string(REPLACE "/W4" "/W3" CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE}") - string(REPLACE "/W4" "/W3" CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG}") - - if(MSVC) - #string(REPLACE "/W4" "/W3" CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS}") - #string(REPLACE "/W4" "/W3" CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE}") - #string(REPLACE "/W4" "/W3" CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG}") - set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /wd4211 /wd4201 /wd4100 /wd4505 /wd4408") - - string(REPLACE "/EHsc-" "/EHs" CMAKE_C_FLAGS "${CMAKE_C_FLAGS}") - string(REPLACE "/EHsc-" "/EHs" CMAKE_C_FLAGS_RELEASE "${CMAKE_C_FLAGS_RELEASE}") - string(REPLACE "/EHsc-" "/EHs" CMAKE_C_FLAGS_DEBUG "${CMAKE_C_FLAGS_DEBUG}") - string(REPLACE "/EHsc-" "/EHs" CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS}") - string(REPLACE "/EHsc-" "/EHs" CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE}") - string(REPLACE "/EHsc-" "/EHs" CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG}") - endif() - - CUDA_COMPILE(cuda_objs ${lib_cuda}) - #CUDA_BUILD_CLEAN_TARGET() -endif() - - -add_library(${the_target} ${lib_srcs} ${lib_hdrs} ${lib_int_hdrs} ${lib_cuda} ${lib_cuda_hdrs} ${cuda_objs}) - - -if(PCHSupport_FOUND) - set(pch_header ${CMAKE_CURRENT_SOURCE_DIR}/src/precomp.hpp) - if(${CMAKE_GENERATOR} MATCHES "Visual*" OR ${CMAKE_GENERATOR} MATCHES "Xcode*") - if(${CMAKE_GENERATOR} MATCHES "Visual*") - set(${the_target}_pch "src/precomp.cpp") - endif() - add_native_precompiled_header(${the_target} ${pch_header}) - elseif(CMAKE_COMPILER_IS_GNUCXX AND ${CMAKE_GENERATOR} MATCHES ".*Makefiles") - add_precompiled_header(${the_target} ${pch_header}) - endif() -endif() - -# For dynamic link numbering convenions -set_target_properties(${the_target} PROPERTIES - VERSION ${OPENCV_VERSION} - SOVERSION ${OPENCV_SOVERSION} - OUTPUT_NAME "${the_target}${OPENCV_DLLVERSION}" - ) - -# Additional target properties -set_target_properties(${the_target} PROPERTIES - DEBUG_POSTFIX "${OPENCV_DEBUG_POSTFIX}" - ARCHIVE_OUTPUT_DIRECTORY "${CMAKE_BINARY_DIR}/lib/" - RUNTIME_OUTPUT_DIRECTORY "${CMAKE_BINARY_DIR}/bin/" - INSTALL_NAME_DIR "${CMAKE_INSTALL_PREFIX}/lib" - ) - -# Add the required libraries for linking: -target_link_libraries(${the_target} ${OPENCV_LINKER_LIBS} ${IPP_LIBS} ${DEPS}) - -if (HAVE_CUDA) - target_link_libraries(${the_target} ${CUDA_LIBRARIES} ${CUDA_NPP_LIBRARIES}) -endif() - -if(MSVC) - if(CMAKE_CROSSCOMPILING) - set_target_properties(${the_target} PROPERTIES - LINK_FLAGS "/NODEFAULTLIB:secchk" - ) - endif() - set_target_properties(${the_target} PROPERTIES - LINK_FLAGS "/NODEFAULTLIB:libc" - ) -endif() - -# Dependencies of this target: -add_dependencies(${the_target} ${DEPS}) - -install(TARGETS ${the_target} - RUNTIME DESTINATION bin COMPONENT main - LIBRARY DESTINATION lib COMPONENT main - ARCHIVE DESTINATION lib COMPONENT main) - -install(FILES ${lib_hdrs} - DESTINATION include/opencv2/${name} - COMPONENT main) - - + +set(name "gpu") + +#"opencv_features2d" "opencv_flann" "opencv_objdetect" - only headers needed +set(DEPS "opencv_core" "opencv_imgproc" "opencv_objdetect" "opencv_features2d" "opencv_flann") + +set(OPENCV_LINKER_LIBS ${OPENCV_LINKER_LIBS} opencv_gpu) + +set(the_target "opencv_${name}") + +project(${the_target}) + +add_definitions(-DCVAPI_EXPORTS) + +include_directories("${CMAKE_CURRENT_SOURCE_DIR}/include" + "${CMAKE_CURRENT_SOURCE_DIR}/src/cuda" + "${CMAKE_CURRENT_SOURCE_DIR}/src" + "${CMAKE_CURRENT_BINARY_DIR}") + +foreach(d ${DEPS}) + if(${d} MATCHES "opencv_") + string(REPLACE "opencv_" "${CMAKE_CURRENT_SOURCE_DIR}/../" d_dir ${d}) + include_directories("${d_dir}/include") + endif() +endforeach() + +file(GLOB lib_srcs "src/*.cpp") +file(GLOB lib_int_hdrs "src/*.h*") +file(GLOB lib_cuda "src/cuda/*.cu*") +file(GLOB lib_cuda_hdrs "src/cuda/*.h*") +source_group("Src\\Host" FILES ${lib_srcs} ${lib_int_hdrs}) +source_group("Src\\Cuda" FILES ${lib_cuda} ${lib_cuda_hdrs}) + +file(GLOB lib_hdrs "include/opencv2/${name}/*.h*") +source_group("Include" FILES ${lib_hdrs}) + +#file(GLOB lib_device_hdrs "include/opencv2/${name}/device/*.h*") +file(GLOB lib_device_hdrs "src/opencv2/gpu/device/*.h*") +source_group("Device" FILES ${lib_device_hdrs}) + +if (HAVE_CUDA) + get_filename_component(_path_to_findnpp "${CMAKE_CURRENT_LIST_FILE}" PATH) + set(CMAKE_MODULE_PATH ${CMAKE_MODULE_PATH} ${_path_to_findnpp}) + find_package(NPP 3.2.16 REQUIRED) + message(STATUS "NPP detected: " ${NPP_VERSION}) + + include_directories(${CUDA_INCLUDE_DIRS} ${CUDA_NPP_INCLUDES}) + + if (UNIX OR APPLE) + set (CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS} "-Xcompiler;-fPIC;") + #set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS}" "-fPIC") + endif() + + #set (CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS} "-keep") + #set (CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS} "-Xcompiler;/EHsc-;") + + string(REPLACE "/W4" "/W3" CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS}") + string(REPLACE "/W4" "/W3" CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE}") + string(REPLACE "/W4" "/W3" CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG}") + + if(MSVC) + #string(REPLACE "/W4" "/W3" CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS}") + #string(REPLACE "/W4" "/W3" CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE}") + #string(REPLACE "/W4" "/W3" CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG}") + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /wd4211 /wd4201 /wd4100 /wd4505 /wd4408") + + string(REPLACE "/EHsc-" "/EHs" CMAKE_C_FLAGS "${CMAKE_C_FLAGS}") + string(REPLACE "/EHsc-" "/EHs" CMAKE_C_FLAGS_RELEASE "${CMAKE_C_FLAGS_RELEASE}") + string(REPLACE "/EHsc-" "/EHs" CMAKE_C_FLAGS_DEBUG "${CMAKE_C_FLAGS_DEBUG}") + string(REPLACE "/EHsc-" "/EHs" CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS}") + string(REPLACE "/EHsc-" "/EHs" CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE}") + string(REPLACE "/EHsc-" "/EHs" CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG}") + endif() + + CUDA_COMPILE(cuda_objs ${lib_cuda}) + #CUDA_BUILD_CLEAN_TARGET() +endif() + + +add_library(${the_target} ${lib_srcs} ${lib_hdrs} ${lib_int_hdrs} ${lib_cuda} ${lib_cuda_hdrs} ${lib_device_hdrs} ${cuda_objs}) + + +if(PCHSupport_FOUND) + set(pch_header ${CMAKE_CURRENT_SOURCE_DIR}/src/precomp.hpp) + if(${CMAKE_GENERATOR} MATCHES "Visual*" OR ${CMAKE_GENERATOR} MATCHES "Xcode*") + if(${CMAKE_GENERATOR} MATCHES "Visual*") + set(${the_target}_pch "src/precomp.cpp") + endif() + add_native_precompiled_header(${the_target} ${pch_header}) + elseif(CMAKE_COMPILER_IS_GNUCXX AND ${CMAKE_GENERATOR} MATCHES ".*Makefiles") + add_precompiled_header(${the_target} ${pch_header}) + endif() +endif() + +# For dynamic link numbering convenions +set_target_properties(${the_target} PROPERTIES + VERSION ${OPENCV_VERSION} + SOVERSION ${OPENCV_SOVERSION} + OUTPUT_NAME "${the_target}${OPENCV_DLLVERSION}" + ) + +# Additional target properties +set_target_properties(${the_target} PROPERTIES + DEBUG_POSTFIX "${OPENCV_DEBUG_POSTFIX}" + ARCHIVE_OUTPUT_DIRECTORY "${CMAKE_BINARY_DIR}/lib/" + RUNTIME_OUTPUT_DIRECTORY "${CMAKE_BINARY_DIR}/bin/" + INSTALL_NAME_DIR "${CMAKE_INSTALL_PREFIX}/lib" + ) + +# Add the required libraries for linking: +target_link_libraries(${the_target} ${OPENCV_LINKER_LIBS} ${IPP_LIBS} ${DEPS}) + +if (HAVE_CUDA) + target_link_libraries(${the_target} ${CUDA_LIBRARIES} ${CUDA_NPP_LIBRARIES}) +endif() + +if(MSVC) + if(CMAKE_CROSSCOMPILING) + set_target_properties(${the_target} PROPERTIES + LINK_FLAGS "/NODEFAULTLIB:secchk" + ) + endif() + set_target_properties(${the_target} PROPERTIES + LINK_FLAGS "/NODEFAULTLIB:libc" + ) +endif() + +# Dependencies of this target: +add_dependencies(${the_target} ${DEPS}) + +install(TARGETS ${the_target} + RUNTIME DESTINATION bin COMPONENT main + LIBRARY DESTINATION lib COMPONENT main + ARCHIVE DESTINATION lib COMPONENT main) + +install(FILES ${lib_hdrs} + DESTINATION include/opencv2/${name} + COMPONENT main) + +#install(FILES ${lib_device_hdrs} +# DESTINATION include/opencv2/${name}/device +# COMPONENT main) + + diff --git a/modules/gpu/include/opencv2/gpu/devmem2d.hpp b/modules/gpu/include/opencv2/gpu/devmem2d.hpp index e756e2e..7341e2f 100644 --- a/modules/gpu/include/opencv2/gpu/devmem2d.hpp +++ b/modules/gpu/include/opencv2/gpu/devmem2d.hpp @@ -55,6 +55,8 @@ namespace cv #else #define __CV_GPU_HOST_DEVICE__ #endif + template struct StaticAssert; + template <> struct StaticAssert {static __CV_GPU_HOST_DEVICE__ void check(){}}; template struct DevMem2D_ { @@ -96,19 +98,18 @@ namespace cv __CV_GPU_HOST_DEVICE__ const T* ptr(int y = 0) const { return (const T*)( (const char*)data + y * step); } }; - template struct StaticCheck; - template <> struct StaticCheck{}; + template struct PtrElemStep_ : public PtrStep_ { PtrElemStep_(const DevMem2D_& mem) : PtrStep_(mem) { + StaticAssert<256 % sizeof(T) == 0>::check(); + PtrStep_::step /= PtrStep_::elem_size; } __CV_GPU_HOST_DEVICE__ T* ptr(int y = 0) { return PtrStep_::data + y * PtrStep_::step; } - __CV_GPU_HOST_DEVICE__ const T* ptr(int y = 0) const { return PtrStep_::data + y * PtrStep_::step; } - private: - StaticCheck<256 % sizeof(T) == 0> ElemStepTypeCheck; + __CV_GPU_HOST_DEVICE__ const T* ptr(int y = 0) const { return PtrStep_::data + y * PtrStep_::step; } }; typedef DevMem2D_ DevMem2D; diff --git a/modules/gpu/src/beliefpropagation_gpu.cpp b/modules/gpu/src/beliefpropagation.cpp similarity index 100% rename from modules/gpu/src/beliefpropagation_gpu.cpp rename to modules/gpu/src/beliefpropagation.cpp diff --git a/modules/gpu/src/border_interpolate.cpp b/modules/gpu/src/border_interpolate.cpp deleted file mode 100644 index a162a2e..0000000 --- a/modules/gpu/src/border_interpolate.cpp +++ /dev/null @@ -1,63 +0,0 @@ -/*M/////////////////////////////////////////////////////////////////////////////////////// -// -// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. -// -// By downloading, copying, installing or using the software you agree to this license. -// If you do not agree to this license, do not download, install, -// copy or use the software. -// -// -// License Agreement -// For Open Source Computer Vision Library -// -// Copyright (C) 2000-2008, Intel Corporation, all rights reserved. -// Copyright (C) 2009, Willow Garage Inc., all rights reserved. -// Third party copyrights are property of their respective owners. -// -// Redistribution and use in source and binary forms, with or without modification, -// are permitted provided that the following conditions are met: -// -// * Redistribution's of source code must retain the above copyright notice, -// this list of conditions and the following disclaimer. -// -// * Redistribution's in binary form must reproduce the above copyright notice, -// this list of conditions and the following disclaimer in the documentation -// and/or other materials provided with the distribution. -// -// * The name of the copyright holders may not be used to endorse or promote products -// derived from this software without specific prior written permission. -// -// This software is provided by the copyright holders and contributors "as is" and -// any express or implied warranties, including, but not limited to, the implied -// warranties of merchantability and fitness for a particular purpose are disclaimed. -// In no event shall the Intel Corporation or contributors be liable for any direct, -// indirect, incidental, special, exemplary, or consequential damages -// (including, but not limited to, procurement of substitute goods or services; -// loss of use, data, or profits; or business interruption) however caused -// and on any theory of liability, whether in contract, strict liability, -// or tort (including negligence or otherwise) arising in any way out of -// the use of this software, even if advised of the possibility of such damage. -// -//M*/ - -#include "internal_shared.hpp" -#include "border_interpolate.hpp" -#include "opencv2/gpu/gpu.hpp" - - -bool cv::gpu::tryConvertToGpuBorderType(int cpuBorderType, int& gpuBorderType) -{ - if (cpuBorderType == cv::BORDER_REFLECT101) - { - gpuBorderType = cv::gpu::BORDER_REFLECT101; - return true; - } - - if (cpuBorderType == cv::BORDER_REPLICATE) - { - gpuBorderType = cv::gpu::BORDER_REPLICATE; - return true; - } - - return false; -} \ No newline at end of file diff --git a/modules/gpu/src/border_interpolate.hpp b/modules/gpu/src/border_interpolate.hpp deleted file mode 100644 index 64a9fcd..0000000 --- a/modules/gpu/src/border_interpolate.hpp +++ /dev/null @@ -1,56 +0,0 @@ -/*M/////////////////////////////////////////////////////////////////////////////////////// -// -// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. -// -// By downloading, copying, installing or using the software you agree to this license. -// If you do not agree to this license, do not download, install, -// copy or use the software. -// -// -// License Agreement -// For Open Source Computer Vision Library -// -// Copyright (C) 2000-2008, Intel Corporation, all rights reserved. -// Copyright (C) 2009, Willow Garage Inc., all rights reserved. -// Third party copyrights are property of their respective owners. -// -// Redistribution and use in source and binary forms, with or without modification, -// are permitted provided that the following conditions are met: -// -// * Redistribution's of source code must retain the above copyright notice, -// this list of conditions and the following disclaimer. -// -// * Redistribution's in binary form must reproduce the above copyright notice, -// this list of conditions and the following disclaimer in the documentation -// and/or other materials provided with the distribution. -// -// * The name of the copyright holders may not be used to endorse or promote products -// derived from this software without specific prior written permission. -// -// This software is provided by the copyright holders and contributors "as is" and -// any express or implied warranties, including, but not limited to, the implied -// warranties of merchantability and fitness for a particular purpose are disclaimed. -// In no event shall the Intel Corporation or contributors be liable for any direct, -// indirect, incidental, special, exemplary, or consequential damages -// (including, but not limited to, procurement of substitute goods or services; -// loss of use, data, or profits; or business interruption) however caused -// and on any theory of liability, whether in contract, strict liability, -// or tort (including negligence or otherwise) arising in any way out of -// the use of this software, even if advised of the possibility of such damage. -// -//M*/ - -#ifndef __OPENCV_GPU_BORDER_INTERPOLATE_HPP__ -#define __OPENCV_GPU_BORDER_INTERPOLATE_HPP__ - -#include "border_interpolate.hpp" - -namespace cv { namespace gpu { - - // Converts CPU border extrapolation mode into GPU internal analogue. - // Returns true if the GPU analogue exists, false otherwise. - bool tryConvertToGpuBorderType(int cpuBorderType, int& gpuBorderType); - -}} - -#endif \ No newline at end of file diff --git a/modules/gpu/src/constantspacebp_gpu.cpp b/modules/gpu/src/constantspacebp.cpp similarity index 79% rename from modules/gpu/src/constantspacebp_gpu.cpp rename to modules/gpu/src/constantspacebp.cpp index a701288..29e0755 100644 --- a/modules/gpu/src/constantspacebp_gpu.cpp +++ b/modules/gpu/src/constantspacebp.cpp @@ -63,40 +63,29 @@ namespace cv { namespace gpu { namespace csbp void load_constants(int ndisp, float max_data_term, float data_weight, float max_disc_term, float disc_single_jump, int min_disp_th, const DevMem2D& left, const DevMem2D& right, const DevMem2D& temp); - void init_data_cost(int rows, int cols, short* disp_selected_pyr, short* data_cost_selected, - size_t msg_step, int h, int w, int level, int nr_plane, int ndisp, int channels, bool use_local_init_data_cost, cudaStream_t stream); + template + void init_data_cost(int rows, int cols, T* disp_selected_pyr, T* data_cost_selected, size_t msg_step, + int h, int w, int level, int nr_plane, int ndisp, int channels, bool use_local_init_data_cost, cudaStream_t stream); - void init_data_cost(int rows, int cols, float* disp_selected_pyr, float* data_cost_selected, - size_t msg_step, int h, int w, int level, int nr_plane, int ndisp, int channels, bool use_local_init_data_cost, cudaStream_t stream); + template + void compute_data_cost(const T* disp_selected_pyr, T* data_cost, size_t msg_step1, size_t msg_step2, + int rows, int cols, int h, int w, int h2, int level, int nr_plane, int channels, cudaStream_t stream); - void compute_data_cost(const short* disp_selected_pyr, short* data_cost, size_t msg_step1, size_t msg_step2, - int rows, int cols, int h, int w, int h2, int level, int nr_plane, int channels, cudaStream_t stream); - void compute_data_cost(const float* disp_selected_pyr, float* data_cost, size_t msg_step1, size_t msg_step2, - int rows, int cols, int h, int w, int h2, int level, int nr_plane, int channels, cudaStream_t stream); + template + void init_message(T* u_new, T* d_new, T* l_new, T* r_new, + const T* u_cur, const T* d_cur, const T* l_cur, const T* r_cur, + T* selected_disp_pyr_new, const T* selected_disp_pyr_cur, + T* data_cost_selected, const T* data_cost, size_t msg_step1, size_t msg_step2, + int h, int w, int nr_plane, int h2, int w2, int nr_plane2, cudaStream_t stream); - void init_message(short* u_new, short* d_new, short* l_new, short* r_new, - const short* u_cur, const short* d_cur, const short* l_cur, const short* r_cur, - short* selected_disp_pyr_new, const short* selected_disp_pyr_cur, - short* data_cost_selected, const short* data_cost, size_t msg_step1, size_t msg_step2, - int h, int w, int nr_plane, int h2, int w2, int nr_plane2, cudaStream_t stream); + template + void calc_all_iterations(T* u, T* d, T* l, T* r, const T* data_cost_selected, + const T* selected_disp_pyr_cur, size_t msg_step, int h, int w, int nr_plane, int iters, cudaStream_t stream); - void init_message(float* u_new, float* d_new, float* l_new, float* r_new, - const float* u_cur, const float* d_cur, const float* l_cur, const float* r_cur, - float* selected_disp_pyr_new, const float* selected_disp_pyr_cur, - float* data_cost_selected, const float* data_cost, size_t msg_step1, size_t msg_step2, - int h, int w, int nr_plane, int h2, int w2, int nr_plane2, cudaStream_t stream); + template + void compute_disp(const T* u, const T* d, const T* l, const T* r, const T* data_cost_selected, const T* disp_selected, size_t msg_step, + const DevMem2D_& disp, int nr_plane, cudaStream_t stream); - void calc_all_iterations(short* u, short* d, short* l, short* r, short* data_cost_selected, - const short* selected_disp_pyr_cur, size_t msg_step, int h, int w, int nr_plane, int iters, cudaStream_t stream); - - void calc_all_iterations(float*u, float* d, float* l, float* r, float* data_cost_selected, - const float* selected_disp_pyr_cur, size_t msg_step, int h, int w, int nr_plane, int iters, cudaStream_t stream); - - void compute_disp(const short* u, const short* d, const short* l, const short* r, const short* data_cost_selected, const short* disp_selected, size_t msg_step, - DevMem2D_ disp, int nr_plane, cudaStream_t stream); - - void compute_disp(const float* u, const float* d, const float* l, const float* r, const float* data_cost_selected, const float* disp_selected, size_t msg_step, - DevMem2D_ disp, int nr_plane, cudaStream_t stream); }}} namespace diff --git a/modules/gpu/src/cuda/beliefpropagation.cu b/modules/gpu/src/cuda/beliefpropagation.cu index 5ec9e87..175fad4 100644 --- a/modules/gpu/src/cuda/beliefpropagation.cu +++ b/modules/gpu/src/cuda/beliefpropagation.cu @@ -41,14 +41,17 @@ //M*/ #include "opencv2/gpu/devmem2d.hpp" -#include "saturate_cast.hpp" +#include "opencv2/gpu/device/saturate_cast.hpp" +#include "opencv2/gpu/device/limits_gpu.hpp" #include "safe_call.hpp" using namespace cv::gpu; +using namespace cv::gpu::device; -#ifndef FLT_MAX -#define FLT_MAX 3.402823466e+38F -#endif +#undef FLT_MAX +//#ifndef FLT_MAX +//#define FLT_MAX 3.402823466e+38F +//#endif namespace cv { namespace gpu { namespace bp { @@ -349,7 +352,7 @@ namespace cv { namespace gpu { namespace bp { template __device__ void message(const T* msg1, const T* msg2, const T* msg3, const T* data, T* dst, size_t msg_disp_step, size_t data_disp_step) { - float minimum = FLT_MAX; + float minimum = numeric_limits_gpu::max(); for(int i = 0; i < cndisp; ++i) { @@ -470,7 +473,7 @@ namespace cv { namespace gpu { namespace bp { size_t disp_step = rows * step; int best = 0; - float best_val = FLT_MAX; + float best_val = numeric_limits_gpu::max(); for (int d = 0; d < cndisp; ++d) { float val = us[d * disp_step]; diff --git a/modules/gpu/src/cuda/border_interpolate.hpp b/modules/gpu/src/cuda/border_interpolate.hpp deleted file mode 100644 index d15e11d..0000000 --- a/modules/gpu/src/cuda/border_interpolate.hpp +++ /dev/null @@ -1,179 +0,0 @@ -/*M/////////////////////////////////////////////////////////////////////////////////////// -// -// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. -// -// By downloading, copying, installing or using the software you agree to this license. -// If you do not agree to this license, do not download, install, -// copy or use the software. -// -// -// License Agreement -// For Open Source Computer Vision Library -// -// Copyright (C) 2000-2008, Intel Corporation, all rights reserved. -// Copyright (C) 2009, Willow Garage Inc., all rights reserved. -// Third party copyrights are property of their respective owners. -// -// Redistribution and use in source and binary forms, with or without modification, -// are permitted provided that the following conditions are met: -// -// * Redistribution's of source code must retain the above copyright notice, -// this list of conditions and the following disclaimer. -// -// * Redistribution's in binary form must reproduce the above copyright notice, -// this list of conditions and the following disclaimer in the documentation -// and/or other materials provided with the distribution. -// -// * The name of the copyright holders may not be used to endorse or promote products -// derived from this software without specific prior written permission. -// -// This software is provided by the copyright holders and contributors "as is" and -// any express or implied warranties, including, but not limited to, the implied -// warranties of merchantability and fitness for a particular purpose are disclaimed. -// In no event shall the Intel Corporation or contributors be liable for any direct, -// indirect, incidental, special, exemplary, or consequential damages -// (including, but not limited to, procurement of substitute goods or services; -// loss of use, data, or profits; or business interruption) however caused -// and on any theory of liability, whether in contract, strict liability, -// or tort (including negligence or otherwise) arising in any way out of -// the use of this software, even if advised of the possibility of such damage. -// -//M*/ - -#ifndef __OPENCV_GPU_BORDER_INTERPOLATE_HPP__ -#define __OPENCV_GPU_BORDER_INTERPOLATE_HPP__ - -#include "../internal_shared.hpp" - -namespace cv { namespace gpu { - - struct BrdReflect101 - { - BrdReflect101(int len): last(len - 1) {} - - __device__ int idx_low(int i) const - { - return abs(i); - } - - __device__ int idx_high(int i) const - { - return last - abs(last - i); - } - - __device__ int idx(int i) const - { - return abs(idx_high(i)); - } - - bool is_range_safe(int mini, int maxi) const - { - return -last <= mini && maxi <= 2 * last; - } - - int last; - }; - - - template - struct BrdRowReflect101: BrdReflect101 - { - BrdRowReflect101(int len): BrdReflect101(len) {} - - __device__ float at_low(int i, const T* data) const - { - return data[idx_low(i)]; - } - - __device__ float at_high(int i, const T* data) const - { - return data[idx_high(i)]; - } - }; - - - template - struct BrdColReflect101: BrdReflect101 - { - BrdColReflect101(int len, int step): BrdReflect101(len), step(step) {} - - __device__ float at_low(int i, const T* data) const - { - return data[idx_low(i) * step]; - } - - __device__ float at_high(int i, const T* data) const - { - return data[idx_high(i) * step]; - } - - int step; - }; - - - struct BrdReplicate - { - BrdReplicate(int len): last(len - 1) {} - - __device__ int idx_low(int i) const - { - return max(i, 0); - } - - __device__ int idx_high(int i) const - { - return min(i, last); - } - - __device__ int idx(int i) const - { - return max(min(i, last), 0); - } - - bool is_range_safe(int mini, int maxi) const - { - return true; - } - - int last; - }; - - - template - struct BrdRowReplicate: BrdReplicate - { - BrdRowReplicate(int len): BrdReplicate(len) {} - - __device__ float at_low(int i, const T* data) const - { - return data[idx_low(i)]; - } - - __device__ float at_high(int i, const T* data) const - { - return data[idx_high(i)]; - } - }; - - - template - struct BrdColReplicate: BrdReplicate - { - BrdColReplicate(int len, int step): BrdReplicate(len), step(step) {} - - __device__ float at_low(int i, const T* data) const - { - return data[idx_low(i) * step]; - } - - __device__ float at_high(int i, const T* data) const - { - return data[idx_high(i) * step]; - } - - int step; - }; - -}} - -#endif \ No newline at end of file diff --git a/modules/gpu/src/cuda/brute_force_matcher.cu b/modules/gpu/src/cuda/brute_force_matcher.cu index f42938f..b28aee1 100644 --- a/modules/gpu/src/cuda/brute_force_matcher.cu +++ b/modules/gpu/src/cuda/brute_force_matcher.cu @@ -40,8 +40,8 @@ // //M*/ -#include "cuda_shared.hpp" -#include "limits_gpu.hpp" +#include "internal_shared.hpp" +#include "opencv2/gpu/device/limits_gpu.hpp" using namespace cv::gpu; using namespace cv::gpu::device; @@ -51,9 +51,6 @@ namespace cv { namespace gpu { namespace bfmatcher /////////////////////////////////////////////////////////////////////////////////// ////////////////////////////////// General funcs ////////////////////////////////// /////////////////////////////////////////////////////////////////////////////////// - - template struct StaticAssert; - template <> struct StaticAssert {static __host__ __device__ void check(){}}; /////////////////////////////////////////////////////////////////////////////// // Mask strategy diff --git a/modules/gpu/src/cuda/color.cu b/modules/gpu/src/cuda/color.cu index 0ff8a17..00ebb91 100644 --- a/modules/gpu/src/cuda/color.cu +++ b/modules/gpu/src/cuda/color.cu @@ -40,18 +40,19 @@ // //M*/ -#include "cuda_shared.hpp" -#include "saturate_cast.hpp" -#include "vecmath.hpp" +#include "internal_shared.hpp" +#include "opencv2/gpu/device/saturate_cast.hpp" +#include "opencv2/gpu/device/vecmath.hpp" using namespace cv::gpu; +using namespace cv::gpu::device; #ifndef CV_DESCALE #define CV_DESCALE(x, n) (((x) + (1 << ((n)-1))) >> (n)) #endif #ifndef FLT_EPSILON -#define FLT_EPSILON 1.192092896e-07F + #define FLT_EPSILON 1.192092896e-07F #endif namespace cv { namespace gpu { namespace color diff --git a/modules/gpu/src/cuda/constantspacebp.cu b/modules/gpu/src/cuda/constantspacebp.cu index 1287f80..b588b7a 100644 --- a/modules/gpu/src/cuda/constantspacebp.cu +++ b/modules/gpu/src/cuda/constantspacebp.cu @@ -41,31 +41,16 @@ //M*/ #include "opencv2/gpu/devmem2d.hpp" -#include "saturate_cast.hpp" +#include "opencv2/gpu/device/saturate_cast.hpp" +#include "opencv2/gpu/device/limits_gpu.hpp" #include "safe_call.hpp" using namespace cv::gpu; +using namespace cv::gpu::device; -#ifndef FLT_MAX -#define FLT_MAX 3.402823466e+30F -#endif - -#ifndef SHRT_MAX -#define SHRT_MAX 32767 -#endif namespace cv { namespace gpu { namespace csbp -{ - - template struct TypeLimits; - template <> struct TypeLimits - { - static __device__ short max() {return SHRT_MAX;} - }; - template <> struct TypeLimits - { - static __device__ float max() {return FLT_MAX;} - }; +{ /////////////////////////////////////////////////////////////// /////////////////////// load constants //////////////////////// @@ -150,7 +135,7 @@ namespace cv { namespace gpu { namespace csbp for(int i = 0; i < nr_plane; i++) { - T minimum = TypeLimits::max(); + T minimum = numeric_limits_gpu::max(); int id = 0; for(int d = 0; d < cndisp; d++) { @@ -164,7 +149,7 @@ namespace cv { namespace gpu { namespace csbp data_cost_selected[i * cdisp_step1] = minimum; selected_disparity[i * cdisp_step1] = id; - data_cost [id * cdisp_step1] = TypeLimits::max(); + data_cost [id * cdisp_step1] = numeric_limits_gpu::max(); } } } @@ -195,7 +180,7 @@ namespace cv { namespace gpu { namespace csbp data_cost_selected[nr_local_minimum * cdisp_step1] = cur; selected_disparity[nr_local_minimum * cdisp_step1] = d; - data_cost[d * cdisp_step1] = TypeLimits::max(); + data_cost[d * cdisp_step1] = numeric_limits_gpu::max(); nr_local_minimum++; } @@ -206,7 +191,7 @@ namespace cv { namespace gpu { namespace csbp for (int i = nr_local_minimum; i < nr_plane; i++) { - T minimum = TypeLimits::max(); + T minimum = numeric_limits_gpu::max(); int id = 0; for (int d = 0; d < cndisp; d++) @@ -221,7 +206,7 @@ namespace cv { namespace gpu { namespace csbp data_cost_selected[i * cdisp_step1] = minimum; selected_disparity[i * cdisp_step1] = id; - data_cost[id * cdisp_step1] = TypeLimits::max(); + data_cost[id * cdisp_step1] = numeric_limits_gpu::max(); } } } @@ -365,7 +350,7 @@ namespace cv { namespace gpu { namespace csbp } template - void init_data_cost_tmpl(int rows, int cols, T* disp_selected_pyr, T* data_cost_selected, size_t msg_step, + void init_data_cost(int rows, int cols, T* disp_selected_pyr, T* data_cost_selected, size_t msg_step, int h, int w, int level, int nr_plane, int ndisp, int channels, bool use_local_init_data_cost, cudaStream_t stream) { @@ -400,17 +385,11 @@ namespace cv { namespace gpu { namespace csbp cudaSafeCall( cudaThreadSynchronize() ); } - void init_data_cost(int rows, int cols, short* disp_selected_pyr, short* data_cost_selected, - size_t msg_step, int h, int w, int level, int nr_plane, int ndisp, int channels, bool use_local_init_data_cost, cudaStream_t stream) - { - init_data_cost_tmpl(rows, cols, disp_selected_pyr, data_cost_selected, msg_step, h, w, level, nr_plane, ndisp, channels, use_local_init_data_cost, stream); - } + template void init_data_cost(int rows, int cols, short* disp_selected_pyr, short* data_cost_selected, size_t msg_step, + int h, int w, int level, int nr_plane, int ndisp, int channels, bool use_local_init_data_cost, cudaStream_t stream); - void init_data_cost(int rows, int cols, float* disp_selected_pyr, float* data_cost_selected, - size_t msg_step, int h, int w, int level, int nr_plane, int ndisp, int channels, bool use_local_init_data_cost, cudaStream_t stream) - { - init_data_cost_tmpl(rows, cols, disp_selected_pyr, data_cost_selected, msg_step, h, w, level, nr_plane, ndisp, channels, use_local_init_data_cost, stream); - } + template void init_data_cost(int rows, int cols, float* disp_selected_pyr, float* data_cost_selected, size_t msg_step, + int h, int w, int level, int nr_plane, int ndisp, int channels, bool use_local_init_data_cost, cudaStream_t stream); /////////////////////////////////////////////////////////////// ////////////////////// compute data cost ////////////////////// @@ -562,7 +541,7 @@ namespace cv { namespace gpu { namespace csbp } template - void compute_data_cost_tmpl(const T* disp_selected_pyr, T* data_cost, size_t msg_step1, size_t msg_step2, + void compute_data_cost(const T* disp_selected_pyr, T* data_cost, size_t msg_step1, size_t msg_step2, int rows, int cols, int h, int w, int h2, int level, int nr_plane, int channels, cudaStream_t stream) { typedef void (*ComputeDataCostCaller)(const T* disp_selected_pyr, T* data_cost, int rows, int cols, @@ -588,16 +567,12 @@ namespace cv { namespace gpu { namespace csbp cudaSafeCall( cudaThreadSynchronize() ); } - void compute_data_cost(const short* disp_selected_pyr, short* data_cost, size_t msg_step1, size_t msg_step2, - int rows, int cols, int h, int w, int h2, int level, int nr_plane, int channels, cudaStream_t stream) - { - compute_data_cost_tmpl(disp_selected_pyr, data_cost, msg_step1, msg_step2, rows, cols, h, w, h2, level, nr_plane, channels, stream); - } - void compute_data_cost(const float* disp_selected_pyr, float* data_cost, size_t msg_step1, size_t msg_step2, - int rows, int cols, int h, int w, int h2, int level, int nr_plane, int channels, cudaStream_t stream) - { - compute_data_cost_tmpl(disp_selected_pyr, data_cost, msg_step1, msg_step2, rows, cols, h, w, h2, level, nr_plane, channels, stream); - } + template void compute_data_cost(const short* disp_selected_pyr, short* data_cost, size_t msg_step1, size_t msg_step2, + int rows, int cols, int h, int w, int h2, int level, int nr_plane, int channels, cudaStream_t stream); + + template void compute_data_cost(const float* disp_selected_pyr, float* data_cost, size_t msg_step1, size_t msg_step2, + int rows, int cols, int h, int w, int h2, int level, int nr_plane, int channels, cudaStream_t stream); + /////////////////////////////////////////////////////////////// //////////////////////// init message ///////////////////////// @@ -613,7 +588,7 @@ namespace cv { namespace gpu { namespace csbp { for(int i = 0; i < nr_plane; i++) { - T minimum = TypeLimits::max(); + T minimum = numeric_limits_gpu::max(); int id = 0; for(int j = 0; j < nr_plane2; j++) { @@ -633,7 +608,7 @@ namespace cv { namespace gpu { namespace csbp l_new[i * cdisp_step1] = l_cur[id * cdisp_step2]; r_new[i * cdisp_step1] = r_cur[id * cdisp_step2]; - data_cost_new[id * cdisp_step1] = TypeLimits::max(); + data_cost_new[id * cdisp_step1] = numeric_limits_gpu::max(); } } @@ -688,7 +663,7 @@ namespace cv { namespace gpu { namespace csbp template - void init_message_tmpl(T* u_new, T* d_new, T* l_new, T* r_new, + void init_message(T* u_new, T* d_new, T* l_new, T* r_new, const T* u_cur, const T* d_cur, const T* l_cur, const T* r_cur, T* selected_disp_pyr_new, const T* selected_disp_pyr_cur, T* data_cost_selected, const T* data_cost, size_t msg_step1, size_t msg_step2, @@ -718,27 +693,18 @@ namespace cv { namespace gpu { namespace csbp cudaSafeCall( cudaThreadSynchronize() ); } - void init_message(short* u_new, short* d_new, short* l_new, short* r_new, + + template void init_message(short* u_new, short* d_new, short* l_new, short* r_new, const short* u_cur, const short* d_cur, const short* l_cur, const short* r_cur, short* selected_disp_pyr_new, const short* selected_disp_pyr_cur, short* data_cost_selected, const short* data_cost, size_t msg_step1, size_t msg_step2, - int h, int w, int nr_plane, int h2, int w2, int nr_plane2, cudaStream_t stream) - { - init_message_tmpl(u_new, d_new, l_new, r_new, u_cur, d_cur, l_cur, r_cur, - selected_disp_pyr_new, selected_disp_pyr_cur, data_cost_selected, data_cost, msg_step1, msg_step2, - h, w, nr_plane, h2, w2, nr_plane2, stream); - } + int h, int w, int nr_plane, int h2, int w2, int nr_plane2, cudaStream_t stream); - void init_message(float* u_new, float* d_new, float* l_new, float* r_new, + template void init_message(float* u_new, float* d_new, float* l_new, float* r_new, const float* u_cur, const float* d_cur, const float* l_cur, const float* r_cur, float* selected_disp_pyr_new, const float* selected_disp_pyr_cur, float* data_cost_selected, const float* data_cost, size_t msg_step1, size_t msg_step2, - int h, int w, int nr_plane, int h2, int w2, int nr_plane2, cudaStream_t stream) - { - init_message_tmpl(u_new, d_new, l_new, r_new, u_cur, d_cur, l_cur, r_cur, - selected_disp_pyr_new, selected_disp_pyr_cur, data_cost_selected, data_cost, msg_step1, msg_step2, - h, w, nr_plane, h2, w2, nr_plane2, stream); - } + int h, int w, int nr_plane, int h2, int w2, int nr_plane2, cudaStream_t stream); /////////////////////////////////////////////////////////////// //////////////////// calc all iterations ///////////////////// @@ -748,7 +714,7 @@ namespace cv { namespace gpu { namespace csbp __device__ void message_per_pixel(const T* data, T* msg_dst, const T* msg1, const T* msg2, const T* msg3, const T* dst_disp, const T* src_disp, int nr_plane, T* temp) { - T minimum = TypeLimits::max(); + T minimum = numeric_limits_gpu::max(); for(int d = 0; d < nr_plane; d++) { @@ -807,7 +773,7 @@ namespace cv { namespace gpu { namespace csbp template - void calc_all_iterations_tmpl(T* u, T* d, T* l, T* r, const T* data_cost_selected, + void calc_all_iterations(T* u, T* d, T* l, T* r, const T* data_cost_selected, const T* selected_disp_pyr_cur, size_t msg_step, int h, int w, int nr_plane, int iters, cudaStream_t stream) { size_t disp_step = msg_step * h; @@ -828,18 +794,12 @@ namespace cv { namespace gpu { namespace csbp cudaSafeCall( cudaThreadSynchronize() ); } }; + + template void calc_all_iterations(short* u, short* d, short* l, short* r, const short* data_cost_selected, const short* selected_disp_pyr_cur, size_t msg_step, + int h, int w, int nr_plane, int iters, cudaStream_t stream); - void calc_all_iterations(short* u, short* d, short* l, short* r, short* data_cost_selected, - const short* selected_disp_pyr_cur, size_t msg_step, int h, int w, int nr_plane, int iters, cudaStream_t stream) - { - calc_all_iterations_tmpl(u, d, l, r, data_cost_selected, selected_disp_pyr_cur, msg_step, h, w, nr_plane, iters, stream); - } - - void calc_all_iterations(float*u, float* d, float* l, float* r, float* data_cost_selected, - const float* selected_disp_pyr_cur, size_t msg_step, int h, int w, int nr_plane, int iters, cudaStream_t stream) - { - calc_all_iterations_tmpl(u, d, l, r, data_cost_selected, selected_disp_pyr_cur, msg_step, h, w, nr_plane, iters, stream); - } + template void calc_all_iterations(float* u, float* d, float* l, float* r, const float* data_cost_selected, const float* selected_disp_pyr_cur, size_t msg_step, + int h, int w, int nr_plane, int iters, cudaStream_t stream); /////////////////////////////////////////////////////////////// @@ -866,7 +826,7 @@ namespace cv { namespace gpu { namespace csbp const T* r = r_ + (y+0) * cmsg_step1 + (x-1); int best = 0; - T best_val = TypeLimits::max(); + T best_val = numeric_limits_gpu::max(); for (int i = 0; i < nr_plane; ++i) { int idx = i * cdisp_step1; @@ -882,9 +842,8 @@ namespace cv { namespace gpu { namespace csbp } } - template - void compute_disp_tmpl(const T* u, const T* d, const T* l, const T* r, const T* data_cost_selected, const T* disp_selected, size_t msg_step, + void compute_disp(const T* u, const T* d, const T* l, const T* r, const T* data_cost_selected, const T* disp_selected, size_t msg_step, const DevMem2D_& disp, int nr_plane, cudaStream_t stream) { size_t disp_step = disp.rows * msg_step; @@ -903,16 +862,9 @@ namespace cv { namespace gpu { namespace csbp cudaSafeCall( cudaThreadSynchronize() ); } - void compute_disp(const short* u, const short* d, const short* l, const short* r, const short* data_cost_selected, const short* disp_selected, size_t msg_step, - DevMem2D_ disp, int nr_plane, cudaStream_t stream) - { - compute_disp_tmpl(u, d, l, r, data_cost_selected, disp_selected, msg_step, disp, nr_plane, stream); - } - - void compute_disp(const float* u, const float* d, const float* l, const float* r, const float* data_cost_selected, const float* disp_selected, size_t msg_step, - DevMem2D_ disp, int nr_plane, cudaStream_t stream) - { - compute_disp_tmpl(u, d, l, r, data_cost_selected, disp_selected, msg_step, disp, nr_plane, stream); - } + template void compute_disp(const short* u, const short* d, const short* l, const short* r, const short* data_cost_selected, const short* disp_selected, size_t msg_step, + const DevMem2D_& disp, int nr_plane, cudaStream_t stream); + template void compute_disp(const float* u, const float* d, const float* l, const float* r, const float* data_cost_selected, const float* disp_selected, size_t msg_step, + const DevMem2D_& disp, int nr_plane, cudaStream_t stream); }}} diff --git a/modules/gpu/src/cuda/filters.cu b/modules/gpu/src/cuda/filters.cu index 6749fe6..d725d0d 100644 --- a/modules/gpu/src/cuda/filters.cu +++ b/modules/gpu/src/cuda/filters.cu @@ -41,12 +41,14 @@ //M*/ #include "opencv2/gpu/devmem2d.hpp" -#include "saturate_cast.hpp" +#include "opencv2/gpu/device/saturate_cast.hpp" +#include "opencv2/gpu/device/vecmath.hpp" + #include "safe_call.hpp" -#include "cuda_shared.hpp" -#include "vecmath.hpp" +#include "internal_shared.hpp" using namespace cv::gpu; +using namespace cv::gpu::device; #ifndef FLT_MAX #define FLT_MAX 3.402823466e+30F diff --git a/modules/gpu/src/cuda/hog.cu b/modules/gpu/src/cuda/hog.cu index ab9df03..3e5ddf5 100644 --- a/modules/gpu/src/cuda/hog.cu +++ b/modules/gpu/src/cuda/hog.cu @@ -40,7 +40,7 @@ // //M*/ -#include "cuda_shared.hpp" +#include "internal_shared.hpp" #ifndef CV_PI_F #ifndef CV_PI diff --git a/modules/gpu/src/cuda/imgproc.cu b/modules/gpu/src/cuda/imgproc.cu index f85e03f..8408182 100644 --- a/modules/gpu/src/cuda/imgproc.cu +++ b/modules/gpu/src/cuda/imgproc.cu @@ -40,10 +40,12 @@ // //M*/ -#include "cuda_shared.hpp" -#include "border_interpolate.hpp" +#include "internal_shared.hpp" +#include "opencv2/gpu/device/border_interpolate.hpp" +#include "internal_shared.hpp" using namespace cv::gpu; +using namespace cv::gpu::device; /////////////////////////////////// Remap /////////////////////////////////////////////// namespace cv { namespace gpu { namespace imgproc @@ -584,11 +586,11 @@ namespace cv { namespace gpu { namespace imgproc switch (border_type) { - case BORDER_REFLECT101: + case BORDER_REFLECT101_GPU: cornerHarris_kernel<<>>( cols, rows, block_size, k, dst, BrdReflect101(cols), BrdReflect101(rows)); break; - case BORDER_REPLICATE: + case BORDER_REPLICATE_GPU: harrisDxTex.addressMode[0] = cudaAddressModeClamp; harrisDxTex.addressMode[1] = cudaAddressModeClamp; harrisDyTex.addressMode[0] = cudaAddressModeClamp; @@ -698,11 +700,11 @@ namespace cv { namespace gpu { namespace imgproc switch (border_type) { - case BORDER_REFLECT101: + case BORDER_REFLECT101_GPU: cornerMinEigenVal_kernel<<>>( cols, rows, block_size, dst, BrdReflect101(cols), BrdReflect101(rows)); break; - case BORDER_REPLICATE: + case BORDER_REPLICATE_GPU: minEigenValDxTex.addressMode[0] = cudaAddressModeClamp; minEigenValDxTex.addressMode[1] = cudaAddressModeClamp; minEigenValDyTex.addressMode[0] = cudaAddressModeClamp; diff --git a/modules/gpu/src/cuda/cuda_shared.hpp b/modules/gpu/src/cuda/internal_shared.hpp similarity index 87% rename from modules/gpu/src/cuda/cuda_shared.hpp rename to modules/gpu/src/cuda/internal_shared.hpp index 600fb08..e52ba4e 100644 --- a/modules/gpu/src/cuda/cuda_shared.hpp +++ b/modules/gpu/src/cuda/internal_shared.hpp @@ -40,8 +40,8 @@ // //M*/ -#ifndef __OPENCV_CUDA_SHARED_HPP__ -#define __OPENCV_CUDA_SHARED_HPP__ +#ifndef __OPENCV_internal_shared_HPP__ +#define __OPENCV_internal_shared_HPP__ #include "opencv2/gpu/devmem2d.hpp" #include "safe_call.hpp" @@ -54,7 +54,18 @@ namespace cv typedef unsigned char uchar; typedef signed char schar; typedef unsigned short ushort; - typedef unsigned int uint; + typedef unsigned int uint; + + enum + { + BORDER_REFLECT101_GPU = 0, + BORDER_REPLICATE_GPU + }; + + // Converts CPU border extrapolation mode into GPU internal analogue. + // Returns true if the GPU analogue exists, false otherwise. + bool tryConvertToGpuBorderType(int cpuBorderType, int& gpuBorderType); + static inline int divUp(int total, int grain) { return (total + grain - 1) / grain; } @@ -99,4 +110,4 @@ namespace cv } -#endif /* __OPENCV_CUDA_SHARED_HPP__ */ +#endif /* __OPENCV_internal_shared_HPP__ */ diff --git a/modules/gpu/src/cuda/linear_filters_beta.cu b/modules/gpu/src/cuda/linear_filters_beta.cu index 0841618..5a5184c 100644 --- a/modules/gpu/src/cuda/linear_filters_beta.cu +++ b/modules/gpu/src/cuda/linear_filters_beta.cu @@ -41,16 +41,16 @@ //M*/ #include "opencv2/gpu/devmem2d.hpp" +#include "opencv2/gpu/device/border_interpolate.hpp" #include "safe_call.hpp" -#include "cuda_shared.hpp" -#include "border_interpolate.hpp" +#include "internal_shared.hpp" #define BLOCK_DIM_X 16 #define BLOCK_DIM_Y 16 #define MAX_KERNEL_SIZE 16 using namespace cv::gpu; - +using namespace cv::gpu::device; namespace cv { namespace gpu { namespace linear_filters { diff --git a/modules/gpu/src/cuda/match_template.cu b/modules/gpu/src/cuda/match_template.cu index ea797f0..42c8951 100644 --- a/modules/gpu/src/cuda/match_template.cu +++ b/modules/gpu/src/cuda/match_template.cu @@ -40,7 +40,7 @@ // //M*/ -#include "cuda_shared.hpp" +#include "internal_shared.hpp" using namespace cv::gpu; diff --git a/modules/gpu/src/cuda/mathfunc.cu b/modules/gpu/src/cuda/mathfunc.cu index c088058..71b70d3 100644 --- a/modules/gpu/src/cuda/mathfunc.cu +++ b/modules/gpu/src/cuda/mathfunc.cu @@ -40,10 +40,10 @@ // //M*/ -#include "cuda_shared.hpp" +#include "opencv2/gpu/device/limits_gpu.hpp" +#include "opencv2/gpu/device/saturate_cast.hpp" #include "transform.hpp" -#include "limits_gpu.hpp" -#include "saturate_cast.hpp" +#include "internal_shared.hpp" using namespace cv::gpu; using namespace cv::gpu::device; diff --git a/modules/gpu/src/cuda/matrix_operations.cu b/modules/gpu/src/cuda/matrix_operations.cu index 74d5c58..42e55b3 100644 --- a/modules/gpu/src/cuda/matrix_operations.cu +++ b/modules/gpu/src/cuda/matrix_operations.cu @@ -40,8 +40,10 @@ // //M*/ -#include "cuda_shared.hpp" -#include "saturate_cast.hpp" +#include "internal_shared.hpp" +#include "opencv2/gpu/device/saturate_cast.hpp" + +using namespace cv::gpu::device; namespace cv { namespace gpu { namespace matrix_operations { diff --git a/modules/gpu/src/cuda/saturate_cast.hpp b/modules/gpu/src/cuda/saturate_cast.hpp deleted file mode 100644 index 5de7ca6..0000000 --- a/modules/gpu/src/cuda/saturate_cast.hpp +++ /dev/null @@ -1,169 +0,0 @@ -/*M/////////////////////////////////////////////////////////////////////////////////////// -// -// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. -// -// By downloading, copying, installing or using the software you agree to this license. -// If you do not agree to this license, do not download, install, -// copy or use the software. -// -// -// License Agreement -// For Open Source Computer Vision Library -// -// Copyright (C) 2000-2008, Intel Corporation, all rights reserved. -// Copyright (C) 2009, Willow Garage Inc., all rights reserved. -// Third party copyrights are property of their respective owners. -// -// Redistribution and use in source and binary forms, with or without modification, -// are permitted provided that the following conditions are met: -// -// * Redistribution's of source code must retain the above copyright notice, -// this list of conditions and the following disclaimer. -// -// * Redistribution's in binary form must reproduce the above copyright notice, -// this list of conditions and the following disclaimer in the documentation -// and/or other materials provided with the distribution. -// -// * The name of the copyright holders may not be used to endorse or promote products -// derived from this software without specific prior written permission. -// -// This software is provided by the copyright holders and contributors "as is" and -// any express or implied warranties, including, but not limited to, the implied -// warranties of merchantability and fitness for a particular purpose are disclaimed. -// In no event shall the Intel Corporation or contributors be liable for any direct, -// indirect, incidental, special, exemplary, or consequential damages -// (including, but not limited to, procurement of substitute goods or services; -// loss of use, data, or profits; or business interruption) however caused -// and on any theory of liability, whether in contract, strict liability, -// or tort (including negligence or otherwise) arising in any way out of -// the use of this software, even if advised of the possibility of such damage. -// -//M*/ - -#ifndef __OPENCV_GPU_SATURATE_CAST_HPP__ -#define __OPENCV_GPU_SATURATE_CAST_HPP__ - -#include "cuda_shared.hpp" - -namespace cv -{ - namespace gpu - { - template static __device__ _Tp saturate_cast(uchar v) { return _Tp(v); } - template static __device__ _Tp saturate_cast(schar v) { return _Tp(v); } - template static __device__ _Tp saturate_cast(ushort v) { return _Tp(v); } - template static __device__ _Tp saturate_cast(short v) { return _Tp(v); } - template static __device__ _Tp saturate_cast(uint v) { return _Tp(v); } - template static __device__ _Tp saturate_cast(int v) { return _Tp(v); } - template static __device__ _Tp saturate_cast(float v) { return _Tp(v); } - template static __device__ _Tp saturate_cast(double v) { return _Tp(v); } - - template<> static __device__ uchar saturate_cast(schar v) - { return (uchar)max((int)v, 0); } - template<> static __device__ uchar saturate_cast(ushort v) - { return (uchar)min((uint)v, (uint)UCHAR_MAX); } - template<> static __device__ uchar saturate_cast(int v) - { return (uchar)((uint)v <= UCHAR_MAX ? v : v > 0 ? UCHAR_MAX : 0); } - template<> static __device__ uchar saturate_cast(uint v) - { return (uchar)min(v, (uint)UCHAR_MAX); } - template<> static __device__ uchar saturate_cast(short v) - { return saturate_cast((uint)v); } - - template<> static __device__ uchar saturate_cast(float v) - { int iv = __float2int_rn(v); return saturate_cast(iv); } - template<> static __device__ uchar saturate_cast(double v) - { - #if defined (__CUDA_ARCH__) && __CUDA_ARCH__ >= 130 - int iv = __double2int_rn(v); return saturate_cast(iv); - #else - return saturate_cast((float)v); - #endif - } - - template<> static __device__ schar saturate_cast(uchar v) - { return (schar)min((int)v, SCHAR_MAX); } - template<> static __device__ schar saturate_cast(ushort v) - { return (schar)min((uint)v, (uint)SCHAR_MAX); } - template<> static __device__ schar saturate_cast(int v) - { - return (schar)((uint)(v-SCHAR_MIN) <= (uint)UCHAR_MAX ? - v : v > 0 ? SCHAR_MAX : SCHAR_MIN); - } - template<> static __device__ schar saturate_cast(short v) - { return saturate_cast((int)v); } - template<> static __device__ schar saturate_cast(uint v) - { return (schar)min(v, (uint)SCHAR_MAX); } - - template<> static __device__ schar saturate_cast(float v) - { int iv = __float2int_rn(v); return saturate_cast(iv); } - template<> static __device__ schar saturate_cast(double v) - { - #if defined (__CUDA_ARCH__) && __CUDA_ARCH__ >= 130 - int iv = __double2int_rn(v); return saturate_cast(iv); - #else - return saturate_cast((float)v); - #endif - } - - template<> static __device__ ushort saturate_cast(schar v) - { return (ushort)max((int)v, 0); } - template<> static __device__ ushort saturate_cast(short v) - { return (ushort)max((int)v, 0); } - template<> static __device__ ushort saturate_cast(int v) - { return (ushort)((uint)v <= (uint)USHRT_MAX ? v : v > 0 ? USHRT_MAX : 0); } - template<> static __device__ ushort saturate_cast(uint v) - { return (ushort)min(v, (uint)USHRT_MAX); } - template<> static __device__ ushort saturate_cast(float v) - { int iv = __float2int_rn(v); return saturate_cast(iv); } - template<> static __device__ ushort saturate_cast(double v) - { - #if defined (__CUDA_ARCH__) && __CUDA_ARCH__ >= 130 - int iv = __double2int_rn(v); return saturate_cast(iv); - #else - return saturate_cast((float)v); - #endif - } - - template<> static __device__ short saturate_cast(ushort v) - { return (short)min((int)v, SHRT_MAX); } - template<> static __device__ short saturate_cast(int v) - { - return (short)((uint)(v - SHRT_MIN) <= (uint)USHRT_MAX ? - v : v > 0 ? SHRT_MAX : SHRT_MIN); - } - template<> static __device__ short saturate_cast(uint v) - { return (short)min(v, (uint)SHRT_MAX); } - template<> static __device__ short saturate_cast(float v) - { int iv = __float2int_rn(v); return saturate_cast(iv); } - template<> static __device__ short saturate_cast(double v) - { - #if defined (__CUDA_ARCH__) && __CUDA_ARCH__ >= 130 - int iv = __double2int_rn(v); return saturate_cast(iv); - #else - return saturate_cast((float)v); - #endif - } - - template<> static __device__ int saturate_cast(float v) { return __float2int_rn(v); } - template<> static __device__ int saturate_cast(double v) - { - #if defined (__CUDA_ARCH__) && __CUDA_ARCH__ >= 130 - return __double2int_rn(v); - #else - return saturate_cast((float)v); - #endif - } - - template<> static __device__ uint saturate_cast(float v){ return __float2uint_rn(v); } - template<> static __device__ uint saturate_cast(double v) - { - #if defined (__CUDA_ARCH__) && __CUDA_ARCH__ >= 130 - return __double2uint_rn(v); - #else - return saturate_cast((float)v); - #endif - } - } -} - -#endif /* __OPENCV_GPU_SATURATE_CAST_HPP__ */ \ No newline at end of file diff --git a/modules/gpu/src/cuda/split_merge.cu b/modules/gpu/src/cuda/split_merge.cu index ab22186..40d2889 100644 --- a/modules/gpu/src/cuda/split_merge.cu +++ b/modules/gpu/src/cuda/split_merge.cu @@ -41,7 +41,7 @@ //M*/ #include "opencv2/gpu/devmem2d.hpp" -#include "cuda_shared.hpp" +#include "internal_shared.hpp" namespace cv { namespace gpu { namespace split_merge { diff --git a/modules/gpu/src/cuda/stereobm.cu b/modules/gpu/src/cuda/stereobm.cu index 80d457f..c385691 100644 --- a/modules/gpu/src/cuda/stereobm.cu +++ b/modules/gpu/src/cuda/stereobm.cu @@ -40,7 +40,7 @@ // //M*/ -//#include "cuda_shared.hpp" +//#include "internal_shared.hpp" #include "opencv2/gpu/devmem2d.hpp" #include "safe_call.hpp" static inline int divUp(int total, int grain) { return (total + grain - 1) / grain; } diff --git a/modules/gpu/src/cuda/transform.hpp b/modules/gpu/src/cuda/transform.hpp index c50d8d1..b8f066e 100644 --- a/modules/gpu/src/cuda/transform.hpp +++ b/modules/gpu/src/cuda/transform.hpp @@ -43,7 +43,7 @@ #ifndef __OPENCV_GPU_TRANSFORM_HPP__ #define __OPENCV_GPU_TRANSFORM_HPP__ -#include "cuda_shared.hpp" +#include "internal_shared.hpp" namespace cv { namespace gpu { namespace device { diff --git a/modules/gpu/src/cuda/vecmath.hpp b/modules/gpu/src/cuda/vecmath.hpp deleted file mode 100644 index ff42383..0000000 --- a/modules/gpu/src/cuda/vecmath.hpp +++ /dev/null @@ -1,936 +0,0 @@ -/*M/////////////////////////////////////////////////////////////////////////////////////// -// -// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. -// -// By downloading, copying, installing or using the software you agree to this license. -// If you do not agree to this license, do not download, install, -// copy or use the software. -// -// -// License Agreement -// For Open Source Computer Vision Library -// -// Copyright (C) 2000-2008, Intel Corporation, all rights reserved. -// Copyright (C) 2009, Willow Garage Inc., all rights reserved. -// Third party copyrights are property of their respective owners. -// -// Redistribution and use in source and binary forms, with or without modification, -// are permitted provided that the following conditions are met: -// -// * Redistribution's of source code must retain the above copyright notice, -// this list of conditions and the following disclaimer. -// -// * Redistribution's in binary form must reproduce the above copyright notice, -// this list of conditions and the following disclaimer in the documentation -// and/or other materials provided with the distribution. -// -// * The name of the copyright holders may not be used to endorse or promote products -// derived from this software without specific prior written permission. -// -// This software is provided by the copyright holders and contributors "as is" and -// any express or implied warranties, including, but not limited to, the implied -// warranties of merchantability and fitness for a particular purpose are disclaimed. -// In no event shall the Intel Corporation or contributors be liable for any direct, -// indirect, incidental, special, exemplary, or consequential damages -// (including, but not limited to, procurement of substitute goods or services; -// loss of use, data, or profits; or business interruption) however caused -// and on any theory of liability, whether in contract, strict liability, -// or tort (including negligence or otherwise) arising in any way out of -// the use of this software, even if advised of the possibility of such damage. -// -//M*/ - -#ifndef __OPENCV_GPU_VECMATH_HPP__ -#define __OPENCV_GPU_VECMATH_HPP__ - -#include "cuda_shared.hpp" -#include "saturate_cast.hpp" - -namespace cv -{ - namespace gpu - { - template struct TypeVec; - - template<> struct TypeVec { typedef uchar vec_t; }; - template<> struct TypeVec { typedef uchar1 vec_t; }; - template<> struct TypeVec { typedef uchar2 vec_t; }; - template<> struct TypeVec { typedef uchar2 vec_t; }; - template<> struct TypeVec { typedef uchar3 vec_t; }; - template<> struct TypeVec { typedef uchar3 vec_t; }; - template<> struct TypeVec { typedef uchar4 vec_t; }; - template<> struct TypeVec { typedef uchar4 vec_t; }; - - template<> struct TypeVec { typedef char vec_t; }; - template<> struct TypeVec { typedef char1 vec_t; }; - template<> struct TypeVec { typedef char2 vec_t; }; - template<> struct TypeVec { typedef char2 vec_t; }; - template<> struct TypeVec { typedef char3 vec_t; }; - template<> struct TypeVec { typedef char3 vec_t; }; - template<> struct TypeVec { typedef char4 vec_t; }; - template<> struct TypeVec { typedef char4 vec_t; }; - - template<> struct TypeVec { typedef ushort vec_t; }; - template<> struct TypeVec { typedef ushort1 vec_t; }; - template<> struct TypeVec { typedef ushort2 vec_t; }; - template<> struct TypeVec { typedef ushort2 vec_t; }; - template<> struct TypeVec { typedef ushort3 vec_t; }; - template<> struct TypeVec { typedef ushort3 vec_t; }; - template<> struct TypeVec { typedef ushort4 vec_t; }; - template<> struct TypeVec { typedef ushort4 vec_t; }; - - template<> struct TypeVec { typedef short vec_t; }; - template<> struct TypeVec { typedef short1 vec_t; }; - template<> struct TypeVec { typedef short2 vec_t; }; - template<> struct TypeVec { typedef short2 vec_t; }; - template<> struct TypeVec { typedef short3 vec_t; }; - template<> struct TypeVec { typedef short3 vec_t; }; - template<> struct TypeVec { typedef short4 vec_t; }; - template<> struct TypeVec { typedef short4 vec_t; }; - - template<> struct TypeVec { typedef uint vec_t; }; - template<> struct TypeVec { typedef uint1 vec_t; }; - template<> struct TypeVec { typedef uint2 vec_t; }; - template<> struct TypeVec { typedef uint2 vec_t; }; - template<> struct TypeVec { typedef uint3 vec_t; }; - template<> struct TypeVec { typedef uint3 vec_t; }; - template<> struct TypeVec { typedef uint4 vec_t; }; - template<> struct TypeVec { typedef uint4 vec_t; }; - - template<> struct TypeVec { typedef int vec_t; }; - template<> struct TypeVec { typedef int1 vec_t; }; - template<> struct TypeVec { typedef int2 vec_t; }; - template<> struct TypeVec { typedef int2 vec_t; }; - template<> struct TypeVec { typedef int3 vec_t; }; - template<> struct TypeVec { typedef int3 vec_t; }; - template<> struct TypeVec { typedef int4 vec_t; }; - template<> struct TypeVec { typedef int4 vec_t; }; - - template<> struct TypeVec { typedef float vec_t; }; - template<> struct TypeVec { typedef float1 vec_t; }; - template<> struct TypeVec { typedef float2 vec_t; }; - template<> struct TypeVec { typedef float2 vec_t; }; - template<> struct TypeVec { typedef float3 vec_t; }; - template<> struct TypeVec { typedef float3 vec_t; }; - template<> struct TypeVec { typedef float4 vec_t; }; - template<> struct TypeVec { typedef float4 vec_t; }; - - template struct VecTraits; - - template<> struct VecTraits - { - typedef uchar elem_t; - enum {cn=1}; - static __device__ uchar all(uchar v) {return v;} - }; - template<> struct VecTraits - { - typedef uchar elem_t; - enum {cn=1}; - static __device__ uchar1 all(uchar v) {return make_uchar1(v);} - }; - template<> struct VecTraits - { - typedef uchar elem_t; - enum {cn=2}; - static __device__ uchar2 all(uchar v) {return make_uchar2(v, v);} - }; - template<> struct VecTraits - { - typedef uchar elem_t; - enum {cn=3}; - static __device__ uchar3 all(uchar v) {return make_uchar3(v, v, v);} - }; - template<> struct VecTraits - { - typedef uchar elem_t; - enum {cn=4}; - static __device__ uchar4 all(uchar v) {return make_uchar4(v, v, v, v);} - }; - - template<> struct VecTraits - { - typedef char elem_t; - enum {cn=1}; - static __device__ char all(char v) {return v;} - }; - template<> struct VecTraits - { - typedef char elem_t; - enum {cn=1}; - static __device__ char1 all(char v) {return make_char1(v);} - }; - template<> struct VecTraits - { - typedef char elem_t; - enum {cn=2}; - static __device__ char2 all(char v) {return make_char2(v, v);} - }; - template<> struct VecTraits - { - typedef char elem_t; - enum {cn=3}; - static __device__ char3 all(char v) {return make_char3(v, v, v);} - }; - template<> struct VecTraits - { - typedef char elem_t; - enum {cn=4}; - static __device__ char4 all(char v) {return make_char4(v, v, v, v);} - }; - - template<> struct VecTraits - { - typedef ushort elem_t; - enum {cn=1}; - static __device__ ushort all(ushort v) {return v;} - }; - template<> struct VecTraits - { - typedef ushort elem_t; - enum {cn=1}; - static __device__ ushort1 all(ushort v) {return make_ushort1(v);} - }; - template<> struct VecTraits - { - typedef ushort elem_t; - enum {cn=2}; - static __device__ ushort2 all(ushort v) {return make_ushort2(v, v);} - }; - template<> struct VecTraits - { - typedef ushort elem_t; - enum {cn=3}; - static __device__ ushort3 all(ushort v) {return make_ushort3(v, v, v);} - }; - template<> struct VecTraits - { - typedef ushort elem_t; - enum {cn=4}; - static __device__ ushort4 all(ushort v) {return make_ushort4(v, v, v, v);} - }; - - template<> struct VecTraits - { - typedef short elem_t; - enum {cn=1}; - static __device__ short all(short v) {return v;} - }; - template<> struct VecTraits - { - typedef short elem_t; - enum {cn=1}; - static __device__ short1 all(short v) {return make_short1(v);} - }; - template<> struct VecTraits - { - typedef short elem_t; - enum {cn=2}; - static __device__ short2 all(short v) {return make_short2(v, v);} - }; - template<> struct VecTraits - { - typedef short elem_t; - enum {cn=3}; - static __device__ short3 all(short v) {return make_short3(v, v, v);} - }; - template<> struct VecTraits - { - typedef short elem_t; - enum {cn=4}; - static __device__ short4 all(short v) {return make_short4(v, v, v, v);} - }; - - template<> struct VecTraits - { - typedef uint elem_t; - enum {cn=1}; - static __device__ uint all(uint v) {return v;} - }; - template<> struct VecTraits - { - typedef uint elem_t; - enum {cn=1}; - static __device__ uint1 all(uint v) {return make_uint1(v);} - }; - template<> struct VecTraits - { - typedef uint elem_t; - enum {cn=2}; - static __device__ uint2 all(uint v) {return make_uint2(v, v);} - }; - template<> struct VecTraits - { - typedef uint elem_t; - enum {cn=3}; - static __device__ uint3 all(uint v) {return make_uint3(v, v, v);} - }; - template<> struct VecTraits - { - typedef uint elem_t; - enum {cn=4}; - static __device__ uint4 all(uint v) {return make_uint4(v, v, v, v);} - }; - - template<> struct VecTraits - { - typedef int elem_t; - enum {cn=1}; - static __device__ int all(int v) {return v;} - }; - template<> struct VecTraits - { - typedef int elem_t; - enum {cn=1}; - static __device__ int1 all(int v) {return make_int1(v);} - }; - template<> struct VecTraits - { - typedef int elem_t; - enum {cn=2}; - static __device__ int2 all(int v) {return make_int2(v, v);} - }; - template<> struct VecTraits - { - typedef int elem_t; - enum {cn=3}; - static __device__ int3 all(int v) {return make_int3(v, v, v);} - }; - template<> struct VecTraits - { - typedef int elem_t; - enum {cn=4}; - static __device__ int4 all(int v) {return make_int4(v, v, v, v);} - }; - - template<> struct VecTraits - { - typedef float elem_t; - enum {cn=1}; - static __device__ float all(float v) {return v;} - }; - template<> struct VecTraits - { - typedef float elem_t; - enum {cn=1}; - static __device__ float1 all(float v) {return make_float1(v);} - }; - template<> struct VecTraits - { - typedef float elem_t; - enum {cn=2}; - static __device__ float2 all(float v) {return make_float2(v, v);} - }; - template<> struct VecTraits - { - typedef float elem_t; - enum {cn=3}; - static __device__ float3 all(float v) {return make_float3(v, v, v);} - }; - template<> struct VecTraits - { - typedef float elem_t; - enum {cn=4}; - static __device__ float4 all(float v) {return make_float4(v, v, v, v);} - }; - - template struct SatCast; - template struct SatCast<1, VecD> - { - template - __device__ VecD operator()(const VecS& v) - { - VecD res; - res.x = saturate_cast< VecTraits::elem_t >(v.x); - return res; - } - }; - template struct SatCast<2, VecD> - { - template - __device__ VecD operator()(const VecS& v) - { - VecD res; - res.x = saturate_cast< VecTraits::elem_t >(v.x); - res.y = saturate_cast< VecTraits::elem_t >(v.y); - return res; - } - }; - template struct SatCast<3, VecD> - { - template - __device__ VecD operator()(const VecS& v) - { - VecD res; - res.x = saturate_cast< VecTraits::elem_t >(v.x); - res.y = saturate_cast< VecTraits::elem_t >(v.y); - res.y = saturate_cast< VecTraits::elem_t >(v.z); - return res; - } - }; - template struct SatCast<4, VecD> - { - template - __device__ VecD operator()(const VecS& v) - { - VecD res; - res.x = saturate_cast< VecTraits::elem_t >(v.x); - res.y = saturate_cast< VecTraits::elem_t >(v.y); - res.y = saturate_cast< VecTraits::elem_t >(v.z); - res.w = saturate_cast< VecTraits::elem_t >(v.w); - return res; - } - }; - - template static __device__ VecD saturate_cast_caller(const VecS& v) - { - SatCast< - - VecTraits::cn, - - VecD - > - - cast; - return cast(v); - } - - template static __device__ _Tp saturate_cast(const uchar1& v) {return saturate_cast_caller<_Tp>(v);} - template static __device__ _Tp saturate_cast(const char1& v) {return saturate_cast_caller<_Tp>(v);} - template static __device__ _Tp saturate_cast(const ushort1& v) {return saturate_cast_caller<_Tp>(v);} - template static __device__ _Tp saturate_cast(const short1& v) {return saturate_cast_caller<_Tp>(v);} - template static __device__ _Tp saturate_cast(const uint1& v) {return saturate_cast_caller<_Tp>(v);} - template static __device__ _Tp saturate_cast(const int1& v) {return saturate_cast_caller<_Tp>(v);} - template static __device__ _Tp saturate_cast(const float1& v) {return saturate_cast_caller<_Tp>(v);} - - template static __device__ _Tp saturate_cast(const uchar2& v) {return saturate_cast_caller<_Tp>(v);} - template static __device__ _Tp saturate_cast(const char2& v) {return saturate_cast_caller<_Tp>(v);} - template static __device__ _Tp saturate_cast(const ushort2& v) {return saturate_cast_caller<_Tp>(v);} - template static __device__ _Tp saturate_cast(const short2& v) {return saturate_cast_caller<_Tp>(v);} - template static __device__ _Tp saturate_cast(const uint2& v) {return saturate_cast_caller<_Tp>(v);} - template static __device__ _Tp saturate_cast(const int2& v) {return saturate_cast_caller<_Tp>(v);} - template static __device__ _Tp saturate_cast(const float2& v) {return saturate_cast_caller<_Tp>(v);} - - template static __device__ _Tp saturate_cast(const uchar3& v) {return saturate_cast_caller<_Tp>(v);} - template static __device__ _Tp saturate_cast(const char3& v) {return saturate_cast_caller<_Tp>(v);} - template static __device__ _Tp saturate_cast(const ushort3& v) {return saturate_cast_caller<_Tp>(v);} - template static __device__ _Tp saturate_cast(const short3& v) {return saturate_cast_caller<_Tp>(v);} - template static __device__ _Tp saturate_cast(const uint3& v) {return saturate_cast_caller<_Tp>(v);} - template static __device__ _Tp saturate_cast(const int3& v) {return saturate_cast_caller<_Tp>(v);} - template static __device__ _Tp saturate_cast(const float3& v) {return saturate_cast_caller<_Tp>(v);} - - template static __device__ _Tp saturate_cast(const uchar4& v) {return saturate_cast_caller<_Tp>(v);} - template static __device__ _Tp saturate_cast(const char4& v) {return saturate_cast_caller<_Tp>(v);} - template static __device__ _Tp saturate_cast(const ushort4& v) {return saturate_cast_caller<_Tp>(v);} - template static __device__ _Tp saturate_cast(const short4& v) {return saturate_cast_caller<_Tp>(v);} - template static __device__ _Tp saturate_cast(const uint4& v) {return saturate_cast_caller<_Tp>(v);} - template static __device__ _Tp saturate_cast(const int4& v) {return saturate_cast_caller<_Tp>(v);} - template static __device__ _Tp saturate_cast(const float4& v) {return saturate_cast_caller<_Tp>(v);} - - static __device__ uchar1 operator+(const uchar1& a, const uchar1& b) - { - return make_uchar1(a.x + b.x); - } - static __device__ uchar1 operator-(const uchar1& a, const uchar1& b) - { - return make_uchar1(a.x - b.x); - } - static __device__ uchar1 operator*(const uchar1& a, const uchar1& b) - { - return make_uchar1(a.x * b.x); - } - static __device__ uchar1 operator/(const uchar1& a, const uchar1& b) - { - return make_uchar1(a.x / b.x); - } - static __device__ float1 operator*(const uchar1& a, float s) - { - return make_float1(a.x * s); - } - - static __device__ uchar2 operator+(const uchar2& a, const uchar2& b) - { - return make_uchar2(a.x + b.x, a.y + b.y); - } - static __device__ uchar2 operator-(const uchar2& a, const uchar2& b) - { - return make_uchar2(a.x - b.x, a.y - b.y); - } - static __device__ uchar2 operator*(const uchar2& a, const uchar2& b) - { - return make_uchar2(a.x * b.x, a.y * b.y); - } - static __device__ uchar2 operator/(const uchar2& a, const uchar2& b) - { - return make_uchar2(a.x / b.x, a.y / b.y); - } - static __device__ float2 operator*(const uchar2& a, float s) - { - return make_float2(a.x * s, a.y * s); - } - - static __device__ uchar3 operator+(const uchar3& a, const uchar3& b) - { - return make_uchar3(a.x + b.x, a.y + b.y, a.z + b.z); - } - static __device__ uchar3 operator-(const uchar3& a, const uchar3& b) - { - return make_uchar3(a.x - b.x, a.y - b.y, a.z - b.z); - } - static __device__ uchar3 operator*(const uchar3& a, const uchar3& b) - { - return make_uchar3(a.x * b.x, a.y * b.y, a.z * b.z); - } - static __device__ uchar3 operator/(const uchar3& a, const uchar3& b) - { - return make_uchar3(a.x / b.x, a.y / b.y, a.z / b.z); - } - static __device__ float3 operator*(const uchar3& a, float s) - { - return make_float3(a.x * s, a.y * s, a.z * s); - } - - static __device__ uchar4 operator+(const uchar4& a, const uchar4& b) - { - return make_uchar4(a.x + b.x, a.y + b.y, a.z + b.z, a.w + b.w); - } - static __device__ uchar4 operator-(const uchar4& a, const uchar4& b) - { - return make_uchar4(a.x - b.x, a.y - b.y, a.z - b.z, a.w - b.w); - } - static __device__ uchar4 operator*(const uchar4& a, const uchar4& b) - { - return make_uchar4(a.x * b.x, a.y * b.y, a.z * b.z, a.w * b.w); - } - static __device__ uchar4 operator/(const uchar4& a, const uchar4& b) - { - return make_uchar4(a.x / b.x, a.y / b.y, a.z / b.z, a.w / b.w); - } - static __device__ float4 operator*(const uchar4& a, float s) - { - return make_float4(a.x * s, a.y * s, a.z * s, a.w * s); - } - - static __device__ char1 operator+(const char1& a, const char1& b) - { - return make_char1(a.x + b.x); - } - static __device__ char1 operator-(const char1& a, const char1& b) - { - return make_char1(a.x - b.x); - } - static __device__ char1 operator*(const char1& a, const char1& b) - { - return make_char1(a.x * b.x); - } - static __device__ char1 operator/(const char1& a, const char1& b) - { - return make_char1(a.x / b.x); - } - static __device__ float1 operator*(const char1& a, float s) - { - return make_float1(a.x * s); - } - - static __device__ char2 operator+(const char2& a, const char2& b) - { - return make_char2(a.x + b.x, a.y + b.y); - } - static __device__ char2 operator-(const char2& a, const char2& b) - { - return make_char2(a.x - b.x, a.y - b.y); - } - static __device__ char2 operator*(const char2& a, const char2& b) - { - return make_char2(a.x * b.x, a.y * b.y); - } - static __device__ char2 operator/(const char2& a, const char2& b) - { - return make_char2(a.x / b.x, a.y / b.y); - } - static __device__ float2 operator*(const char2& a, float s) - { - return make_float2(a.x * s, a.y * s); - } - - static __device__ char3 operator+(const char3& a, const char3& b) - { - return make_char3(a.x + b.x, a.y + b.y, a.z + b.z); - } - static __device__ char3 operator-(const char3& a, const char3& b) - { - return make_char3(a.x - b.x, a.y - b.y, a.z - b.z); - } - static __device__ char3 operator*(const char3& a, const char3& b) - { - return make_char3(a.x * b.x, a.y * b.y, a.z * b.z); - } - static __device__ char3 operator/(const char3& a, const char3& b) - { - return make_char3(a.x / b.x, a.y / b.y, a.z / b.z); - } - static __device__ float3 operator*(const char3& a, float s) - { - return make_float3(a.x * s, a.y * s, a.z * s); - } - - static __device__ char4 operator+(const char4& a, const char4& b) - { - return make_char4(a.x + b.x, a.y + b.y, a.z + b.z, a.w + b.w); - } - static __device__ char4 operator-(const char4& a, const char4& b) - { - return make_char4(a.x - b.x, a.y - b.y, a.z - b.z, a.w - b.w); - } - static __device__ char4 operator*(const char4& a, const char4& b) - { - return make_char4(a.x * b.x, a.y * b.y, a.z * b.z, a.w * b.w); - } - static __device__ char4 operator/(const char4& a, const char4& b) - { - return make_char4(a.x / b.x, a.y / b.y, a.z / b.z, a.w / b.w); - } - static __device__ float4 operator*(const char4& a, float s) - { - return make_float4(a.x * s, a.y * s, a.z * s, a.w * s); - } - - static __device__ ushort1 operator+(const ushort1& a, const ushort1& b) - { - return make_ushort1(a.x + b.x); - } - static __device__ ushort1 operator-(const ushort1& a, const ushort1& b) - { - return make_ushort1(a.x - b.x); - } - static __device__ ushort1 operator*(const ushort1& a, const ushort1& b) - { - return make_ushort1(a.x * b.x); - } - static __device__ ushort1 operator/(const ushort1& a, const ushort1& b) - { - return make_ushort1(a.x / b.x); - } - static __device__ float1 operator*(const ushort1& a, float s) - { - return make_float1(a.x * s); - } - - static __device__ ushort2 operator+(const ushort2& a, const ushort2& b) - { - return make_ushort2(a.x + b.x, a.y + b.y); - } - static __device__ ushort2 operator-(const ushort2& a, const ushort2& b) - { - return make_ushort2(a.x - b.x, a.y - b.y); - } - static __device__ ushort2 operator*(const ushort2& a, const ushort2& b) - { - return make_ushort2(a.x * b.x, a.y * b.y); - } - static __device__ ushort2 operator/(const ushort2& a, const ushort2& b) - { - return make_ushort2(a.x / b.x, a.y / b.y); - } - static __device__ float2 operator*(const ushort2& a, float s) - { - return make_float2(a.x * s, a.y * s); - } - - static __device__ ushort3 operator+(const ushort3& a, const ushort3& b) - { - return make_ushort3(a.x + b.x, a.y + b.y, a.z + b.z); - } - static __device__ ushort3 operator-(const ushort3& a, const ushort3& b) - { - return make_ushort3(a.x - b.x, a.y - b.y, a.z - b.z); - } - static __device__ ushort3 operator*(const ushort3& a, const ushort3& b) - { - return make_ushort3(a.x * b.x, a.y * b.y, a.z * b.z); - } - static __device__ ushort3 operator/(const ushort3& a, const ushort3& b) - { - return make_ushort3(a.x / b.x, a.y / b.y, a.z / b.z); - } - static __device__ float3 operator*(const ushort3& a, float s) - { - return make_float3(a.x * s, a.y * s, a.z * s); - } - - static __device__ ushort4 operator+(const ushort4& a, const ushort4& b) - { - return make_ushort4(a.x + b.x, a.y + b.y, a.z + b.z, a.w + b.w); - } - static __device__ ushort4 operator-(const ushort4& a, const ushort4& b) - { - return make_ushort4(a.x - b.x, a.y - b.y, a.z - b.z, a.w - b.w); - } - static __device__ ushort4 operator*(const ushort4& a, const ushort4& b) - { - return make_ushort4(a.x * b.x, a.y * b.y, a.z * b.z, a.w * b.w); - } - static __device__ ushort4 operator/(const ushort4& a, const ushort4& b) - { - return make_ushort4(a.x / b.x, a.y / b.y, a.z / b.z, a.w / b.w); - } - static __device__ float4 operator*(const ushort4& a, float s) - { - return make_float4(a.x * s, a.y * s, a.z * s, a.w * s); - } - - static __device__ short1 operator+(const short1& a, const short1& b) - { - return make_short1(a.x + b.x); - } - static __device__ short1 operator-(const short1& a, const short1& b) - { - return make_short1(a.x - b.x); - } - static __device__ short1 operator*(const short1& a, const short1& b) - { - return make_short1(a.x * b.x); - } - static __device__ short1 operator/(const short1& a, const short1& b) - { - return make_short1(a.x / b.x); - } - static __device__ float1 operator*(const short1& a, float s) - { - return make_float1(a.x * s); - } - - static __device__ short2 operator+(const short2& a, const short2& b) - { - return make_short2(a.x + b.x, a.y + b.y); - } - static __device__ short2 operator-(const short2& a, const short2& b) - { - return make_short2(a.x - b.x, a.y - b.y); - } - static __device__ short2 operator*(const short2& a, const short2& b) - { - return make_short2(a.x * b.x, a.y * b.y); - } - static __device__ short2 operator/(const short2& a, const short2& b) - { - return make_short2(a.x / b.x, a.y / b.y); - } - static __device__ float2 operator*(const short2& a, float s) - { - return make_float2(a.x * s, a.y * s); - } - - static __device__ short3 operator+(const short3& a, const short3& b) - { - return make_short3(a.x + b.x, a.y + b.y, a.z + b.z); - } - static __device__ short3 operator-(const short3& a, const short3& b) - { - return make_short3(a.x - b.x, a.y - b.y, a.z - b.z); - } - static __device__ short3 operator*(const short3& a, const short3& b) - { - return make_short3(a.x * b.x, a.y * b.y, a.z * b.z); - } - static __device__ short3 operator/(const short3& a, const short3& b) - { - return make_short3(a.x / b.x, a.y / b.y, a.z / b.z); - } - static __device__ float3 operator*(const short3& a, float s) - { - return make_float3(a.x * s, a.y * s, a.z * s); - } - - static __device__ short4 operator+(const short4& a, const short4& b) - { - return make_short4(a.x + b.x, a.y + b.y, a.z + b.z, a.w + b.w); - } - static __device__ short4 operator-(const short4& a, const short4& b) - { - return make_short4(a.x - b.x, a.y - b.y, a.z - b.z, a.w - b.w); - } - static __device__ short4 operator*(const short4& a, const short4& b) - { - return make_short4(a.x * b.x, a.y * b.y, a.z * b.z, a.w * b.w); - } - static __device__ short4 operator/(const short4& a, const short4& b) - { - return make_short4(a.x / b.x, a.y / b.y, a.z / b.z, a.w / b.w); - } - static __device__ float4 operator*(const short4& a, float s) - { - return make_float4(a.x * s, a.y * s, a.z * s, a.w * s); - } - - static __device__ int1 operator+(const int1& a, const int1& b) - { - return make_int1(a.x + b.x); - } - static __device__ int1 operator-(const int1& a, const int1& b) - { - return make_int1(a.x - b.x); - } - static __device__ int1 operator*(const int1& a, const int1& b) - { - return make_int1(a.x * b.x); - } - static __device__ int1 operator/(const int1& a, const int1& b) - { - return make_int1(a.x / b.x); - } - static __device__ float1 operator*(const int1& a, float s) - { - return make_float1(a.x * s); - } - - static __device__ int2 operator+(const int2& a, const int2& b) - { - return make_int2(a.x + b.x, a.y + b.y); - } - static __device__ int2 operator-(const int2& a, const int2& b) - { - return make_int2(a.x - b.x, a.y - b.y); - } - static __device__ int2 operator*(const int2& a, const int2& b) - { - return make_int2(a.x * b.x, a.y * b.y); - } - static __device__ int2 operator/(const int2& a, const int2& b) - { - return make_int2(a.x / b.x, a.y / b.y); - } - static __device__ float2 operator*(const int2& a, float s) - { - return make_float2(a.x * s, a.y * s); - } - - static __device__ int3 operator+(const int3& a, const int3& b) - { - return make_int3(a.x + b.x, a.y + b.y, a.z + b.z); - } - static __device__ int3 operator-(const int3& a, const int3& b) - { - return make_int3(a.x - b.x, a.y - b.y, a.z - b.z); - } - static __device__ int3 operator*(const int3& a, const int3& b) - { - return make_int3(a.x * b.x, a.y * b.y, a.z * b.z); - } - static __device__ int3 operator/(const int3& a, const int3& b) - { - return make_int3(a.x / b.x, a.y / b.y, a.z / b.z); - } - static __device__ float3 operator*(const int3& a, float s) - { - return make_float3(a.x * s, a.y * s, a.z * s); - } - - static __device__ int4 operator+(const int4& a, const int4& b) - { - return make_int4(a.x + b.x, a.y + b.y, a.z + b.z, a.w + b.w); - } - static __device__ int4 operator-(const int4& a, const int4& b) - { - return make_int4(a.x - b.x, a.y - b.y, a.z - b.z, a.w - b.w); - } - static __device__ int4 operator*(const int4& a, const int4& b) - { - return make_int4(a.x * b.x, a.y * b.y, a.z * b.z, a.w * b.w); - } - static __device__ int4 operator/(const int4& a, const int4& b) - { - return make_int4(a.x / b.x, a.y / b.y, a.z / b.z, a.w / b.w); - } - static __device__ float4 operator*(const int4& a, float s) - { - return make_float4(a.x * s, a.y * s, a.z * s, a.w * s); - } - - static __device__ float1 operator+(const float1& a, const float1& b) - { - return make_float1(a.x + b.x); - } - static __device__ float1 operator-(const float1& a, const float1& b) - { - return make_float1(a.x - b.x); - } - static __device__ float1 operator*(const float1& a, const float1& b) - { - return make_float1(a.x * b.x); - } - static __device__ float1 operator/(const float1& a, const float1& b) - { - return make_float1(a.x / b.x); - } - static __device__ float1 operator*(const float1& a, float s) - { - return make_float1(a.x * s); - } - - static __device__ float2 operator+(const float2& a, const float2& b) - { - return make_float2(a.x + b.x, a.y + b.y); - } - static __device__ float2 operator-(const float2& a, const float2& b) - { - return make_float2(a.x - b.x, a.y - b.y); - } - static __device__ float2 operator*(const float2& a, const float2& b) - { - return make_float2(a.x * b.x, a.y * b.y); - } - static __device__ float2 operator/(const float2& a, const float2& b) - { - return make_float2(a.x / b.x, a.y / b.y); - } - static __device__ float2 operator*(const float2& a, float s) - { - return make_float2(a.x * s, a.y * s); - } - - static __device__ float3 operator+(const float3& a, const float3& b) - { - return make_float3(a.x + b.x, a.y + b.y, a.z + b.z); - } - static __device__ float3 operator-(const float3& a, const float3& b) - { - return make_float3(a.x - b.x, a.y - b.y, a.z - b.z); - } - static __device__ float3 operator*(const float3& a, const float3& b) - { - return make_float3(a.x * b.x, a.y * b.y, a.z * b.z); - } - static __device__ float3 operator/(const float3& a, const float3& b) - { - return make_float3(a.x / b.x, a.y / b.y, a.z / b.z); - } - static __device__ float3 operator*(const float3& a, float s) - { - return make_float3(a.x * s, a.y * s, a.z * s); - } - - static __device__ float4 operator+(const float4& a, const float4& b) - { - return make_float4(a.x + b.x, a.y + b.y, a.z + b.z, a.w + b.w); - } - static __device__ float4 operator-(const float4& a, const float4& b) - { - return make_float4(a.x - b.x, a.y - b.y, a.z - b.z, a.w - b.w); - } - static __device__ float4 operator*(const float4& a, const float4& b) - { - return make_float4(a.x * b.x, a.y * b.y, a.z * b.z, a.w * b.w); - } - static __device__ float4 operator/(const float4& a, const float4& b) - { - return make_float4(a.x / b.x, a.y / b.y, a.z / b.z, a.w / b.w); - } - static __device__ float4 operator*(const float4& a, float s) - { - return make_float4(a.x * s, a.y * s, a.z * s, a.w * s); - } - } -} - -#endif // __OPENCV_GPU_VECMATH_HPP__ \ No newline at end of file diff --git a/modules/gpu/src/imgproc_gpu.cpp b/modules/gpu/src/imgproc_gpu.cpp index 2118c4d..376d1e0 100644 --- a/modules/gpu/src/imgproc_gpu.cpp +++ b/modules/gpu/src/imgproc_gpu.cpp @@ -41,7 +41,6 @@ //M*/ #include "precomp.hpp" -#include "border_interpolate.hpp" using namespace cv; using namespace cv::gpu; @@ -860,6 +859,9 @@ void cv::gpu::histRange(const GpuMat& src, GpuMat hist[4], const GpuMat levels[4 hist_callers[src.depth()](src, hist, levels); } +//////////////////////////////////////////////////////////////////////// +// cornerHarris & minEgenVal + namespace cv { namespace gpu { namespace imgproc { void extractCovData_caller(const DevMem2Df Dx, const DevMem2Df Dy, PtrStepf dst); @@ -939,6 +941,24 @@ namespace } // Anonymous namespace + +bool cv::gpu::tryConvertToGpuBorderType(int cpuBorderType, int& gpuBorderType) +{ + if (cpuBorderType == cv::BORDER_REFLECT101) + { + gpuBorderType = cv::gpu::BORDER_REFLECT101_GPU; + return true; + } + + if (cpuBorderType == cv::BORDER_REPLICATE) + { + gpuBorderType = cv::gpu::BORDER_REPLICATE_GPU; + return true; + } + + return false; +} + void cv::gpu::cornerHarris(const GpuMat& src, GpuMat& dst, int blockSize, int ksize, double k, int borderType) { CV_Assert(borderType == cv::BORDER_REFLECT101 || diff --git a/modules/gpu/src/internal_shared.hpp b/modules/gpu/src/internal_shared.hpp deleted file mode 100644 index c3a5882..0000000 --- a/modules/gpu/src/internal_shared.hpp +++ /dev/null @@ -1,57 +0,0 @@ -/*M/////////////////////////////////////////////////////////////////////////////////////// -// -// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. -// -// By downloading, copying, installing or using the software you agree to this license. -// If you do not agree to this license, do not download, install, -// copy or use the software. -// -// -// License Agreement -// For Open Source Computer Vision Library -// -// Copyright (C) 2000-2008, Intel Corporation, all rights reserved. -// Copyright (C) 2009, Willow Garage Inc., all rights reserved. -// Third party copyrights are property of their respective owners. -// -// Redistribution and use in source and binary forms, with or without modification, -// are permitted provided that the following conditions are met: -// -// * Redistribution's of source code must retain the above copyright notice, -// this list of conditions and the following disclaimer. -// -// * Redistribution's in binary form must reproduce the above copyright notice, -// this list of conditions and the following disclaimer in the documentation -// and/or other materials provided with the distribution. -// -// * The name of the copyright holders may not be used to endorse or promote products -// derived from this software without specific prior written permission. -// -// This software is provided by the copyright holders and contributors "as is" and -// any express or implied warranties, including, but not limited to, the implied -// warranties of merchantability and fitness for a particular purpose are disclaimed. -// In no event shall the Intel Corporation or contributors be liable for any direct, -// indirect, incidental, special, exemplary, or consequential damages -// (including, but not limited to, procurement of substitute goods or services; -// loss of use, data, or profits; or business interruption) however caused -// and on any theory of liability, whether in contract, strict liability, -// or tort (including negligence or otherwise) arising in any way out of -// the use of this software, even if advised of the possibility of such damage. -// -//M*/ - -#ifndef __OPENCV_GPU_INTERNAL_SHARED_HPP__ -#define __OPENCV_GPU_INTERNAL_SHARED_HPP__ - -namespace cv { namespace gpu { - - // Internal GPU anlagues of CPU border extrapolation types - enum - { - BORDER_REFLECT101 = 0, - BORDER_REPLICATE - }; - -}} - -#endif \ No newline at end of file diff --git a/modules/gpu/src/opencv2/gpu/device/border_interpolate.hpp b/modules/gpu/src/opencv2/gpu/device/border_interpolate.hpp new file mode 100644 index 0000000..d53e693 --- /dev/null +++ b/modules/gpu/src/opencv2/gpu/device/border_interpolate.hpp @@ -0,0 +1,176 @@ +/*M/////////////////////////////////////////////////////////////////////////////////////// +// +// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. +// +// By downloading, copying, installing or using the software you agree to this license. +// If you do not agree to this license, do not download, install, +// copy or use the software. +// +// +// License Agreement +// For Open Source Computer Vision Library +// +// Copyright (C) 2000-2008, Intel Corporation, all rights reserved. +// Copyright (C) 2009, Willow Garage Inc., all rights reserved. +// Third party copyrights are property of their respective owners. +// +// Redistribution and use in source and binary forms, with or without modification, +// are permitted provided that the following conditions are met: +// +// * Redistribution's of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// +// * Redistribution's in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// +// * The name of the copyright holders may not be used to endorse or promote products +// derived from this software without specific prior written permission. +// +// This software is provided by the copyright holders and contributors "as is" and +// any express or bpied warranties, including, but not limited to, the bpied +// warranties of merchantability and fitness for a particular purpose are disclaimed. +// In no event shall the Intel Corporation or contributors be liable for any direct, +// indirect, incidental, special, exemplary, or consequential damages +// (including, but not limited to, procurement of substitute goods or services; +// loss of use, data, or profits; or business interruption) however caused +// and on any theory of liability, whether in contract, strict liability, +// or tort (including negligence or otherwise) arising in any way out of +// the use of this software, even if advised of the possibility of such damage. +// +//M*/ + +namespace cv +{ + namespace gpu + { + namespace device + { + struct BrdReflect101 + { + BrdReflect101(int len): last(len - 1) {} + + __device__ int idx_low(int i) const + { + return abs(i); + } + + __device__ int idx_high(int i) const + { + return last - abs(last - i); + } + + __device__ int idx(int i) const + { + return abs(idx_high(i)); + } + + bool is_range_safe(int mini, int maxi) const + { + return -last <= mini && maxi <= 2 * last; + } + + int last; + }; + + + template + struct BrdRowReflect101: BrdReflect101 + { + BrdRowReflect101(int len): BrdReflect101(len) {} + + __device__ float at_low(int i, const T* data) const + { + return data[idx_low(i)]; + } + + __device__ float at_high(int i, const T* data) const + { + return data[idx_high(i)]; + } + }; + + + template + struct BrdColReflect101: BrdReflect101 + { + BrdColReflect101(int len, int step): BrdReflect101(len), step(step) {} + + __device__ float at_low(int i, const T* data) const + { + return data[idx_low(i) * step]; + } + + __device__ float at_high(int i, const T* data) const + { + return data[idx_high(i) * step]; + } + + int step; + }; + + + struct BrdReplicate + { + BrdReplicate(int len): last(len - 1) {} + + __device__ int idx_low(int i) const + { + return max(i, 0); + } + + __device__ int idx_high(int i) const + { + return min(i, last); + } + + __device__ int idx(int i) const + { + return max(min(i, last), 0); + } + + bool is_range_safe(int mini, int maxi) const + { + return true; + } + + int last; + }; + + + template + struct BrdRowReplicate: BrdReplicate + { + BrdRowReplicate(int len): BrdReplicate(len) {} + + __device__ float at_low(int i, const T* data) const + { + return data[idx_low(i)]; + } + + __device__ float at_high(int i, const T* data) const + { + return data[idx_high(i)]; + } + }; + + + template + struct BrdColReplicate: BrdReplicate + { + BrdColReplicate(int len, int step): BrdReplicate(len), step(step) {} + + __device__ float at_low(int i, const T* data) const + { + return data[idx_low(i) * step]; + } + + __device__ float at_high(int i, const T* data) const + { + return data[idx_high(i) * step]; + } + int step; + }; + } + } +} \ No newline at end of file diff --git a/modules/gpu/src/cuda/dynamic_smem.hpp b/modules/gpu/src/opencv2/gpu/device/dynamic_smem.hpp similarity index 100% rename from modules/gpu/src/cuda/dynamic_smem.hpp rename to modules/gpu/src/opencv2/gpu/device/dynamic_smem.hpp diff --git a/modules/gpu/src/cuda/limits_gpu.hpp b/modules/gpu/src/opencv2/gpu/device/limits_gpu.hpp similarity index 97% rename from modules/gpu/src/cuda/limits_gpu.hpp rename to modules/gpu/src/opencv2/gpu/device/limits_gpu.hpp index fcf4bdc..790440c 100644 --- a/modules/gpu/src/cuda/limits_gpu.hpp +++ b/modules/gpu/src/opencv2/gpu/device/limits_gpu.hpp @@ -193,7 +193,7 @@ namespace cv typedef float type; __device__ static type min() { return 1.175494351e-38f/*FLT_MIN*/; }; __device__ static type max() { return 3.402823466e+38f/*FLT_MAX*/; }; - __device__ static type epsilon(); + __device__ static type epsilon() { return 1.192092896e-07f/*FLT_EPSILON*/; }; __device__ static type round_error(); __device__ static type denorm_min(); __device__ static type infinity(); diff --git a/modules/gpu/src/opencv2/gpu/device/saturate_cast.hpp b/modules/gpu/src/opencv2/gpu/device/saturate_cast.hpp new file mode 100644 index 0000000..d96dfac --- /dev/null +++ b/modules/gpu/src/opencv2/gpu/device/saturate_cast.hpp @@ -0,0 +1,172 @@ +/*M/////////////////////////////////////////////////////////////////////////////////////// +// +// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. +// +// By downloading, copying, installing or using the software you agree to this license. +// If you do not agree to this license, do not download, install, +// copy or use the software. +// +// +// License Agreement +// For Open Source Computer Vision Library +// +// Copyright (C) 2000-2008, Intel Corporation, all rights reserved. +// Copyright (C) 2009, Willow Garage Inc., all rights reserved. +// Third party copyrights are property of their respective owners. +// +// Redistribution and use in source and binary forms, with or without modification, +// are permitted provided that the following conditions are met: +// +// * Redistribution's of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// +// * Redistribution's in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// +// * The name of the copyright holders may not be used to endorse or promote products +// derived from this software without specific prior written permission. +// +// This software is provided by the copyright holders and contributors "as is" and +// any express or implied warranties, including, but not limited to, the implied +// warranties of merchantability and fitness for a particular purpose are disclaimed. +// In no event shall the Intel Corporation or contributors be liable for any direct, +// indirect, incidental, special, exemplary, or consequential damages +// (including, but not limited to, procurement of substitute goods or services; +// loss of use, data, or profits; or business interruption) however caused +// and on any theory of liability, whether in contract, strict liability, +// or tort (including negligence or otherwise) arising in any way out of +// the use of this software, even if advised of the possibility of such damage. +// +//M*/ + +#ifndef __OPENCV_GPU_SATURATE_CAST_HPP__ +#define __OPENCV_GPU_SATURATE_CAST_HPP__ + +#include "internal_shared.hpp" + +namespace cv +{ + namespace gpu + { + namespace device + { + template static __device__ _Tp saturate_cast(uchar v) { return _Tp(v); } + template static __device__ _Tp saturate_cast(schar v) { return _Tp(v); } + template static __device__ _Tp saturate_cast(ushort v) { return _Tp(v); } + template static __device__ _Tp saturate_cast(short v) { return _Tp(v); } + template static __device__ _Tp saturate_cast(uint v) { return _Tp(v); } + template static __device__ _Tp saturate_cast(int v) { return _Tp(v); } + template static __device__ _Tp saturate_cast(float v) { return _Tp(v); } + template static __device__ _Tp saturate_cast(double v) { return _Tp(v); } + + template<> static __device__ uchar saturate_cast(schar v) + { return (uchar)max((int)v, 0); } + template<> static __device__ uchar saturate_cast(ushort v) + { return (uchar)min((uint)v, (uint)UCHAR_MAX); } + template<> static __device__ uchar saturate_cast(int v) + { return (uchar)((uint)v <= UCHAR_MAX ? v : v > 0 ? UCHAR_MAX : 0); } + template<> static __device__ uchar saturate_cast(uint v) + { return (uchar)min(v, (uint)UCHAR_MAX); } + template<> static __device__ uchar saturate_cast(short v) + { return saturate_cast((uint)v); } + + template<> static __device__ uchar saturate_cast(float v) + { int iv = __float2int_rn(v); return saturate_cast(iv); } + template<> static __device__ uchar saturate_cast(double v) + { + #if defined (__CUDA_ARCH__) && __CUDA_ARCH__ >= 130 + int iv = __double2int_rn(v); return saturate_cast(iv); + #else + return saturate_cast((float)v); + #endif + } + + template<> static __device__ schar saturate_cast(uchar v) + { return (schar)min((int)v, SCHAR_MAX); } + template<> static __device__ schar saturate_cast(ushort v) + { return (schar)min((uint)v, (uint)SCHAR_MAX); } + template<> static __device__ schar saturate_cast(int v) + { + return (schar)((uint)(v-SCHAR_MIN) <= (uint)UCHAR_MAX ? + v : v > 0 ? SCHAR_MAX : SCHAR_MIN); + } + template<> static __device__ schar saturate_cast(short v) + { return saturate_cast((int)v); } + template<> static __device__ schar saturate_cast(uint v) + { return (schar)min(v, (uint)SCHAR_MAX); } + + template<> static __device__ schar saturate_cast(float v) + { int iv = __float2int_rn(v); return saturate_cast(iv); } + template<> static __device__ schar saturate_cast(double v) + { + #if defined (__CUDA_ARCH__) && __CUDA_ARCH__ >= 130 + int iv = __double2int_rn(v); return saturate_cast(iv); + #else + return saturate_cast((float)v); + #endif + } + + template<> static __device__ ushort saturate_cast(schar v) + { return (ushort)max((int)v, 0); } + template<> static __device__ ushort saturate_cast(short v) + { return (ushort)max((int)v, 0); } + template<> static __device__ ushort saturate_cast(int v) + { return (ushort)((uint)v <= (uint)USHRT_MAX ? v : v > 0 ? USHRT_MAX : 0); } + template<> static __device__ ushort saturate_cast(uint v) + { return (ushort)min(v, (uint)USHRT_MAX); } + template<> static __device__ ushort saturate_cast(float v) + { int iv = __float2int_rn(v); return saturate_cast(iv); } + template<> static __device__ ushort saturate_cast(double v) + { + #if defined (__CUDA_ARCH__) && __CUDA_ARCH__ >= 130 + int iv = __double2int_rn(v); return saturate_cast(iv); + #else + return saturate_cast((float)v); + #endif + } + + template<> static __device__ short saturate_cast(ushort v) + { return (short)min((int)v, SHRT_MAX); } + template<> static __device__ short saturate_cast(int v) + { + return (short)((uint)(v - SHRT_MIN) <= (uint)USHRT_MAX ? + v : v > 0 ? SHRT_MAX : SHRT_MIN); + } + template<> static __device__ short saturate_cast(uint v) + { return (short)min(v, (uint)SHRT_MAX); } + template<> static __device__ short saturate_cast(float v) + { int iv = __float2int_rn(v); return saturate_cast(iv); } + template<> static __device__ short saturate_cast(double v) + { + #if defined (__CUDA_ARCH__) && __CUDA_ARCH__ >= 130 + int iv = __double2int_rn(v); return saturate_cast(iv); + #else + return saturate_cast((float)v); + #endif + } + + template<> static __device__ int saturate_cast(float v) { return __float2int_rn(v); } + template<> static __device__ int saturate_cast(double v) + { + #if defined (__CUDA_ARCH__) && __CUDA_ARCH__ >= 130 + return __double2int_rn(v); + #else + return saturate_cast((float)v); + #endif + } + + template<> static __device__ uint saturate_cast(float v){ return __float2uint_rn(v); } + template<> static __device__ uint saturate_cast(double v) + { + #if defined (__CUDA_ARCH__) && __CUDA_ARCH__ >= 130 + return __double2uint_rn(v); + #else + return saturate_cast((float)v); + #endif + } + } + } +} + +#endif /* __OPENCV_GPU_SATURATE_CAST_HPP__ */ \ No newline at end of file diff --git a/modules/gpu/src/opencv2/gpu/device/vecmath.hpp b/modules/gpu/src/opencv2/gpu/device/vecmath.hpp new file mode 100644 index 0000000..d73853c --- /dev/null +++ b/modules/gpu/src/opencv2/gpu/device/vecmath.hpp @@ -0,0 +1,939 @@ +/*M/////////////////////////////////////////////////////////////////////////////////////// +// +// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. +// +// By downloading, copying, installing or using the software you agree to this license. +// If you do not agree to this license, do not download, install, +// copy or use the software. +// +// +// License Agreement +// For Open Source Computer Vision Library +// +// Copyright (C) 2000-2008, Intel Corporation, all rights reserved. +// Copyright (C) 2009, Willow Garage Inc., all rights reserved. +// Third party copyrights are property of their respective owners. +// +// Redistribution and use in source and binary forms, with or without modification, +// are permitted provided that the following conditions are met: +// +// * Redistribution's of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// +// * Redistribution's in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// +// * The name of the copyright holders may not be used to endorse or promote products +// derived from this software without specific prior written permission. +// +// This software is provided by the copyright holders and contributors "as is" and +// any express or implied warranties, including, but not limited to, the implied +// warranties of merchantability and fitness for a particular purpose are disclaimed. +// In no event shall the Intel Corporation or contributors be liable for any direct, +// indirect, incidental, special, exemplary, or consequential damages +// (including, but not limited to, procurement of substitute goods or services; +// loss of use, data, or profits; or business interruption) however caused +// and on any theory of liability, whether in contract, strict liability, +// or tort (including negligence or otherwise) arising in any way out of +// the use of this software, even if advised of the possibility of such damage. +// +//M*/ + +#ifndef __OPENCV_GPU_VECMATH_HPP__ +#define __OPENCV_GPU_VECMATH_HPP__ + +#include "internal_shared.hpp" +#include "saturate_cast.hpp" + +namespace cv +{ + namespace gpu + { + namespace device + { + template struct TypeVec; + + template<> struct TypeVec { typedef uchar vec_t; }; + template<> struct TypeVec { typedef uchar1 vec_t; }; + template<> struct TypeVec { typedef uchar2 vec_t; }; + template<> struct TypeVec { typedef uchar2 vec_t; }; + template<> struct TypeVec { typedef uchar3 vec_t; }; + template<> struct TypeVec { typedef uchar3 vec_t; }; + template<> struct TypeVec { typedef uchar4 vec_t; }; + template<> struct TypeVec { typedef uchar4 vec_t; }; + + template<> struct TypeVec { typedef char vec_t; }; + template<> struct TypeVec { typedef char1 vec_t; }; + template<> struct TypeVec { typedef char2 vec_t; }; + template<> struct TypeVec { typedef char2 vec_t; }; + template<> struct TypeVec { typedef char3 vec_t; }; + template<> struct TypeVec { typedef char3 vec_t; }; + template<> struct TypeVec { typedef char4 vec_t; }; + template<> struct TypeVec { typedef char4 vec_t; }; + + template<> struct TypeVec { typedef ushort vec_t; }; + template<> struct TypeVec { typedef ushort1 vec_t; }; + template<> struct TypeVec { typedef ushort2 vec_t; }; + template<> struct TypeVec { typedef ushort2 vec_t; }; + template<> struct TypeVec { typedef ushort3 vec_t; }; + template<> struct TypeVec { typedef ushort3 vec_t; }; + template<> struct TypeVec { typedef ushort4 vec_t; }; + template<> struct TypeVec { typedef ushort4 vec_t; }; + + template<> struct TypeVec { typedef short vec_t; }; + template<> struct TypeVec { typedef short1 vec_t; }; + template<> struct TypeVec { typedef short2 vec_t; }; + template<> struct TypeVec { typedef short2 vec_t; }; + template<> struct TypeVec { typedef short3 vec_t; }; + template<> struct TypeVec { typedef short3 vec_t; }; + template<> struct TypeVec { typedef short4 vec_t; }; + template<> struct TypeVec { typedef short4 vec_t; }; + + template<> struct TypeVec { typedef uint vec_t; }; + template<> struct TypeVec { typedef uint1 vec_t; }; + template<> struct TypeVec { typedef uint2 vec_t; }; + template<> struct TypeVec { typedef uint2 vec_t; }; + template<> struct TypeVec { typedef uint3 vec_t; }; + template<> struct TypeVec { typedef uint3 vec_t; }; + template<> struct TypeVec { typedef uint4 vec_t; }; + template<> struct TypeVec { typedef uint4 vec_t; }; + + template<> struct TypeVec { typedef int vec_t; }; + template<> struct TypeVec { typedef int1 vec_t; }; + template<> struct TypeVec { typedef int2 vec_t; }; + template<> struct TypeVec { typedef int2 vec_t; }; + template<> struct TypeVec { typedef int3 vec_t; }; + template<> struct TypeVec { typedef int3 vec_t; }; + template<> struct TypeVec { typedef int4 vec_t; }; + template<> struct TypeVec { typedef int4 vec_t; }; + + template<> struct TypeVec { typedef float vec_t; }; + template<> struct TypeVec { typedef float1 vec_t; }; + template<> struct TypeVec { typedef float2 vec_t; }; + template<> struct TypeVec { typedef float2 vec_t; }; + template<> struct TypeVec { typedef float3 vec_t; }; + template<> struct TypeVec { typedef float3 vec_t; }; + template<> struct TypeVec { typedef float4 vec_t; }; + template<> struct TypeVec { typedef float4 vec_t; }; + + template struct VecTraits; + + template<> struct VecTraits + { + typedef uchar elem_t; + enum {cn=1}; + static __device__ uchar all(uchar v) {return v;} + }; + template<> struct VecTraits + { + typedef uchar elem_t; + enum {cn=1}; + static __device__ uchar1 all(uchar v) {return make_uchar1(v);} + }; + template<> struct VecTraits + { + typedef uchar elem_t; + enum {cn=2}; + static __device__ uchar2 all(uchar v) {return make_uchar2(v, v);} + }; + template<> struct VecTraits + { + typedef uchar elem_t; + enum {cn=3}; + static __device__ uchar3 all(uchar v) {return make_uchar3(v, v, v);} + }; + template<> struct VecTraits + { + typedef uchar elem_t; + enum {cn=4}; + static __device__ uchar4 all(uchar v) {return make_uchar4(v, v, v, v);} + }; + + template<> struct VecTraits + { + typedef char elem_t; + enum {cn=1}; + static __device__ char all(char v) {return v;} + }; + template<> struct VecTraits + { + typedef char elem_t; + enum {cn=1}; + static __device__ char1 all(char v) {return make_char1(v);} + }; + template<> struct VecTraits + { + typedef char elem_t; + enum {cn=2}; + static __device__ char2 all(char v) {return make_char2(v, v);} + }; + template<> struct VecTraits + { + typedef char elem_t; + enum {cn=3}; + static __device__ char3 all(char v) {return make_char3(v, v, v);} + }; + template<> struct VecTraits + { + typedef char elem_t; + enum {cn=4}; + static __device__ char4 all(char v) {return make_char4(v, v, v, v);} + }; + + template<> struct VecTraits + { + typedef ushort elem_t; + enum {cn=1}; + static __device__ ushort all(ushort v) {return v;} + }; + template<> struct VecTraits + { + typedef ushort elem_t; + enum {cn=1}; + static __device__ ushort1 all(ushort v) {return make_ushort1(v);} + }; + template<> struct VecTraits + { + typedef ushort elem_t; + enum {cn=2}; + static __device__ ushort2 all(ushort v) {return make_ushort2(v, v);} + }; + template<> struct VecTraits + { + typedef ushort elem_t; + enum {cn=3}; + static __device__ ushort3 all(ushort v) {return make_ushort3(v, v, v);} + }; + template<> struct VecTraits + { + typedef ushort elem_t; + enum {cn=4}; + static __device__ ushort4 all(ushort v) {return make_ushort4(v, v, v, v);} + }; + + template<> struct VecTraits + { + typedef short elem_t; + enum {cn=1}; + static __device__ short all(short v) {return v;} + }; + template<> struct VecTraits + { + typedef short elem_t; + enum {cn=1}; + static __device__ short1 all(short v) {return make_short1(v);} + }; + template<> struct VecTraits + { + typedef short elem_t; + enum {cn=2}; + static __device__ short2 all(short v) {return make_short2(v, v);} + }; + template<> struct VecTraits + { + typedef short elem_t; + enum {cn=3}; + static __device__ short3 all(short v) {return make_short3(v, v, v);} + }; + template<> struct VecTraits + { + typedef short elem_t; + enum {cn=4}; + static __device__ short4 all(short v) {return make_short4(v, v, v, v);} + }; + + template<> struct VecTraits + { + typedef uint elem_t; + enum {cn=1}; + static __device__ uint all(uint v) {return v;} + }; + template<> struct VecTraits + { + typedef uint elem_t; + enum {cn=1}; + static __device__ uint1 all(uint v) {return make_uint1(v);} + }; + template<> struct VecTraits + { + typedef uint elem_t; + enum {cn=2}; + static __device__ uint2 all(uint v) {return make_uint2(v, v);} + }; + template<> struct VecTraits + { + typedef uint elem_t; + enum {cn=3}; + static __device__ uint3 all(uint v) {return make_uint3(v, v, v);} + }; + template<> struct VecTraits + { + typedef uint elem_t; + enum {cn=4}; + static __device__ uint4 all(uint v) {return make_uint4(v, v, v, v);} + }; + + template<> struct VecTraits + { + typedef int elem_t; + enum {cn=1}; + static __device__ int all(int v) {return v;} + }; + template<> struct VecTraits + { + typedef int elem_t; + enum {cn=1}; + static __device__ int1 all(int v) {return make_int1(v);} + }; + template<> struct VecTraits + { + typedef int elem_t; + enum {cn=2}; + static __device__ int2 all(int v) {return make_int2(v, v);} + }; + template<> struct VecTraits + { + typedef int elem_t; + enum {cn=3}; + static __device__ int3 all(int v) {return make_int3(v, v, v);} + }; + template<> struct VecTraits + { + typedef int elem_t; + enum {cn=4}; + static __device__ int4 all(int v) {return make_int4(v, v, v, v);} + }; + + template<> struct VecTraits + { + typedef float elem_t; + enum {cn=1}; + static __device__ float all(float v) {return v;} + }; + template<> struct VecTraits + { + typedef float elem_t; + enum {cn=1}; + static __device__ float1 all(float v) {return make_float1(v);} + }; + template<> struct VecTraits + { + typedef float elem_t; + enum {cn=2}; + static __device__ float2 all(float v) {return make_float2(v, v);} + }; + template<> struct VecTraits + { + typedef float elem_t; + enum {cn=3}; + static __device__ float3 all(float v) {return make_float3(v, v, v);} + }; + template<> struct VecTraits + { + typedef float elem_t; + enum {cn=4}; + static __device__ float4 all(float v) {return make_float4(v, v, v, v);} + }; + + template struct SatCast; + template struct SatCast<1, VecD> + { + template + __device__ VecD operator()(const VecS& v) + { + VecD res; + res.x = saturate_cast< VecTraits::elem_t >(v.x); + return res; + } + }; + template struct SatCast<2, VecD> + { + template + __device__ VecD operator()(const VecS& v) + { + VecD res; + res.x = saturate_cast< VecTraits::elem_t >(v.x); + res.y = saturate_cast< VecTraits::elem_t >(v.y); + return res; + } + }; + template struct SatCast<3, VecD> + { + template + __device__ VecD operator()(const VecS& v) + { + VecD res; + res.x = saturate_cast< VecTraits::elem_t >(v.x); + res.y = saturate_cast< VecTraits::elem_t >(v.y); + res.y = saturate_cast< VecTraits::elem_t >(v.z); + return res; + } + }; + template struct SatCast<4, VecD> + { + template + __device__ VecD operator()(const VecS& v) + { + VecD res; + res.x = saturate_cast< VecTraits::elem_t >(v.x); + res.y = saturate_cast< VecTraits::elem_t >(v.y); + res.y = saturate_cast< VecTraits::elem_t >(v.z); + res.w = saturate_cast< VecTraits::elem_t >(v.w); + return res; + } + }; + + template static __device__ VecD saturate_cast_caller(const VecS& v) + { + SatCast< + + VecTraits::cn, + + VecD + > + + cast; + return cast(v); + } + + template static __device__ _Tp saturate_cast(const uchar1& v) {return saturate_cast_caller<_Tp>(v);} + template static __device__ _Tp saturate_cast(const char1& v) {return saturate_cast_caller<_Tp>(v);} + template static __device__ _Tp saturate_cast(const ushort1& v) {return saturate_cast_caller<_Tp>(v);} + template static __device__ _Tp saturate_cast(const short1& v) {return saturate_cast_caller<_Tp>(v);} + template static __device__ _Tp saturate_cast(const uint1& v) {return saturate_cast_caller<_Tp>(v);} + template static __device__ _Tp saturate_cast(const int1& v) {return saturate_cast_caller<_Tp>(v);} + template static __device__ _Tp saturate_cast(const float1& v) {return saturate_cast_caller<_Tp>(v);} + + template static __device__ _Tp saturate_cast(const uchar2& v) {return saturate_cast_caller<_Tp>(v);} + template static __device__ _Tp saturate_cast(const char2& v) {return saturate_cast_caller<_Tp>(v);} + template static __device__ _Tp saturate_cast(const ushort2& v) {return saturate_cast_caller<_Tp>(v);} + template static __device__ _Tp saturate_cast(const short2& v) {return saturate_cast_caller<_Tp>(v);} + template static __device__ _Tp saturate_cast(const uint2& v) {return saturate_cast_caller<_Tp>(v);} + template static __device__ _Tp saturate_cast(const int2& v) {return saturate_cast_caller<_Tp>(v);} + template static __device__ _Tp saturate_cast(const float2& v) {return saturate_cast_caller<_Tp>(v);} + + template static __device__ _Tp saturate_cast(const uchar3& v) {return saturate_cast_caller<_Tp>(v);} + template static __device__ _Tp saturate_cast(const char3& v) {return saturate_cast_caller<_Tp>(v);} + template static __device__ _Tp saturate_cast(const ushort3& v) {return saturate_cast_caller<_Tp>(v);} + template static __device__ _Tp saturate_cast(const short3& v) {return saturate_cast_caller<_Tp>(v);} + template static __device__ _Tp saturate_cast(const uint3& v) {return saturate_cast_caller<_Tp>(v);} + template static __device__ _Tp saturate_cast(const int3& v) {return saturate_cast_caller<_Tp>(v);} + template static __device__ _Tp saturate_cast(const float3& v) {return saturate_cast_caller<_Tp>(v);} + + template static __device__ _Tp saturate_cast(const uchar4& v) {return saturate_cast_caller<_Tp>(v);} + template static __device__ _Tp saturate_cast(const char4& v) {return saturate_cast_caller<_Tp>(v);} + template static __device__ _Tp saturate_cast(const ushort4& v) {return saturate_cast_caller<_Tp>(v);} + template static __device__ _Tp saturate_cast(const short4& v) {return saturate_cast_caller<_Tp>(v);} + template static __device__ _Tp saturate_cast(const uint4& v) {return saturate_cast_caller<_Tp>(v);} + template static __device__ _Tp saturate_cast(const int4& v) {return saturate_cast_caller<_Tp>(v);} + template static __device__ _Tp saturate_cast(const float4& v) {return saturate_cast_caller<_Tp>(v);} + + static __device__ uchar1 operator+(const uchar1& a, const uchar1& b) + { + return make_uchar1(a.x + b.x); + } + static __device__ uchar1 operator-(const uchar1& a, const uchar1& b) + { + return make_uchar1(a.x - b.x); + } + static __device__ uchar1 operator*(const uchar1& a, const uchar1& b) + { + return make_uchar1(a.x * b.x); + } + static __device__ uchar1 operator/(const uchar1& a, const uchar1& b) + { + return make_uchar1(a.x / b.x); + } + static __device__ float1 operator*(const uchar1& a, float s) + { + return make_float1(a.x * s); + } + + static __device__ uchar2 operator+(const uchar2& a, const uchar2& b) + { + return make_uchar2(a.x + b.x, a.y + b.y); + } + static __device__ uchar2 operator-(const uchar2& a, const uchar2& b) + { + return make_uchar2(a.x - b.x, a.y - b.y); + } + static __device__ uchar2 operator*(const uchar2& a, const uchar2& b) + { + return make_uchar2(a.x * b.x, a.y * b.y); + } + static __device__ uchar2 operator/(const uchar2& a, const uchar2& b) + { + return make_uchar2(a.x / b.x, a.y / b.y); + } + static __device__ float2 operator*(const uchar2& a, float s) + { + return make_float2(a.x * s, a.y * s); + } + + static __device__ uchar3 operator+(const uchar3& a, const uchar3& b) + { + return make_uchar3(a.x + b.x, a.y + b.y, a.z + b.z); + } + static __device__ uchar3 operator-(const uchar3& a, const uchar3& b) + { + return make_uchar3(a.x - b.x, a.y - b.y, a.z - b.z); + } + static __device__ uchar3 operator*(const uchar3& a, const uchar3& b) + { + return make_uchar3(a.x * b.x, a.y * b.y, a.z * b.z); + } + static __device__ uchar3 operator/(const uchar3& a, const uchar3& b) + { + return make_uchar3(a.x / b.x, a.y / b.y, a.z / b.z); + } + static __device__ float3 operator*(const uchar3& a, float s) + { + return make_float3(a.x * s, a.y * s, a.z * s); + } + + static __device__ uchar4 operator+(const uchar4& a, const uchar4& b) + { + return make_uchar4(a.x + b.x, a.y + b.y, a.z + b.z, a.w + b.w); + } + static __device__ uchar4 operator-(const uchar4& a, const uchar4& b) + { + return make_uchar4(a.x - b.x, a.y - b.y, a.z - b.z, a.w - b.w); + } + static __device__ uchar4 operator*(const uchar4& a, const uchar4& b) + { + return make_uchar4(a.x * b.x, a.y * b.y, a.z * b.z, a.w * b.w); + } + static __device__ uchar4 operator/(const uchar4& a, const uchar4& b) + { + return make_uchar4(a.x / b.x, a.y / b.y, a.z / b.z, a.w / b.w); + } + static __device__ float4 operator*(const uchar4& a, float s) + { + return make_float4(a.x * s, a.y * s, a.z * s, a.w * s); + } + + static __device__ char1 operator+(const char1& a, const char1& b) + { + return make_char1(a.x + b.x); + } + static __device__ char1 operator-(const char1& a, const char1& b) + { + return make_char1(a.x - b.x); + } + static __device__ char1 operator*(const char1& a, const char1& b) + { + return make_char1(a.x * b.x); + } + static __device__ char1 operator/(const char1& a, const char1& b) + { + return make_char1(a.x / b.x); + } + static __device__ float1 operator*(const char1& a, float s) + { + return make_float1(a.x * s); + } + + static __device__ char2 operator+(const char2& a, const char2& b) + { + return make_char2(a.x + b.x, a.y + b.y); + } + static __device__ char2 operator-(const char2& a, const char2& b) + { + return make_char2(a.x - b.x, a.y - b.y); + } + static __device__ char2 operator*(const char2& a, const char2& b) + { + return make_char2(a.x * b.x, a.y * b.y); + } + static __device__ char2 operator/(const char2& a, const char2& b) + { + return make_char2(a.x / b.x, a.y / b.y); + } + static __device__ float2 operator*(const char2& a, float s) + { + return make_float2(a.x * s, a.y * s); + } + + static __device__ char3 operator+(const char3& a, const char3& b) + { + return make_char3(a.x + b.x, a.y + b.y, a.z + b.z); + } + static __device__ char3 operator-(const char3& a, const char3& b) + { + return make_char3(a.x - b.x, a.y - b.y, a.z - b.z); + } + static __device__ char3 operator*(const char3& a, const char3& b) + { + return make_char3(a.x * b.x, a.y * b.y, a.z * b.z); + } + static __device__ char3 operator/(const char3& a, const char3& b) + { + return make_char3(a.x / b.x, a.y / b.y, a.z / b.z); + } + static __device__ float3 operator*(const char3& a, float s) + { + return make_float3(a.x * s, a.y * s, a.z * s); + } + + static __device__ char4 operator+(const char4& a, const char4& b) + { + return make_char4(a.x + b.x, a.y + b.y, a.z + b.z, a.w + b.w); + } + static __device__ char4 operator-(const char4& a, const char4& b) + { + return make_char4(a.x - b.x, a.y - b.y, a.z - b.z, a.w - b.w); + } + static __device__ char4 operator*(const char4& a, const char4& b) + { + return make_char4(a.x * b.x, a.y * b.y, a.z * b.z, a.w * b.w); + } + static __device__ char4 operator/(const char4& a, const char4& b) + { + return make_char4(a.x / b.x, a.y / b.y, a.z / b.z, a.w / b.w); + } + static __device__ float4 operator*(const char4& a, float s) + { + return make_float4(a.x * s, a.y * s, a.z * s, a.w * s); + } + + static __device__ ushort1 operator+(const ushort1& a, const ushort1& b) + { + return make_ushort1(a.x + b.x); + } + static __device__ ushort1 operator-(const ushort1& a, const ushort1& b) + { + return make_ushort1(a.x - b.x); + } + static __device__ ushort1 operator*(const ushort1& a, const ushort1& b) + { + return make_ushort1(a.x * b.x); + } + static __device__ ushort1 operator/(const ushort1& a, const ushort1& b) + { + return make_ushort1(a.x / b.x); + } + static __device__ float1 operator*(const ushort1& a, float s) + { + return make_float1(a.x * s); + } + + static __device__ ushort2 operator+(const ushort2& a, const ushort2& b) + { + return make_ushort2(a.x + b.x, a.y + b.y); + } + static __device__ ushort2 operator-(const ushort2& a, const ushort2& b) + { + return make_ushort2(a.x - b.x, a.y - b.y); + } + static __device__ ushort2 operator*(const ushort2& a, const ushort2& b) + { + return make_ushort2(a.x * b.x, a.y * b.y); + } + static __device__ ushort2 operator/(const ushort2& a, const ushort2& b) + { + return make_ushort2(a.x / b.x, a.y / b.y); + } + static __device__ float2 operator*(const ushort2& a, float s) + { + return make_float2(a.x * s, a.y * s); + } + + static __device__ ushort3 operator+(const ushort3& a, const ushort3& b) + { + return make_ushort3(a.x + b.x, a.y + b.y, a.z + b.z); + } + static __device__ ushort3 operator-(const ushort3& a, const ushort3& b) + { + return make_ushort3(a.x - b.x, a.y - b.y, a.z - b.z); + } + static __device__ ushort3 operator*(const ushort3& a, const ushort3& b) + { + return make_ushort3(a.x * b.x, a.y * b.y, a.z * b.z); + } + static __device__ ushort3 operator/(const ushort3& a, const ushort3& b) + { + return make_ushort3(a.x / b.x, a.y / b.y, a.z / b.z); + } + static __device__ float3 operator*(const ushort3& a, float s) + { + return make_float3(a.x * s, a.y * s, a.z * s); + } + + static __device__ ushort4 operator+(const ushort4& a, const ushort4& b) + { + return make_ushort4(a.x + b.x, a.y + b.y, a.z + b.z, a.w + b.w); + } + static __device__ ushort4 operator-(const ushort4& a, const ushort4& b) + { + return make_ushort4(a.x - b.x, a.y - b.y, a.z - b.z, a.w - b.w); + } + static __device__ ushort4 operator*(const ushort4& a, const ushort4& b) + { + return make_ushort4(a.x * b.x, a.y * b.y, a.z * b.z, a.w * b.w); + } + static __device__ ushort4 operator/(const ushort4& a, const ushort4& b) + { + return make_ushort4(a.x / b.x, a.y / b.y, a.z / b.z, a.w / b.w); + } + static __device__ float4 operator*(const ushort4& a, float s) + { + return make_float4(a.x * s, a.y * s, a.z * s, a.w * s); + } + + static __device__ short1 operator+(const short1& a, const short1& b) + { + return make_short1(a.x + b.x); + } + static __device__ short1 operator-(const short1& a, const short1& b) + { + return make_short1(a.x - b.x); + } + static __device__ short1 operator*(const short1& a, const short1& b) + { + return make_short1(a.x * b.x); + } + static __device__ short1 operator/(const short1& a, const short1& b) + { + return make_short1(a.x / b.x); + } + static __device__ float1 operator*(const short1& a, float s) + { + return make_float1(a.x * s); + } + + static __device__ short2 operator+(const short2& a, const short2& b) + { + return make_short2(a.x + b.x, a.y + b.y); + } + static __device__ short2 operator-(const short2& a, const short2& b) + { + return make_short2(a.x - b.x, a.y - b.y); + } + static __device__ short2 operator*(const short2& a, const short2& b) + { + return make_short2(a.x * b.x, a.y * b.y); + } + static __device__ short2 operator/(const short2& a, const short2& b) + { + return make_short2(a.x / b.x, a.y / b.y); + } + static __device__ float2 operator*(const short2& a, float s) + { + return make_float2(a.x * s, a.y * s); + } + + static __device__ short3 operator+(const short3& a, const short3& b) + { + return make_short3(a.x + b.x, a.y + b.y, a.z + b.z); + } + static __device__ short3 operator-(const short3& a, const short3& b) + { + return make_short3(a.x - b.x, a.y - b.y, a.z - b.z); + } + static __device__ short3 operator*(const short3& a, const short3& b) + { + return make_short3(a.x * b.x, a.y * b.y, a.z * b.z); + } + static __device__ short3 operator/(const short3& a, const short3& b) + { + return make_short3(a.x / b.x, a.y / b.y, a.z / b.z); + } + static __device__ float3 operator*(const short3& a, float s) + { + return make_float3(a.x * s, a.y * s, a.z * s); + } + + static __device__ short4 operator+(const short4& a, const short4& b) + { + return make_short4(a.x + b.x, a.y + b.y, a.z + b.z, a.w + b.w); + } + static __device__ short4 operator-(const short4& a, const short4& b) + { + return make_short4(a.x - b.x, a.y - b.y, a.z - b.z, a.w - b.w); + } + static __device__ short4 operator*(const short4& a, const short4& b) + { + return make_short4(a.x * b.x, a.y * b.y, a.z * b.z, a.w * b.w); + } + static __device__ short4 operator/(const short4& a, const short4& b) + { + return make_short4(a.x / b.x, a.y / b.y, a.z / b.z, a.w / b.w); + } + static __device__ float4 operator*(const short4& a, float s) + { + return make_float4(a.x * s, a.y * s, a.z * s, a.w * s); + } + + static __device__ int1 operator+(const int1& a, const int1& b) + { + return make_int1(a.x + b.x); + } + static __device__ int1 operator-(const int1& a, const int1& b) + { + return make_int1(a.x - b.x); + } + static __device__ int1 operator*(const int1& a, const int1& b) + { + return make_int1(a.x * b.x); + } + static __device__ int1 operator/(const int1& a, const int1& b) + { + return make_int1(a.x / b.x); + } + static __device__ float1 operator*(const int1& a, float s) + { + return make_float1(a.x * s); + } + + static __device__ int2 operator+(const int2& a, const int2& b) + { + return make_int2(a.x + b.x, a.y + b.y); + } + static __device__ int2 operator-(const int2& a, const int2& b) + { + return make_int2(a.x - b.x, a.y - b.y); + } + static __device__ int2 operator*(const int2& a, const int2& b) + { + return make_int2(a.x * b.x, a.y * b.y); + } + static __device__ int2 operator/(const int2& a, const int2& b) + { + return make_int2(a.x / b.x, a.y / b.y); + } + static __device__ float2 operator*(const int2& a, float s) + { + return make_float2(a.x * s, a.y * s); + } + + static __device__ int3 operator+(const int3& a, const int3& b) + { + return make_int3(a.x + b.x, a.y + b.y, a.z + b.z); + } + static __device__ int3 operator-(const int3& a, const int3& b) + { + return make_int3(a.x - b.x, a.y - b.y, a.z - b.z); + } + static __device__ int3 operator*(const int3& a, const int3& b) + { + return make_int3(a.x * b.x, a.y * b.y, a.z * b.z); + } + static __device__ int3 operator/(const int3& a, const int3& b) + { + return make_int3(a.x / b.x, a.y / b.y, a.z / b.z); + } + static __device__ float3 operator*(const int3& a, float s) + { + return make_float3(a.x * s, a.y * s, a.z * s); + } + + static __device__ int4 operator+(const int4& a, const int4& b) + { + return make_int4(a.x + b.x, a.y + b.y, a.z + b.z, a.w + b.w); + } + static __device__ int4 operator-(const int4& a, const int4& b) + { + return make_int4(a.x - b.x, a.y - b.y, a.z - b.z, a.w - b.w); + } + static __device__ int4 operator*(const int4& a, const int4& b) + { + return make_int4(a.x * b.x, a.y * b.y, a.z * b.z, a.w * b.w); + } + static __device__ int4 operator/(const int4& a, const int4& b) + { + return make_int4(a.x / b.x, a.y / b.y, a.z / b.z, a.w / b.w); + } + static __device__ float4 operator*(const int4& a, float s) + { + return make_float4(a.x * s, a.y * s, a.z * s, a.w * s); + } + + static __device__ float1 operator+(const float1& a, const float1& b) + { + return make_float1(a.x + b.x); + } + static __device__ float1 operator-(const float1& a, const float1& b) + { + return make_float1(a.x - b.x); + } + static __device__ float1 operator*(const float1& a, const float1& b) + { + return make_float1(a.x * b.x); + } + static __device__ float1 operator/(const float1& a, const float1& b) + { + return make_float1(a.x / b.x); + } + static __device__ float1 operator*(const float1& a, float s) + { + return make_float1(a.x * s); + } + + static __device__ float2 operator+(const float2& a, const float2& b) + { + return make_float2(a.x + b.x, a.y + b.y); + } + static __device__ float2 operator-(const float2& a, const float2& b) + { + return make_float2(a.x - b.x, a.y - b.y); + } + static __device__ float2 operator*(const float2& a, const float2& b) + { + return make_float2(a.x * b.x, a.y * b.y); + } + static __device__ float2 operator/(const float2& a, const float2& b) + { + return make_float2(a.x / b.x, a.y / b.y); + } + static __device__ float2 operator*(const float2& a, float s) + { + return make_float2(a.x * s, a.y * s); + } + + static __device__ float3 operator+(const float3& a, const float3& b) + { + return make_float3(a.x + b.x, a.y + b.y, a.z + b.z); + } + static __device__ float3 operator-(const float3& a, const float3& b) + { + return make_float3(a.x - b.x, a.y - b.y, a.z - b.z); + } + static __device__ float3 operator*(const float3& a, const float3& b) + { + return make_float3(a.x * b.x, a.y * b.y, a.z * b.z); + } + static __device__ float3 operator/(const float3& a, const float3& b) + { + return make_float3(a.x / b.x, a.y / b.y, a.z / b.z); + } + static __device__ float3 operator*(const float3& a, float s) + { + return make_float3(a.x * s, a.y * s, a.z * s); + } + + static __device__ float4 operator+(const float4& a, const float4& b) + { + return make_float4(a.x + b.x, a.y + b.y, a.z + b.z, a.w + b.w); + } + static __device__ float4 operator-(const float4& a, const float4& b) + { + return make_float4(a.x - b.x, a.y - b.y, a.z - b.z, a.w - b.w); + } + static __device__ float4 operator*(const float4& a, const float4& b) + { + return make_float4(a.x * b.x, a.y * b.y, a.z * b.z, a.w * b.w); + } + static __device__ float4 operator/(const float4& a, const float4& b) + { + return make_float4(a.x / b.x, a.y / b.y, a.z / b.z, a.w / b.w); + } + static __device__ float4 operator*(const float4& a, float s) + { + return make_float4(a.x * s, a.y * s, a.z * s, a.w * s); + } + } + } +} + +#endif // __OPENCV_GPU_VECMATH_HPP__ \ No newline at end of file diff --git a/modules/gpu/src/precomp.hpp b/modules/gpu/src/precomp.hpp index 0820c61..ab6c42d 100644 --- a/modules/gpu/src/precomp.hpp +++ b/modules/gpu/src/precomp.hpp @@ -62,7 +62,7 @@ #if defined(HAVE_CUDA) - #include "cuda_shared.hpp" + #include "internal_shared.hpp" #include "cuda_runtime_api.h" #include "opencv2/gpu/stream_accessor.hpp" #include "npp.h" diff --git a/modules/gpu/src/stereobm_gpu.cpp b/modules/gpu/src/stereobm.cpp similarity index 100% rename from modules/gpu/src/stereobm_gpu.cpp rename to modules/gpu/src/stereobm.cpp -- 2.7.4