From 2dab93c2e8189f6b9274810fa593f682dfde5b87 Mon Sep 17 00:00:00 2001 From: Vladislav Vinogradov Date: Tue, 16 Apr 2013 17:44:19 +0400 Subject: [PATCH] refactored gpu info structures (TargetArchs and DeviceInfo) now DeviceInfo provides full information about device (from cudaDeviceProp) --- modules/core/include/opencv2/core/gpu.hpp | 215 +++++-- modules/core/include/opencv2/core/gpu.inl.hpp | 56 ++ modules/core/src/{gpu.cpp => gpu_info.cpp} | 810 +++++++++++++++++++++----- modules/gpufilters/src/filtering.cpp | 4 +- modules/gpuoptflow/test/test_optflow.cpp | 2 +- modules/gpustereo/src/stereobm.cpp | 2 +- modules/ts/src/gpu_perf.cpp | 4 +- modules/ts/src/ts_perf.cpp | 4 +- samples/gpu/driver_api_multi.cpp | 4 +- samples/gpu/driver_api_stereo_multi.cpp | 4 +- samples/gpu/multi.cpp | 4 +- samples/gpu/performance/performance.cpp | 2 +- samples/gpu/stereo_multi.cpp | 4 +- 13 files changed, 929 insertions(+), 186 deletions(-) rename modules/core/src/{gpu.cpp => gpu_info.cpp} (63%) diff --git a/modules/core/include/opencv2/core/gpu.hpp b/modules/core/include/opencv2/core/gpu.hpp index 46d0710..88e3530 100644 --- a/modules/core/include/opencv2/core/gpu.hpp +++ b/modules/core/include/opencv2/core/gpu.hpp @@ -392,17 +392,17 @@ private: //////////////////////////////// Initialization & Info //////////////////////// -//! This is the only function that do not throw exceptions if the library is compiled without Cuda. +//! this is the only function that do not throw exceptions if the library is compiled without CUDA CV_EXPORTS int getCudaEnabledDeviceCount(); -//! Functions below throw cv::Expception if the library is compiled without Cuda. - +//! set device to be used for GPU executions for the calling host thread CV_EXPORTS void setDevice(int device); +//! returns which device is currently being used for the calling host thread CV_EXPORTS int getDevice(); -//! Explicitly destroys and cleans up all resources associated with the current device in the current process. -//! Any subsequent API call to this device will reinitialize the device. +//! explicitly destroys and cleans up all resources associated with the current device in the current process +//! any subsequent API call to this device will reinitialize the device CV_EXPORTS void resetDevice(); enum FeatureSet @@ -423,75 +423,218 @@ enum FeatureSet DYNAMIC_PARALLELISM = FEATURE_SET_COMPUTE_35 }; -// Checks whether current device supports the given feature +//! checks whether current device supports the given feature CV_EXPORTS bool deviceSupports(FeatureSet feature_set); -// Gives information about what GPU archs this OpenCV GPU module was -// compiled for +//! information about what GPU archs this OpenCV GPU module was compiled for class CV_EXPORTS TargetArchs { public: static bool builtWith(FeatureSet feature_set); + static bool has(int major, int minor); static bool hasPtx(int major, int minor); static bool hasBin(int major, int minor); + static bool hasEqualOrLessPtx(int major, int minor); static bool hasEqualOrGreater(int major, int minor); static bool hasEqualOrGreaterPtx(int major, int minor); static bool hasEqualOrGreaterBin(int major, int minor); -private: - TargetArchs(); }; -// Gives information about the given GPU +//! information about the given GPU. class CV_EXPORTS DeviceInfo { public: - // Creates DeviceInfo object for the current GPU - DeviceInfo() : device_id_(getDevice()) { query(); } + //! creates DeviceInfo object for the current GPU + DeviceInfo(); - // Creates DeviceInfo object for the given GPU - DeviceInfo(int device_id) : device_id_(device_id) { query(); } + //! creates DeviceInfo object for the given GPU + DeviceInfo(int device_id); - String name() const { return name_; } + //! device number. + int deviceID() const; - // Return compute capability versions - int majorVersion() const { return majorVersion_; } - int minorVersion() const { return minorVersion_; } + //! ASCII string identifying device + const char* name() const; - int multiProcessorCount() const { return multi_processor_count_; } + //! global memory available on device in bytes + size_t totalGlobalMem() const; + //! shared memory available per block in bytes size_t sharedMemPerBlock() const; + //! 32-bit registers available per block + int regsPerBlock() const; + + //! warp size in threads + int warpSize() const; + + //! maximum pitch in bytes allowed by memory copies + size_t memPitch() const; + + //! maximum number of threads per block + int maxThreadsPerBlock() const; + + //! maximum size of each dimension of a block + Vec3i maxThreadsDim() const; + + //! maximum size of each dimension of a grid + Vec3i maxGridSize() const; + + //! clock frequency in kilohertz + int clockRate() const; + + //! constant memory available on device in bytes + size_t totalConstMem() const; + + //! major compute capability + int major() const; + + //! minor compute capability + int minor() const; + + //! alignment requirement for textures + size_t textureAlignment() const; + + //! pitch alignment requirement for texture references bound to pitched memory + size_t texturePitchAlignment() const; + + //! number of multiprocessors on device + int multiProcessorCount() const; + + //! specified whether there is a run time limit on kernels + bool kernelExecTimeoutEnabled() const; + + //! device is integrated as opposed to discrete + bool integrated() const; + + //! device can map host memory with cudaHostAlloc/cudaHostGetDevicePointer + bool canMapHostMemory() const; + + enum ComputeMode + { + ComputeModeDefault, /**< default compute mode (Multiple threads can use ::cudaSetDevice() with this device) */ + ComputeModeExclusive, /**< compute-exclusive-thread mode (Only one thread in one process will be able to use ::cudaSetDevice() with this device) */ + ComputeModeProhibited, /**< compute-prohibited mode (No threads can use ::cudaSetDevice() with this device) */ + ComputeModeExclusiveProcess /**< compute-exclusive-process mode (Many threads in one process will be able to use ::cudaSetDevice() with this device) */ + }; + + //! compute mode + ComputeMode computeMode() const; + + //! maximum 1D texture size + int maxTexture1D() const; + + //! maximum 1D mipmapped texture size + int maxTexture1DMipmap() const; + + //! maximum size for 1D textures bound to linear memory + int maxTexture1DLinear() const; + + //! maximum 2D texture dimensions + Vec2i maxTexture2D() const; + + //! maximum 2D mipmapped texture dimensions + Vec2i maxTexture2DMipmap() const; + + //! maximum dimensions (width, height, pitch) for 2D textures bound to pitched memory + Vec3i maxTexture2DLinear() const; + + //! maximum 2D texture dimensions if texture gather operations have to be performed + Vec2i maxTexture2DGather() const; + + //! maximum 3D texture dimensions + Vec3i maxTexture3D() const; + + //! maximum Cubemap texture dimensions + int maxTextureCubemap() const; + + //! maximum 1D layered texture dimensions + Vec2i maxTexture1DLayered() const; + + //! maximum 2D layered texture dimensions + Vec3i maxTexture2DLayered() const; + + //! maximum Cubemap layered texture dimensions + Vec2i maxTextureCubemapLayered() const; + + //! maximum 1D surface size + int maxSurface1D() const; + + //! maximum 2D surface dimensions + Vec2i maxSurface2D() const; + + //! maximum 3D surface dimensions + Vec3i maxSurface3D() const; + + //! maximum 1D layered surface dimensions + Vec2i maxSurface1DLayered() const; + + //! maximum 2D layered surface dimensions + Vec3i maxSurface2DLayered() const; + + //! maximum Cubemap surface dimensions + int maxSurfaceCubemap() const; + + //! maximum Cubemap layered surface dimensions + Vec2i maxSurfaceCubemapLayered() const; + + //! alignment requirements for surfaces + size_t surfaceAlignment() const; + + //! device can possibly execute multiple kernels concurrently + bool concurrentKernels() const; + + //! device has ECC support enabled + bool ECCEnabled() const; + + //! PCI bus ID of the device + int pciBusID() const; + + //! PCI device ID of the device + int pciDeviceID() const; + + //! PCI domain ID of the device + int pciDomainID() const; + + //! true if device is a Tesla device using TCC driver, false otherwise + bool tccDriver() const; + + //! number of asynchronous engines + int asyncEngineCount() const; + + //! device shares a unified address space with the host + bool unifiedAddressing() const; + + //! peak memory clock frequency in kilohertz + int memoryClockRate() const; + + //! global memory bus width in bits + int memoryBusWidth() const; + + //! size of L2 cache in bytes + int l2CacheSize() const; + + //! maximum resident threads per multiprocessor + int maxThreadsPerMultiProcessor() const; + + //! gets free and total device memory void queryMemory(size_t& totalMemory, size_t& freeMemory) const; size_t freeMemory() const; size_t totalMemory() const; - // Checks whether device supports the given feature + //! checks whether device supports the given feature bool supports(FeatureSet feature_set) const; - // Checks whether the GPU module can be run on the given device + //! checks whether the GPU module can be run on the given device bool isCompatible() const; - bool canMapHostMemory() const; - - size_t textureAlignment() const; - - int deviceID() const { return device_id_; } - private: - void query(); - int device_id_; - - String name_; - int multi_processor_count_; - int majorVersion_; - int minorVersion_; }; CV_EXPORTS void printCudaDeviceInfo(int device); - CV_EXPORTS void printShortCudaDeviceInfo(int device); }} // namespace cv { namespace gpu { diff --git a/modules/core/include/opencv2/core/gpu.inl.hpp b/modules/core/include/opencv2/core/gpu.inl.hpp index 1983cbc..a30f1b6 100644 --- a/modules/core/include/opencv2/core/gpu.inl.hpp +++ b/modules/core/include/opencv2/core/gpu.inl.hpp @@ -567,6 +567,62 @@ Stream::Stream(const Ptr& impl) { } +//////////////////////////////// Initialization & Info //////////////////////// + +inline +bool TargetArchs::has(int major, int minor) +{ + return hasPtx(major, minor) || hasBin(major, minor); +} + +inline +bool TargetArchs::hasEqualOrGreater(int major, int minor) +{ + return hasEqualOrGreaterPtx(major, minor) || hasEqualOrGreaterBin(major, minor); +} + +inline +DeviceInfo::DeviceInfo() +{ + device_id_ = getDevice(); +} + +inline +DeviceInfo::DeviceInfo(int device_id) +{ + CV_Assert( device_id >= 0 && device_id < getCudaEnabledDeviceCount() ); + device_id_ = device_id; +} + +inline +int DeviceInfo::deviceID() const +{ + return device_id_; +} + +inline +size_t DeviceInfo::freeMemory() const +{ + size_t _totalMemory, _freeMemory; + queryMemory(_totalMemory, _freeMemory); + return _freeMemory; +} + +inline +size_t DeviceInfo::totalMemory() const +{ + size_t _totalMemory, _freeMemory; + queryMemory(_totalMemory, _freeMemory); + return _totalMemory; +} + +inline +bool DeviceInfo::supports(FeatureSet feature_set) const +{ + int version = major() * 10 + minor(); + return version >= feature_set; +} + }} // namespace cv { namespace gpu { //////////////////////////////// Mat //////////////////////////////// diff --git a/modules/core/src/gpu.cpp b/modules/core/src/gpu_info.cpp similarity index 63% rename from modules/core/src/gpu.cpp rename to modules/core/src/gpu_info.cpp index 2216ec7..7520380 100644 --- a/modules/core/src/gpu.cpp +++ b/modules/core/src/gpu_info.cpp @@ -41,50 +41,17 @@ //M*/ #include "precomp.hpp" -#include using namespace cv; using namespace cv::gpu; -//////////////////////////////// Initialization & Info //////////////////////// - -#ifndef HAVE_CUDA - -int cv::gpu::getCudaEnabledDeviceCount() { return 0; } - -void cv::gpu::setDevice(int) { throw_no_cuda(); } -int cv::gpu::getDevice() { throw_no_cuda(); return 0; } - -void cv::gpu::resetDevice() { throw_no_cuda(); } - -bool cv::gpu::deviceSupports(FeatureSet) { throw_no_cuda(); return false; } - -bool cv::gpu::TargetArchs::builtWith(FeatureSet) { throw_no_cuda(); return false; } -bool cv::gpu::TargetArchs::has(int, int) { throw_no_cuda(); return false; } -bool cv::gpu::TargetArchs::hasPtx(int, int) { throw_no_cuda(); return false; } -bool cv::gpu::TargetArchs::hasBin(int, int) { throw_no_cuda(); return false; } -bool cv::gpu::TargetArchs::hasEqualOrLessPtx(int, int) { throw_no_cuda(); return false; } -bool cv::gpu::TargetArchs::hasEqualOrGreater(int, int) { throw_no_cuda(); return false; } -bool cv::gpu::TargetArchs::hasEqualOrGreaterPtx(int, int) { throw_no_cuda(); return false; } -bool cv::gpu::TargetArchs::hasEqualOrGreaterBin(int, int) { throw_no_cuda(); return false; } - -size_t cv::gpu::DeviceInfo::sharedMemPerBlock() const { throw_no_cuda(); return 0; } -void cv::gpu::DeviceInfo::queryMemory(size_t&, size_t&) const { throw_no_cuda(); } -size_t cv::gpu::DeviceInfo::freeMemory() const { throw_no_cuda(); return 0; } -size_t cv::gpu::DeviceInfo::totalMemory() const { throw_no_cuda(); return 0; } -bool cv::gpu::DeviceInfo::supports(FeatureSet) const { throw_no_cuda(); return false; } -bool cv::gpu::DeviceInfo::isCompatible() const { throw_no_cuda(); return false; } -void cv::gpu::DeviceInfo::query() { throw_no_cuda(); } - -void cv::gpu::printCudaDeviceInfo(int) { throw_no_cuda(); } -void cv::gpu::printShortCudaDeviceInfo(int) { throw_no_cuda(); } - -#else // HAVE_CUDA - int cv::gpu::getCudaEnabledDeviceCount() { +#ifndef HAVE_CUDA + return 0; +#else int count; - cudaError_t error = cudaGetDeviceCount( &count ); + cudaError_t error = cudaGetDeviceCount(&count); if (error == cudaErrorInsufficientDriver) return -1; @@ -94,25 +61,78 @@ int cv::gpu::getCudaEnabledDeviceCount() cudaSafeCall( error ); return count; +#endif } void cv::gpu::setDevice(int device) { - cudaSafeCall( cudaSetDevice( device ) ); +#ifndef HAVE_CUDA + (void) device; + throw_no_cuda(); +#else + cudaSafeCall( cudaSetDevice(device) ); +#endif } int cv::gpu::getDevice() { +#ifndef HAVE_CUDA + throw_no_cuda(); + return 0; +#else int device; - cudaSafeCall( cudaGetDevice( &device ) ); + cudaSafeCall( cudaGetDevice(&device) ); return device; +#endif } void cv::gpu::resetDevice() { +#ifndef HAVE_CUDA + throw_no_cuda(); +#else cudaSafeCall( cudaDeviceReset() ); +#endif } +bool cv::gpu::deviceSupports(FeatureSet feature_set) +{ +#ifndef HAVE_CUDA + (void) feature_set; + throw_no_cuda(); + return false; +#else + static int versions[] = + { + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1 + }; + static const int cache_size = static_cast(sizeof(versions) / sizeof(versions[0])); + + const int devId = getDevice(); + + int version; + + if (devId < cache_size && versions[devId] >= 0) + { + version = versions[devId]; + } + else + { + DeviceInfo dev(devId); + version = dev.major() * 10 + dev.minor(); + if (devId < cache_size) + versions[devId] = version; + } + + return TargetArchs::builtWith(feature_set) && (version >= feature_set); +#endif +} + +//////////////////////////////////////////////////////////////////////// +// TargetArchs + +#ifdef HAVE_CUDA + namespace { class CudaArch @@ -128,7 +148,7 @@ namespace bool hasEqualOrGreaterBin(int major, int minor) const; private: - static void fromStr(const String& set_as_str, std::vector& arr); + static void fromStr(const char* set_as_str, std::vector& arr); std::vector bin; std::vector ptx; @@ -174,12 +194,14 @@ namespace return !bin.empty() && (bin.back() >= major * 10 + minor); } - void CudaArch::fromStr(const String& set_as_str, std::vector& arr) + void CudaArch::fromStr(const char* set_as_str, std::vector& arr) { arr.clear(); + const size_t len = strlen(set_as_str); + size_t pos = 0; - while (pos < set_as_str.size()) + while (pos < len) { if (isspace(set_as_str[pos])) { @@ -189,8 +211,8 @@ namespace { int cur_value; int chars_read; - int args_read = sscanf(set_as_str.c_str() + pos, "%d%n", &cur_value, &chars_read); - CV_Assert(args_read == 1); + int args_read = sscanf(set_as_str + pos, "%d%n", &cur_value, &chars_read); + CV_Assert( args_read == 1 ); arr.push_back(cur_value); pos += chars_read; @@ -201,70 +223,83 @@ namespace } } +#endif + bool cv::gpu::TargetArchs::builtWith(cv::gpu::FeatureSet feature_set) { +#ifndef HAVE_CUDA + (void) feature_set; + throw_no_cuda(); + return false; +#else return cudaArch.builtWith(feature_set); -} - -bool cv::gpu::TargetArchs::has(int major, int minor) -{ - return hasPtx(major, minor) || hasBin(major, minor); +#endif } bool cv::gpu::TargetArchs::hasPtx(int major, int minor) { +#ifndef HAVE_CUDA + (void) major; + (void) minor; + throw_no_cuda(); + return false; +#else return cudaArch.hasPtx(major, minor); +#endif } bool cv::gpu::TargetArchs::hasBin(int major, int minor) { +#ifndef HAVE_CUDA + (void) major; + (void) minor; + throw_no_cuda(); + return false; +#else return cudaArch.hasBin(major, minor); +#endif } bool cv::gpu::TargetArchs::hasEqualOrLessPtx(int major, int minor) { +#ifndef HAVE_CUDA + (void) major; + (void) minor; + throw_no_cuda(); + return false; +#else return cudaArch.hasEqualOrLessPtx(major, minor); -} - -bool cv::gpu::TargetArchs::hasEqualOrGreater(int major, int minor) -{ - return hasEqualOrGreaterPtx(major, minor) || hasEqualOrGreaterBin(major, minor); +#endif } bool cv::gpu::TargetArchs::hasEqualOrGreaterPtx(int major, int minor) { +#ifndef HAVE_CUDA + (void) major; + (void) minor; + throw_no_cuda(); + return false; +#else return cudaArch.hasEqualOrGreaterPtx(major, minor); +#endif } bool cv::gpu::TargetArchs::hasEqualOrGreaterBin(int major, int minor) { +#ifndef HAVE_CUDA + (void) major; + (void) minor; + throw_no_cuda(); + return false; +#else return cudaArch.hasEqualOrGreaterBin(major, minor); +#endif } -bool cv::gpu::deviceSupports(FeatureSet feature_set) -{ - static int versions[] = - { - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1 - }; - static const int cache_size = static_cast(sizeof(versions) / sizeof(versions[0])); - - const int devId = getDevice(); - - int version; - - if (devId < cache_size && versions[devId] >= 0) - version = versions[devId]; - else - { - DeviceInfo dev(devId); - version = dev.majorVersion() * 10 + dev.minorVersion(); - if (devId < cache_size) - versions[devId] = version; - } +//////////////////////////////////////////////////////////////////////// +// DeviceInfo - return TargetArchs::builtWith(feature_set) && (version >= feature_set); -} +#ifdef HAVE_CUDA namespace { @@ -272,116 +307,613 @@ namespace { public: DeviceProps(); - ~DeviceProps(); - cudaDeviceProp* get(int devID); + const cudaDeviceProp* get(int devID) const; private: - std::vector props_; + std::vector props_; }; DeviceProps::DeviceProps() { - props_.resize(10, 0); - } + int count = getCudaEnabledDeviceCount(); - DeviceProps::~DeviceProps() - { - for (size_t i = 0; i < props_.size(); ++i) + if (count > 0) { - if (props_[i]) - delete props_[i]; + props_.resize(count); + + for (int devID = 0; devID < count; ++devID) + { + cudaSafeCall( cudaGetDeviceProperties(&props_[devID], devID) ); + } } - props_.clear(); } - cudaDeviceProp* DeviceProps::get(int devID) + const cudaDeviceProp* DeviceProps::get(int devID) const { - if (devID >= (int) props_.size()) - props_.resize(devID + 5, 0); + CV_Assert( static_cast(devID) < props_.size() ); - if (!props_[devID]) - { - props_[devID] = new cudaDeviceProp; - cudaSafeCall( cudaGetDeviceProperties(props_[devID], devID) ); - } + return &props_[devID]; + } - return props_[devID]; + DeviceProps& deviceProps() + { + static DeviceProps props; + return props; } +} - DeviceProps deviceProps; +#endif + +const char* cv::gpu::DeviceInfo::name() const +{ +#ifndef HAVE_CUDA + throw_no_cuda(); + return ""; +#else + return deviceProps().get(device_id_)->name; +#endif +} + +size_t cv::gpu::DeviceInfo::totalGlobalMem() const +{ +#ifndef HAVE_CUDA + throw_no_cuda(); + return 0; +#else + return deviceProps().get(device_id_)->totalGlobalMem; +#endif } size_t cv::gpu::DeviceInfo::sharedMemPerBlock() const { - return deviceProps.get(device_id_)->sharedMemPerBlock; +#ifndef HAVE_CUDA + throw_no_cuda(); + return 0; +#else + return deviceProps().get(device_id_)->sharedMemPerBlock; +#endif +} + +int cv::gpu::DeviceInfo::regsPerBlock() const +{ +#ifndef HAVE_CUDA + throw_no_cuda(); + return 0; +#else + return deviceProps().get(device_id_)->regsPerBlock; +#endif } -bool cv::gpu::DeviceInfo::canMapHostMemory() const +int cv::gpu::DeviceInfo::warpSize() const +{ +#ifndef HAVE_CUDA + throw_no_cuda(); + return 0; +#else + return deviceProps().get(device_id_)->warpSize; +#endif +} + +size_t cv::gpu::DeviceInfo::memPitch() const +{ +#ifndef HAVE_CUDA + throw_no_cuda(); + return 0; +#else + return deviceProps().get(device_id_)->memPitch; +#endif +} + +int cv::gpu::DeviceInfo::maxThreadsPerBlock() const +{ +#ifndef HAVE_CUDA + throw_no_cuda(); + return 0; +#else + return deviceProps().get(device_id_)->maxThreadsPerBlock; +#endif +} + +Vec3i cv::gpu::DeviceInfo::maxThreadsDim() const +{ +#ifndef HAVE_CUDA + throw_no_cuda(); + return Vec3i(); +#else + return Vec3i(deviceProps().get(device_id_)->maxThreadsDim); +#endif +} + +Vec3i cv::gpu::DeviceInfo::maxGridSize() const +{ +#ifndef HAVE_CUDA + throw_no_cuda(); + return Vec3i(); +#else + return Vec3i(deviceProps().get(device_id_)->maxGridSize); +#endif +} + +int cv::gpu::DeviceInfo::clockRate() const { - return deviceProps.get(device_id_)->canMapHostMemory != 0; +#ifndef HAVE_CUDA + throw_no_cuda(); + return 0; +#else + return deviceProps().get(device_id_)->clockRate; +#endif +} + +size_t cv::gpu::DeviceInfo::totalConstMem() const +{ +#ifndef HAVE_CUDA + throw_no_cuda(); + return 0; +#else + return deviceProps().get(device_id_)->totalConstMem; +#endif +} + +int cv::gpu::DeviceInfo::major() const +{ +#ifndef HAVE_CUDA + throw_no_cuda(); + return 0; +#else + return deviceProps().get(device_id_)->major; +#endif +} + +int cv::gpu::DeviceInfo::minor() const +{ +#ifndef HAVE_CUDA + throw_no_cuda(); + return 0; +#else + return deviceProps().get(device_id_)->minor; +#endif } size_t cv::gpu::DeviceInfo::textureAlignment() const { - return deviceProps.get(device_id_)->textureAlignment; +#ifndef HAVE_CUDA + throw_no_cuda(); + return 0; +#else + return deviceProps().get(device_id_)->textureAlignment; +#endif } -void cv::gpu::DeviceInfo::queryMemory(size_t& _totalMemory, size_t& _freeMemory) const +size_t cv::gpu::DeviceInfo::texturePitchAlignment() const { - int prevDeviceID = getDevice(); - if (prevDeviceID != device_id_) - setDevice(device_id_); +#ifndef HAVE_CUDA + throw_no_cuda(); + return 0; +#else + return deviceProps().get(device_id_)->texturePitchAlignment; +#endif +} - cudaSafeCall( cudaMemGetInfo(&_freeMemory, &_totalMemory) ); +int cv::gpu::DeviceInfo::multiProcessorCount() const +{ +#ifndef HAVE_CUDA + throw_no_cuda(); + return 0; +#else + return deviceProps().get(device_id_)->multiProcessorCount; +#endif +} - if (prevDeviceID != device_id_) - setDevice(prevDeviceID); +bool cv::gpu::DeviceInfo::kernelExecTimeoutEnabled() const +{ +#ifndef HAVE_CUDA + throw_no_cuda(); + return false; +#else + return deviceProps().get(device_id_)->kernelExecTimeoutEnabled != 0; +#endif +} + +bool cv::gpu::DeviceInfo::integrated() const +{ +#ifndef HAVE_CUDA + throw_no_cuda(); + return false; +#else + return deviceProps().get(device_id_)->integrated != 0; +#endif +} + +bool cv::gpu::DeviceInfo::canMapHostMemory() const +{ +#ifndef HAVE_CUDA + throw_no_cuda(); + return false; +#else + return deviceProps().get(device_id_)->canMapHostMemory != 0; +#endif +} + +DeviceInfo::ComputeMode cv::gpu::DeviceInfo::computeMode() const +{ +#ifndef HAVE_CUDA + throw_no_cuda(); + return ComputeModeDefault; +#else + static const ComputeMode tbl[] = + { + ComputeModeDefault, + ComputeModeExclusive, + ComputeModeProhibited, + ComputeModeExclusiveProcess + }; + + return tbl[deviceProps().get(device_id_)->computeMode]; +#endif +} + +int cv::gpu::DeviceInfo::maxTexture1D() const +{ +#ifndef HAVE_CUDA + throw_no_cuda(); + return 0; +#else + return deviceProps().get(device_id_)->maxTexture1D; +#endif } -size_t cv::gpu::DeviceInfo::freeMemory() const +int cv::gpu::DeviceInfo::maxTexture1DMipmap() const { - size_t _totalMemory, _freeMemory; - queryMemory(_totalMemory, _freeMemory); - return _freeMemory; +#ifndef HAVE_CUDA + throw_no_cuda(); + return 0; +#else + return deviceProps().get(device_id_)->maxTexture1DMipmap; +#endif } -size_t cv::gpu::DeviceInfo::totalMemory() const +int cv::gpu::DeviceInfo::maxTexture1DLinear() const { - size_t _totalMemory, _freeMemory; - queryMemory(_totalMemory, _freeMemory); - return _totalMemory; +#ifndef HAVE_CUDA + throw_no_cuda(); + return 0; +#else + return deviceProps().get(device_id_)->maxTexture1DLinear; +#endif +} + +Vec2i cv::gpu::DeviceInfo::maxTexture2D() const +{ +#ifndef HAVE_CUDA + throw_no_cuda(); + return Vec2i(); +#else + return Vec2i(deviceProps().get(device_id_)->maxTexture2D); +#endif } -bool cv::gpu::DeviceInfo::supports(FeatureSet feature_set) const +Vec2i cv::gpu::DeviceInfo::maxTexture2DMipmap() const { - int version = majorVersion() * 10 + minorVersion(); - return version >= feature_set; +#ifndef HAVE_CUDA + throw_no_cuda(); + return Vec2i(); +#else + return Vec2i(deviceProps().get(device_id_)->maxTexture2DMipmap); +#endif +} + +Vec3i cv::gpu::DeviceInfo::maxTexture2DLinear() const +{ +#ifndef HAVE_CUDA + throw_no_cuda(); + return Vec3i(); +#else + return Vec3i(deviceProps().get(device_id_)->maxTexture2DLinear); +#endif +} + +Vec2i cv::gpu::DeviceInfo::maxTexture2DGather() const +{ +#ifndef HAVE_CUDA + throw_no_cuda(); + return Vec2i(); +#else + return Vec2i(deviceProps().get(device_id_)->maxTexture2DGather); +#endif +} + +Vec3i cv::gpu::DeviceInfo::maxTexture3D() const +{ +#ifndef HAVE_CUDA + throw_no_cuda(); + return Vec3i(); +#else + return Vec3i(deviceProps().get(device_id_)->maxTexture3D); +#endif +} + +int cv::gpu::DeviceInfo::maxTextureCubemap() const +{ +#ifndef HAVE_CUDA + throw_no_cuda(); + return 0; +#else + return deviceProps().get(device_id_)->maxTextureCubemap; +#endif +} + +Vec2i cv::gpu::DeviceInfo::maxTexture1DLayered() const +{ +#ifndef HAVE_CUDA + throw_no_cuda(); + return Vec2i(); +#else + return Vec2i(deviceProps().get(device_id_)->maxTexture1DLayered); +#endif +} + +Vec3i cv::gpu::DeviceInfo::maxTexture2DLayered() const +{ +#ifndef HAVE_CUDA + throw_no_cuda(); + return Vec3i(); +#else + return Vec3i(deviceProps().get(device_id_)->maxTexture2DLayered); +#endif +} + +Vec2i cv::gpu::DeviceInfo::maxTextureCubemapLayered() const +{ +#ifndef HAVE_CUDA + throw_no_cuda(); + return Vec2i(); +#else + return Vec2i(deviceProps().get(device_id_)->maxTextureCubemapLayered); +#endif +} + +int cv::gpu::DeviceInfo::maxSurface1D() const +{ +#ifndef HAVE_CUDA + throw_no_cuda(); + return 0; +#else + return deviceProps().get(device_id_)->maxSurface1D; +#endif +} + +Vec2i cv::gpu::DeviceInfo::maxSurface2D() const +{ +#ifndef HAVE_CUDA + throw_no_cuda(); + return Vec2i(); +#else + return Vec2i(deviceProps().get(device_id_)->maxSurface2D); +#endif +} + +Vec3i cv::gpu::DeviceInfo::maxSurface3D() const +{ +#ifndef HAVE_CUDA + throw_no_cuda(); + return Vec3i(); +#else + return Vec3i(deviceProps().get(device_id_)->maxSurface3D); +#endif +} + +Vec2i cv::gpu::DeviceInfo::maxSurface1DLayered() const +{ +#ifndef HAVE_CUDA + throw_no_cuda(); + return Vec2i(); +#else + return Vec2i(deviceProps().get(device_id_)->maxSurface1DLayered); +#endif +} + +Vec3i cv::gpu::DeviceInfo::maxSurface2DLayered() const +{ +#ifndef HAVE_CUDA + throw_no_cuda(); + return Vec3i(); +#else + return Vec3i(deviceProps().get(device_id_)->maxSurface2DLayered); +#endif +} + +int cv::gpu::DeviceInfo::maxSurfaceCubemap() const +{ +#ifndef HAVE_CUDA + throw_no_cuda(); + return 0; +#else + return deviceProps().get(device_id_)->maxSurfaceCubemap; +#endif +} + +Vec2i cv::gpu::DeviceInfo::maxSurfaceCubemapLayered() const +{ +#ifndef HAVE_CUDA + throw_no_cuda(); + return Vec2i(); +#else + return Vec2i(deviceProps().get(device_id_)->maxSurfaceCubemapLayered); +#endif +} + +size_t cv::gpu::DeviceInfo::surfaceAlignment() const +{ +#ifndef HAVE_CUDA + throw_no_cuda(); + return 0; +#else + return deviceProps().get(device_id_)->surfaceAlignment; +#endif +} + +bool cv::gpu::DeviceInfo::concurrentKernels() const +{ +#ifndef HAVE_CUDA + throw_no_cuda(); + return false; +#else + return deviceProps().get(device_id_)->concurrentKernels != 0; +#endif +} + +bool cv::gpu::DeviceInfo::ECCEnabled() const +{ +#ifndef HAVE_CUDA + throw_no_cuda(); + return false; +#else + return deviceProps().get(device_id_)->ECCEnabled != 0; +#endif +} + +int cv::gpu::DeviceInfo::pciBusID() const +{ +#ifndef HAVE_CUDA + throw_no_cuda(); + return 0; +#else + return deviceProps().get(device_id_)->pciBusID; +#endif +} + +int cv::gpu::DeviceInfo::pciDeviceID() const +{ +#ifndef HAVE_CUDA + throw_no_cuda(); + return 0; +#else + return deviceProps().get(device_id_)->pciDeviceID; +#endif +} + +int cv::gpu::DeviceInfo::pciDomainID() const +{ +#ifndef HAVE_CUDA + throw_no_cuda(); + return 0; +#else + return deviceProps().get(device_id_)->pciDomainID; +#endif +} + +bool cv::gpu::DeviceInfo::tccDriver() const +{ +#ifndef HAVE_CUDA + throw_no_cuda(); + return false; +#else + return deviceProps().get(device_id_)->tccDriver != 0; +#endif +} + +int cv::gpu::DeviceInfo::asyncEngineCount() const +{ +#ifndef HAVE_CUDA + throw_no_cuda(); + return 0; +#else + return deviceProps().get(device_id_)->asyncEngineCount; +#endif +} + +bool cv::gpu::DeviceInfo::unifiedAddressing() const +{ +#ifndef HAVE_CUDA + throw_no_cuda(); + return false; +#else + return deviceProps().get(device_id_)->unifiedAddressing != 0; +#endif +} + +int cv::gpu::DeviceInfo::memoryClockRate() const +{ +#ifndef HAVE_CUDA + throw_no_cuda(); + return 0; +#else + return deviceProps().get(device_id_)->memoryClockRate; +#endif +} + +int cv::gpu::DeviceInfo::memoryBusWidth() const +{ +#ifndef HAVE_CUDA + throw_no_cuda(); + return 0; +#else + return deviceProps().get(device_id_)->memoryBusWidth; +#endif +} + +int cv::gpu::DeviceInfo::l2CacheSize() const +{ +#ifndef HAVE_CUDA + throw_no_cuda(); + return 0; +#else + return deviceProps().get(device_id_)->l2CacheSize; +#endif +} + +int cv::gpu::DeviceInfo::maxThreadsPerMultiProcessor() const +{ +#ifndef HAVE_CUDA + throw_no_cuda(); + return 0; +#else + return deviceProps().get(device_id_)->maxThreadsPerMultiProcessor; +#endif +} + +void cv::gpu::DeviceInfo::queryMemory(size_t& _totalMemory, size_t& _freeMemory) const +{ +#ifndef HAVE_CUDA + (void) _totalMemory; + (void) _freeMemory; + throw_no_cuda(); +#else + int prevDeviceID = getDevice(); + if (prevDeviceID != device_id_) + setDevice(device_id_); + + cudaSafeCall( cudaMemGetInfo(&_freeMemory, &_totalMemory) ); + + if (prevDeviceID != device_id_) + setDevice(prevDeviceID); +#endif } bool cv::gpu::DeviceInfo::isCompatible() const { +#ifndef HAVE_CUDA + throw_no_cuda(); + return false; +#else // Check PTX compatibility - if (TargetArchs::hasEqualOrLessPtx(majorVersion(), minorVersion())) + if (TargetArchs::hasEqualOrLessPtx(major(), minor())) return true; // Check BIN compatibility - for (int i = minorVersion(); i >= 0; --i) - if (TargetArchs::hasBin(majorVersion(), i)) + for (int i = minor(); i >= 0; --i) + if (TargetArchs::hasBin(major(), i)) return true; return false; +#endif } -void cv::gpu::DeviceInfo::query() -{ - const cudaDeviceProp* prop = deviceProps.get(device_id_); +//////////////////////////////////////////////////////////////////////// +// print info - name_ = prop->name; - multi_processor_count_ = prop->multiProcessorCount; - majorVersion_ = prop->major; - minorVersion_ = prop->minor; -} +#ifdef HAVE_CUDA namespace { @@ -407,8 +939,14 @@ namespace } } +#endif + void cv::gpu::printCudaDeviceInfo(int device) { +#ifndef HAVE_CUDA + (void) device; + throw_no_cuda(); +#else int count = getCudaEnabledDeviceCount(); bool valid = (device >= 0) && (device < count); @@ -484,11 +1022,17 @@ void cv::gpu::printCudaDeviceInfo(int device) printf(", CUDA Driver Version = %d.%d", driverVersion / 1000, driverVersion % 100); printf(", CUDA Runtime Version = %d.%d", runtimeVersion/1000, runtimeVersion%100); printf(", NumDevs = %d\n\n", count); + fflush(stdout); +#endif } void cv::gpu::printShortCudaDeviceInfo(int device) { +#ifndef HAVE_CUDA + (void) device; + throw_no_cuda(); +#else int count = getCudaEnabledDeviceCount(); bool valid = (device >= 0) && (device < count); @@ -514,11 +1058,11 @@ void cv::gpu::printShortCudaDeviceInfo(int device) printf(", Driver/Runtime ver.%d.%d/%d.%d\n", driverVersion/1000, driverVersion%100, runtimeVersion/1000, runtimeVersion%100); } + fflush(stdout); +#endif } -#endif // HAVE_CUDA - //////////////////////////////////////////////////////////////////////// // Error handling diff --git a/modules/gpufilters/src/filtering.cpp b/modules/gpufilters/src/filtering.cpp index 26442f5..d40293d 100644 --- a/modules/gpufilters/src/filtering.cpp +++ b/modules/gpufilters/src/filtering.cpp @@ -878,7 +878,7 @@ namespace virtual void operator()(const GpuMat& src, GpuMat& dst, Stream& s = Stream::Null()) { DeviceInfo devInfo; - int cc = devInfo.majorVersion() * 10 + devInfo.minorVersion(); + int cc = devInfo.major() * 10 + devInfo.minor(); func(src, dst, kernel.ptr(), ksize, anchor, brd_type, cc, StreamAccessor::getStream(s)); } @@ -977,7 +977,7 @@ namespace virtual void operator()(const GpuMat& src, GpuMat& dst, Stream& s = Stream::Null()) { DeviceInfo devInfo; - int cc = devInfo.majorVersion() * 10 + devInfo.minorVersion(); + int cc = devInfo.major() * 10 + devInfo.minor(); if (ksize > 16 && cc < 20) CV_Error(cv::Error::StsNotImplemented, "column linear filter doesn't implemented for kernel size > 16 for device with compute capabilities less than 2.0"); diff --git a/modules/gpuoptflow/test/test_optflow.cpp b/modules/gpuoptflow/test/test_optflow.cpp index fce0755..c20260e 100644 --- a/modules/gpuoptflow/test/test_optflow.cpp +++ b/modules/gpuoptflow/test/test_optflow.cpp @@ -80,7 +80,7 @@ GPU_TEST_P(BroxOpticalFlow, Regression) brox(loadMat(frame0), loadMat(frame1), u, v); std::string fname(cvtest::TS::ptr()->get_data_path()); - if (devInfo.majorVersion() >= 2) + if (devInfo.major() >= 2) fname += "opticalflow/brox_optical_flow_cc20.bin"; else fname += "opticalflow/brox_optical_flow.bin"; diff --git a/modules/gpustereo/src/stereobm.cpp b/modules/gpustereo/src/stereobm.cpp index 47d17dc..f8e6c20 100644 --- a/modules/gpustereo/src/stereobm.cpp +++ b/modules/gpustereo/src/stereobm.cpp @@ -91,7 +91,7 @@ bool cv::gpu::StereoBM_GPU::checkIfGpuCallReasonable() DeviceInfo device_info; - if (device_info.majorVersion() > 1 || device_info.multiProcessorCount() > 16) + if (device_info.major() > 1 || device_info.multiProcessorCount() > 16) return true; return false; diff --git a/modules/ts/src/gpu_perf.cpp b/modules/ts/src/gpu_perf.cpp index f6d3bb3..dca1814 100644 --- a/modules/ts/src/gpu_perf.cpp +++ b/modules/ts/src/gpu_perf.cpp @@ -287,8 +287,8 @@ namespace perf cv::gpu::DeviceInfo info(i); printf("[----------]\n"), fflush(stdout); - printf("[ DEVICE ] \t# %d %s.\n", i, info.name().c_str()), fflush(stdout); - printf("[ ] \tCompute capability: %d.%d\n", (int)info.majorVersion(), (int)info.minorVersion()), fflush(stdout); + printf("[ DEVICE ] \t# %d %s.\n", i, info.name()), fflush(stdout); + printf("[ ] \tCompute capability: %d.%d\n", (int)info.major(), (int)info.minor()), fflush(stdout); printf("[ ] \tMulti Processor Count: %d\n", info.multiProcessorCount()), fflush(stdout); printf("[ ] \tTotal memory: %d Mb\n", static_cast(static_cast(info.totalMemory() / 1024.0) / 1024.0)), fflush(stdout); printf("[ ] \tFree memory: %d Mb\n", static_cast(static_cast(info.freeMemory() / 1024.0) / 1024.0)), fflush(stdout); diff --git a/modules/ts/src/ts_perf.cpp b/modules/ts/src/ts_perf.cpp index caaeda4..cd5a239 100644 --- a/modules/ts/src/ts_perf.cpp +++ b/modules/ts/src/ts_perf.cpp @@ -682,13 +682,13 @@ void TestBase::Init(int argc, const char* const argv[]) cv::gpu::DeviceInfo info(param_cuda_device); if (!info.isCompatible()) { - printf("[----------]\n[ FAILURE ] \tDevice %s is NOT compatible with current GPU module build.\n[----------]\n", info.name().c_str()), fflush(stdout); + printf("[----------]\n[ FAILURE ] \tDevice %s is NOT compatible with current GPU module build.\n[----------]\n", info.name()), fflush(stdout); exit(-1); } cv::gpu::setDevice(param_cuda_device); - printf("[----------]\n[ GPU INFO ] \tRun test suite on %s GPU.\n[----------]\n", info.name().c_str()), fflush(stdout); + printf("[----------]\n[ GPU INFO ] \tRun test suite on %s GPU.\n[----------]\n", info.name()), fflush(stdout); } #endif diff --git a/samples/gpu/driver_api_multi.cpp b/samples/gpu/driver_api_multi.cpp index 9119cfa..046167f 100644 --- a/samples/gpu/driver_api_multi.cpp +++ b/samples/gpu/driver_api_multi.cpp @@ -82,8 +82,8 @@ int main() if (!dev_info.isCompatible()) { std::cout << "GPU module isn't built for GPU #" << i << " (" - << dev_info.name() << ", CC " << dev_info.majorVersion() - << dev_info.minorVersion() << "\n"; + << dev_info.name() << ", CC " << dev_info.major() + << dev_info.minor() << "\n"; return -1; } } diff --git a/samples/gpu/driver_api_stereo_multi.cpp b/samples/gpu/driver_api_stereo_multi.cpp index d3dbf58..de9ac8d 100644 --- a/samples/gpu/driver_api_stereo_multi.cpp +++ b/samples/gpu/driver_api_stereo_multi.cpp @@ -112,8 +112,8 @@ int main(int argc, char** argv) if (!dev_info.isCompatible()) { std::cout << "GPU module isn't built for GPU #" << i << " (" - << dev_info.name() << ", CC " << dev_info.majorVersion() - << dev_info.minorVersion() << "\n"; + << dev_info.name() << ", CC " << dev_info.major() + << dev_info.minor() << "\n"; return -1; } } diff --git a/samples/gpu/multi.cpp b/samples/gpu/multi.cpp index 180388f..34b1118 100644 --- a/samples/gpu/multi.cpp +++ b/samples/gpu/multi.cpp @@ -62,8 +62,8 @@ int main() if (!dev_info.isCompatible()) { std::cout << "GPU module isn't built for GPU #" << i << " (" - << dev_info.name() << ", CC " << dev_info.majorVersion() - << dev_info.minorVersion() << "\n"; + << dev_info.name() << ", CC " << dev_info.major() + << dev_info.minor() << "\n"; return -1; } } diff --git a/samples/gpu/performance/performance.cpp b/samples/gpu/performance/performance.cpp index 8af0b3d..42fd978 100644 --- a/samples/gpu/performance/performance.cpp +++ b/samples/gpu/performance/performance.cpp @@ -191,7 +191,7 @@ int main(int argc, const char* argv[]) DeviceInfo dev_info(device); if (!dev_info.isCompatible()) { - cerr << "GPU module isn't built for GPU #" << device << " " << dev_info.name() << ", CC " << dev_info.majorVersion() << '.' << dev_info.minorVersion() << endl; + cerr << "GPU module isn't built for GPU #" << device << " " << dev_info.name() << ", CC " << dev_info.major() << '.' << dev_info.minor() << endl; return -1; } setDevice(device); diff --git a/samples/gpu/stereo_multi.cpp b/samples/gpu/stereo_multi.cpp index 2a3dec3..f85efe1 100644 --- a/samples/gpu/stereo_multi.cpp +++ b/samples/gpu/stereo_multi.cpp @@ -81,8 +81,8 @@ int main(int argc, char** argv) if (!dev_info.isCompatible()) { std::cout << "GPU module isn't built for GPU #" << i << " (" - << dev_info.name() << ", CC " << dev_info.majorVersion() - << dev_info.minorVersion() << "\n"; + << dev_info.name() << ", CC " << dev_info.major() + << dev_info.minor() << "\n"; return -1; } } -- 2.7.4