+++ /dev/null
-// Disable some warnings which are caused with CUDA headers\r
-#pragma warning(disable: 4201 4408 4100)\r
-\r
-#include <iostream>\r
-#include <cvconfig.h>\r
-#include "opencv2/core/core.hpp"\r
-#include <opencv2/gpu/gpu.hpp>\r
-\r
-#if !defined(HAVE_CUDA) || !defined(HAVE_TBB)\r
-\r
-int main()\r
-{\r
-#if !defined(HAVE_CUDA)\r
- cout << "CUDA support is required (CMake key 'WITH_CUDA' must be true).\n";\r
-#endif\r
-\r
-#if !defined(HAVE_TBB)\r
- cout << "TBB support is required (CMake key 'WITH_TBB' must be true).\n";\r
-#endif\r
-\r
- return 0;\r
-}\r
-\r
-#else\r
-\r
-#include <cuda.h>\r
-#include <cuda_runtime.h>\r
-#include "opencv2/core/internal.hpp" // For TBB wrappers\r
-\r
-using namespace std;\r
-using namespace cv;\r
-using namespace cv::gpu;\r
-\r
-struct Worker { void operator()(int device_id) const; };\r
-void destroyContexts();\r
-\r
-#define cuSafeCall(code) if (code != CUDA_SUCCESS) { \\r
- cout << "CUDA driver API error: code " << code \\r
- << ", file " << __FILE__ << ", line " << __LINE__ << endl; \\r
- destroyContexts(); \\r
- exit(-1); \\r
-}\r
-\r
-\r
-// Each GPU is associated with its own context\r
-CUcontext contexts[2];\r
-\r
-\r
-int main()\r
-{\r
- if (getCudaEnabledDeviceCount() < 2)\r
- {\r
- cout << "Two or more GPUs are required\n";\r
- return -1;\r
- }\r
-\r
- cuSafeCall(cuInit(0));\r
-\r
- // Create context for the first GPU\r
- CUdevice device;\r
- cuSafeCall(cuDeviceGet(&device, 0));\r
- cuSafeCall(cuCtxCreate(&contexts[0], 0, device));\r
-\r
- CUcontext prev_context;\r
- cuSafeCall(cuCtxPopCurrent(&prev_context));\r
-\r
- // Create context for the second GPU\r
- cuSafeCall(cuDeviceGet(&device, 1));\r
- cuSafeCall(cuCtxCreate(&contexts[1], 1, device));\r
-\r
- cuSafeCall(cuCtxPopCurrent(&prev_context));\r
-\r
- // Execute calculation in two threads using two GPUs\r
- int devices[] = {0, 1};\r
- parallel_do(devices, devices + 2, Worker());\r
-\r
- destroyContexts();\r
- return 0;\r
-}\r
-\r
-\r
-void Worker::operator()(int device_id) const\r
-{\r
- // Set proper context\r
- cuSafeCall(cuCtxPushCurrent(contexts[device_id]));\r
-\r
- Mat src(1000, 1000, CV_32F);\r
- Mat dst;\r
-\r
- RNG rng(0);\r
- rng.fill(src, RNG::UNIFORM, 0, 1);\r
-\r
- // CPU works\r
- transpose(src, dst);\r
-\r
- GpuMat d_src(src);\r
- GpuMat d_dst;\r
-\r
- // GPU works\r
- transpose(d_src, d_dst);\r
-\r
- // Check results\r
- bool passed = norm(dst - Mat(d_dst), NORM_INF) < 1e-3;\r
- cout << "GPU #" << device_id << ": "<< (passed ? "passed" : "FAILED") << endl;\r
-\r
- // Deallocate data here, otherwise deallocation will be performed\r
- // after context is extracted from the stack\r
- d_src.release();\r
- d_dst.release();\r
-\r
- CUcontext prev_context;\r
- cuSafeCall(cuCtxPopCurrent(&prev_context));\r
-}\r
-\r
-\r
-void destroyContexts()\r
-{\r
- cuSafeCall(cuCtxDestroy(contexts[0]));\r
- cuSafeCall(cuCtxDestroy(contexts[1]));\r
-}\r
-\r
-#endif
\ No newline at end of file