using namespace cv;\r
using namespace cv::gpu;\r
\r
-\r
-void cuSafeCall(int code);\r
struct Worker { void operator()(int device_id) const; };\r
-void destroy();\r
+void destroyContexts();\r
+\r
+#define cuSafeCall(code) if (code != CUDA_SUCCESS) { \\r
+ cout << "CUDA driver API error: code " << code \\r
+ << ", file " << __FILE__ << ", line " << __LINE__ << endl; \\r
+ destroyContexts(); \\r
+ exit(-1); \\r
+}\r
\r
\r
// Each GPU is associated with its own context\r
CUcontext contexts[2];\r
\r
-// Auxiliary variable, stores previusly used context\r
-CUcontext prev_context;\r
-\r
\r
int main()\r
{\r
return -1;\r
}\r
\r
- // Save the default context\r
- cuSafeCall(cuCtxAttach(&contexts[0], 0));\r
- cuSafeCall(cuCtxDetach(contexts[0]));\r
+ cuSafeCall(cuInit(0));\r
\r
- // Create new context for the second GPU\r
+ // Create context for the first GPU\r
CUdevice device;\r
+ cuSafeCall(cuDeviceGet(&device, 0));\r
+ cuSafeCall(cuCtxCreate(&contexts[0], 0, device));\r
+\r
+ CUcontext prev_context;\r
+ cuCtxPopCurrent(&prev_context);\r
+\r
+ // Create context for the second GPU\r
cuSafeCall(cuDeviceGet(&device, 1));\r
- cuSafeCall(cuCtxCreate(&contexts[1], 0, device));\r
+ cuSafeCall(cuCtxCreate(&contexts[1], 1, device));\r
\r
- // Restore the first GPU context\r
- cuSafeCall(cuCtxPopCurrent(&prev_context));\r
+ cuCtxPopCurrent(&prev_context);\r
\r
- // Run \r
+ // Execute calculation in two threads using two GPUs\r
int devices[] = {0, 1};\r
parallel_do(devices, devices + 2, Worker());\r
\r
- // Destroy context of the second GPU\r
- destroy();\r
-\r
+ destroyContexts();\r
return 0;\r
}\r
\r
\r
void Worker::operator()(int device_id) const\r
{\r
- cout << device_id << endl;\r
-}\r
+ cuCtxPushCurrent(contexts[device_id]);\r
\r
+ // Generate random matrix\r
+ Mat src(1000, 1000, CV_32F);\r
+ RNG rng(0);\r
+ rng.fill(src, RNG::UNIFORM, 0, 1);\r
\r
-void cuSafeCall(int code)\r
-{\r
- if (code != CUDA_SUCCESS) \r
- {\r
- cout << "CUDA driver API error: code " << code \r
- << ", file " << __FILE__ \r
- << ", line " << __LINE__ << endl;\r
- destroy();\r
- exit(-1);\r
- }\r
+ // Upload data on GPU\r
+ GpuMat d_src(src);\r
+ GpuMat d_dst;\r
+\r
+ transpose(d_src, d_dst);\r
+\r
+ // Deallocate here, otherwise deallocation will be performed \r
+ // after context is extracted from the stack\r
+ d_src.release();\r
+ d_dst.release();\r
+\r
+ CUcontext prev_context;\r
+ cuCtxPopCurrent(&prev_context);\r
+\r
+ cout << "Device " << device_id << " finished\n";\r
}\r
\r
\r
-void destroy() \r
+void destroyContexts()\r
{\r
+ cuCtxDestroy(contexts[0]);\r
cuCtxDestroy(contexts[1]);\r
}\r
\r