samples/gpu/driver_api_multi.cpp

   1 /* This sample demonstrates the way you can perform independed tasks
   2    on the different GPUs */
   3
   4 // Disable some warnings which are caused with CUDA headers
   5 #if defined(_MSC_VER)
   6 #pragma warning(disable: 4201 4408 4100)
   7 #endif
   8
   9 #include <iostream>
  10 #include "cvconfig.h"
  11 #include "opencv2/core/core.hpp"
  12 #include "opencv2/cudaarithm.hpp"
  13
  14 #ifdef HAVE_TBB
  15 #  include "tbb/tbb_stddef.h"
  16 #  if TBB_VERSION_MAJOR*100 + TBB_VERSION_MINOR >= 202
  17 #    include "tbb/tbb.h"
  18 #    include "tbb/task.h"
  19 #    undef min
  20 #    undef max
  21 #  else
  22 #    undef HAVE_TBB
  23 #  endif
  24 #endif
  25
  26 #if !defined(HAVE_CUDA) || !defined(HAVE_TBB) || defined(__arm__)
  27
  28 int main()
  29 {
  30 #if !defined(HAVE_CUDA)
  31     std::cout << "CUDA support is required (CMake key 'WITH_CUDA' must be true).\n";
  32 #endif
  33
  34 #if !defined(HAVE_TBB)
  35     std::cout << "TBB support is required (CMake key 'WITH_TBB' must be true).\n";
  36 #endif
  37
  38 #if defined(__arm__)
  39     std::cout << "Unsupported for ARM CUDA library." << std::endl;
  40 #endif
  41
  42     return 0;
  43 }
  44
  45 #else
  46
  47 #include <cuda.h>
  48 #include <cuda_runtime.h>
  49
  50 using namespace std;
  51 using namespace cv;
  52 using namespace cv::cuda;
  53
  54 struct Worker { void operator()(int device_id) const; };
  55 void destroyContexts();
  56
  57 #define safeCall(expr) safeCall_(expr, #expr, __FILE__, __LINE__)
  58 inline void safeCall_(int code, const char* expr, const char* file, int line)
  59 {
  60     if (code != CUDA_SUCCESS)
  61     {
  62         std::cout << "CUDA driver API error: code " << code << ", expr " << expr
  63             << ", file " << file << ", line " << line << endl;
  64         destroyContexts();
  65         exit(-1);
  66     }
  67 }
  68
  69 // Each GPU is associated with its own context
  70 CUcontext contexts[2];
  71
  72 int main()
  73 {
  74     int num_devices = getCudaEnabledDeviceCount();
  75     if (num_devices < 2)
  76     {
  77         std::cout << "Two or more GPUs are required\n";
  78         return -1;
  79     }
  80
  81     for (int i = 0; i < num_devices; ++i)
  82     {
  83         cv::cuda::printShortCudaDeviceInfo(i);
  84
  85         DeviceInfo dev_info(i);
  86         if (!dev_info.isCompatible())
  87         {
  88             std::cout << "CUDA module isn't built for GPU #" << i << " ("
  89                  << dev_info.name() << ", CC " << dev_info.majorVersion()
  90                  << dev_info.minorVersion() << "\n";
  91             return -1;
  92         }
  93     }
  94
  95     // Init CUDA Driver API
  96     safeCall(cuInit(0));
  97
  98     // Create context for GPU #0
  99     CUdevice device;
 100     safeCall(cuDeviceGet(&device, 0));
 101     safeCall(cuCtxCreate(&contexts[0], 0, device));
 102
 103     CUcontext prev_context;
 104     safeCall(cuCtxPopCurrent(&prev_context));
 105
 106     // Create context for GPU #1
 107     safeCall(cuDeviceGet(&device, 1));
 108     safeCall(cuCtxCreate(&contexts[1], 0, device));
 109
 110     safeCall(cuCtxPopCurrent(&prev_context));
 111
 112     // Execute calculation in two threads using two GPUs
 113     int devices[] = {0, 1};
 114     tbb::parallel_do(devices, devices + 2, Worker());
 115
 116     destroyContexts();
 117     return 0;
 118 }
 119
 120
 121 void Worker::operator()(int device_id) const
 122 {
 123     // Set the proper context
 124     safeCall(cuCtxPushCurrent(contexts[device_id]));
 125
 126     Mat src(1000, 1000, CV_32F);
 127     Mat dst;
 128
 129     RNG rng(0);
 130     rng.fill(src, RNG::UNIFORM, 0, 1);
 131
 132     // CPU works
 133     cv::transpose(src, dst);
 134
 135     // GPU works
 136     GpuMat d_src(src);
 137     GpuMat d_dst;
 138     cuda::transpose(d_src, d_dst);
 139
 140     // Check results
 141     bool passed = cv::norm(dst - Mat(d_dst), NORM_INF) < 1e-3;
 142     std::cout << "GPU #" << device_id << " (" << DeviceInfo().name() << "): "
 143         << (passed ? "passed" : "FAILED") << endl;
 144
 145     // Deallocate data here, otherwise deallocation will be performed
 146     // after context is extracted from the stack
 147     d_src.release();
 148     d_dst.release();
 149
 150     CUcontext prev_context;
 151     safeCall(cuCtxPopCurrent(&prev_context));
 152 }
 153
 154
 155 void destroyContexts()
 156 {
 157     safeCall(cuCtxDestroy(contexts[0]));
 158     safeCall(cuCtxDestroy(contexts[1]));
 159 }
 160
 161 #endif