src/caffe/common.cpp

   1 #include <boost/thread.hpp>
   2 #include <glog/logging.h>
   3 #include <cmath>
   4 #include <cstdio>
   5 #include <ctime>
   6 #include <exception>
   7
   8 #include "caffe/common.hpp"
   9 #include "caffe/util/rng.hpp"
  10
  11 namespace caffe {
  12
  13 // Make sure each thread can have different values.
  14 static boost::thread_specific_ptr<Caffe> thread_instance_;
  15
  16 Caffe& Caffe::Get() {
  17   if (!thread_instance_.get()) {
  18     thread_instance_.reset(new Caffe());
  19   }
  20   return *(thread_instance_.get());
  21 }
  22
  23 // random seeding
  24 int64_t cluster_seedgen(void) {
  25   int64_t s, seed, pid;
  26   FILE* f = fopen("/dev/urandom", "rb");
  27   if (f && fread(&seed, 1, sizeof(seed), f) == sizeof(seed)) {
  28     fclose(f);
  29     return seed;
  30   }
  31
  32   LOG(INFO) << "System entropy source not available, "
  33               "using fallback algorithm to generate seed instead.";
  34   if (f)
  35     fclose(f);
  36
  37   pid = getpid();
  38   s = time(NULL);
  39   seed = std::abs(((s * 181) * ((pid - 83) * 359)) % 104729);
  40   return seed;
  41 }
  42
  43
  44 void GlobalInit(int* pargc, char*** pargv) {
  45   // Google flags.
  46   ::gflags::ParseCommandLineFlags(pargc, pargv, true);
  47   // Google logging.
  48   ::google::InitGoogleLogging(*(pargv)[0]);
  49   // Provide a backtrace on segfault.
  50   ::google::InstallFailureSignalHandler();
  51 }
  52
  53 #ifdef CPU_ONLY  // CPU-only Caffe.
  54
  55 Caffe::Caffe()
  56     : random_generator_(), mode_(Caffe::CPU),use_mali_gpu_(false),
  57       solver_count_(1), solver_rank_(0), multiprocess_(false)
  58    {
  59 }
  60
  61 Caffe::~Caffe() { }
  62
  63 void Caffe::set_random_seed(const unsigned int seed) {
  64   // RNG seed
  65   Get().random_generator_.reset(new RNG(seed));
  66 }
  67
  68 void Caffe::SetDevice(const int device_id) {
  69   NO_GPU;
  70 }
  71
  72 void Caffe::DeviceQuery() {
  73   NO_GPU;
  74 }
  75
  76 bool Caffe::CheckDevice(const int device_id) {
  77   NO_GPU;
  78   return false;
  79 }
  80
  81 int Caffe::FindDevice(const int start_id) {
  82   NO_GPU;
  83   return -1;
  84 }
  85
  86 class Caffe::RNG::Generator {
  87  public:
  88   Generator() : rng_(new caffe::rng_t(cluster_seedgen())) {}
  89   explicit Generator(unsigned int seed) : rng_(new caffe::rng_t(seed)) {}
  90   caffe::rng_t* rng() { return rng_.get(); }
  91  private:
  92   shared_ptr<caffe::rng_t> rng_;
  93 };
  94
  95 Caffe::RNG::RNG() : generator_(new Generator()) { }
  96
  97 Caffe::RNG::RNG(unsigned int seed) : generator_(new Generator(seed)) { }
  98
  99 Caffe::RNG& Caffe::RNG::operator=(const RNG& other) {
 100   generator_ = other.generator_;
 101   return *this;
 102 }
 103
 104 void* Caffe::RNG::generator() {
 105   return static_cast<void*>(generator_->rng());
 106 }
 107
 108 #else  // Normal GPU + CPU Caffe.
 109
 110 Caffe::Caffe()
 111     : cublas_handle_(NULL), curand_generator_(NULL), random_generator_(),
 112     mode_(Caffe::CPU),
 113     solver_count_(1), solver_rank_(0), multiprocess_(false) {
 114   // Try to create a cublas handler, and report an error if failed (but we will
 115   // keep the program running as one might just want to run CPU code).
 116   if (cublasCreate(&cublas_handle_) != CUBLAS_STATUS_SUCCESS) {
 117     LOG(ERROR) << "Cannot create Cublas handle. Cublas won't be available.";
 118   }
 119   // Try to create a curand handler.
 120   if (curandCreateGenerator(&curand_generator_, CURAND_RNG_PSEUDO_DEFAULT)
 121       != CURAND_STATUS_SUCCESS ||
 122       curandSetPseudoRandomGeneratorSeed(curand_generator_, cluster_seedgen())
 123       != CURAND_STATUS_SUCCESS) {
 124     LOG(ERROR) << "Cannot create Curand generator. Curand won't be available.";
 125   }
 126 }
 127
 128 Caffe::~Caffe() {
 129   if (cublas_handle_) CUBLAS_CHECK(cublasDestroy(cublas_handle_));
 130   if (curand_generator_) {
 131     CURAND_CHECK(curandDestroyGenerator(curand_generator_));
 132   }
 133 }
 134
 135 void Caffe::set_random_seed(const unsigned int seed) {
 136   // Curand seed
 137   static bool g_curand_availability_logged = false;
 138   if (Get().curand_generator_) {
 139     CURAND_CHECK(curandSetPseudoRandomGeneratorSeed(curand_generator(),
 140         seed));
 141     CURAND_CHECK(curandSetGeneratorOffset(curand_generator(), 0));
 142   } else {
 143     if (!g_curand_availability_logged) {
 144         LOG(ERROR) <<
 145             "Curand not available. Skipping setting the curand seed.";
 146         g_curand_availability_logged = true;
 147     }
 148   }
 149   // RNG seed
 150   Get().random_generator_.reset(new RNG(seed));
 151 }
 152
 153 void Caffe::SetDevice(const int device_id) {
 154   int current_device;
 155   CUDA_CHECK(cudaGetDevice(&current_device));
 156   if (current_device == device_id) {
 157     return;
 158   }
 159   // The call to cudaSetDevice must come before any calls to Get, which
 160   // may perform initialization using the GPU.
 161   CUDA_CHECK(cudaSetDevice(device_id));
 162   if (Get().cublas_handle_) CUBLAS_CHECK(cublasDestroy(Get().cublas_handle_));
 163   if (Get().curand_generator_) {
 164     CURAND_CHECK(curandDestroyGenerator(Get().curand_generator_));
 165   }
 166   CUBLAS_CHECK(cublasCreate(&Get().cublas_handle_));
 167   CURAND_CHECK(curandCreateGenerator(&Get().curand_generator_,
 168       CURAND_RNG_PSEUDO_DEFAULT));
 169   CURAND_CHECK(curandSetPseudoRandomGeneratorSeed(Get().curand_generator_,
 170       cluster_seedgen()));
 171 }
 172
 173 void Caffe::DeviceQuery() {
 174   cudaDeviceProp prop;
 175   int device;
 176   if (cudaSuccess != cudaGetDevice(&device)) {
 177     printf("No cuda device present.\n");
 178     return;
 179   }
 180   CUDA_CHECK(cudaGetDeviceProperties(&prop, device));
 181   LOG(INFO) << "Device id:                     " << device;
 182   LOG(INFO) << "Major revision number:         " << prop.major;
 183   LOG(INFO) << "Minor revision number:         " << prop.minor;
 184   LOG(INFO) << "Name:                          " << prop.name;
 185   LOG(INFO) << "Total global memory:           " << prop.totalGlobalMem;
 186   LOG(INFO) << "Total shared memory per block: " << prop.sharedMemPerBlock;
 187   LOG(INFO) << "Total registers per block:     " << prop.regsPerBlock;
 188   LOG(INFO) << "Warp size:                     " << prop.warpSize;
 189   LOG(INFO) << "Maximum memory pitch:          " << prop.memPitch;
 190   LOG(INFO) << "Maximum threads per block:     " << prop.maxThreadsPerBlock;
 191   LOG(INFO) << "Maximum dimension of block:    "
 192       << prop.maxThreadsDim[0] << ", " << prop.maxThreadsDim[1] << ", "
 193       << prop.maxThreadsDim[2];
 194   LOG(INFO) << "Maximum dimension of grid:     "
 195       << prop.maxGridSize[0] << ", " << prop.maxGridSize[1] << ", "
 196       << prop.maxGridSize[2];
 197   LOG(INFO) << "Clock rate:                    " << prop.clockRate;
 198   LOG(INFO) << "Total constant memory:         " << prop.totalConstMem;
 199   LOG(INFO) << "Texture alignment:             " << prop.textureAlignment;
 200   LOG(INFO) << "Concurrent copy and execution: "
 201       << (prop.deviceOverlap ? "Yes" : "No");
 202   LOG(INFO) << "Number of multiprocessors:     " << prop.multiProcessorCount;
 203   LOG(INFO) << "Kernel execution timeout:      "
 204       << (prop.kernelExecTimeoutEnabled ? "Yes" : "No");
 205   return;
 206 }
 207
 208 bool Caffe::CheckDevice(const int device_id) {
 209   // This function checks the availability of GPU #device_id.
 210   // It attempts to create a context on the device by calling cudaFree(0).
 211   // cudaSetDevice() alone is not sufficient to check the availability.
 212   // It lazily records device_id, however, does not initialize a
 213   // context. So it does not know if the host thread has the permission to use
 214   // the device or not.
 215   //
 216   // In a shared environment where the devices are set to EXCLUSIVE_PROCESS
 217   // or EXCLUSIVE_THREAD mode, cudaSetDevice() returns cudaSuccess
 218   // even if the device is exclusively occupied by another process or thread.
 219   // Cuda operations that initialize the context are needed to check
 220   // the permission. cudaFree(0) is one of those with no side effect,
 221   // except the context initialization.
 222   bool r = ((cudaSuccess == cudaSetDevice(device_id)) &&
 223             (cudaSuccess == cudaFree(0)));
 224   // reset any error that may have occurred.
 225   cudaGetLastError();
 226   return r;
 227 }
 228
 229 int Caffe::FindDevice(const int start_id) {
 230   // This function finds the first available device by checking devices with
 231   // ordinal from start_id to the highest available value. In the
 232   // EXCLUSIVE_PROCESS or EXCLUSIVE_THREAD mode, if it succeeds, it also
 233   // claims the device due to the initialization of the context.
 234   int count = 0;
 235   CUDA_CHECK(cudaGetDeviceCount(&count));
 236   for (int i = start_id; i < count; i++) {
 237     if (CheckDevice(i)) return i;
 238   }
 239   return -1;
 240 }
 241
 242 class Caffe::RNG::Generator {
 243  public:
 244   Generator() : rng_(new caffe::rng_t(cluster_seedgen())) {}
 245   explicit Generator(unsigned int seed) : rng_(new caffe::rng_t(seed)) {}
 246   caffe::rng_t* rng() { return rng_.get(); }
 247  private:
 248   shared_ptr<caffe::rng_t> rng_;
 249 };
 250
 251 Caffe::RNG::RNG() : generator_(new Generator()) { }
 252
 253 Caffe::RNG::RNG(unsigned int seed) : generator_(new Generator(seed)) { }
 254
 255 Caffe::RNG& Caffe::RNG::operator=(const RNG& other) {
 256   generator_.reset(other.generator_.get());
 257   return *this;
 258 }
 259
 260 void* Caffe::RNG::generator() {
 261   return static_cast<void*>(generator_->rng());
 262 }
 263
 264 const char* cublasGetErrorString(cublasStatus_t error) {
 265   switch (error) {
 266   case CUBLAS_STATUS_SUCCESS:
 267     return "CUBLAS_STATUS_SUCCESS";
 268   case CUBLAS_STATUS_NOT_INITIALIZED:
 269     return "CUBLAS_STATUS_NOT_INITIALIZED";
 270   case CUBLAS_STATUS_ALLOC_FAILED:
 271     return "CUBLAS_STATUS_ALLOC_FAILED";
 272   case CUBLAS_STATUS_INVALID_VALUE:
 273     return "CUBLAS_STATUS_INVALID_VALUE";
 274   case CUBLAS_STATUS_ARCH_MISMATCH:
 275     return "CUBLAS_STATUS_ARCH_MISMATCH";
 276   case CUBLAS_STATUS_MAPPING_ERROR:
 277     return "CUBLAS_STATUS_MAPPING_ERROR";
 278   case CUBLAS_STATUS_EXECUTION_FAILED:
 279     return "CUBLAS_STATUS_EXECUTION_FAILED";
 280   case CUBLAS_STATUS_INTERNAL_ERROR:
 281     return "CUBLAS_STATUS_INTERNAL_ERROR";
 282 #if CUDA_VERSION >= 6000
 283   case CUBLAS_STATUS_NOT_SUPPORTED:
 284     return "CUBLAS_STATUS_NOT_SUPPORTED";
 285 #endif
 286 #if CUDA_VERSION >= 6050
 287   case CUBLAS_STATUS_LICENSE_ERROR:
 288     return "CUBLAS_STATUS_LICENSE_ERROR";
 289 #endif
 290   }
 291   return "Unknown cublas status";
 292 }
 293
 294 const char* curandGetErrorString(curandStatus_t error) {
 295   switch (error) {
 296   case CURAND_STATUS_SUCCESS:
 297     return "CURAND_STATUS_SUCCESS";
 298   case CURAND_STATUS_VERSION_MISMATCH:
 299     return "CURAND_STATUS_VERSION_MISMATCH";
 300   case CURAND_STATUS_NOT_INITIALIZED:
 301     return "CURAND_STATUS_NOT_INITIALIZED";
 302   case CURAND_STATUS_ALLOCATION_FAILED:
 303     return "CURAND_STATUS_ALLOCATION_FAILED";
 304   case CURAND_STATUS_TYPE_ERROR:
 305     return "CURAND_STATUS_TYPE_ERROR";
 306   case CURAND_STATUS_OUT_OF_RANGE:
 307     return "CURAND_STATUS_OUT_OF_RANGE";
 308   case CURAND_STATUS_LENGTH_NOT_MULTIPLE:
 309     return "CURAND_STATUS_LENGTH_NOT_MULTIPLE";
 310   case CURAND_STATUS_DOUBLE_PRECISION_REQUIRED:
 311     return "CURAND_STATUS_DOUBLE_PRECISION_REQUIRED";
 312   case CURAND_STATUS_LAUNCH_FAILURE:
 313     return "CURAND_STATUS_LAUNCH_FAILURE";
 314   case CURAND_STATUS_PREEXISTING_FAILURE:
 315     return "CURAND_STATUS_PREEXISTING_FAILURE";
 316   case CURAND_STATUS_INITIALIZATION_FAILED:
 317     return "CURAND_STATUS_INITIALIZATION_FAILED";
 318   case CURAND_STATUS_ARCH_MISMATCH:
 319     return "CURAND_STATUS_ARCH_MISMATCH";
 320   case CURAND_STATUS_INTERNAL_ERROR:
 321     return "CURAND_STATUS_INTERNAL_ERROR";
 322   }
 323   return "Unknown curand status";
 324 }
 325
 326 #endif  // CPU_ONLY
 327
 328 }  // namespace caffe