1 #include <boost/thread.hpp>
2 #include <glog/logging.h>
8 #include "caffe/common.hpp"
9 #include "caffe/util/rng.hpp"
13 // Make sure each thread can have different values.
14 static boost::thread_specific_ptr<Caffe> thread_instance_;
17 if (!thread_instance_.get()) {
18 thread_instance_.reset(new Caffe());
20 return *(thread_instance_.get());
24 int64_t cluster_seedgen(void) {
26 FILE* f = fopen("/dev/urandom", "rb");
27 if (f && fread(&seed, 1, sizeof(seed), f) == sizeof(seed)) {
32 LOG(INFO) << "System entropy source not available, "
33 "using fallback algorithm to generate seed instead.";
39 seed = std::abs(((s * 181) * ((pid - 83) * 359)) % 104729);
44 void GlobalInit(int* pargc, char*** pargv) {
46 ::gflags::ParseCommandLineFlags(pargc, pargv, true);
48 ::google::InitGoogleLogging(*(pargv)[0]);
49 // Provide a backtrace on segfault.
50 ::google::InstallFailureSignalHandler();
53 #ifdef CPU_ONLY // CPU-only Caffe.
56 : random_generator_(), mode_(Caffe::CPU),use_mali_gpu_(false),
57 solver_count_(1), solver_rank_(0), multiprocess_(false)
63 void Caffe::set_random_seed(const unsigned int seed) {
65 Get().random_generator_.reset(new RNG(seed));
68 void Caffe::SetDevice(const int device_id) {
72 void Caffe::DeviceQuery() {
76 bool Caffe::CheckDevice(const int device_id) {
81 int Caffe::FindDevice(const int start_id) {
86 class Caffe::RNG::Generator {
88 Generator() : rng_(new caffe::rng_t(cluster_seedgen())) {}
89 explicit Generator(unsigned int seed) : rng_(new caffe::rng_t(seed)) {}
90 caffe::rng_t* rng() { return rng_.get(); }
92 shared_ptr<caffe::rng_t> rng_;
95 Caffe::RNG::RNG() : generator_(new Generator()) { }
97 Caffe::RNG::RNG(unsigned int seed) : generator_(new Generator(seed)) { }
99 Caffe::RNG& Caffe::RNG::operator=(const RNG& other) {
100 generator_ = other.generator_;
104 void* Caffe::RNG::generator() {
105 return static_cast<void*>(generator_->rng());
108 #else // Normal GPU + CPU Caffe.
111 : cublas_handle_(NULL), curand_generator_(NULL), random_generator_(),
113 solver_count_(1), solver_rank_(0), multiprocess_(false) {
114 // Try to create a cublas handler, and report an error if failed (but we will
115 // keep the program running as one might just want to run CPU code).
116 if (cublasCreate(&cublas_handle_) != CUBLAS_STATUS_SUCCESS) {
117 LOG(ERROR) << "Cannot create Cublas handle. Cublas won't be available.";
119 // Try to create a curand handler.
120 if (curandCreateGenerator(&curand_generator_, CURAND_RNG_PSEUDO_DEFAULT)
121 != CURAND_STATUS_SUCCESS ||
122 curandSetPseudoRandomGeneratorSeed(curand_generator_, cluster_seedgen())
123 != CURAND_STATUS_SUCCESS) {
124 LOG(ERROR) << "Cannot create Curand generator. Curand won't be available.";
129 if (cublas_handle_) CUBLAS_CHECK(cublasDestroy(cublas_handle_));
130 if (curand_generator_) {
131 CURAND_CHECK(curandDestroyGenerator(curand_generator_));
135 void Caffe::set_random_seed(const unsigned int seed) {
137 static bool g_curand_availability_logged = false;
138 if (Get().curand_generator_) {
139 CURAND_CHECK(curandSetPseudoRandomGeneratorSeed(curand_generator(),
141 CURAND_CHECK(curandSetGeneratorOffset(curand_generator(), 0));
143 if (!g_curand_availability_logged) {
145 "Curand not available. Skipping setting the curand seed.";
146 g_curand_availability_logged = true;
150 Get().random_generator_.reset(new RNG(seed));
153 void Caffe::SetDevice(const int device_id) {
155 CUDA_CHECK(cudaGetDevice(¤t_device));
156 if (current_device == device_id) {
159 // The call to cudaSetDevice must come before any calls to Get, which
160 // may perform initialization using the GPU.
161 CUDA_CHECK(cudaSetDevice(device_id));
162 if (Get().cublas_handle_) CUBLAS_CHECK(cublasDestroy(Get().cublas_handle_));
163 if (Get().curand_generator_) {
164 CURAND_CHECK(curandDestroyGenerator(Get().curand_generator_));
166 CUBLAS_CHECK(cublasCreate(&Get().cublas_handle_));
167 CURAND_CHECK(curandCreateGenerator(&Get().curand_generator_,
168 CURAND_RNG_PSEUDO_DEFAULT));
169 CURAND_CHECK(curandSetPseudoRandomGeneratorSeed(Get().curand_generator_,
173 void Caffe::DeviceQuery() {
176 if (cudaSuccess != cudaGetDevice(&device)) {
177 printf("No cuda device present.\n");
180 CUDA_CHECK(cudaGetDeviceProperties(&prop, device));
181 LOG(INFO) << "Device id: " << device;
182 LOG(INFO) << "Major revision number: " << prop.major;
183 LOG(INFO) << "Minor revision number: " << prop.minor;
184 LOG(INFO) << "Name: " << prop.name;
185 LOG(INFO) << "Total global memory: " << prop.totalGlobalMem;
186 LOG(INFO) << "Total shared memory per block: " << prop.sharedMemPerBlock;
187 LOG(INFO) << "Total registers per block: " << prop.regsPerBlock;
188 LOG(INFO) << "Warp size: " << prop.warpSize;
189 LOG(INFO) << "Maximum memory pitch: " << prop.memPitch;
190 LOG(INFO) << "Maximum threads per block: " << prop.maxThreadsPerBlock;
191 LOG(INFO) << "Maximum dimension of block: "
192 << prop.maxThreadsDim[0] << ", " << prop.maxThreadsDim[1] << ", "
193 << prop.maxThreadsDim[2];
194 LOG(INFO) << "Maximum dimension of grid: "
195 << prop.maxGridSize[0] << ", " << prop.maxGridSize[1] << ", "
196 << prop.maxGridSize[2];
197 LOG(INFO) << "Clock rate: " << prop.clockRate;
198 LOG(INFO) << "Total constant memory: " << prop.totalConstMem;
199 LOG(INFO) << "Texture alignment: " << prop.textureAlignment;
200 LOG(INFO) << "Concurrent copy and execution: "
201 << (prop.deviceOverlap ? "Yes" : "No");
202 LOG(INFO) << "Number of multiprocessors: " << prop.multiProcessorCount;
203 LOG(INFO) << "Kernel execution timeout: "
204 << (prop.kernelExecTimeoutEnabled ? "Yes" : "No");
208 bool Caffe::CheckDevice(const int device_id) {
209 // This function checks the availability of GPU #device_id.
210 // It attempts to create a context on the device by calling cudaFree(0).
211 // cudaSetDevice() alone is not sufficient to check the availability.
212 // It lazily records device_id, however, does not initialize a
213 // context. So it does not know if the host thread has the permission to use
214 // the device or not.
216 // In a shared environment where the devices are set to EXCLUSIVE_PROCESS
217 // or EXCLUSIVE_THREAD mode, cudaSetDevice() returns cudaSuccess
218 // even if the device is exclusively occupied by another process or thread.
219 // Cuda operations that initialize the context are needed to check
220 // the permission. cudaFree(0) is one of those with no side effect,
221 // except the context initialization.
222 bool r = ((cudaSuccess == cudaSetDevice(device_id)) &&
223 (cudaSuccess == cudaFree(0)));
224 // reset any error that may have occurred.
229 int Caffe::FindDevice(const int start_id) {
230 // This function finds the first available device by checking devices with
231 // ordinal from start_id to the highest available value. In the
232 // EXCLUSIVE_PROCESS or EXCLUSIVE_THREAD mode, if it succeeds, it also
233 // claims the device due to the initialization of the context.
235 CUDA_CHECK(cudaGetDeviceCount(&count));
236 for (int i = start_id; i < count; i++) {
237 if (CheckDevice(i)) return i;
242 class Caffe::RNG::Generator {
244 Generator() : rng_(new caffe::rng_t(cluster_seedgen())) {}
245 explicit Generator(unsigned int seed) : rng_(new caffe::rng_t(seed)) {}
246 caffe::rng_t* rng() { return rng_.get(); }
248 shared_ptr<caffe::rng_t> rng_;
251 Caffe::RNG::RNG() : generator_(new Generator()) { }
253 Caffe::RNG::RNG(unsigned int seed) : generator_(new Generator(seed)) { }
255 Caffe::RNG& Caffe::RNG::operator=(const RNG& other) {
256 generator_.reset(other.generator_.get());
260 void* Caffe::RNG::generator() {
261 return static_cast<void*>(generator_->rng());
264 const char* cublasGetErrorString(cublasStatus_t error) {
266 case CUBLAS_STATUS_SUCCESS:
267 return "CUBLAS_STATUS_SUCCESS";
268 case CUBLAS_STATUS_NOT_INITIALIZED:
269 return "CUBLAS_STATUS_NOT_INITIALIZED";
270 case CUBLAS_STATUS_ALLOC_FAILED:
271 return "CUBLAS_STATUS_ALLOC_FAILED";
272 case CUBLAS_STATUS_INVALID_VALUE:
273 return "CUBLAS_STATUS_INVALID_VALUE";
274 case CUBLAS_STATUS_ARCH_MISMATCH:
275 return "CUBLAS_STATUS_ARCH_MISMATCH";
276 case CUBLAS_STATUS_MAPPING_ERROR:
277 return "CUBLAS_STATUS_MAPPING_ERROR";
278 case CUBLAS_STATUS_EXECUTION_FAILED:
279 return "CUBLAS_STATUS_EXECUTION_FAILED";
280 case CUBLAS_STATUS_INTERNAL_ERROR:
281 return "CUBLAS_STATUS_INTERNAL_ERROR";
282 #if CUDA_VERSION >= 6000
283 case CUBLAS_STATUS_NOT_SUPPORTED:
284 return "CUBLAS_STATUS_NOT_SUPPORTED";
286 #if CUDA_VERSION >= 6050
287 case CUBLAS_STATUS_LICENSE_ERROR:
288 return "CUBLAS_STATUS_LICENSE_ERROR";
291 return "Unknown cublas status";
294 const char* curandGetErrorString(curandStatus_t error) {
296 case CURAND_STATUS_SUCCESS:
297 return "CURAND_STATUS_SUCCESS";
298 case CURAND_STATUS_VERSION_MISMATCH:
299 return "CURAND_STATUS_VERSION_MISMATCH";
300 case CURAND_STATUS_NOT_INITIALIZED:
301 return "CURAND_STATUS_NOT_INITIALIZED";
302 case CURAND_STATUS_ALLOCATION_FAILED:
303 return "CURAND_STATUS_ALLOCATION_FAILED";
304 case CURAND_STATUS_TYPE_ERROR:
305 return "CURAND_STATUS_TYPE_ERROR";
306 case CURAND_STATUS_OUT_OF_RANGE:
307 return "CURAND_STATUS_OUT_OF_RANGE";
308 case CURAND_STATUS_LENGTH_NOT_MULTIPLE:
309 return "CURAND_STATUS_LENGTH_NOT_MULTIPLE";
310 case CURAND_STATUS_DOUBLE_PRECISION_REQUIRED:
311 return "CURAND_STATUS_DOUBLE_PRECISION_REQUIRED";
312 case CURAND_STATUS_LAUNCH_FAILURE:
313 return "CURAND_STATUS_LAUNCH_FAILURE";
314 case CURAND_STATUS_PREEXISTING_FAILURE:
315 return "CURAND_STATUS_PREEXISTING_FAILURE";
316 case CURAND_STATUS_INITIALIZATION_FAILED:
317 return "CURAND_STATUS_INITIALIZATION_FAILED";
318 case CURAND_STATUS_ARCH_MISMATCH:
319 return "CURAND_STATUS_ARCH_MISMATCH";
320 case CURAND_STATUS_INTERNAL_ERROR:
321 return "CURAND_STATUS_INTERNAL_ERROR";
323 return "Unknown curand status";