include/caffe/syncedmem.hpp

   1 #ifndef CAFFE_SYNCEDMEM_HPP_
   2 #define CAFFE_SYNCEDMEM_HPP_
   3
   4 #include <cstdlib>
   5
   6 #ifdef USE_MKL
   7   #include "mkl.h"
   8 #endif
   9
  10 #include "caffe/common.hpp"
  11
  12 namespace caffe {
  13
  14 // If CUDA is available and in GPU mode, host memory will be allocated pinned,
  15 // using cudaMallocHost. It avoids dynamic pinning for transfers (DMA).
  16 // The improvement in performance seems negligible in the single GPU case,
  17 // but might be more significant for parallel training. Most importantly,
  18 // it improved stability for large models on many GPUs.
  19 inline void CaffeMallocHost(void** ptr, size_t size, bool* use_cuda) {
  20 #ifndef CPU_ONLY
  21   if (Caffe::mode() == Caffe::GPU) {
  22     CUDA_CHECK(cudaMallocHost(ptr, size));
  23     *use_cuda = true;
  24     return;
  25   }
  26 #endif
  27 #ifdef USE_MKL
  28   *ptr = mkl_malloc(size ? size:1, 64);
  29 #else
  30   *ptr = malloc(size);
  31 #endif
  32   *use_cuda = false;
  33   CHECK(*ptr) << "host allocation of size " << size << " failed";
  34 }
  35
  36 inline void CaffeFreeHost(void* ptr, bool use_cuda) {
  37 #ifndef CPU_ONLY
  38   if (use_cuda) {
  39     CUDA_CHECK(cudaFreeHost(ptr));
  40     return;
  41   }
  42 #endif
  43 #ifdef USE_MKL
  44   mkl_free(ptr);
  45 #else
  46   free(ptr);
  47 #endif
  48 }
  49
  50
  51 /**
  52  * @brief Manages memory allocation and synchronization between the host (CPU)
  53  *        and device (GPU).
  54  *
  55  * TODO(dox): more thorough description.
  56  */
  57 class SyncedMemory {
  58  public:
  59   SyncedMemory();
  60   explicit SyncedMemory(size_t size);
  61   ~SyncedMemory();
  62   const void* cpu_data();
  63   void set_cpu_data(void* data);
  64   const void* gpu_data();
  65   void set_gpu_data(void* data);
  66   void* mutable_cpu_data();
  67   void* mutable_gpu_data();
  68   enum SyncedHead { UNINITIALIZED, HEAD_AT_CPU, HEAD_AT_GPU, SYNCED };
  69   SyncedHead head() { return head_; }
  70   size_t size() { return size_; }
  71
  72 #ifndef CPU_ONLY
  73   void async_gpu_push(const cudaStream_t& stream);
  74 #endif
  75
  76  private:
  77   void check_device();
  78
  79   void to_cpu();
  80   void to_gpu();
  81   void* cpu_ptr_;
  82   void* gpu_ptr_;
  83   size_t size_;
  84   SyncedHead head_;
  85   bool own_cpu_data_;
  86   bool cpu_malloc_use_cuda_;
  87   bool own_gpu_data_;
  88   int device_;
  89
  90   DISABLE_COPY_AND_ASSIGN(SyncedMemory);
  91 };  // class SyncedMemory
  92
  93 }  // namespace caffe
  94
  95 #endif  // CAFFE_SYNCEDMEM_HPP_