include/caffe/syncedmem.hpp

   1 #ifndef CAFFE_SYNCEDMEM_HPP_
   2 #define CAFFE_SYNCEDMEM_HPP_
   3
   4 #include <cstdlib>
   5
   6 #include "caffe/common.hpp"
   7 #include "caffe/util/math_functions.hpp"
   8
   9 namespace caffe {
  10
  11 // Theoretically, CaffeMallocHost and CaffeFreeHost should simply call the
  12 // cudaMallocHost and cudaFree functions in order to create pinned memory.
  13 // However, those codes rely on the existence of a cuda GPU (I don't know
  14 // why that is a must since allocating memory should not be accessing the
  15 // GPU resource, but it just creates an error as of Cuda 5.0) and will cause
  16 // problem when running on a machine without GPU. Thus, we simply define
  17 // these two functions for safety and possible future change if the problem
  18 // of calling cuda functions disappears in a future version.
  19 //
  20 // In practice, although we are creating unpinned memory here, as long as we
  21 // are constantly accessing them the memory pages almost always stays in
  22 // the physical memory (assuming we have large enough memory installed), and
  23 // does not seem to create a memory bottleneck here.
  24
  25 inline void CaffeMallocHost(void** ptr, size_t size) {
  26   *ptr = malloc(size);
  27   CHECK(*ptr) << "host allocation of size " << size << " failed";
  28 }
  29
  30 inline void CaffeFreeHost(void* ptr) {
  31   free(ptr);
  32 }
  33
  34
  35 /**
  36  * @brief Manages memory allocation and synchronization between the host (CPU)
  37  *        and device (GPU).
  38  *
  39  * TODO(dox): more thorough description.
  40  */
  41 class SyncedMemory {
  42  public:
  43   SyncedMemory()
  44       : cpu_ptr_(NULL), gpu_ptr_(NULL), size_(0), head_(UNINITIALIZED),
  45         own_cpu_data_(false) {}
  46   explicit SyncedMemory(size_t size)
  47       : cpu_ptr_(NULL), gpu_ptr_(NULL), size_(size), head_(UNINITIALIZED),
  48         own_cpu_data_(false) {}
  49   ~SyncedMemory();
  50   const void* cpu_data();
  51   void set_cpu_data(void* data);
  52   const void* gpu_data();
  53   void* mutable_cpu_data();
  54   void* mutable_gpu_data();
  55   enum SyncedHead { UNINITIALIZED, HEAD_AT_CPU, HEAD_AT_GPU, SYNCED };
  56   SyncedHead head() { return head_; }
  57   size_t size() { return size_; }
  58
  59 #ifndef CPU_ONLY
  60   void async_gpu_push(const cudaStream_t& stream);
  61 #endif
  62
  63  private:
  64   void to_cpu();
  65   void to_gpu();
  66   void* cpu_ptr_;
  67   void* gpu_ptr_;
  68   size_t size_;
  69   SyncedHead head_;
  70   bool own_cpu_data_;
  71
  72   DISABLE_COPY_AND_ASSIGN(SyncedMemory);
  73 };  // class SyncedMemory
  74
  75 }  // namespace caffe
  76
  77 #endif  // CAFFE_SYNCEDMEM_HPP_