1 #ifndef CAFFE_SYNCEDMEM_HPP_
2 #define CAFFE_SYNCEDMEM_HPP_
10 #include "caffe/common.hpp"
14 // If CUDA is available and in GPU mode, host memory will be allocated pinned,
15 // using cudaMallocHost. It avoids dynamic pinning for transfers (DMA).
16 // The improvement in performance seems negligible in the single GPU case,
17 // but might be more significant for parallel training. Most importantly,
18 // it improved stability for large models on many GPUs.
19 inline void CaffeMallocHost(void** ptr, size_t size, bool* use_cuda) {
21 if (Caffe::mode() == Caffe::GPU) {
22 CUDA_CHECK(cudaMallocHost(ptr, size));
28 *ptr = mkl_malloc(size ? size:1, 64);
33 CHECK(*ptr) << "host allocation of size " << size << " failed";
36 inline void CaffeFreeHost(void* ptr, bool use_cuda) {
39 CUDA_CHECK(cudaFreeHost(ptr));
52 * @brief Manages memory allocation and synchronization between the host (CPU)
55 * TODO(dox): more thorough description.
60 explicit SyncedMemory(size_t size);
62 const void* cpu_data();
63 void set_cpu_data(void* data);
64 const void* gpu_data();
65 void set_gpu_data(void* data);
66 void* mutable_cpu_data();
67 void* mutable_gpu_data();
68 enum SyncedHead { UNINITIALIZED, HEAD_AT_CPU, HEAD_AT_GPU, SYNCED };
69 SyncedHead head() { return head_; }
70 size_t size() { return size_; }
73 void async_gpu_push(const cudaStream_t& stream);
86 bool cpu_malloc_use_cuda_;
90 DISABLE_COPY_AND_ASSIGN(SyncedMemory);
91 }; // class SyncedMemory
95 #endif // CAFFE_SYNCEDMEM_HPP_