1 #ifndef CAFFE_PARALLEL_HPP_
2 #define CAFFE_PARALLEL_HPP_
6 #include <boost/thread.hpp>
11 #include "caffe/blob.hpp"
12 #include "caffe/common.hpp"
13 #include "caffe/internal_thread.hpp"
14 #include "caffe/layer.hpp"
15 #include "caffe/proto/caffe.pb.h"
16 #include "caffe/solver.hpp"
17 #include "caffe/syncedmem.hpp"
18 #include "caffe/util/blocking_queue.hpp"
19 #include "caffe/util/nccl.hpp"
23 // Represents a net parameters. Once a net is created, its parameter buffers can
24 // be replaced by ones from Params, to allow parallelization. Params ensures
25 // parameters are allocated in one consecutive array.
26 template<typename Dtype>
29 explicit Params(shared_ptr<Solver<Dtype> > root_solver);
33 inline size_t size() const {
36 inline Dtype* data() const {
39 inline Dtype* diff() const {
44 const size_t size_; // Size of buffers
45 Dtype* data_; // Network parameters
46 Dtype* diff_; // Gradient
48 DISABLE_COPY_AND_ASSIGN(Params);
51 // Params stored in GPU memory.
52 template<typename Dtype>
53 class GPUParams : public Params<Dtype> {
55 GPUParams(shared_ptr<Solver<Dtype> > root_solver, int device);
58 void Configure(Solver<Dtype>* solver) const;
61 using Params<Dtype>::size_;
62 using Params<Dtype>::data_;
63 using Params<Dtype>::diff_;
66 template<typename Dtype>
67 class NCCL : public GPUParams<Dtype>,
68 public Solver<Dtype>::Callback,
69 public Net<Dtype>::Callback {
72 * Single process version.
74 explicit NCCL(shared_ptr<Solver<Dtype> > solver);
76 * In multi-process settings, first create a NCCL id (new_uid), then
77 * pass it to each process to create connected instances.
79 NCCL(shared_ptr<Solver<Dtype> > solver, const string& uid);
82 boost::barrier* barrier();
83 void set_barrier(boost::barrier* value);
86 * In single process settings, create instances without uids and
87 * call this to connect them.
89 static void InitSingleProcess(vector<NCCL<Dtype>*>* nccls);
91 static string new_uid();
94 * Broadcast weights from rank 0 other solvers.
99 * Single process multi-GPU.
101 void Run(const vector<int>& gpus, const char* restore);
106 void run(int layer); // Net callback
107 void on_gradients_ready();
110 cudaStream_t stream_;
112 shared_ptr<Solver<Dtype> > solver_;
113 // Should not be necessary, https://github.com/NVIDIA/nccl/issues/37
114 boost::barrier* barrier_;
115 using Params<Dtype>::size_;
116 using Params<Dtype>::data_;
117 using Params<Dtype>::diff_;