10 #include "caffe/blob.hpp"
11 #include "caffe/common.hpp"
12 #include "caffe/layer.hpp"
13 #include "caffe/proto/caffe.pb.h"
18 * @brief Connects Layer%s together into a directed acyclic graph (DAG)
19 * specified by a NetParameter.
21 * TODO(dox): more thorough description.
23 template <typename Dtype>
26 explicit Net(const NetParameter& param);
27 explicit Net(const string& param_file, Phase phase,
28 const int level = 0, const vector<string>* stages = NULL);
31 /// @brief Initialize a network with a NetParameter.
32 void Init(const NetParameter& param);
35 * @brief Run Forward and return the result.
38 const vector<Blob<Dtype>*>& Forward(Dtype* loss = NULL);
39 /// @brief DEPRECATED; use Forward() instead.
40 const vector<Blob<Dtype>*>& ForwardPrefilled(Dtype* loss = NULL) {
41 LOG_EVERY_N(WARNING, 1000) << "DEPRECATED: ForwardPrefilled() "
42 << "will be removed in a future version. Use Forward().";
47 * The From and To variants of Forward and Backward operate on the
48 * (topological) ordering by which the net is specified. For general DAG
49 * networks, note that (1) computing from one layer to another might entail
50 * extra computation on unrelated branches, and (2) computation starting in
51 * the middle may be incorrect if all of the layers of a fan-in are not
54 Dtype ForwardFromTo(int start, int end);
55 Dtype ForwardFrom(int start);
56 Dtype ForwardTo(int end);
57 /// @brief DEPRECATED; set input blobs then use Forward() instead.
58 const vector<Blob<Dtype>*>& Forward(const vector<Blob<Dtype>* > & bottom,
62 * @brief Zeroes out the diffs of all net parameters.
63 * Should be run before Backward.
65 void ClearParamDiffs();
68 * The network backward should take no input and output, since it solely
69 * computes the gradient w.r.t the parameters, and the data has already been
70 * provided during the forward pass.
73 void BackwardFromTo(int start, int end);
74 void BackwardFrom(int start);
75 void BackwardTo(int end);
78 * @brief Reshape all layers from bottom to top.
80 * This is useful to propagate changes to layer sizes without running
81 * a forward pass, e.g. to compute output feature size.
85 Dtype ForwardBackward() {
92 /// @brief Updates the network weights based on the diff values computed.
95 * @brief Shares weight data of owner blobs with shared blobs.
97 * Note: this is called by Net::Init, and thus should normally not be
103 * @brief For an already initialized net, implicitly copies (i.e., using no
104 * additional memory) the pre-trained layers from another Net.
106 void ShareTrainedLayersWith(const Net* other);
107 // For an already initialized net, CopyTrainedLayersFrom() copies the already
108 // trained layers from another net parameter instance.
110 * @brief For an already initialized net, copies the pre-trained layers from
113 void CopyTrainedLayersFrom(const NetParameter& param);
114 void CopyTrainedLayersFrom(const string trained_filename);
115 void CopyTrainedLayersFromBinaryProto(const string trained_filename);
116 void CopyTrainedLayersFromHDF5(const string trained_filename);
117 /// @brief Writes the net to a proto.
118 void ToProto(NetParameter* param, bool write_diff = false) const;
119 /// @brief Writes the net to an HDF5 file.
120 void ToHDF5(const string& filename, bool write_diff = false) const;
122 /// @brief returns the network name.
123 inline const string& name() const { return name_; }
124 /// @brief returns the layer names
125 inline const vector<string>& layer_names() const { return layer_names_; }
126 /// @brief returns the blob names
127 inline const vector<string>& blob_names() const { return blob_names_; }
128 /// @brief returns the blobs
129 inline const vector<shared_ptr<Blob<Dtype> > >& blobs() const {
132 /// @brief returns the layers
133 inline const vector<shared_ptr<Layer<Dtype> > >& layers() const {
136 /// @brief returns the phase: TRAIN or TEST
137 inline Phase phase() const { return phase_; }
139 * @brief returns the bottom vecs for each layer -- usually you won't
140 * need this unless you do per-layer checks such as gradients.
142 inline const vector<vector<Blob<Dtype>*> >& bottom_vecs() const {
146 * @brief returns the top vecs for each layer -- usually you won't
147 * need this unless you do per-layer checks such as gradients.
149 inline const vector<vector<Blob<Dtype>*> >& top_vecs() const {
152 /// @brief returns the ids of the top blobs of layer i
153 inline const vector<int> & top_ids(int i) const {
154 CHECK_GE(i, 0) << "Invalid layer id";
155 CHECK_LT(i, top_id_vecs_.size()) << "Invalid layer id";
156 return top_id_vecs_[i];
158 /// @brief returns the ids of the bottom blobs of layer i
159 inline const vector<int> & bottom_ids(int i) const {
160 CHECK_GE(i, 0) << "Invalid layer id";
161 CHECK_LT(i, bottom_id_vecs_.size()) << "Invalid layer id";
162 return bottom_id_vecs_[i];
164 inline const vector<vector<bool> >& bottom_need_backward() const {
165 return bottom_need_backward_;
167 inline const vector<Dtype>& blob_loss_weights() const {
168 return blob_loss_weights_;
170 inline const vector<bool>& layer_need_backward() const {
171 return layer_need_backward_;
173 /// @brief returns the parameters
174 inline const vector<shared_ptr<Blob<Dtype> > >& params() const {
177 inline const vector<Blob<Dtype>*>& learnable_params() const {
178 return learnable_params_;
180 /// @brief returns the learnable parameter learning rate multipliers
181 inline const vector<float>& params_lr() const { return params_lr_; }
182 inline const vector<bool>& has_params_lr() const { return has_params_lr_; }
183 /// @brief returns the learnable parameter decay multipliers
184 inline const vector<float>& params_weight_decay() const {
185 return params_weight_decay_;
187 inline const vector<bool>& has_params_decay() const {
188 return has_params_decay_;
190 const map<string, int>& param_names_index() const {
191 return param_names_index_;
193 inline const vector<int>& param_owners() const { return param_owners_; }
194 inline const vector<string>& param_display_names() const {
195 return param_display_names_;
197 /// @brief Input and output blob numbers
198 inline int num_inputs() const { return net_input_blobs_.size(); }
199 inline int num_outputs() const { return net_output_blobs_.size(); }
200 inline const vector<Blob<Dtype>*>& input_blobs() const {
201 return net_input_blobs_;
203 inline const vector<Blob<Dtype>*>& output_blobs() const {
204 return net_output_blobs_;
206 inline const vector<int>& input_blob_indices() const {
207 return net_input_blob_indices_;
209 inline const vector<int>& output_blob_indices() const {
210 return net_output_blob_indices_;
212 bool has_blob(const string& blob_name) const;
213 const shared_ptr<Blob<Dtype> > blob_by_name(const string& blob_name) const;
214 bool has_layer(const string& layer_name) const;
215 const shared_ptr<Layer<Dtype> > layer_by_name(const string& layer_name) const;
217 void set_debug_info(const bool value) { debug_info_ = value; }
221 * @brief Remove layers that the user specified should be excluded given the current
222 * phase, level, and stage.
224 static void FilterNet(const NetParameter& param,
225 NetParameter* param_filtered);
226 /// @brief return whether NetState state meets NetStateRule rule
227 static bool StateMeetsRule(const NetState& state, const NetStateRule& rule,
228 const string& layer_name);
230 // Invoked at specific points during an iteration
233 virtual void run(int layer) = 0;
235 template <typename T>
238 const vector<Callback*>& before_forward() const { return before_forward_; }
239 void add_before_forward(Callback* value) {
240 before_forward_.push_back(value);
242 const vector<Callback*>& after_forward() const { return after_forward_; }
243 void add_after_forward(Callback* value) {
244 after_forward_.push_back(value);
246 const vector<Callback*>& before_backward() const { return before_backward_; }
247 void add_before_backward(Callback* value) {
248 before_backward_.push_back(value);
250 const vector<Callback*>& after_backward() const { return after_backward_; }
251 void add_after_backward(Callback* value) {
252 after_backward_.push_back(value);
257 /// @brief Append a new top blob to the net.
258 void AppendTop(const NetParameter& param, const int layer_id,
259 const int top_id, set<string>* available_blobs,
260 map<string, int>* blob_name_to_idx);
261 /// @brief Append a new bottom blob to the net.
262 int AppendBottom(const NetParameter& param, const int layer_id,
263 const int bottom_id, set<string>* available_blobs,
264 map<string, int>* blob_name_to_idx);
265 /// @brief Append a new parameter blob to the net.
266 void AppendParam(const NetParameter& param, const int layer_id,
269 /// @brief Helper for displaying debug info in Forward.
270 void ForwardDebugInfo(const int layer_id);
271 /// @brief Helper for displaying debug info in Backward.
272 void BackwardDebugInfo(const int layer_id);
273 /// @brief Helper for displaying debug info in Update.
274 void UpdateDebugInfo(const int param_id);
276 /// @brief The network name
278 /// @brief The phase: TRAIN or TEST
280 /// @brief Individual layers in the net
281 vector<shared_ptr<Layer<Dtype> > > layers_;
282 vector<string> layer_names_;
283 map<string, int> layer_names_index_;
284 vector<bool> layer_need_backward_;
285 /// @brief the blobs storing intermediate results between the layer.
286 vector<shared_ptr<Blob<Dtype> > > blobs_;
287 vector<string> blob_names_;
288 map<string, int> blob_names_index_;
289 vector<bool> blob_need_backward_;
290 /// bottom_vecs stores the vectors containing the input for each layer.
291 /// They don't actually host the blobs (blobs_ does), so we simply store
293 vector<vector<Blob<Dtype>*> > bottom_vecs_;
294 vector<vector<int> > bottom_id_vecs_;
295 vector<vector<bool> > bottom_need_backward_;
296 /// top_vecs stores the vectors containing the output for each layer
297 vector<vector<Blob<Dtype>*> > top_vecs_;
298 vector<vector<int> > top_id_vecs_;
299 /// Vector of weight in the loss (or objective) function of each net blob,
300 /// indexed by blob_id.
301 vector<Dtype> blob_loss_weights_;
302 vector<vector<int> > param_id_vecs_;
303 vector<int> param_owners_;
304 vector<string> param_display_names_;
305 vector<pair<int, int> > param_layer_indices_;
306 map<string, int> param_names_index_;
307 /// blob indices for the input and the output of the net
308 vector<int> net_input_blob_indices_;
309 vector<int> net_output_blob_indices_;
310 vector<Blob<Dtype>*> net_input_blobs_;
311 vector<Blob<Dtype>*> net_output_blobs_;
312 /// The parameters in the network.
313 vector<shared_ptr<Blob<Dtype> > > params_;
314 vector<Blob<Dtype>*> learnable_params_;
316 * The mapping from params_ -> learnable_params_: we have
317 * learnable_param_ids_.size() == params_.size(),
318 * and learnable_params_[learnable_param_ids_[i]] == params_[i].get()
319 * if and only if params_[i] is an "owner"; otherwise, params_[i] is a sharer
320 * and learnable_params_[learnable_param_ids_[i]] gives its owner.
322 vector<int> learnable_param_ids_;
323 /// the learning rate multipliers for learnable_params_
324 vector<float> params_lr_;
325 vector<bool> has_params_lr_;
326 /// the weight decay multipliers for learnable_params_
327 vector<float> params_weight_decay_;
328 vector<bool> has_params_decay_;
329 /// The bytes of memory used by this net
331 /// Whether to compute and display debug info for the net.
334 vector<Callback*> before_forward_;
335 vector<Callback*> after_forward_;
336 vector<Callback*> before_backward_;
337 vector<Callback*> after_backward_;
339 DISABLE_COPY_AND_ASSIGN(Net);
345 #endif // CAFFE_NET_HPP_