Caffe::Caffe()
: mode_(Caffe::CPU), phase_(Caffe::TRAIN), cublas_handle_(NULL),
curand_generator_(NULL), vsl_stream_(NULL) {
+ // Try to create a cublas handler, and report an error if failed (but we will
+ // keep the program running as one might just want to run CPU code).
if (cublasCreate(&cublas_handle_) != CUBLAS_STATUS_SUCCESS) {
LOG(ERROR) << "Cannot create Cublas handle. Cublas won't be available.";
}
+ // Try to create a curand handler.
if (curandCreateGenerator(&curand_generator_, CURAND_RNG_PSEUDO_DEFAULT)
!= CURAND_STATUS_SUCCESS ||
curandSetPseudoRandomGeneratorSeed(curand_generator_, 1701ULL)
!= CURAND_STATUS_SUCCESS) {
LOG(ERROR) << "Cannot create Curand generator. Curand won't be available.";
}
+ // Try to create a vsl stream. This should almost always work, but we will
+ // check it anyway.
if (vslNewStream(&vsl_stream_, VSL_BRNG_MT19937, 1701) != VSL_STATUS_OK) {
LOG(ERROR) << "Cannot create vsl stream. VSL random number generator "
<< "won't be available.";
void Caffe::set_random_seed(const unsigned int seed) {
// Curand seed
// Yangqing's note: simply setting the generator seed does not seem to
- // work on the tesla K20s, so I wrote the ugly reset thing below. It is not
- // tested yet and I'll wait til Jeff finishes training.
+ // work on the tesla K20s, so I wrote the ugly reset thing below.
if (Get().curand_generator_) {
CURAND_CHECK(curandDestroyGenerator(curand_generator()));
CURAND_CHECK(curandCreateGenerator(&Get().curand_generator_,
#include <glog/logging.h>
#include <mkl_vsl.h>
+// various checks for different function calls.
#define CUDA_CHECK(condition) CHECK_EQ((condition), cudaSuccess)
#define CUBLAS_CHECK(condition) CHECK_EQ((condition), CUBLAS_STATUS_SUCCESS)
#define CURAND_CHECK(condition) CHECK_EQ((condition), CURAND_STATUS_SUCCESS)
#define VSL_CHECK(condition) CHECK_EQ((condition), VSL_STATUS_OK)
+// After a kernel is executed, this will check the error and if there is one,
+// exit loudly.
#define CUDA_POST_KERNEL_CHECK \
if (cudaSuccess != cudaPeekAtLastError()) \
LOG(FATAL) << "Cuda kernel failed. Error: " \
<< cudaGetErrorString(cudaPeekAtLastError())
+// Disable the copy and assignment operator for a class.
#define DISABLE_COPY_AND_ASSIGN(classname) \
private:\
classname(const classname&);\
classname& operator=(const classname&)
+// Instantiate a class with float and double specifications.
#define INSTANTIATE_CLASS(classname) \
template class classname<float>; \
template class classname<double>
+// A simple macro to mark codes that are not implemented, so that when the code
+// is executed we will see a fatal log.
#define NOT_IMPLEMENTED LOG(FATAL) << "Not Implemented Yet"
namespace caffe {
+
// We will use the boost shared_ptr instead of the new C++11 one mainly
// because cuda does not work (at least now) well with C++11 features.
using boost::shared_ptr;
+
// For backward compatibility we will just use 512 threads per block
const int CAFFE_CUDA_NUM_THREADS = 512;
+
inline int CAFFE_GET_BLOCKS(const int N) {
return (N + CAFFE_CUDA_NUM_THREADS - 1) / CAFFE_CUDA_NUM_THREADS;
}
// A singleton class to hold common caffe stuff, such as the handler that
-// caffe is going to use for cublas.
+// caffe is going to use for cublas, curand, etc.
class Caffe {
private:
// The private constructor to avoid duplicate instantiation.
blobs_[net_input_blob_indices_[i]]->CopyFrom(*bottom[i]);
}
for (int i = 0; i < layers_.size(); ++i) {
- //LOG(ERROR) << "Forwarding " << layer_names_[i];
+ // LOG(ERROR) << "Forwarding " << layer_names_[i];
layers_[i]->Forward(bottom_vecs_[i], &top_vecs_[i]);
}
return net_output_blobs_;
#include <vector>
#include "caffe/blob.hpp"
-#include "caffe/layer.hpp"
#include "caffe/common.hpp"
+#include "caffe/layer.hpp"
#include "caffe/proto/caffe.pb.h"
using std::map;
namespace caffe {
+
template <typename Dtype>
class Net {
public:
inline vector<vector<Blob<Dtype>*> >& bottom_vecs() { return bottom_vecs_; }
inline vector<vector<Blob<Dtype>*> >& top_vecs() { return top_vecs_; }
// returns the parameters
- vector<shared_ptr<Blob<Dtype> > >& params() { return params_; };
+ vector<shared_ptr<Blob<Dtype> > >& params() { return params_; }
// Updates the network
void Update();
// Copyright Yangqing Jia 2013
#include <algorithm>
-#include <fstream>
#include <string>
+#include <vector>
-#include "caffe/proto/caffe.pb.h"
#include "caffe/net.hpp"
+#include "caffe/proto/caffe.pb.h"
+#include "caffe/optimization/solver.hpp"
#include "caffe/util/io.hpp"
#include "caffe/util/math_functions.hpp"
-#include "caffe/optimization/solver.hpp"
using std::max;
using std::min;
-using std::stringstream;
-using std::ofstream;
namespace caffe {
+
template <typename Dtype>
void Solver<Dtype>::Solve(Net<Dtype>* net) {
net_ = net;
LOG(INFO) << "Optimization Done.";
}
+
template <typename Dtype>
void Solver<Dtype>::Snapshot(bool is_final) {
NetParameter net_param;
WriteProtoToBinaryFile(net_param, filename.c_str());
}
+
template <typename Dtype>
Dtype SGDSolver<Dtype>::GetLearningRate() {
Dtype rate;
return rate;
}
+
template <typename Dtype>
void SGDSolver<Dtype>::PreSolve() {
// First of all, see if we need to initialize the history
}
}
+
template <typename Dtype>
void SGDSolver<Dtype>::ComputeUpdateValue() {
vector<shared_ptr<Blob<Dtype> > >& net_params = this->net_->params();
}
}
+
INSTANTIATE_CLASS(Solver);
INSTANTIATE_CLASS(SGDSolver);
-} // namespace caffe
\ No newline at end of file
+} // namespace caffe
+// Copyright Yangqing Jia 2013
+
#ifndef CAFFE_OPTIMIZATION_SOLVER_HPP_
#define CAFFE_OPTIMIZATION_SOLVER_HPP_
+#include <vector>
+
namespace caffe {
template <typename Dtype>
: param_(param) {}
// The main entry of the solver function.
void Solve(Net<Dtype>* net);
+ virtual ~Solver() {}
protected:
// PreSolve is run before any solving iteration starts, allowing one to
// put up some scaffold.
- virtual void PreSolve() {};
+ virtual void PreSolve() {}
// Get the update value for the current iteration.
virtual void ComputeUpdateValue() = 0;
void Snapshot(bool is_final = false);
DISABLE_COPY_AND_ASSIGN(Solver);
};
+
template <typename Dtype>
class SGDSolver : public Solver<Dtype> {
public:
} // namspace caffe
-#endif // CAFFE_OPTIMIZATION_SOLVER_HPP_
\ No newline at end of file
+#endif // CAFFE_OPTIMIZATION_SOLVER_HPP_
}
void WriteProtoToBinaryFile(const Message& proto, const char* filename);
-inline void WriteProtoToBinaryFile(const Message& proto, const string& filename) {
+inline void WriteProtoToBinaryFile(
+ const Message& proto, const string& filename) {
WriteProtoToBinaryFile(proto, filename.c_str());
}
namespace caffe {
+
// The neuron layer is a specific type of layers that just works on single
// celements.
template <typename Dtype>
shared_ptr<SyncedMemory> bias_multiplier_;
};
+
template <typename Dtype>
class PaddingLayer : public Layer<Dtype> {
public:
int WIDTH_OUT_;
};
+
template <typename Dtype>
class LRNLayer : public Layer<Dtype> {
public:
int width_;
};
+
template <typename Dtype>
class Im2colLayer : public Layer<Dtype> {
public:
int WIDTH_;
};
+
template <typename Dtype>
class PoolingLayer : public Layer<Dtype> {
public:
int POOLED_WIDTH_;
};
+
template <typename Dtype>
class ConvolutionLayer : public Layer<Dtype> {
public:
int N_;
};
+
// This function is used to create a pthread that prefetches the data.
template <typename Dtype>
void* DataLayerPrefetch(void* layer_pointer);
template <typename Dtype>
class DataLayer : public Layer<Dtype> {
// The function used to perform prefetching.
- friend void* DataLayerPrefetch<Dtype>(void*);
+ friend void* DataLayerPrefetch<Dtype>(void* layer_pointer);
public:
explicit DataLayer(const LayerParameter& param)
Blob<Dtype> difference_;
};
+
template <typename Dtype>
class AccuracyLayer : public Layer<Dtype> {
public: