# using program.NAME, program.C_SRCS, etc. There are no structs in Make, so we use this convention
# to keep track of attributes that all belong to the same target or program.
#
-PROJECT := caffeine
+PROJECT := caffe
NAME := lib$(PROJECT).so
TEST_NAME := test_$(PROJECT)
-CXX_SRCS := $(shell find caffeine ! -name "test_*.cpp" -name "*.cpp")
-CU_SRCS := $(shell find caffeine -name "*.cu")
-TEST_SRCS := $(shell find caffeine -name "test_*.cpp")
+CXX_SRCS := $(shell find caffe ! -name "test_*.cpp" -name "*.cpp")
+CU_SRCS := $(shell find caffe -name "*.cu")
+TEST_SRCS := $(shell find caffe -name "test_*.cpp")
GTEST_SRC := gtest/gtest-all.cpp
-PROTO_SRCS := $(wildcard caffeine/proto/*.proto)
+PROTO_SRCS := $(wildcard caffe/proto/*.proto)
PROTO_GEN_HEADER := ${PROTO_SRCS:.proto=.pb.h}
PROTO_GEN_CC := ${PROTO_SRCS:.proto=.pb.cc}
PROTO_GEN_PY := ${PROTO_SRCS:.proto=_pb2.py}
all: $(NAME)
linecount: clean
- cloc --read-lang-def=caffeine.cloc caffeine/
+ cloc --read-lang-def=caffe.cloc caffe/
test: $(OBJS) $(GTEST_OBJ) $(TEST_BINS)
#include <cublas_v2.h>
-#include "caffeine/blob.hpp"
-#include "caffeine/common.hpp"
-#include "caffeine/syncedmem.hpp"
+#include "caffe/blob.hpp"
+#include "caffe/common.hpp"
+#include "caffe/syncedmem.hpp"
-namespace caffeine {
+namespace caffe {
template <typename Dtype>
void Blob<Dtype>::Reshape(const int num, const int channels, const int height,
INSTANTIATE_CLASS(Blob);
-} // namespace caffeine
+} // namespace caffe
-#ifndef CAFFEINE_BLOB_HPP
-#define CAFFEINE_BLOB_HPP
+#ifndef CAFFE_BLOB_HPP
+#define CAFFE_BLOB_HPP
-#include "caffeine/common.hpp"
-#include "caffeine/syncedmem.hpp"
-#include "caffeine/proto/layer_param.pb.h"
+#include "caffe/common.hpp"
+#include "caffe/syncedmem.hpp"
+#include "caffe/proto/layer_param.pb.h"
-namespace caffeine {
+namespace caffe {
template <typename Dtype>
class Blob {
int count_;
}; // class Blob
-} // namespace caffeine
+} // namespace caffe
-#endif // CAFFEINE_BLOB_HPP_
+#endif // CAFFE_BLOB_HPP_
-#include "caffeine/common.hpp"
+#include "caffe/common.hpp"
-namespace caffeine {
+namespace caffe {
-shared_ptr<Caffeine> Caffeine::singleton_;
+shared_ptr<Caffe> Caffe::singleton_;
-Caffeine::Caffeine()
- : mode_(Caffeine::CPU), phase_(Caffeine::TRAIN) {
+Caffe::Caffe()
+ : mode_(Caffe::CPU), phase_(Caffe::TRAIN) {
CUBLAS_CHECK(cublasCreate(&cublas_handle_));
CURAND_CHECK(curandCreateGenerator(&curand_generator_,
CURAND_RNG_PSEUDO_DEFAULT));
VSL_CHECK(vslNewStream(&vsl_stream_, VSL_BRNG_MT19937, 1701));
}
-Caffeine::~Caffeine() {
+Caffe::~Caffe() {
if (!cublas_handle_) CUBLAS_CHECK(cublasDestroy(cublas_handle_));
if (!curand_generator_) {
CURAND_CHECK(curandDestroyGenerator(curand_generator_));
if (!vsl_stream_) VSL_CHECK(vslDeleteStream(&vsl_stream_));
};
-Caffeine& Caffeine::Get() {
+Caffe& Caffe::Get() {
if (!singleton_) {
- singleton_.reset(new Caffeine());
+ singleton_.reset(new Caffe());
}
return *singleton_;
};
-VSLStreamStatePtr Caffeine::vsl_stream() {
+VSLStreamStatePtr Caffe::vsl_stream() {
return Get().vsl_stream_;
}
-cublasHandle_t Caffeine::cublas_handle() {
+cublasHandle_t Caffe::cublas_handle() {
return Get().cublas_handle_;
};
-curandGenerator_t Caffeine::curand_generator() {
+curandGenerator_t Caffe::curand_generator() {
return Get().curand_generator_;
};
-Caffeine::Brew Caffeine::mode() {
+Caffe::Brew Caffe::mode() {
return Get().mode_;
}
-void Caffeine::set_mode(Caffeine::Brew mode) {
+void Caffe::set_mode(Caffe::Brew mode) {
Get().mode_ = mode;
}
-Caffeine::Phase Caffeine::phase() {
+Caffe::Phase Caffe::phase() {
return Get().phase_;
}
-void Caffeine::set_phase(Caffeine::Phase phase) {
+void Caffe::set_phase(Caffe::Phase phase) {
Get().phase_ = phase;
}
-void Caffeine::set_random_seed(const unsigned int seed) {
+void Caffe::set_random_seed(const unsigned int seed) {
// Curand seed
// Yangqing's note: simply setting the generator seed does not seem to
// work on the tesla K20s, so I wrote the ugly reset thing below. It is not
VSL_CHECK(vslNewStream(&(Get().vsl_stream_), VSL_BRNG_MT19937, seed));
}
-} // namespace caffeine
+} // namespace caffe
-#ifndef CAFFEINE_COMMON_HPP_
-#define CAFFEINE_COMMON_HPP_
+#ifndef CAFFE_COMMON_HPP_
+#define CAFFE_COMMON_HPP_
#include <boost/shared_ptr.hpp>
#include <cublas_v2.h>
#define NOT_IMPLEMENTED LOG(FATAL) << "Not Implemented Yet"
-namespace caffeine {
+namespace caffe {
// We will use the boost shared_ptr instead of the new C++11 one mainly
// because cuda does not work (at least now) well with C++11 features.
using boost::shared_ptr;
// For backward compatibility we will just use 512 threads per block
-const int CAFFEINE_CUDA_NUM_THREADS = 512;
+const int CAFFE_CUDA_NUM_THREADS = 512;
-inline int CAFFEINE_GET_BLOCKS(const int N) {
- return (N + CAFFEINE_CUDA_NUM_THREADS - 1) / CAFFEINE_CUDA_NUM_THREADS;
+inline int CAFFE_GET_BLOCKS(const int N) {
+ return (N + CAFFE_CUDA_NUM_THREADS - 1) / CAFFE_CUDA_NUM_THREADS;
}
-// A singleton class to hold common caffeine stuff, such as the handler that
-// caffeine is going to use for cublas.
-class Caffeine {
+// A singleton class to hold common caffe stuff, such as the handler that
+// caffe is going to use for cublas.
+class Caffe {
public:
- ~Caffeine();
- static Caffeine& Get();
+ ~Caffe();
+ static Caffe& Get();
enum Brew { CPU, GPU };
enum Phase { TRAIN, TEST};
static void set_phase(Phase phase);
static void set_random_seed(const unsigned int seed);
private:
- Caffeine();
- static shared_ptr<Caffeine> singleton_;
+ Caffe();
+ static shared_ptr<Caffe> singleton_;
cublasHandle_t cublas_handle_;
curandGenerator_t curand_generator_;
VSLStreamStatePtr vsl_stream_;
Phase phase_;
};
-} // namespace caffeine
+} // namespace caffe
-#endif // CAFFEINE_COMMON_HPP_
+#endif // CAFFE_COMMON_HPP_
// algorithm. The expectation is that they are only going to be used during
// initialization time and will not involve any GPUs.
-#ifndef CAFFEINE_FILLER_HPP
-#define CAFFEINE_FILLER_HPP
+#ifndef CAFFE_FILLER_HPP
+#define CAFFE_FILLER_HPP
#include <mkl.h>
-#include "caffeine/common.hpp"
-#include "caffeine/blob.hpp"
-#include "caffeine/syncedmem.hpp"
-#include "caffeine/proto/layer_param.pb.h"
+#include "caffe/common.hpp"
+#include "caffe/blob.hpp"
+#include "caffe/syncedmem.hpp"
+#include "caffe/proto/layer_param.pb.h"
-namespace caffeine {
+namespace caffe {
template <typename Dtype>
class Filler {
CHECK(count);
switch(sizeof(Dtype)) {
case sizeof(float):
- VSL_CHECK(vsRngUniform(VSL_RNG_METHOD_UNIFORM_STD, Caffeine::vsl_stream(),
+ VSL_CHECK(vsRngUniform(VSL_RNG_METHOD_UNIFORM_STD, Caffe::vsl_stream(),
count, (float*)data, this->filler_param_.min(),
this->filler_param_.max()));
break;
case sizeof(double):
- VSL_CHECK(vdRngUniform(VSL_RNG_METHOD_UNIFORM_STD, Caffeine::vsl_stream(),
+ VSL_CHECK(vdRngUniform(VSL_RNG_METHOD_UNIFORM_STD, Caffe::vsl_stream(),
count, (double*)data, this->filler_param_.min(),
this->filler_param_.max()));
break;
switch(sizeof(Dtype)) {
case sizeof(float):
VSL_CHECK(vsRngGaussian(VSL_RNG_METHOD_GAUSSIAN_BOXMULLER,
- Caffeine::vsl_stream(), count, (float*)data,
+ Caffe::vsl_stream(), count, (float*)data,
this->filler_param_.mean(), this->filler_param_.std()));
break;
case sizeof(double):
VSL_CHECK(vdRngGaussian(VSL_RNG_METHOD_GAUSSIAN_BOXMULLER,
- Caffeine::vsl_stream(), count, (double*)data,
+ Caffe::vsl_stream(), count, (double*)data,
this->filler_param_.mean(), this->filler_param_.std()));
break;
default:
return (Filler<Dtype>*)(NULL);
}
-} // namespace caffeine
+} // namespace caffe
-#endif // CAFFEINE_FILLER_HPP_
+#endif // CAFFE_FILLER_HPP_
-#ifndef CAFFEINE_LAYER_H_
-#define CAFFEINE_LAYER_H_
+#ifndef CAFFE_LAYER_H_
+#define CAFFE_LAYER_H_
#include <vector>
-#include "caffeine/blob.hpp"
-#include "caffeine/common.hpp"
-#include "caffeine/proto/layer_param.pb.h"
+#include "caffe/blob.hpp"
+#include "caffe/common.hpp"
+#include "caffe/proto/layer_param.pb.h"
using std::vector;
-namespace caffeine {
+namespace caffe {
template <typename Dtype>
class Layer {
template <typename Dtype>
inline void Layer<Dtype>::Forward(const vector<Blob<Dtype>*>& bottom,
vector<Blob<Dtype>*>* top) {
- switch(Caffeine::mode()) {
- case Caffeine::CPU:
+ switch(Caffe::mode()) {
+ case Caffe::CPU:
Forward_cpu(bottom, top);
break;
- case Caffeine::GPU:
+ case Caffe::GPU:
Forward_gpu(bottom, top);
break;
default:
- LOG(FATAL) << "Unknown caffeine mode.";
+ LOG(FATAL) << "Unknown caffe mode.";
}
};
inline Dtype Layer<Dtype>::Backward(const vector<Blob<Dtype>*>& top,
const bool propagate_down,
vector<Blob<Dtype>*>* bottom) {
- switch(Caffeine::mode()) {
- case Caffeine::CPU:
+ switch(Caffe::mode()) {
+ case Caffe::CPU:
return Backward_cpu(top, propagate_down, bottom);
- case Caffeine::GPU:
+ case Caffe::GPU:
return Backward_gpu(top, propagate_down, bottom);
default:
- LOG(FATAL) << "Unknown caffeine mode.";
+ LOG(FATAL) << "Unknown caffe mode.";
}
};
-} // namespace caffeine
+} // namespace caffe
-#endif // CAFFEINE_LAYER_H_
+#endif // CAFFE_LAYER_H_
-#include "caffeine/layer.hpp"
-#include "caffeine/vision_layers.hpp"
-#include "caffeine/util/im2col.hpp"
-#include "caffeine/filler.hpp"
-#include "caffeine/util/math_functions.hpp"
+#include "caffe/layer.hpp"
+#include "caffe/vision_layers.hpp"
+#include "caffe/util/im2col.hpp"
+#include "caffe/filler.hpp"
+#include "caffe/util/math_functions.hpp"
-namespace caffeine {
+namespace caffe {
template <typename Dtype>
void ConvolutionLayer<Dtype>::SetUp(const vector<Blob<Dtype>*>& bottom,
WIDTH_, KSIZE_, STRIDE_, col_data);
// Second, innerproduct with groups
for (int g = 0; g < GROUP_; ++g) {
- caffeine_cpu_gemm<Dtype>(CblasNoTrans, CblasNoTrans, M_, N_, K_,
+ caffe_cpu_gemm<Dtype>(CblasNoTrans, CblasNoTrans, M_, N_, K_,
(Dtype)1., weight + weight_offset * g, col_data + col_offset * g,
(Dtype)0., top_data + (*top)[0]->offset(n) + top_offset * g);
}
// third, add bias
if (biasterm_) {
- caffeine_cpu_gemm<Dtype>(CblasNoTrans, CblasNoTrans, NUM_OUTPUT_,
+ caffe_cpu_gemm<Dtype>(CblasNoTrans, CblasNoTrans, NUM_OUTPUT_,
N_, 1, (Dtype)1., this->blobs_[1].cpu_data(),
(Dtype*)bias_multiplier_->cpu_data(), (Dtype)1.,
top_data + (*top)[0]->offset(n));
WIDTH_, KSIZE_, STRIDE_, col_data);
// Second, innerproduct with groups
for (int g = 0; g < GROUP_; ++g) {
- caffeine_gpu_gemm<Dtype>(CblasNoTrans, CblasNoTrans, M_, N_, K_,
+ caffe_gpu_gemm<Dtype>(CblasNoTrans, CblasNoTrans, M_, N_, K_,
(Dtype)1., weight + weight_offset * g, col_data + col_offset * g,
(Dtype)0., top_data + (*top)[0]->offset(n) + top_offset * g);
}
// third, add bias
if (biasterm_) {
- caffeine_gpu_gemm<Dtype>(CblasNoTrans, CblasNoTrans, NUM_OUTPUT_,
+ caffe_gpu_gemm<Dtype>(CblasNoTrans, CblasNoTrans, NUM_OUTPUT_,
N_, 1, (Dtype)1., this->blobs_[1].gpu_data(),
(Dtype*)bias_multiplier_->gpu_data(), (Dtype)1.,
top_data + (*top)[0]->offset(n));
bias_diff = this->blobs_[1].mutable_cpu_diff();
memset(bias_diff, 0., sizeof(Dtype) * this->blobs_[1].count());
for (int n = 0; n < NUM_; ++n) {
- caffeine_cpu_gemv<Dtype>(CblasNoTrans, NUM_OUTPUT_, N_,
+ caffe_cpu_gemv<Dtype>(CblasNoTrans, NUM_OUTPUT_, N_,
1., top_diff + top[0]->offset(n),
(Dtype*)bias_multiplier_->cpu_data(), 1., bias_diff);
}
WIDTH_, KSIZE_, STRIDE_, col_data);
// gradient w.r.t. weight. Note that we will accumulate diffs.
for (int g = 0; g < GROUP_; ++g) {
- caffeine_cpu_gemm<Dtype>(CblasNoTrans, CblasTrans, M_, K_, N_,
+ caffe_cpu_gemm<Dtype>(CblasNoTrans, CblasTrans, M_, K_, N_,
(Dtype)1., top_diff + top[0]->offset(n) + top_offset * g,
col_data + col_offset * g, (Dtype)1.,
weight_diff + weight_offset * g);
// gradient w.r.t. bottom data, if necessary
if (propagate_down) {
for (int g = 0; g < GROUP_; ++g) {
- caffeine_cpu_gemm<Dtype>(CblasTrans, CblasNoTrans, K_, N_, M_,
+ caffe_cpu_gemm<Dtype>(CblasTrans, CblasNoTrans, K_, N_, M_,
(Dtype)1., weight + weight_offset * g,
top_diff + top[0]->offset(n) + top_offset * g,
(Dtype)0., col_diff + col_offset * g);
CUDA_CHECK(cudaMemset(bias_diff, 0.,
sizeof(Dtype) * this->blobs_[1].count()));
for (int n = 0; n < NUM_; ++n) {
- caffeine_gpu_gemv<Dtype>(CblasNoTrans, NUM_OUTPUT_, N_,
+ caffe_gpu_gemv<Dtype>(CblasNoTrans, NUM_OUTPUT_, N_,
1., top_diff + top[0]->offset(n),
(Dtype*)bias_multiplier_->gpu_data(), 1., bias_diff);
}
WIDTH_, KSIZE_, STRIDE_, col_data);
// gradient w.r.t. weight. Note that we will accumulate diffs.
for (int g = 0; g < GROUP_; ++g) {
- caffeine_gpu_gemm<Dtype>(CblasNoTrans, CblasTrans, M_, K_, N_,
+ caffe_gpu_gemm<Dtype>(CblasNoTrans, CblasTrans, M_, K_, N_,
(Dtype)1., top_diff + top[0]->offset(n) + top_offset * g,
col_data + col_offset * g, (Dtype)1.,
weight_diff + weight_offset * g);
// gradient w.r.t. bottom data, if necessary
if (propagate_down) {
for (int g = 0; g < GROUP_; ++g) {
- caffeine_gpu_gemm<Dtype>(CblasTrans, CblasNoTrans, K_, N_, M_,
+ caffe_gpu_gemm<Dtype>(CblasTrans, CblasNoTrans, K_, N_, M_,
(Dtype)1., weight + weight_offset * g,
top_diff + top[0]->offset(n) + top_offset * g,
(Dtype)0., col_diff + col_offset * g);
INSTANTIATE_CLASS(ConvolutionLayer);
-} // namespace caffeine
+} // namespace caffe
#include <algorithm>
#include <limits>
-#include "caffeine/common.hpp"
-#include "caffeine/layer.hpp"
-#include "caffeine/syncedmem.hpp"
-#include "caffeine/vision_layers.hpp"
+#include "caffe/common.hpp"
+#include "caffe/layer.hpp"
+#include "caffe/syncedmem.hpp"
+#include "caffe/vision_layers.hpp"
using std::max;
-namespace caffeine {
+namespace caffe {
template <typename Dtype>
void DropoutLayer<Dtype>::SetUp(const vector<Blob<Dtype>*>& bottom,
Dtype* top_data = (*top)[0]->mutable_cpu_data();
int* mask = (int*)rand_vec_->mutable_cpu_data();
const int count = bottom[0]->count();
- if (Caffeine::phase() == Caffeine::TRAIN) {
+ if (Caffe::phase() == Caffe::TRAIN) {
// Create random numbers
- viRngBernoulli(VSL_RNG_METHOD_BERNOULLI_ICDF, Caffeine::vsl_stream(),
+ viRngBernoulli(VSL_RNG_METHOD_BERNOULLI_ICDF, Caffe::vsl_stream(),
count, mask, 1. - threshold_);
for (int i = 0; i < count; ++i) {
top_data[i] = bottom_data[i] * mask[i] * scale_;
Dtype DropoutLayer<Dtype>::Backward_cpu(const vector<Blob<Dtype>*>& top,
const bool propagate_down,
vector<Blob<Dtype>*>* bottom) {
- CHECK(Caffeine::phase() == Caffeine::TRAIN);
+ CHECK(Caffe::phase() == Caffe::TRAIN);
if (propagate_down) {
const Dtype* top_diff = top[0]->cpu_diff();
Dtype* bottom_diff = (*bottom)[0]->mutable_cpu_diff();
const Dtype* bottom_data = bottom[0]->gpu_data();
Dtype* top_data = (*top)[0]->mutable_gpu_data();
const int count = bottom[0]->count();
- if (Caffeine::phase() == Caffeine::TRAIN) {
- CURAND_CHECK(curandGenerate(Caffeine::curand_generator(),
+ if (Caffe::phase() == Caffe::TRAIN) {
+ CURAND_CHECK(curandGenerate(Caffe::curand_generator(),
(unsigned int*)(rand_vec_->mutable_gpu_data()), count));
// set thresholds
- DropoutForward<Dtype><<<CAFFEINE_GET_BLOCKS(count), CAFFEINE_CUDA_NUM_THREADS>>>(
+ DropoutForward<Dtype><<<CAFFE_GET_BLOCKS(count), CAFFE_CUDA_NUM_THREADS>>>(
count, bottom_data, (unsigned int*)rand_vec_->gpu_data(), uint_thres_, scale_,
top_data);
CUDA_POST_KERNEL_CHECK;
Dtype DropoutLayer<Dtype>::Backward_gpu(const vector<Blob<Dtype>*>& top,
const bool propagate_down,
vector<Blob<Dtype>*>* bottom) {
- CHECK(Caffeine::phase() == Caffeine::TRAIN);
+ CHECK(Caffe::phase() == Caffe::TRAIN);
if (propagate_down) {
const Dtype* top_diff = top[0]->gpu_diff();
Dtype* bottom_diff = (*bottom)[0]->mutable_gpu_diff();
const unsigned int* mask = (unsigned int*)rand_vec_->gpu_data();
const int count = (*bottom)[0]->count();
- DropoutBackward<Dtype><<<CAFFEINE_GET_BLOCKS(count), CAFFEINE_CUDA_NUM_THREADS>>>(
+ DropoutBackward<Dtype><<<CAFFE_GET_BLOCKS(count), CAFFE_CUDA_NUM_THREADS>>>(
count, top_diff, mask, uint_thres_, scale_, bottom_diff);
CUDA_POST_KERNEL_CHECK;
}
INSTANTIATE_CLASS(DropoutLayer);
-} // namespace caffeine
+} // namespace caffe
-#include "caffeine/layer.hpp"
-#include "caffeine/util/im2col.hpp"
-#include "caffeine/vision_layers.hpp"
-#include "caffeine/common.hpp"
+#include "caffe/layer.hpp"
+#include "caffe/util/im2col.hpp"
+#include "caffe/vision_layers.hpp"
+#include "caffe/common.hpp"
-namespace caffeine {
+namespace caffe {
template <typename Dtype>
void Im2colLayer<Dtype>::SetUp(const vector<Blob<Dtype>*>& bottom,
INSTANTIATE_CLASS(Im2colLayer);
-} // namespace caffeine
+} // namespace caffe
#include <mkl.h>
#include <cublas_v2.h>
-#include "caffeine/blob.hpp"
-#include "caffeine/common.hpp"
-#include "caffeine/filler.hpp"
-#include "caffeine/layer.hpp"
-#include "caffeine/vision_layers.hpp"
-#include "caffeine/util/math_functions.hpp"
+#include "caffe/blob.hpp"
+#include "caffe/common.hpp"
+#include "caffe/filler.hpp"
+#include "caffe/layer.hpp"
+#include "caffe/vision_layers.hpp"
+#include "caffe/util/math_functions.hpp"
-namespace caffeine {
+namespace caffe {
template <typename Dtype>
void InnerProductLayer<Dtype>::SetUp(const vector<Blob<Dtype>*>& bottom,
const Dtype* bottom_data = bottom[0]->cpu_data();
Dtype* top_data = (*top)[0]->mutable_cpu_data();
const Dtype* weight = this->blobs_[0].cpu_data();
- caffeine_cpu_gemm<Dtype>(CblasNoTrans, CblasNoTrans, M_, N_, K_, (Dtype)1.,
+ caffe_cpu_gemm<Dtype>(CblasNoTrans, CblasNoTrans, M_, N_, K_, (Dtype)1.,
bottom_data, weight, (Dtype)0., top_data);
if (biasterm_) {
- caffeine_cpu_gemm<Dtype>(CblasNoTrans, CblasNoTrans, M_, N_, 1, (Dtype)1.,
+ caffe_cpu_gemm<Dtype>(CblasNoTrans, CblasNoTrans, M_, N_, 1, (Dtype)1.,
(Dtype*)bias_multiplier_->cpu_data(), this->blobs_[1].cpu_data(),
(Dtype)1., top_data);
}
const Dtype* top_diff = top[0]->cpu_diff();
const Dtype* bottom_data = (*bottom)[0]->cpu_data();
// Gradient with respect to weight
- caffeine_cpu_gemm<Dtype>(CblasTrans, CblasNoTrans, K_, N_, M_, (Dtype)1.,
+ caffe_cpu_gemm<Dtype>(CblasTrans, CblasNoTrans, K_, N_, M_, (Dtype)1.,
bottom_data, top_diff, (Dtype)0., this->blobs_[0].mutable_cpu_diff());
if (biasterm_) {
// Gradient with respect to bias
- caffeine_cpu_gemv<Dtype>(CblasTrans, M_, N_, (Dtype)1., top_diff,
+ caffe_cpu_gemv<Dtype>(CblasTrans, M_, N_, (Dtype)1., top_diff,
(Dtype*)bias_multiplier_->cpu_data(), (Dtype)0.,
this->blobs_[1].mutable_cpu_diff());
}
if (propagate_down) {
// Gradient with respect to bottom data
- caffeine_cpu_gemm<Dtype>(CblasNoTrans, CblasTrans, M_, K_, N_, (Dtype)1.,
+ caffe_cpu_gemm<Dtype>(CblasNoTrans, CblasTrans, M_, K_, N_, (Dtype)1.,
top_diff, this->blobs_[0].cpu_data(), (Dtype)0.,
(*bottom)[0]->mutable_cpu_diff());
}
const Dtype* bottom_data = bottom[0]->gpu_data();
Dtype* top_data = (*top)[0]->mutable_gpu_data();
const Dtype* weight = this->blobs_[0].gpu_data();
- caffeine_gpu_gemm<Dtype>(CblasNoTrans, CblasNoTrans, M_, N_, K_, (Dtype)1.,
+ caffe_gpu_gemm<Dtype>(CblasNoTrans, CblasNoTrans, M_, N_, K_, (Dtype)1.,
bottom_data, weight, (Dtype)0., top_data);
if (biasterm_) {
- caffeine_gpu_gemm<Dtype>(CblasNoTrans, CblasNoTrans, M_, N_, 1, (Dtype)1.,
+ caffe_gpu_gemm<Dtype>(CblasNoTrans, CblasNoTrans, M_, N_, 1, (Dtype)1.,
(Dtype*)bias_multiplier_->gpu_data(), this->blobs_[1].gpu_data(),
(Dtype)1., top_data);
}
const Dtype* top_diff = top[0]->gpu_diff();
const Dtype* bottom_data = (*bottom)[0]->gpu_data();
// Gradient with respect to weight
- caffeine_gpu_gemm<Dtype>(CblasTrans, CblasNoTrans, K_, N_, M_, (Dtype)1.,
+ caffe_gpu_gemm<Dtype>(CblasTrans, CblasNoTrans, K_, N_, M_, (Dtype)1.,
bottom_data, top_diff, (Dtype)0., this->blobs_[0].mutable_gpu_diff());
if (biasterm_) {
// Gradient with respect to bias
- caffeine_gpu_gemv<Dtype>(CblasTrans, M_, N_, (Dtype)1., top_diff,
+ caffe_gpu_gemv<Dtype>(CblasTrans, M_, N_, (Dtype)1., top_diff,
(Dtype*)bias_multiplier_->gpu_data(), (Dtype)0.,
this->blobs_[1].mutable_gpu_diff());
}
if (propagate_down) {
// Gradient with respect to bottom data
- caffeine_gpu_gemm<Dtype>(CblasNoTrans, CblasTrans, M_, K_, N_, (Dtype)1.,
+ caffe_gpu_gemm<Dtype>(CblasNoTrans, CblasTrans, M_, K_, N_, (Dtype)1.,
top_diff, this->blobs_[0].gpu_data(), (Dtype)0.,
(*bottom)[0]->mutable_gpu_diff());
}
INSTANTIATE_CLASS(InnerProductLayer);
-} // namespace caffeine
+} // namespace caffe
-#include "caffeine/layer.hpp"
-#include "caffeine/vision_layers.hpp"
-#include "caffeine/util/math_functions.hpp"
+#include "caffe/layer.hpp"
+#include "caffe/vision_layers.hpp"
+#include "caffe/util/math_functions.hpp"
-namespace caffeine {
+namespace caffe {
template <typename Dtype>
void LRNLayer<Dtype>::SetUp(const vector<Blob<Dtype>*>& bottom,
// go through the images
for (int n = 0; n < num_; ++n) {
// compute the padded square
- caffeine_sqr(channels_ * height_ * width_,
+ caffe_sqr(channels_ * height_ * width_,
bottom_data + bottom[0]->offset(n),
padded_square_data + padded_square.offset(0, pre_pad_));
// Create the first channel scale
for (int c = 0; c < size_; ++c) {
- caffeine_axpy<Dtype>(height_ * width_, alpha_over_size,
+ caffe_axpy<Dtype>(height_ * width_, alpha_over_size,
padded_square_data + padded_square.offset(0, c),
scale_data + scale_.offset(n, 0));
}
for (int c = 1; c < channels_; ++c) {
// copy previous scale
- caffeine_copy<Dtype>(height_ * width_,
+ caffe_copy<Dtype>(height_ * width_,
scale_data + scale_.offset(n, c - 1),
scale_data + scale_.offset(n, c));
// add head
- caffeine_axpy<Dtype>(height_ * width_, alpha_over_size,
+ caffe_axpy<Dtype>(height_ * width_, alpha_over_size,
padded_square_data + padded_square.offset(0, c + size_ - 1),
scale_data + scale_.offset(n, c));
// subtract tail
- caffeine_axpy<Dtype>(height_ * width_, -alpha_over_size,
+ caffe_axpy<Dtype>(height_ * width_, -alpha_over_size,
padded_square_data + padded_square.offset(0, c - 1),
scale_data + scale_.offset(n, c));
}
}
// In the end, compute output
- caffeine_powx<Dtype>(scale_.count(), scale_data, -beta_, top_data);
- caffeine_mul<Dtype>(scale_.count(), top_data, bottom_data, top_data);
+ caffe_powx<Dtype>(scale_.count(), scale_data, -beta_, top_data);
+ caffe_mul<Dtype>(scale_.count(), top_data, bottom_data, top_data);
}
template <typename Dtype>
memset(padded_ratio_data, 0, sizeof(Dtype) * padded_ratio.count());
Dtype cache_ratio_value = 2. * alpha_ * beta_ / size_;
- caffeine_powx<Dtype>(scale_.count(), scale_data, -beta_, bottom_diff);
- caffeine_mul<Dtype>(scale_.count(), top_diff, bottom_diff, bottom_diff);
+ caffe_powx<Dtype>(scale_.count(), scale_data, -beta_, bottom_diff);
+ caffe_mul<Dtype>(scale_.count(), top_diff, bottom_diff, bottom_diff);
// go through individual data
int inverse_pre_pad = size_ - (size_ + 1) / 2;
for (int n = 0; n < num_; ++n) {
int block_offset = scale_.offset(n);
// first, compute diff_i * y_i / s_i
- caffeine_mul<Dtype>(channels_ * height_ * width_,
+ caffe_mul<Dtype>(channels_ * height_ * width_,
top_diff + block_offset, top_data + block_offset,
padded_ratio_data + padded_ratio.offset(0, inverse_pre_pad));
- caffeine_div<Dtype>(channels_ * height_ * width_,
+ caffe_div<Dtype>(channels_ * height_ * width_,
padded_ratio_data + padded_ratio.offset(0, inverse_pre_pad),
scale_data + block_offset,
padded_ratio_data + padded_ratio.offset(0, inverse_pre_pad));
// Now, compute the accumulated ratios and the bottom diff
memset(accum_ratio_data, 0, sizeof(Dtype) * accum_ratio.count());
for (int c = 0; c < size_ - 1; ++c) {
- caffeine_axpy<Dtype>(height_ * width_, 1.,
+ caffe_axpy<Dtype>(height_ * width_, 1.,
padded_ratio_data + padded_ratio.offset(0, c), accum_ratio_data);
}
for (int c = 0; c < channels_; ++c) {
- caffeine_axpy<Dtype>(height_ * width_, 1.,
+ caffe_axpy<Dtype>(height_ * width_, 1.,
padded_ratio_data + padded_ratio.offset(0, c + size_ - 1),
accum_ratio_data);
// compute bottom diff
- caffeine_mul<Dtype>(height_ * width_,
+ caffe_mul<Dtype>(height_ * width_,
bottom_data + top[0]->offset(n, c),
accum_ratio_data, accum_ratio_times_bottom);
- caffeine_axpy<Dtype>(height_ * width_, -cache_ratio_value,
+ caffe_axpy<Dtype>(height_ * width_, -cache_ratio_value,
accum_ratio_times_bottom, bottom_diff + top[0]->offset(n,c));
- caffeine_axpy<Dtype>(height_ * width_, -1.,
+ caffe_axpy<Dtype>(height_ * width_, -1.,
padded_ratio_data + padded_ratio.offset(0, c), accum_ratio_data);
}
}
INSTANTIATE_CLASS(LRNLayer);
-} // namespace caffeine
+} // namespace caffe
-#include "caffeine/layer.hpp"
-#include "caffeine/vision_layers.hpp"
-#include "caffeine/util/math_functions.hpp"
+#include "caffe/layer.hpp"
+#include "caffe/vision_layers.hpp"
+#include "caffe/util/math_functions.hpp"
-namespace caffeine {
+namespace caffe {
template <typename Dtype>
__global__ void LRNFillScale(const int nthreads, const Dtype* in,
// We will launch one kernel for each pixel location, and have the kernel
// go through all the channels.
int n_threads = num_ * height_ * width_;
- LRNFillScale<<<CAFFEINE_GET_BLOCKS(n_threads), CAFFEINE_CUDA_NUM_THREADS>>>(
+ LRNFillScale<<<CAFFE_GET_BLOCKS(n_threads), CAFFE_CUDA_NUM_THREADS>>>(
n_threads, bottom_data, num_, channels_, height_, width_, size_,
alpha_ / size_, scale_data);
CUDA_POST_KERNEL_CHECK;
n_threads = bottom[0]->count();
- LRNComputeOutput<<<CAFFEINE_GET_BLOCKS(n_threads), CAFFEINE_CUDA_NUM_THREADS>>>(
+ LRNComputeOutput<<<CAFFE_GET_BLOCKS(n_threads), CAFFE_CUDA_NUM_THREADS>>>(
n_threads, bottom_data, scale_data, -beta_, top_data);
CUDA_POST_KERNEL_CHECK;
}
Dtype LRNLayer<Dtype>::Backward_gpu(const vector<Blob<Dtype>*>& top,
const bool propagate_down, vector<Blob<Dtype>*>* bottom) {
int n_threads = num_ * height_ * width_;
- LRNComputeDiff<<<CAFFEINE_GET_BLOCKS(n_threads), CAFFEINE_CUDA_NUM_THREADS>>>(
+ LRNComputeDiff<<<CAFFE_GET_BLOCKS(n_threads), CAFFE_CUDA_NUM_THREADS>>>(
n_threads, (*bottom)[0]->gpu_data(), top[0]->gpu_data(),
scale_.gpu_data(), top[0]->gpu_diff(), num_, channels_, height_, width_,
size_, -beta_, Dtype(2. * alpha_ * beta_ / size_),
INSTANTIATE_CLASS(LRNLayer);
-} // namespace caffeine
+} // namespace caffe
-#include "caffeine/layer.hpp"
-#include "caffeine/vision_layers.hpp"
+#include "caffe/layer.hpp"
+#include "caffe/vision_layers.hpp"
-namespace caffeine {
+namespace caffe {
template <typename Dtype>
void NeuronLayer<Dtype>::SetUp(const vector<Blob<Dtype>*>& bottom,
INSTANTIATE_CLASS(NeuronLayer);
-} // namespace caffeine
+} // namespace caffe
-#include "caffeine/layer.hpp"
-#include "caffeine/vision_layers.hpp"
+#include "caffe/layer.hpp"
+#include "caffe/vision_layers.hpp"
#include <iostream>
-namespace caffeine {
+namespace caffe {
template <typename Dtype>
void PaddingLayer<Dtype>::SetUp(const vector<Blob<Dtype>*>& bottom,
const int count = bottom[0]->count();
// First, set all data to be zero for the boundary pixels
CUDA_CHECK(cudaMemset(top_data, 0, sizeof(Dtype) * (*top)[0]->count()));
- PaddingForward<Dtype><<<CAFFEINE_GET_BLOCKS(count), CAFFEINE_CUDA_NUM_THREADS>>>(
+ PaddingForward<Dtype><<<CAFFE_GET_BLOCKS(count), CAFFE_CUDA_NUM_THREADS>>>(
count, bottom_data, top_data, NUM_, CHANNEL_, HEIGHT_IN_, WIDTH_IN_,
PAD_);
CUDA_POST_KERNEL_CHECK;
const Dtype* top_diff = top[0]->gpu_diff();
Dtype* bottom_diff = (*bottom)[0]->mutable_gpu_diff();
const int count = (*bottom)[0]->count();
- PaddingBackward<Dtype><<<CAFFEINE_GET_BLOCKS(count), CAFFEINE_CUDA_NUM_THREADS>>>(
+ PaddingBackward<Dtype><<<CAFFE_GET_BLOCKS(count), CAFFE_CUDA_NUM_THREADS>>>(
count, top_diff, bottom_diff, NUM_, CHANNEL_, HEIGHT_IN_, WIDTH_IN_,
PAD_);
CUDA_POST_KERNEL_CHECK;
INSTANTIATE_CLASS(PaddingLayer);
-} // namespace caffeine
+} // namespace caffe
-#include "caffeine/layer.hpp"
-#include "caffeine/vision_layers.hpp"
+#include "caffe/layer.hpp"
+#include "caffe/vision_layers.hpp"
#include <algorithm>
using std::max;
-namespace caffeine {
+namespace caffe {
template <typename Dtype>
void ReLULayer<Dtype>::Forward_cpu(const vector<Blob<Dtype>*>& bottom,
const Dtype* bottom_data = bottom[0]->gpu_data();
Dtype* top_data = (*top)[0]->mutable_gpu_data();
const int count = bottom[0]->count();
- ReLUForward<Dtype><<<CAFFEINE_GET_BLOCKS(count), CAFFEINE_CUDA_NUM_THREADS>>>(
+ ReLUForward<Dtype><<<CAFFE_GET_BLOCKS(count), CAFFE_CUDA_NUM_THREADS>>>(
count, bottom_data, top_data);
}
const Dtype* top_diff = top[0]->gpu_diff();
Dtype* bottom_diff = (*bottom)[0]->mutable_gpu_diff();
const int count = (*bottom)[0]->count();
- ReLUBackward<Dtype><<<CAFFEINE_GET_BLOCKS(count), CAFFEINE_CUDA_NUM_THREADS>>>(
+ ReLUBackward<Dtype><<<CAFFE_GET_BLOCKS(count), CAFFE_CUDA_NUM_THREADS>>>(
count, top_diff, bottom_data, bottom_diff);
}
return Dtype(0);
INSTANTIATE_CLASS(ReLULayer);
-} // namespace caffeine
+} // namespace caffe
-package caffeine;
+package caffe;
message BlobProto {
optional int32 num = 1 [default = 0];
#include <cstring>
#include <cuda_runtime.h>
-#include "caffeine/common.hpp"
-#include "caffeine/syncedmem.hpp"
+#include "caffe/common.hpp"
+#include "caffe/syncedmem.hpp"
-namespace caffeine {
+namespace caffe {
SyncedMemory::~SyncedMemory() {
if (cpu_ptr_) {
}
-} // namespace caffeine
+} // namespace caffe
-#ifndef CAFFEINE_SYNCEDMEM_HPP
-#define CAFFEINE_SYNCEDMEM_HPP
+#ifndef CAFFE_SYNCEDMEM_HPP
+#define CAFFE_SYNCEDMEM_HPP
-namespace caffeine {
+namespace caffe {
class SyncedMemory {
public:
SyncedHead head_;
}; // class SyncedMemory
-} // namespace caffeine
+} // namespace caffe
-#endif // CAFFEINE_SYNCEDMEM_HPP_
+#endif // CAFFE_SYNCEDMEM_HPP_
#include <cuda_runtime.h>
#include "gtest/gtest.h"
-#include "caffeine/common.hpp"
-#include "caffeine/blob.hpp"
-#include "caffeine/filler.hpp"
+#include "caffe/common.hpp"
+#include "caffe/blob.hpp"
+#include "caffe/filler.hpp"
-#include "caffeine/test/test_caffeine_main.hpp"
+#include "caffe/test/test_caffe_main.hpp"
-namespace caffeine {
+namespace caffe {
template <typename Dtype>
class BlobSimpleTest : public ::testing::Test {
-// The main caffeine test code. Your test cpp code should include this hpp
+// The main caffe test code. Your test cpp code should include this hpp
// to allow a main function to be compiled into the binary.
-#ifndef CAFFEINE_TEST_TEST_CAFFEINE_MAIN_HPP_
-#define CAFFEINE_TEST_TEST_CAFFEINE_MAIN_HPP_
+#ifndef CAFFE_TEST_TEST_CAFFE_MAIN_HPP_
+#define CAFFE_TEST_TEST_CAFFE_MAIN_HPP_
#include <cstdlib>
#include <cstdio>
#include <glog/logging.h>
#include <gtest/gtest.h>
-namespace caffeine {
+namespace caffe {
-cudaDeviceProp CAFFEINE_TEST_CUDA_PROP;
+cudaDeviceProp CAFFE_TEST_CUDA_PROP;
-} // namespace caffeine
+} // namespace caffe
-using namespace caffeine;
+using namespace caffe;
using namespace std;
int main(int argc, char** argv) {
}
cudaGetDevice(&device);
cout << "Current device id: " << device << endl;
- cudaGetDeviceProperties(&CAFFEINE_TEST_CUDA_PROP, device);
+ cudaGetDeviceProperties(&CAFFE_TEST_CUDA_PROP, device);
// invoke the test.
return RUN_ALL_TESTS();
}
-#endif // CAFFEINE_TEST_TEST_CAFFEINE_MAIN_HPP_
+#endif // CAFFE_TEST_TEST_CAFFE_MAIN_HPP_
#include <cuda_runtime.h>
#include "gtest/gtest.h"
-#include "caffeine/common.hpp"
-#include "caffeine/syncedmem.hpp"
+#include "caffe/common.hpp"
+#include "caffe/syncedmem.hpp"
-#include "caffeine/test/test_caffeine_main.hpp"
+#include "caffe/test/test_caffe_main.hpp"
-namespace caffeine {
+namespace caffe {
class CommonTest : public ::testing::Test {};
TEST_F(CommonTest, TestCublasHandler) {
int cuda_device_id;
CUDA_CHECK(cudaGetDevice(&cuda_device_id));
- EXPECT_TRUE(Caffeine::cublas_handle());
+ EXPECT_TRUE(Caffe::cublas_handle());
}
TEST_F(CommonTest, TestVslStream) {
- EXPECT_TRUE(Caffeine::vsl_stream());
+ EXPECT_TRUE(Caffe::vsl_stream());
}
TEST_F(CommonTest, TestBrewMode) {
- EXPECT_EQ(Caffeine::mode(), Caffeine::CPU);
- Caffeine::set_mode(Caffeine::GPU);
- EXPECT_EQ(Caffeine::mode(), Caffeine::GPU);
+ EXPECT_EQ(Caffe::mode(), Caffe::CPU);
+ Caffe::set_mode(Caffe::GPU);
+ EXPECT_EQ(Caffe::mode(), Caffe::GPU);
}
TEST_F(CommonTest, TestPhase) {
- EXPECT_EQ(Caffeine::phase(), Caffeine::TRAIN);
- Caffeine::set_phase(Caffeine::TEST);
- EXPECT_EQ(Caffeine::phase(), Caffeine::TEST);
+ EXPECT_EQ(Caffe::phase(), Caffe::TRAIN);
+ Caffe::set_phase(Caffe::TEST);
+ EXPECT_EQ(Caffe::phase(), Caffe::TEST);
}
TEST_F(CommonTest, TestRandSeedCPU) {
SyncedMemory data_a(10 * sizeof(int));
SyncedMemory data_b(10 * sizeof(int));
- Caffeine::set_random_seed(1701);
- viRngBernoulli(VSL_RNG_METHOD_BERNOULLI_ICDF, Caffeine::vsl_stream(),
+ Caffe::set_random_seed(1701);
+ viRngBernoulli(VSL_RNG_METHOD_BERNOULLI_ICDF, Caffe::vsl_stream(),
10, (int*)data_a.mutable_cpu_data(), 0.5);
- Caffeine::set_random_seed(1701);
- viRngBernoulli(VSL_RNG_METHOD_BERNOULLI_ICDF, Caffeine::vsl_stream(),
+ Caffe::set_random_seed(1701);
+ viRngBernoulli(VSL_RNG_METHOD_BERNOULLI_ICDF, Caffe::vsl_stream(),
10, (int*)data_b.mutable_cpu_data(), 0.5);
for (int i = 0; i < 10; ++i) {
EXPECT_EQ(((const int*)(data_a.cpu_data()))[i],
TEST_F(CommonTest, TestRandSeedGPU) {
SyncedMemory data_a(10 * sizeof(unsigned int));
SyncedMemory data_b(10 * sizeof(unsigned int));
- Caffeine::set_random_seed(1701);
- CURAND_CHECK(curandGenerate(Caffeine::curand_generator(),
+ Caffe::set_random_seed(1701);
+ CURAND_CHECK(curandGenerate(Caffe::curand_generator(),
(unsigned int*)data_a.mutable_gpu_data(), 10));
- Caffeine::set_random_seed(1701);
- CURAND_CHECK(curandGenerate(Caffeine::curand_generator(),
+ Caffe::set_random_seed(1701);
+ CURAND_CHECK(curandGenerate(Caffe::curand_generator(),
(unsigned int*)data_b.mutable_gpu_data(), 10));
for (int i = 0; i < 10; ++i) {
EXPECT_EQ(((const unsigned int*)(data_a.cpu_data()))[i],
}
-} // namespace caffeine
+} // namespace caffe
#include <cuda_runtime.h>
#include "gtest/gtest.h"
-#include "caffeine/blob.hpp"
-#include "caffeine/common.hpp"
-#include "caffeine/filler.hpp"
-#include "caffeine/vision_layers.hpp"
-#include "caffeine/test/test_gradient_check_util.hpp"
+#include "caffe/blob.hpp"
+#include "caffe/common.hpp"
+#include "caffe/filler.hpp"
+#include "caffe/vision_layers.hpp"
+#include "caffe/test/test_gradient_check_util.hpp"
-#include "caffeine/test/test_caffeine_main.hpp"
+#include "caffe/test/test_caffe_main.hpp"
-namespace caffeine {
+namespace caffe {
-extern cudaDeviceProp CAFFEINE_TEST_CUDA_PROP;
+extern cudaDeviceProp CAFFE_TEST_CUDA_PROP;
template <typename Dtype>
class ConvolutionLayerTest : public ::testing::Test {
shared_ptr<Layer<TypeParam> > layer(
new ConvolutionLayer<TypeParam>(layer_param));
layer->SetUp(this->blob_bottom_vec_, &(this->blob_top_vec_));
- Caffeine::set_mode(Caffeine::CPU);
+ Caffe::set_mode(Caffe::CPU);
layer->Forward(this->blob_bottom_vec_, &(this->blob_top_vec_));
// After the convolution, the output should all have output values 27.1
const TypeParam* top_data = this->blob_top_->cpu_data();
EXPECT_LE(top_data[i], 27.1 + 1e-4);
}
// Test GPU
- Caffeine::set_mode(Caffeine::GPU);
+ Caffe::set_mode(Caffe::GPU);
layer->Forward(this->blob_bottom_vec_, &(this->blob_top_vec_));
// After the convolution, the output should all have output values 27.1
top_data = this->blob_top_->cpu_data();
shared_ptr<Layer<TypeParam> > layer(
new ConvolutionLayer<TypeParam>(layer_param));
layer->SetUp(this->blob_bottom_vec_, &(this->blob_top_vec_));
- Caffeine::set_mode(Caffeine::CPU);
+ Caffe::set_mode(Caffe::CPU);
layer->Forward(this->blob_bottom_vec_, &(this->blob_top_vec_));
// After the convolution, the output should all have output values 9.1
const TypeParam* top_data = this->blob_top_->cpu_data();
EXPECT_LE(top_data[i], 9.1 + 1e-4);
}
// Test GPU
- Caffeine::set_mode(Caffeine::GPU);
+ Caffe::set_mode(Caffe::GPU);
layer->Forward(this->blob_bottom_vec_, &(this->blob_top_vec_));
// After the convolution, the output should all have output values 9.1
top_data = this->blob_top_->cpu_data();
layer_param.set_kernelsize(3);
layer_param.set_stride(2);
layer_param.set_num_output(2);
- Caffeine::set_mode(Caffeine::CPU);
+ Caffe::set_mode(Caffe::CPU);
ConvolutionLayer<TypeParam> layer(layer_param);
GradientChecker<TypeParam> checker(1e-2, 1e-2);
checker.CheckGradientExhaustive(layer, this->blob_bottom_vec_, this->blob_top_vec_);
layer_param.set_kernelsize(3);
layer_param.set_stride(2);
layer_param.set_num_output(2);
- Caffeine::set_mode(Caffeine::GPU);
+ Caffe::set_mode(Caffe::GPU);
ConvolutionLayer<TypeParam> layer(layer_param);
GradientChecker<TypeParam> checker(1e-2, 1e-2);
checker.CheckGradientExhaustive(layer, this->blob_bottom_vec_, this->blob_top_vec_);
#include <cuda_runtime.h>
#include "gtest/gtest.h"
-#include "caffeine/filler.hpp"
+#include "caffe/filler.hpp"
-#include "caffeine/test/test_caffeine_main.hpp"
+#include "caffe/test/test_caffe_main.hpp"
-namespace caffeine {
+namespace caffe {
typedef ::testing::Types<float, double> Dtypes;
-#ifndef CAFFEINE_TEST_GRADIENT_CHECK_UTIL_H_
-#define CAFFEINE_TEST_GRADIENT_CHECK_UTIL_H_
+#ifndef CAFFE_TEST_GRADIENT_CHECK_UTIL_H_
+#define CAFFE_TEST_GRADIENT_CHECK_UTIL_H_
#include <algorithm>
#include <cmath>
#include <glog/logging.h>
#include <gtest/gtest.h>
-#include "caffeine/layer.hpp"
+#include "caffe/layer.hpp"
using std::max;
-namespace caffeine {
+namespace caffe {
// The gradient checker adds a L2 normalization loss function on top of the
// top blobs, and checks the gradient.
// go through the values
for (int feat_id = 0; feat_id < current_blob->count(); ++feat_id) {
// First, obtain the original data
- Caffeine::set_random_seed(seed_);
+ Caffe::set_random_seed(seed_);
layer.Forward(bottom, &top);
Dtype computed_objective = GetObjAndGradient(top, top_id, top_data_id);
// Get any additional loss from the layer
Dtype computed_gradient = current_blob->cpu_diff()[feat_id];
// compute score by adding stepsize
current_blob->mutable_cpu_data()[feat_id] += stepsize_;
- Caffeine::set_random_seed(seed_);
+ Caffe::set_random_seed(seed_);
layer.Forward(bottom, &top);
Dtype positive_objective = GetObjAndGradient(top, top_id, top_data_id);
positive_objective += layer.Backward(top, true, &bottom);
// compute score by subtracting stepsize
current_blob->mutable_cpu_data()[feat_id] -= stepsize_ * 2;
- Caffeine::set_random_seed(seed_);
+ Caffe::set_random_seed(seed_);
layer.Forward(bottom, &top);
Dtype negative_objective = GetObjAndGradient(top, top_id, top_data_id);
negative_objective += layer.Backward(top, true, &bottom);
return loss;
}
-} // namespace caffeine
+} // namespace caffe
-#endif // CAFFEINE_TEST_GRADIENT_CHECK_UTIL_H_
+#endif // CAFFE_TEST_GRADIENT_CHECK_UTIL_H_
#include <cuda_runtime.h>
#include "gtest/gtest.h"
-#include "caffeine/blob.hpp"
-#include "caffeine/common.hpp"
-#include "caffeine/filler.hpp"
-#include "caffeine/vision_layers.hpp"
-#include "caffeine/test/test_gradient_check_util.hpp"
+#include "caffe/blob.hpp"
+#include "caffe/common.hpp"
+#include "caffe/filler.hpp"
+#include "caffe/vision_layers.hpp"
+#include "caffe/test/test_gradient_check_util.hpp"
-#include "caffeine/test/test_caffeine_main.hpp"
+#include "caffe/test/test_caffe_main.hpp"
-namespace caffeine {
+namespace caffe {
-extern cudaDeviceProp CAFFEINE_TEST_CUDA_PROP;
+extern cudaDeviceProp CAFFE_TEST_CUDA_PROP;
template <typename Dtype>
class Im2colLayerTest : public ::testing::Test {
layer_param.set_kernelsize(3);
layer_param.set_stride(2);
Im2colLayer<TypeParam> layer(layer_param);
- Caffeine::set_mode(Caffeine::CPU);
+ Caffe::set_mode(Caffe::CPU);
layer.SetUp(this->blob_bottom_vec_, &(this->blob_top_vec_));
layer.Forward(this->blob_bottom_vec_, &(this->blob_top_vec_));
// We are lazy and will only check the top left block
layer_param.set_kernelsize(3);
layer_param.set_stride(2);
Im2colLayer<TypeParam> layer(layer_param);
- Caffeine::set_mode(Caffeine::GPU);
+ Caffe::set_mode(Caffe::GPU);
layer.SetUp(this->blob_bottom_vec_, &(this->blob_top_vec_));
layer.Forward(this->blob_bottom_vec_, &(this->blob_top_vec_));
// We are lazy and will only check the top left block
LayerParameter layer_param;
layer_param.set_kernelsize(3);
layer_param.set_stride(2);
- Caffeine::set_mode(Caffeine::CPU);
+ Caffe::set_mode(Caffe::CPU);
Im2colLayer<TypeParam> layer(layer_param);
GradientChecker<TypeParam> checker(1e-2, 1e-2);
checker.CheckGradientExhaustive(layer, this->blob_bottom_vec_, this->blob_top_vec_);
LayerParameter layer_param;
layer_param.set_kernelsize(3);
layer_param.set_stride(2);
- Caffeine::set_mode(Caffeine::GPU);
+ Caffe::set_mode(Caffe::GPU);
Im2colLayer<TypeParam> layer(layer_param);
GradientChecker<TypeParam> checker(1e-2, 1e-2);
checker.CheckGradientExhaustive(layer, this->blob_bottom_vec_, this->blob_top_vec_);
#include <cuda_runtime.h>
#include "gtest/gtest.h"
-#include "caffeine/blob.hpp"
-#include "caffeine/common.hpp"
-#include "caffeine/filler.hpp"
-#include "caffeine/vision_layers.hpp"
-#include "caffeine/test/test_gradient_check_util.hpp"
+#include "caffe/blob.hpp"
+#include "caffe/common.hpp"
+#include "caffe/filler.hpp"
+#include "caffe/vision_layers.hpp"
+#include "caffe/test/test_gradient_check_util.hpp"
-#include "caffeine/test/test_caffeine_main.hpp"
+#include "caffe/test/test_caffe_main.hpp"
-namespace caffeine {
+namespace caffe {
-extern cudaDeviceProp CAFFEINE_TEST_CUDA_PROP;
+extern cudaDeviceProp CAFFE_TEST_CUDA_PROP;
template <typename Dtype>
class InnerProductLayerTest : public ::testing::Test {
TYPED_TEST(InnerProductLayerTest, TestCPU) {
LayerParameter layer_param;
- Caffeine::set_mode(Caffeine::CPU);
+ Caffe::set_mode(Caffe::CPU);
layer_param.set_num_output(10);
layer_param.mutable_weight_filler()->set_type("uniform");
layer_param.mutable_bias_filler()->set_type("uniform");
}
TYPED_TEST(InnerProductLayerTest, TestGPU) {
- if (sizeof(TypeParam) == 4 || CAFFEINE_TEST_CUDA_PROP.major >= 2) {
+ if (sizeof(TypeParam) == 4 || CAFFE_TEST_CUDA_PROP.major >= 2) {
LayerParameter layer_param;
- Caffeine::set_mode(Caffeine::GPU);
+ Caffe::set_mode(Caffe::GPU);
layer_param.set_num_output(10);
layer_param.mutable_weight_filler()->set_type("uniform");
layer_param.mutable_bias_filler()->set_type("uniform");
TYPED_TEST(InnerProductLayerTest, TestCPUGradient) {
LayerParameter layer_param;
- Caffeine::set_mode(Caffeine::CPU);
+ Caffe::set_mode(Caffe::CPU);
layer_param.set_num_output(10);
layer_param.mutable_weight_filler()->set_type("uniform");
layer_param.mutable_bias_filler()->set_type("uniform");
}
TYPED_TEST(InnerProductLayerTest, TestGPUGradient) {
- if (sizeof(TypeParam) == 4 || CAFFEINE_TEST_CUDA_PROP.major >= 2) {
+ if (sizeof(TypeParam) == 4 || CAFFE_TEST_CUDA_PROP.major >= 2) {
LayerParameter layer_param;
- Caffeine::set_mode(Caffeine::GPU);
+ Caffe::set_mode(Caffe::GPU);
layer_param.set_num_output(10);
layer_param.mutable_weight_filler()->set_type("uniform");
layer_param.mutable_bias_filler()->set_type("uniform");
#include <iostream>
#include "gtest/gtest.h"
-#include "caffeine/blob.hpp"
-#include "caffeine/common.hpp"
-#include "caffeine/filler.hpp"
-#include "caffeine/vision_layers.hpp"
-#include "caffeine/test/test_gradient_check_util.hpp"
+#include "caffe/blob.hpp"
+#include "caffe/common.hpp"
+#include "caffe/filler.hpp"
+#include "caffe/vision_layers.hpp"
+#include "caffe/test/test_gradient_check_util.hpp"
-#include "caffeine/test/test_caffeine_main.hpp"
+#include "caffe/test/test_caffe_main.hpp"
using std::min;
using std::max;
-namespace caffeine {
+namespace caffe {
-extern cudaDeviceProp CAFFEINE_TEST_CUDA_PROP;
+extern cudaDeviceProp CAFFE_TEST_CUDA_PROP;
template <typename Dtype>
class LRNLayerTest : public ::testing::Test {
: blob_bottom_(new Blob<Dtype>()),
blob_top_(new Blob<Dtype>()) {};
virtual void SetUp() {
- Caffeine::set_random_seed(1701);
+ Caffe::set_random_seed(1701);
blob_bottom_->Reshape(2, 7, 3, 3);
// fill the values
FillerParameter filler_param;
TYPED_TEST(LRNLayerTest, TestCPUForward) {
LayerParameter layer_param;
LRNLayer<TypeParam> layer(layer_param);
- Caffeine::set_mode(Caffeine::CPU);
+ Caffe::set_mode(Caffe::CPU);
layer.SetUp(this->blob_bottom_vec_, &(this->blob_top_vec_));
layer.Forward(this->blob_bottom_vec_, &(this->blob_top_vec_));
Blob<TypeParam> top_reference;
TYPED_TEST(LRNLayerTest, TestGPUForward) {
LayerParameter layer_param;
LRNLayer<TypeParam> layer(layer_param);
- Caffeine::set_mode(Caffeine::GPU);
+ Caffe::set_mode(Caffe::GPU);
layer.SetUp(this->blob_bottom_vec_, &(this->blob_top_vec_));
layer.Forward(this->blob_bottom_vec_, &(this->blob_top_vec_));
Blob<TypeParam> top_reference;
LayerParameter layer_param;
LRNLayer<TypeParam> layer(layer_param);
GradientChecker<TypeParam> checker(1e-2, 1e-2);
- Caffeine::set_mode(Caffeine::CPU);
+ Caffe::set_mode(Caffe::CPU);
layer.SetUp(this->blob_bottom_vec_, &(this->blob_top_vec_));
layer.Forward(this->blob_bottom_vec_, &(this->blob_top_vec_));
for (int i = 0; i < this->blob_top_->count(); ++i) {
LayerParameter layer_param;
LRNLayer<TypeParam> layer(layer_param);
GradientChecker<TypeParam> checker(1e-2, 1e-2);
- Caffeine::set_mode(Caffeine::GPU);
+ Caffe::set_mode(Caffe::GPU);
layer.SetUp(this->blob_bottom_vec_, &(this->blob_top_vec_));
layer.Forward(this->blob_bottom_vec_, &(this->blob_top_vec_));
for (int i = 0; i < this->blob_top_->count(); ++i) {
#include <cuda_runtime.h>
#include "gtest/gtest.h"
-#include "caffeine/blob.hpp"
-#include "caffeine/common.hpp"
-#include "caffeine/filler.hpp"
-#include "caffeine/vision_layers.hpp"
-#include "caffeine/test/test_gradient_check_util.hpp"
+#include "caffe/blob.hpp"
+#include "caffe/common.hpp"
+#include "caffe/filler.hpp"
+#include "caffe/vision_layers.hpp"
+#include "caffe/test/test_gradient_check_util.hpp"
-#include "caffeine/test/test_caffeine_main.hpp"
+#include "caffe/test/test_caffe_main.hpp"
-namespace caffeine {
+namespace caffe {
-extern cudaDeviceProp CAFFEINE_TEST_CUDA_PROP;
+extern cudaDeviceProp CAFFE_TEST_CUDA_PROP;
template <typename Dtype>
class NeuronLayerTest : public ::testing::Test {
TYPED_TEST(NeuronLayerTest, TestReLUCPU) {
LayerParameter layer_param;
- Caffeine::set_mode(Caffeine::CPU);
+ Caffe::set_mode(Caffe::CPU);
ReLULayer<TypeParam> layer(layer_param);
layer.SetUp(this->blob_bottom_vec_, &(this->blob_top_vec_));
layer.Forward(this->blob_bottom_vec_, &(this->blob_top_vec_));
TYPED_TEST(NeuronLayerTest, TestReLUGradientCPU) {
LayerParameter layer_param;
- Caffeine::set_mode(Caffeine::CPU);
+ Caffe::set_mode(Caffe::CPU);
ReLULayer<TypeParam> layer(layer_param);
GradientChecker<TypeParam> checker(1e-2, 1e-3, 1701, 0., 0.01);
checker.CheckGradientExhaustive(layer, this->blob_bottom_vec_, this->blob_top_vec_);
TYPED_TEST(NeuronLayerTest, TestReLUGPU) {
LayerParameter layer_param;
- Caffeine::set_mode(Caffeine::GPU);
+ Caffe::set_mode(Caffe::GPU);
ReLULayer<TypeParam> layer(layer_param);
layer.SetUp(this->blob_bottom_vec_, &(this->blob_top_vec_));
layer.Forward(this->blob_bottom_vec_, &(this->blob_top_vec_));
TYPED_TEST(NeuronLayerTest, TestReLUGradientGPU) {
LayerParameter layer_param;
- Caffeine::set_mode(Caffeine::GPU);
+ Caffe::set_mode(Caffe::GPU);
ReLULayer<TypeParam> layer(layer_param);
GradientChecker<TypeParam> checker(1e-2, 1e-3, 1701, 0., 0.01);
checker.CheckGradientExhaustive(layer, this->blob_bottom_vec_, this->blob_top_vec_);
TYPED_TEST(NeuronLayerTest, TestDropoutCPU) {
LayerParameter layer_param;
- Caffeine::set_mode(Caffeine::CPU);
- Caffeine::set_phase(Caffeine::TRAIN);
+ Caffe::set_mode(Caffe::CPU);
+ Caffe::set_phase(Caffe::TRAIN);
DropoutLayer<TypeParam> layer(layer_param);
layer.SetUp(this->blob_bottom_vec_, &(this->blob_top_vec_));
layer.Forward(this->blob_bottom_vec_, &(this->blob_top_vec_));
TYPED_TEST(NeuronLayerTest, TestDropoutGradientCPU) {
LayerParameter layer_param;
- Caffeine::set_mode(Caffeine::CPU);
+ Caffe::set_mode(Caffe::CPU);
DropoutLayer<TypeParam> layer(layer_param);
GradientChecker<TypeParam> checker(1e-2, 1e-3);
checker.CheckGradientExhaustive(layer, this->blob_bottom_vec_, this->blob_top_vec_);
TYPED_TEST(NeuronLayerTest, TestDropoutCPUTestPhase) {
LayerParameter layer_param;
- Caffeine::set_mode(Caffeine::CPU);
- Caffeine::set_phase(Caffeine::TEST);
+ Caffe::set_mode(Caffe::CPU);
+ Caffe::set_phase(Caffe::TEST);
DropoutLayer<TypeParam> layer(layer_param);
layer.SetUp(this->blob_bottom_vec_, &(this->blob_top_vec_));
layer.Forward(this->blob_bottom_vec_, &(this->blob_top_vec_));
TYPED_TEST(NeuronLayerTest, TestDropoutGPU) {
LayerParameter layer_param;
- Caffeine::set_mode(Caffeine::GPU);
- Caffeine::set_phase(Caffeine::TRAIN);
+ Caffe::set_mode(Caffe::GPU);
+ Caffe::set_phase(Caffe::TRAIN);
DropoutLayer<TypeParam> layer(layer_param);
layer.SetUp(this->blob_bottom_vec_, &(this->blob_top_vec_));
layer.Forward(this->blob_bottom_vec_, &(this->blob_top_vec_));
TYPED_TEST(NeuronLayerTest, TestDropoutGradientGPU) {
- if (CAFFEINE_TEST_CUDA_PROP.major >= 2) {
+ if (CAFFE_TEST_CUDA_PROP.major >= 2) {
LayerParameter layer_param;
- Caffeine::set_mode(Caffeine::GPU);
+ Caffe::set_mode(Caffe::GPU);
DropoutLayer<TypeParam> layer(layer_param);
GradientChecker<TypeParam> checker(1e-2, 1e-3);
// it is too expensive to call curand multiple times, so we don't do an
TYPED_TEST(NeuronLayerTest, TestDropoutGPUTestPhase) {
LayerParameter layer_param;
- Caffeine::set_mode(Caffeine::GPU);
- Caffeine::set_phase(Caffeine::TEST);
+ Caffe::set_mode(Caffe::GPU);
+ Caffe::set_phase(Caffe::TEST);
DropoutLayer<TypeParam> layer(layer_param);
layer.SetUp(this->blob_bottom_vec_, &(this->blob_top_vec_));
layer.Forward(this->blob_bottom_vec_, &(this->blob_top_vec_));
#include <cuda_runtime.h>
#include "gtest/gtest.h"
-#include "caffeine/blob.hpp"
-#include "caffeine/common.hpp"
-#include "caffeine/filler.hpp"
-#include "caffeine/vision_layers.hpp"
-#include "caffeine/test/test_gradient_check_util.hpp"
+#include "caffe/blob.hpp"
+#include "caffe/common.hpp"
+#include "caffe/filler.hpp"
+#include "caffe/vision_layers.hpp"
+#include "caffe/test/test_gradient_check_util.hpp"
-#include "caffeine/test/test_caffeine_main.hpp"
+#include "caffe/test/test_caffe_main.hpp"
-namespace caffeine {
+namespace caffe {
-extern cudaDeviceProp CAFFEINE_TEST_CUDA_PROP;
+extern cudaDeviceProp CAFFE_TEST_CUDA_PROP;
template <typename Dtype>
class PaddingLayerTest : public ::testing::Test {
TYPED_TEST(PaddingLayerTest, TestCPU) {
LayerParameter layer_param;
layer_param.set_pad(1);
- Caffeine::set_mode(Caffeine::CPU);
+ Caffe::set_mode(Caffe::CPU);
PaddingLayer<TypeParam> layer(layer_param);
layer.SetUp(this->blob_bottom_vec_, &(this->blob_top_vec_));
layer.Forward(this->blob_bottom_vec_, &(this->blob_top_vec_));
TYPED_TEST(PaddingLayerTest, TestCPUGrad) {
LayerParameter layer_param;
layer_param.set_pad(1);
- Caffeine::set_mode(Caffeine::CPU);
+ Caffe::set_mode(Caffe::CPU);
PaddingLayer<TypeParam> layer(layer_param);
GradientChecker<TypeParam> checker(1e-2, 1e-3);
checker.CheckGradientExhaustive(layer, this->blob_bottom_vec_, this->blob_top_vec_);
}
TYPED_TEST(PaddingLayerTest, TestGPU) {
- if (CAFFEINE_TEST_CUDA_PROP.major >= 2) {
+ if (CAFFE_TEST_CUDA_PROP.major >= 2) {
LayerParameter layer_param;
layer_param.set_pad(1);
- Caffeine::set_mode(Caffeine::GPU);
+ Caffe::set_mode(Caffe::GPU);
PaddingLayer<TypeParam> layer(layer_param);
layer.SetUp(this->blob_bottom_vec_, &(this->blob_top_vec_));
layer.Forward(this->blob_bottom_vec_, &(this->blob_top_vec_));
}
TYPED_TEST(PaddingLayerTest, TestGPUGrad) {
- if (CAFFEINE_TEST_CUDA_PROP.major >= 2) {
+ if (CAFFE_TEST_CUDA_PROP.major >= 2) {
LayerParameter layer_param;
layer_param.set_pad(1);
- Caffeine::set_mode(Caffeine::GPU);
+ Caffe::set_mode(Caffe::GPU);
PaddingLayer<TypeParam> layer(layer_param);
GradientChecker<TypeParam> checker(1e-2, 1e-3);
checker.CheckGradientExhaustive(layer, this->blob_bottom_vec_, this->blob_top_vec_);
--- /dev/null
+#include <cstdlib>
+#include <cstdio>
+#include <iostream>
+
+#include <cuda_runtime.h>
+#include <glog/logging.h>
+#include <gtest/gtest.h>
+#include "caffe/test/test_caffe_main.hpp"
+
+namespace caffe {
+
+extern cudaDeviceProp CAFFE_TEST_CUDA_PROP;
+
+class PlatformTest : public ::testing::Test {};
+
+TEST_F(PlatformTest, TestInitialization) {
+ printf("Major revision number: %d\n", CAFFE_TEST_CUDA_PROP.major);
+ printf("Minor revision number: %d\n", CAFFE_TEST_CUDA_PROP.minor);
+ printf("Name: %s\n", CAFFE_TEST_CUDA_PROP.name);
+ printf("Total global memory: %lu\n", CAFFE_TEST_CUDA_PROP.totalGlobalMem);
+ printf("Total shared memory per block: %lu\n", CAFFE_TEST_CUDA_PROP.sharedMemPerBlock);
+ printf("Total registers per block: %d\n", CAFFE_TEST_CUDA_PROP.regsPerBlock);
+ printf("Warp size: %d\n", CAFFE_TEST_CUDA_PROP.warpSize);
+ printf("Maximum memory pitch: %lu\n", CAFFE_TEST_CUDA_PROP.memPitch);
+ printf("Maximum threads per block: %d\n", CAFFE_TEST_CUDA_PROP.maxThreadsPerBlock);
+ for (int i = 0; i < 3; ++i)
+ printf("Maximum dimension %d of block: %d\n", i, CAFFE_TEST_CUDA_PROP.maxThreadsDim[i]);
+ for (int i = 0; i < 3; ++i)
+ printf("Maximum dimension %d of grid: %d\n", i, CAFFE_TEST_CUDA_PROP.maxGridSize[i]);
+ printf("Clock rate: %d\n", CAFFE_TEST_CUDA_PROP.clockRate);
+ printf("Total constant memory: %lu\n", CAFFE_TEST_CUDA_PROP.totalConstMem);
+ printf("Texture alignment: %lu\n", CAFFE_TEST_CUDA_PROP.textureAlignment);
+ printf("Concurrent copy and execution: %s\n", (CAFFE_TEST_CUDA_PROP.deviceOverlap ? "Yes" : "No"));
+ printf("Number of multiprocessors: %d\n", CAFFE_TEST_CUDA_PROP.multiProcessorCount);
+ printf("Kernel execution timeout: %s\n", (CAFFE_TEST_CUDA_PROP.kernelExecTimeoutEnabled ? "Yes" : "No"));
+ EXPECT_TRUE(true);
+}
+
+} // namespace caffe
#include <google/protobuf/text_format.h>
#include "gtest/gtest.h"
-#include "caffeine/test/test_caffeine_main.hpp"
-#include "caffeine/proto/layer_param.pb.h"
+#include "caffe/test/test_caffe_main.hpp"
+#include "caffe/proto/layer_param.pb.h"
-namespace caffeine {
+namespace caffe {
class ProtoTest : public ::testing::Test {};
#include <cuda_runtime.h>
#include "gtest/gtest.h"
-#include "caffeine/common.hpp"
-#include "caffeine/syncedmem.hpp"
+#include "caffe/common.hpp"
+#include "caffe/syncedmem.hpp"
-#include "caffeine/test/test_caffeine_main.hpp"
+#include "caffe/test/test_caffe_main.hpp"
-namespace caffeine {
+namespace caffe {
class SyncedMemoryTest : public ::testing::Test {};
#include <cublas_v2.h>
#include "gtest/gtest.h"
-#include "caffeine/blob.hpp"
-#include "caffeine/util/math_functions.hpp"
+#include "caffe/blob.hpp"
+#include "caffe/util/math_functions.hpp"
-#include "caffeine/test/test_caffeine_main.hpp"
+#include "caffe/test/test_caffe_main.hpp"
-namespace caffeine {
+namespace caffe {
-extern cudaDeviceProp CAFFEINE_TEST_CUDA_PROP;
+extern cudaDeviceProp CAFFE_TEST_CUDA_PROP;
typedef ::testing::Types<float, double> Dtypes;
memcpy(A.mutable_cpu_data(), data, 6 * sizeof(TypeParam));
memcpy(B.mutable_cpu_data(), data, 12 * sizeof(TypeParam));
- if (sizeof(TypeParam) == 4 || CAFFEINE_TEST_CUDA_PROP.major >= 2) {
+ if (sizeof(TypeParam) == 4 || CAFFE_TEST_CUDA_PROP.major >= 2) {
//[1,2,3; 4 5 6] * [1,2,3,4; 5,6,7,8; 9,10,11,12];
- caffeine_cpu_gemm<TypeParam>(CblasNoTrans, CblasNoTrans, 2, 4, 3, 1.,
+ caffe_cpu_gemm<TypeParam>(CblasNoTrans, CblasNoTrans, 2, 4, 3, 1.,
A.cpu_data(), B.cpu_data(), 0., C.mutable_cpu_data());
for (int i = 0; i < 8; ++i) {
EXPECT_EQ(C.cpu_data()[i], result[i]);
}
- caffeine_gpu_gemm<TypeParam>(CblasNoTrans, CblasNoTrans, 2, 4, 3, 1.,
+ caffe_gpu_gemm<TypeParam>(CblasNoTrans, CblasNoTrans, 2, 4, 3, 1.,
A.gpu_data(), B.gpu_data(), 0., C.mutable_gpu_data());
for (int i = 0; i < 8; ++i) {
EXPECT_EQ(C.cpu_data()[i], result[i]);
// Test when we have a transposed A
A.Reshape(1,1,3,2);
memcpy(A.mutable_cpu_data(), A_reshape_data, 6 * sizeof(TypeParam));
- caffeine_cpu_gemm<TypeParam>(CblasTrans, CblasNoTrans, 2, 4, 3, 1.,
+ caffe_cpu_gemm<TypeParam>(CblasTrans, CblasNoTrans, 2, 4, 3, 1.,
A.cpu_data(), B.cpu_data(), 0., C.mutable_cpu_data());
for (int i = 0; i < 8; ++i) {
EXPECT_EQ(C.cpu_data()[i], result[i]);
}
- caffeine_gpu_gemm<TypeParam>(CblasTrans, CblasNoTrans, 2, 4, 3, 1.,
+ caffe_gpu_gemm<TypeParam>(CblasTrans, CblasNoTrans, 2, 4, 3, 1.,
A.gpu_data(), B.gpu_data(), 0., C.mutable_gpu_data());
for (int i = 0; i < 8; ++i) {
EXPECT_EQ(C.cpu_data()[i], result[i]);
// Test when we have a transposed A and a transposed B too
B.Reshape(1,1,4,3);
memcpy(B.mutable_cpu_data(), B_reshape_data, 12 * sizeof(TypeParam));
- caffeine_cpu_gemm<TypeParam>(CblasTrans, CblasTrans, 2, 4, 3, 1.,
+ caffe_cpu_gemm<TypeParam>(CblasTrans, CblasTrans, 2, 4, 3, 1.,
A.cpu_data(), B.cpu_data(), 0., C.mutable_cpu_data());
for (int i = 0; i < 8; ++i) {
EXPECT_EQ(C.cpu_data()[i], result[i]);
}
- caffeine_gpu_gemm<TypeParam>(CblasTrans, CblasTrans, 2, 4, 3, 1.,
+ caffe_gpu_gemm<TypeParam>(CblasTrans, CblasTrans, 2, 4, 3, 1.,
A.gpu_data(), B.gpu_data(), 0., C.mutable_gpu_data());
for (int i = 0; i < 8; ++i) {
EXPECT_EQ(C.cpu_data()[i], result[i]);
// Test when we have a transposed B
A.Reshape(1,1,2,3);
memcpy(A.mutable_cpu_data(), data, 6 * sizeof(TypeParam));
- caffeine_cpu_gemm<TypeParam>(CblasNoTrans, CblasTrans, 2, 4, 3, 1.,
+ caffe_cpu_gemm<TypeParam>(CblasNoTrans, CblasTrans, 2, 4, 3, 1.,
A.cpu_data(), B.cpu_data(), 0., C.mutable_cpu_data());
for (int i = 0; i < 8; ++i) {
EXPECT_EQ(C.cpu_data()[i], result[i]);
}
- caffeine_gpu_gemm<TypeParam>(CblasNoTrans, CblasTrans, 2, 4, 3, 1.,
+ caffe_gpu_gemm<TypeParam>(CblasNoTrans, CblasTrans, 2, 4, 3, 1.,
A.gpu_data(), B.gpu_data(), 0., C.mutable_gpu_data());
for (int i = 0; i < 8; ++i) {
EXPECT_EQ(C.cpu_data()[i], result[i]);
memcpy(A.mutable_cpu_data(), data, 6 * sizeof(TypeParam));
memcpy(x.mutable_cpu_data(), data, 3 * sizeof(TypeParam));
- if (sizeof(TypeParam) == 4 || CAFFEINE_TEST_CUDA_PROP.major >= 2) {
- caffeine_cpu_gemv<TypeParam>(CblasNoTrans, 2, 3, 1., A.cpu_data(),
+ if (sizeof(TypeParam) == 4 || CAFFE_TEST_CUDA_PROP.major >= 2) {
+ caffe_cpu_gemv<TypeParam>(CblasNoTrans, 2, 3, 1., A.cpu_data(),
x.cpu_data(), 0., y.mutable_cpu_data());
for (int i = 0; i < 2; ++i) {
EXPECT_EQ(y.cpu_data()[i], result_2[i]);
}
- caffeine_gpu_gemv<TypeParam>(CblasNoTrans, 2, 3, 1., A.gpu_data(),
+ caffe_gpu_gemv<TypeParam>(CblasNoTrans, 2, 3, 1., A.gpu_data(),
x.gpu_data(), 0., y.mutable_gpu_data());
for (int i = 0; i < 2; ++i) {
EXPECT_EQ(y.cpu_data()[i], result_2[i]);
// Test transpose case
memcpy(y.mutable_cpu_data(), data, 2 * sizeof(TypeParam));
- caffeine_cpu_gemv<TypeParam>(CblasTrans, 2, 3, 1., A.cpu_data(),
+ caffe_cpu_gemv<TypeParam>(CblasTrans, 2, 3, 1., A.cpu_data(),
y.cpu_data(), 0., x.mutable_cpu_data());
for (int i = 0; i < 3; ++i) {
EXPECT_EQ(x.cpu_data()[i], result_3[i]);
}
- caffeine_gpu_gemv<TypeParam>(CblasTrans, 2, 3, 1., A.gpu_data(),
+ caffe_gpu_gemv<TypeParam>(CblasTrans, 2, 3, 1., A.gpu_data(),
y.gpu_data(), 0., x.mutable_gpu_data());
for (int i = 0; i < 3; ++i) {
EXPECT_EQ(x.cpu_data()[i], result_3[i]);
#include <cstdlib>
#include <cstring>
-#include "caffeine/util/im2col.hpp"
+#include "caffe/util/im2col.hpp"
-namespace caffeine {
+namespace caffe {
template <typename Dtype>
void im2col_cpu(const Dtype* data_im, const int channels,
const int height, const int width, const int psize, const int stride,
double* data_im);
-} // namespace caffeine
+} // namespace caffe
#include <cstdlib>
#include <cstring>
-#include "caffeine/common.hpp"
-#include "caffeine/util/im2col.hpp"
+#include "caffe/common.hpp"
+#include "caffe/util/im2col.hpp"
-namespace caffeine {
+namespace caffe {
template <typename Dtype>
__global__ void im2col_gpu_kernel(const int n, const Dtype* data_im,
int height_col = (height - ksize) / stride + 1;
int width_col = (width - ksize) / stride + 1;
int num_kernels = channels * height_col * width_col;
- im2col_gpu_kernel<Dtype><<<CAFFEINE_GET_BLOCKS(num_kernels), CAFFEINE_CUDA_NUM_THREADS>>>(
+ im2col_gpu_kernel<Dtype><<<CAFFE_GET_BLOCKS(num_kernels), CAFFE_CUDA_NUM_THREADS>>>(
num_kernels, data_im, height, width, ksize, stride, height_col, width_col,
data_col);
CUDA_POST_KERNEL_CHECK;
int num_kernels = channels * height * width;
// To avoid involving atomic operations, we will launch one kernel per
// bottom dimension, and then in the kernel add up the top dimensions.
- col2im_gpu_kernel<Dtype><<<CAFFEINE_GET_BLOCKS(num_kernels), CAFFEINE_CUDA_NUM_THREADS>>>(
+ col2im_gpu_kernel<Dtype><<<CAFFE_GET_BLOCKS(num_kernels), CAFFE_CUDA_NUM_THREADS>>>(
num_kernels, data_col, height, width, channels, ksize, stride,
height_col, width_col, data_im);
CUDA_POST_KERNEL_CHECK;
double* data_im);
-} // namespace caffeine
+} // namespace caffe
-#ifndef _CAFFEINE_UTIL__IM2COL_HPP_
-#define _CAFFEINE_UTIL_IM2COL_HPP_
+#ifndef _CAFFE_UTIL__IM2COL_HPP_
+#define _CAFFE_UTIL_IM2COL_HPP_
-namespace caffeine {
+namespace caffe {
template <typename Dtype>
void im2col_cpu(const Dtype* data_im, const int channels,
const int height, const int width, const int psize, const int stride,
Dtype* data_im);
-} // namespace caffeine
+} // namespace caffe
-#endif // CAFFEINE_UTIL_IM2COL_HPP_
+#endif // CAFFE_UTIL_IM2COL_HPP_
#include <mkl.h>
#include <cublas_v2.h>
-#include "caffeine/common.hpp"
-#include "caffeine/util/math_functions.hpp"
+#include "caffe/common.hpp"
+#include "caffe/util/math_functions.hpp"
-namespace caffeine {
+namespace caffe {
template<>
-void caffeine_cpu_gemm<float>(const CBLAS_TRANSPOSE TransA,
+void caffe_cpu_gemm<float>(const CBLAS_TRANSPOSE TransA,
const CBLAS_TRANSPOSE TransB, const int M, const int N, const int K,
const float alpha, const float* A, const float* B, const float beta,
float* C) {
}
template<>
-void caffeine_cpu_gemm<double>(const CBLAS_TRANSPOSE TransA,
+void caffe_cpu_gemm<double>(const CBLAS_TRANSPOSE TransA,
const CBLAS_TRANSPOSE TransB, const int M, const int N, const int K,
const double alpha, const double* A, const double* B, const double beta,
double* C) {
}
template <>
-void caffeine_gpu_gemm<float>(const CBLAS_TRANSPOSE TransA,
+void caffe_gpu_gemm<float>(const CBLAS_TRANSPOSE TransA,
const CBLAS_TRANSPOSE TransB, const int M, const int N, const int K,
const float alpha, const float* A, const float* B, const float beta,
float* C) {
(TransA == CblasNoTrans) ? CUBLAS_OP_N : CUBLAS_OP_T;
cublasOperation_t cuTransB =
(TransB == CblasNoTrans) ? CUBLAS_OP_N : CUBLAS_OP_T;
- CUBLAS_CHECK(cublasSgemm(Caffeine::cublas_handle(), cuTransB, cuTransA,
+ CUBLAS_CHECK(cublasSgemm(Caffe::cublas_handle(), cuTransB, cuTransA,
N, M, K, &alpha, B, ldb, A, lda, &beta, C, N));
}
template <>
-void caffeine_gpu_gemm<double>(const CBLAS_TRANSPOSE TransA,
+void caffe_gpu_gemm<double>(const CBLAS_TRANSPOSE TransA,
const CBLAS_TRANSPOSE TransB, const int M, const int N, const int K,
const double alpha, const double* A, const double* B, const double beta,
double* C) {
(TransA == CblasNoTrans) ? CUBLAS_OP_N : CUBLAS_OP_T;
cublasOperation_t cuTransB =
(TransB == CblasNoTrans) ? CUBLAS_OP_N : CUBLAS_OP_T;
- CUBLAS_CHECK(cublasDgemm(Caffeine::cublas_handle(), cuTransB, cuTransA,
+ CUBLAS_CHECK(cublasDgemm(Caffe::cublas_handle(), cuTransB, cuTransA,
N, M, K, &alpha, B, ldb, A, lda, &beta, C, N));
}
template <>
-void caffeine_cpu_gemv<float>(const CBLAS_TRANSPOSE TransA, const int M,
+void caffe_cpu_gemv<float>(const CBLAS_TRANSPOSE TransA, const int M,
const int N, const float alpha, const float* A, const float* x,
const float beta, float* y) {
cblas_sgemv(CblasRowMajor, TransA, M, N, alpha, A, N, x, 1, beta, y, 1);
}
template <>
-void caffeine_cpu_gemv<double>(const CBLAS_TRANSPOSE TransA, const int M,
+void caffe_cpu_gemv<double>(const CBLAS_TRANSPOSE TransA, const int M,
const int N, const double alpha, const double* A, const double* x,
const double beta, double* y) {
cblas_dgemv(CblasRowMajor, TransA, M, N, alpha, A, N, x, 1, beta, y, 1);
}
template <>
-void caffeine_gpu_gemv<float>(const CBLAS_TRANSPOSE TransA, const int M,
+void caffe_gpu_gemv<float>(const CBLAS_TRANSPOSE TransA, const int M,
const int N, const float alpha, const float* A, const float* x,
const float beta, float* y) {
cublasOperation_t cuTransA =
(TransA == CblasNoTrans) ? CUBLAS_OP_T : CUBLAS_OP_N;
- CUBLAS_CHECK(cublasSgemv(Caffeine::cublas_handle(), cuTransA, N, M, &alpha,
+ CUBLAS_CHECK(cublasSgemv(Caffe::cublas_handle(), cuTransA, N, M, &alpha,
A, N, x, 1, &beta, y, 1));
}
template <>
-void caffeine_gpu_gemv<double>(const CBLAS_TRANSPOSE TransA, const int M,
+void caffe_gpu_gemv<double>(const CBLAS_TRANSPOSE TransA, const int M,
const int N, const double alpha, const double* A, const double* x,
const double beta, double* y) {
cublasOperation_t cuTransA =
(TransA == CblasNoTrans) ? CUBLAS_OP_T : CUBLAS_OP_N;
- CUBLAS_CHECK(cublasDgemv(Caffeine::cublas_handle(), cuTransA, N, M, &alpha,
+ CUBLAS_CHECK(cublasDgemv(Caffe::cublas_handle(), cuTransA, N, M, &alpha,
A, N, x, 1, &beta, y, 1));
}
template <>
-void caffeine_axpy<float>(const int N, const float alpha, const float* X,
+void caffe_axpy<float>(const int N, const float alpha, const float* X,
float* Y) { cblas_saxpy(N, alpha, X, 1, Y, 1); }
template <>
-void caffeine_axpy<double>(const int N, const double alpha, const double* X,
+void caffe_axpy<double>(const int N, const double alpha, const double* X,
double* Y) { cblas_daxpy(N, alpha, X, 1, Y, 1); }
template <>
-void caffeine_copy<float>(const int N, const float* X, float* Y) {
+void caffe_copy<float>(const int N, const float* X, float* Y) {
cblas_scopy(N, X, 1, Y, 1);
}
template <>
-void caffeine_copy<double>(const int N, const double* X, double* Y) {
+void caffe_copy<double>(const int N, const double* X, double* Y) {
cblas_dcopy(N, X, 1, Y, 1);
}
template <>
-void caffeine_sqr<float>(const int n, const float* a, float* y){
+void caffe_sqr<float>(const int n, const float* a, float* y){
vsSqr(n, a, y);
}
template <>
-void caffeine_sqr<double>(const int n, const double* a, double* y) {
+void caffe_sqr<double>(const int n, const double* a, double* y) {
vdSqr(n, a, y);
}
template <>
-void caffeine_mul<float>(const int n, const float* a, const float* b,
+void caffe_mul<float>(const int n, const float* a, const float* b,
float* y) { vsMul(n, a, b, y); }
template <>
-void caffeine_mul<double>(const int n, const double* a, const double* b,
+void caffe_mul<double>(const int n, const double* a, const double* b,
double* y) { vdMul(n, a, b, y); }
template <>
-void caffeine_div<float>(const int n, const float* a, const float* b,
+void caffe_div<float>(const int n, const float* a, const float* b,
float* y) { vsDiv(n, a, b, y); }
template <>
-void caffeine_div<double>(const int n, const double* a, const double* b,
+void caffe_div<double>(const int n, const double* a, const double* b,
double* y) { vdDiv(n, a, b, y); }
template <>
-void caffeine_powx<float>(const int n, const float* a, const float b,
+void caffe_powx<float>(const int n, const float* a, const float b,
float* y) { vsPowx(n, a, b, y); }
template <>
-void caffeine_powx<double>(const int n, const double* a, const double b,
+void caffe_powx<double>(const int n, const double* a, const double b,
double* y) { vdPowx(n, a, b, y); }
-} // namespace caffeine
+} // namespace caffe
-#ifndef CAFFEINE_UTIL_MATH_FUNCTIONS_H_
-#define CAFFEINE_UTIL_MATH_FUNCTIONS_H_
+#ifndef CAFFE_UTIL_MATH_FUNCTIONS_H_
+#define CAFFE_UTIL_MATH_FUNCTIONS_H_
#include <mkl.h>
#include <cublas_v2.h>
-namespace caffeine {
+namespace caffe {
// Decaf gemm provides a simpler interface to the gemm functions, with the
// limitation that the data has to be contiguous in memory.
template <typename Dtype>
-void caffeine_cpu_gemm(const CBLAS_TRANSPOSE TransA,
+void caffe_cpu_gemm(const CBLAS_TRANSPOSE TransA,
const CBLAS_TRANSPOSE TransB, const int M, const int N, const int K,
const Dtype alpha, const Dtype* A, const Dtype* B, const Dtype beta,
Dtype* C);
// gemm function - following the c convention and calling the fortran-order
// gpu code under the hood.
template <typename Dtype>
-void caffeine_gpu_gemm(const CBLAS_TRANSPOSE TransA,
+void caffe_gpu_gemm(const CBLAS_TRANSPOSE TransA,
const CBLAS_TRANSPOSE TransB, const int M, const int N, const int K,
const Dtype alpha, const Dtype* A, const Dtype* B, const Dtype beta,
Dtype* C);
template <typename Dtype>
-void caffeine_cpu_gemv(const CBLAS_TRANSPOSE TransA, const int M, const int N,
+void caffe_cpu_gemv(const CBLAS_TRANSPOSE TransA, const int M, const int N,
const Dtype alpha, const Dtype* A, const Dtype* x, const Dtype beta,
Dtype* y);
template <typename Dtype>
-void caffeine_gpu_gemv(const CBLAS_TRANSPOSE TransA, const int M, const int N,
+void caffe_gpu_gemv(const CBLAS_TRANSPOSE TransA, const int M, const int N,
const Dtype alpha, const Dtype* A, const Dtype* x, const Dtype beta,
Dtype* y);
template <typename Dtype>
-void caffeine_axpy(const int N, const Dtype alpha, const Dtype* X,
+void caffe_axpy(const int N, const Dtype alpha, const Dtype* X,
Dtype* Y);
template <typename Dtype>
-void caffeine_copy(const int N, const Dtype *X, Dtype *Y);
+void caffe_copy(const int N, const Dtype *X, Dtype *Y);
template <typename Dtype>
-void caffeine_sqr(const int N, const Dtype* a, Dtype* y);
+void caffe_sqr(const int N, const Dtype* a, Dtype* y);
template <typename Dtype>
-void caffeine_mul(const int N, const Dtype* a, const Dtype* b, Dtype* y);
+void caffe_mul(const int N, const Dtype* a, const Dtype* b, Dtype* y);
template <typename Dtype>
-void caffeine_div(const int N, const Dtype* a, const Dtype* b, Dtype* y);
+void caffe_div(const int N, const Dtype* a, const Dtype* b, Dtype* y);
template <typename Dtype>
-void caffeine_powx(const int n, const Dtype* a, const Dtype b, Dtype* y);
+void caffe_powx(const int n, const Dtype* a, const Dtype b, Dtype* y);
-} // namespace caffeine
+} // namespace caffe
-#endif // CAFFEINE_UTIL_MATH_FUNCTIONS_H_
+#endif // CAFFE_UTIL_MATH_FUNCTIONS_H_
-#ifndef CAFFEINE_VISION_LAYERS_HPP_
-#define CAFFEINE_VISION_LAYERS_HPP_
+#ifndef CAFFE_VISION_LAYERS_HPP_
+#define CAFFE_VISION_LAYERS_HPP_
-#include "caffeine/layer.hpp"
+#include "caffe/layer.hpp"
-namespace caffeine {
+namespace caffe {
// The neuron layer is a specific type of layers that just works on single
// celements.
int N_;
};
-} // namespace caffeine
+} // namespace caffe
-#endif // CAFFEINE_VISION_LAYERS_HPP_
+#endif // CAFFE_VISION_LAYERS_HPP_
+++ /dev/null
-#include <cstdlib>
-#include <cstdio>
-#include <iostream>
-
-#include <cuda_runtime.h>
-#include <glog/logging.h>
-#include <gtest/gtest.h>
-#include "caffeine/test/test_caffeine_main.hpp"
-
-namespace caffeine {
-
-extern cudaDeviceProp CAFFEINE_TEST_CUDA_PROP;
-
-class PlatformTest : public ::testing::Test {};
-
-TEST_F(PlatformTest, TestInitialization) {
- printf("Major revision number: %d\n", CAFFEINE_TEST_CUDA_PROP.major);
- printf("Minor revision number: %d\n", CAFFEINE_TEST_CUDA_PROP.minor);
- printf("Name: %s\n", CAFFEINE_TEST_CUDA_PROP.name);
- printf("Total global memory: %lu\n", CAFFEINE_TEST_CUDA_PROP.totalGlobalMem);
- printf("Total shared memory per block: %lu\n", CAFFEINE_TEST_CUDA_PROP.sharedMemPerBlock);
- printf("Total registers per block: %d\n", CAFFEINE_TEST_CUDA_PROP.regsPerBlock);
- printf("Warp size: %d\n", CAFFEINE_TEST_CUDA_PROP.warpSize);
- printf("Maximum memory pitch: %lu\n", CAFFEINE_TEST_CUDA_PROP.memPitch);
- printf("Maximum threads per block: %d\n", CAFFEINE_TEST_CUDA_PROP.maxThreadsPerBlock);
- for (int i = 0; i < 3; ++i)
- printf("Maximum dimension %d of block: %d\n", i, CAFFEINE_TEST_CUDA_PROP.maxThreadsDim[i]);
- for (int i = 0; i < 3; ++i)
- printf("Maximum dimension %d of grid: %d\n", i, CAFFEINE_TEST_CUDA_PROP.maxGridSize[i]);
- printf("Clock rate: %d\n", CAFFEINE_TEST_CUDA_PROP.clockRate);
- printf("Total constant memory: %lu\n", CAFFEINE_TEST_CUDA_PROP.totalConstMem);
- printf("Texture alignment: %lu\n", CAFFEINE_TEST_CUDA_PROP.textureAlignment);
- printf("Concurrent copy and execution: %s\n", (CAFFEINE_TEST_CUDA_PROP.deviceOverlap ? "Yes" : "No"));
- printf("Number of multiprocessors: %d\n", CAFFEINE_TEST_CUDA_PROP.multiProcessorCount);
- printf("Kernel execution timeout: %s\n", (CAFFEINE_TEST_CUDA_PROP.kernelExecTimeoutEnabled ? "Yes" : "No"));
- EXPECT_TRUE(true);
-}
-
-} // namespace caffeine