From b17ac6620b4e6ae33d4d889b6cdbde1c447bb944 Mon Sep 17 00:00:00 2001 From: Eric Tzeng Date: Wed, 26 Feb 2014 18:45:45 -0800 Subject: [PATCH] Splitting source files between CUDA and CPU code. --- src/caffe/layers/bnll_layer.cpp | 48 ++++++++++ src/caffe/layers/bnll_layer.cu | 31 ------ src/caffe/layers/conv_layer.cpp | 88 ----------------- src/caffe/layers/conv_layer.cu | 104 +++++++++++++++++++++ src/caffe/layers/data_layer.cpp | 23 ----- src/caffe/layers/data_layer.cu | 44 +++++++++ src/caffe/layers/dropout_layer.cpp | 63 +++++++++++++ src/caffe/layers/dropout_layer.cu | 48 ---------- src/caffe/layers/flatten_layer.cpp | 18 ---- src/caffe/layers/flatten_layer.cu | 30 ++++++ src/caffe/layers/hdf5_data_layer.cpp | 28 ------ src/caffe/layers/hdf5_data_layer.cu | 53 +++++++++++ src/caffe/layers/im2col_layer.cpp | 24 ----- src/caffe/layers/im2col_layer.cu | 38 ++++++++ src/caffe/layers/inner_product_layer.cpp | 40 -------- src/caffe/layers/inner_product_layer.cu | 59 ++++++++++++ src/caffe/layers/{loss_layer.cu => loss_layer.cpp} | 6 +- src/caffe/layers/relu_layer.cpp | 42 +++++++++ src/caffe/layers/relu_layer.cu | 27 ------ src/caffe/layers/sigmoid_layer.cpp | 46 +++++++++ src/caffe/layers/sigmoid_layer.cu | 35 ------- src/caffe/layers/softmax_layer.cpp | 86 +++++++++++++++++ src/caffe/layers/softmax_layer.cu | 71 -------------- src/caffe/layers/softmax_loss_layer.cpp | 59 ++++++++++++ src/caffe/layers/softmax_loss_layer.cu | 40 -------- src/caffe/layers/split_layer.cpp | 34 ------- src/caffe/layers/split_layer.cu | 48 ++++++++++ src/caffe/layers/tanh_layer.cpp | 48 ++++++++++ src/caffe/layers/tanh_layer.cu | 33 ------- 29 files changed, 771 insertions(+), 543 deletions(-) create mode 100644 src/caffe/layers/bnll_layer.cpp create mode 100644 src/caffe/layers/conv_layer.cu create mode 100644 src/caffe/layers/data_layer.cu create mode 100644 src/caffe/layers/dropout_layer.cpp create mode 100644 src/caffe/layers/flatten_layer.cu create mode 100644 src/caffe/layers/hdf5_data_layer.cu create mode 100644 src/caffe/layers/im2col_layer.cu create mode 100644 src/caffe/layers/inner_product_layer.cu rename src/caffe/layers/{loss_layer.cu => loss_layer.cpp} (96%) create mode 100644 src/caffe/layers/relu_layer.cpp create mode 100644 src/caffe/layers/sigmoid_layer.cpp create mode 100644 src/caffe/layers/softmax_layer.cpp create mode 100644 src/caffe/layers/softmax_loss_layer.cpp create mode 100644 src/caffe/layers/split_layer.cu create mode 100644 src/caffe/layers/tanh_layer.cpp diff --git a/src/caffe/layers/bnll_layer.cpp b/src/caffe/layers/bnll_layer.cpp new file mode 100644 index 0000000..ab0e0f0 --- /dev/null +++ b/src/caffe/layers/bnll_layer.cpp @@ -0,0 +1,48 @@ +// Copyright 2013 Yangqing Jia + +#include "caffe/layer.hpp" +#include "caffe/vision_layers.hpp" +#include + +using std::min; + +namespace caffe { + +const float kBNLL_THRESHOLD = 50.; + +template +void BNLLLayer::Forward_cpu(const vector*>& bottom, + vector*>* top) { + const Dtype* bottom_data = bottom[0]->cpu_data(); + Dtype* top_data = (*top)[0]->mutable_cpu_data(); + const int count = bottom[0]->count(); + for (int i = 0; i < count; ++i) { + top_data[i] = bottom_data[i] > 0 ? + bottom_data[i] + log(1. + exp(-bottom_data[i])) : + log(1. + exp(bottom_data[i])); + } +} + +template +Dtype BNLLLayer::Backward_cpu(const vector*>& top, + const bool propagate_down, + vector*>* bottom) { + if (propagate_down) { + const Dtype* bottom_data = (*bottom)[0]->cpu_data(); + const Dtype* top_diff = top[0]->cpu_diff(); + Dtype* bottom_diff = (*bottom)[0]->mutable_cpu_diff(); + const int count = (*bottom)[0]->count(); + Dtype expval; + for (int i = 0; i < count; ++i) { + expval = exp(min(bottom_data[i], Dtype(kBNLL_THRESHOLD))); + bottom_diff[i] = top_diff[i] * expval / (expval + 1.); + } + } + return Dtype(0); +} + + +INSTANTIATE_CLASS(BNLLLayer); + + +} // namespace caffe diff --git a/src/caffe/layers/bnll_layer.cu b/src/caffe/layers/bnll_layer.cu index f61cffa..1edec33 100644 --- a/src/caffe/layers/bnll_layer.cu +++ b/src/caffe/layers/bnll_layer.cu @@ -13,37 +13,6 @@ namespace caffe { const float kBNLL_THRESHOLD = 50.; template -void BNLLLayer::Forward_cpu(const vector*>& bottom, - vector*>* top) { - const Dtype* bottom_data = bottom[0]->cpu_data(); - Dtype* top_data = (*top)[0]->mutable_cpu_data(); - const int count = bottom[0]->count(); - for (int i = 0; i < count; ++i) { - top_data[i] = bottom_data[i] > 0 ? - bottom_data[i] + log(1. + exp(-bottom_data[i])) : - log(1. + exp(bottom_data[i])); - } -} - -template -Dtype BNLLLayer::Backward_cpu(const vector*>& top, - const bool propagate_down, - vector*>* bottom) { - if (propagate_down) { - const Dtype* bottom_data = (*bottom)[0]->cpu_data(); - const Dtype* top_diff = top[0]->cpu_diff(); - Dtype* bottom_diff = (*bottom)[0]->mutable_cpu_diff(); - const int count = (*bottom)[0]->count(); - Dtype expval; - for (int i = 0; i < count; ++i) { - expval = exp(min(bottom_data[i], Dtype(kBNLL_THRESHOLD))); - bottom_diff[i] = top_diff[i] * expval / (expval + 1.); - } - } - return Dtype(0); -} - -template __global__ void BNLLForward(const int n, const Dtype* in, Dtype* out) { int index = threadIdx.x + blockIdx.x * blockDim.x; if (index < n) { diff --git a/src/caffe/layers/conv_layer.cpp b/src/caffe/layers/conv_layer.cpp index 21256f9..64a652a 100644 --- a/src/caffe/layers/conv_layer.cpp +++ b/src/caffe/layers/conv_layer.cpp @@ -107,36 +107,6 @@ void ConvolutionLayer::Forward_cpu(const vector*>& bottom, } template -void ConvolutionLayer::Forward_gpu(const vector*>& bottom, - vector*>* top) { - const Dtype* bottom_data = bottom[0]->gpu_data(); - Dtype* top_data = (*top)[0]->mutable_gpu_data(); - Dtype* col_data = col_buffer_.mutable_gpu_data(); - const Dtype* weight = this->blobs_[0]->gpu_data(); - int weight_offset = M_ * K_; - int col_offset = K_ * N_; - int top_offset = M_ * N_; - for (int n = 0; n < NUM_; ++n) { - // First, im2col - im2col_gpu(bottom_data + bottom[0]->offset(n), CHANNELS_, HEIGHT_, - WIDTH_, KSIZE_, PAD_, STRIDE_, col_data); - // Second, innerproduct with groups - for (int g = 0; g < GROUP_; ++g) { - caffe_gpu_gemm(CblasNoTrans, CblasNoTrans, M_, N_, K_, - (Dtype)1., weight + weight_offset * g, col_data + col_offset * g, - (Dtype)0., top_data + (*top)[0]->offset(n) + top_offset * g); - } - // third, add bias - if (biasterm_) { - caffe_gpu_gemm(CblasNoTrans, CblasNoTrans, NUM_OUTPUT_, - N_, 1, (Dtype)1., this->blobs_[1]->gpu_data(), - reinterpret_cast(bias_multiplier_->gpu_data()), - (Dtype)1., top_data + (*top)[0]->offset(n)); - } - } -} - -template Dtype ConvolutionLayer::Backward_cpu(const vector*>& top, const bool propagate_down, vector*>* bottom) { const Dtype* top_diff = top[0]->cpu_diff(); @@ -192,64 +162,6 @@ Dtype ConvolutionLayer::Backward_cpu(const vector*>& top, return Dtype(0.); } -template -Dtype ConvolutionLayer::Backward_gpu(const vector*>& top, - const bool propagate_down, vector*>* bottom) { - const Dtype* top_diff = top[0]->gpu_diff(); - const Dtype* weight = this->blobs_[0]->gpu_data(); - Dtype* weight_diff = this->blobs_[0]->mutable_gpu_diff(); - const Dtype* bottom_data = (*bottom)[0]->gpu_data(); - Dtype* bottom_diff = (*bottom)[0]->mutable_gpu_diff(); - Dtype* col_data = col_buffer_.mutable_gpu_data(); - Dtype* col_diff = col_buffer_.mutable_gpu_diff(); - // bias gradient if necessary - Dtype* bias_diff = NULL; - - if (biasterm_) { - bias_diff = this->blobs_[1]->mutable_gpu_diff(); - CUDA_CHECK(cudaMemset(bias_diff, 0, - sizeof(Dtype) * this->blobs_[1]->count())); - for (int n = 0; n < NUM_; ++n) { - caffe_gpu_gemv(CblasNoTrans, NUM_OUTPUT_, N_, - 1., top_diff + top[0]->offset(n), - reinterpret_cast(bias_multiplier_->gpu_data()), - 1., bias_diff); - } - } - - int weight_offset = M_ * K_; - int col_offset = K_ * N_; - int top_offset = M_ * N_; - CUDA_CHECK(cudaMemset(weight_diff, 0, - sizeof(Dtype) * this->blobs_[0]->count())); - for (int n = 0; n < NUM_; ++n) { - // since we saved memory in the forward pass by not storing all col data, - // we will need to recompute them. - im2col_gpu(bottom_data + (*bottom)[0]->offset(n), CHANNELS_, HEIGHT_, - WIDTH_, KSIZE_, PAD_, STRIDE_, col_data); - // gradient w.r.t. weight. Note that we will accumulate diffs. - for (int g = 0; g < GROUP_; ++g) { - caffe_gpu_gemm(CblasNoTrans, CblasTrans, M_, K_, N_, - (Dtype)1., top_diff + top[0]->offset(n) + top_offset * g, - col_data + col_offset * g, (Dtype)1., - weight_diff + weight_offset * g); - } - // gradient w.r.t. bottom data, if necessary - if (propagate_down) { - for (int g = 0; g < GROUP_; ++g) { - caffe_gpu_gemm(CblasTrans, CblasNoTrans, K_, N_, M_, - (Dtype)1., weight + weight_offset * g, - top_diff + top[0]->offset(n) + top_offset * g, - (Dtype)0., col_diff + col_offset * g); - } - // col2im back to the data - col2im_gpu(col_diff, CHANNELS_, HEIGHT_, WIDTH_, KSIZE_, PAD_, STRIDE_, - bottom_diff + (*bottom)[0]->offset(n)); - } - } - return Dtype(0.); -} - INSTANTIATE_CLASS(ConvolutionLayer); } // namespace caffe diff --git a/src/caffe/layers/conv_layer.cu b/src/caffe/layers/conv_layer.cu new file mode 100644 index 0000000..a7f56fa --- /dev/null +++ b/src/caffe/layers/conv_layer.cu @@ -0,0 +1,104 @@ +// Copyright 2013 Yangqing Jia + +#include + +#include "caffe/layer.hpp" +#include "caffe/vision_layers.hpp" +#include "caffe/util/im2col.hpp" +#include "caffe/filler.hpp" +#include "caffe/util/math_functions.hpp" + +namespace caffe { + +template +void ConvolutionLayer::Forward_gpu(const vector*>& bottom, + vector*>* top) { + const Dtype* bottom_data = bottom[0]->gpu_data(); + Dtype* top_data = (*top)[0]->mutable_gpu_data(); + Dtype* col_data = col_buffer_.mutable_gpu_data(); + const Dtype* weight = this->blobs_[0]->gpu_data(); + int weight_offset = M_ * K_; + int col_offset = K_ * N_; + int top_offset = M_ * N_; + for (int n = 0; n < NUM_; ++n) { + // First, im2col + im2col_gpu(bottom_data + bottom[0]->offset(n), CHANNELS_, HEIGHT_, + WIDTH_, KSIZE_, PAD_, STRIDE_, col_data); + // Second, innerproduct with groups + for (int g = 0; g < GROUP_; ++g) { + caffe_gpu_gemm(CblasNoTrans, CblasNoTrans, M_, N_, K_, + (Dtype)1., weight + weight_offset * g, col_data + col_offset * g, + (Dtype)0., top_data + (*top)[0]->offset(n) + top_offset * g); + } + // third, add bias + if (biasterm_) { + caffe_gpu_gemm(CblasNoTrans, CblasNoTrans, NUM_OUTPUT_, + N_, 1, (Dtype)1., this->blobs_[1]->gpu_data(), + reinterpret_cast(bias_multiplier_->gpu_data()), + (Dtype)1., top_data + (*top)[0]->offset(n)); + } + } +} + +template +Dtype ConvolutionLayer::Backward_gpu(const vector*>& top, + const bool propagate_down, vector*>* bottom) { + const Dtype* top_diff = top[0]->gpu_diff(); + const Dtype* weight = this->blobs_[0]->gpu_data(); + Dtype* weight_diff = this->blobs_[0]->mutable_gpu_diff(); + const Dtype* bottom_data = (*bottom)[0]->gpu_data(); + Dtype* bottom_diff = (*bottom)[0]->mutable_gpu_diff(); + Dtype* col_data = col_buffer_.mutable_gpu_data(); + Dtype* col_diff = col_buffer_.mutable_gpu_diff(); + // bias gradient if necessary + Dtype* bias_diff = NULL; + + if (biasterm_) { + bias_diff = this->blobs_[1]->mutable_gpu_diff(); + CUDA_CHECK(cudaMemset(bias_diff, 0, + sizeof(Dtype) * this->blobs_[1]->count())); + for (int n = 0; n < NUM_; ++n) { + caffe_gpu_gemv(CblasNoTrans, NUM_OUTPUT_, N_, + 1., top_diff + top[0]->offset(n), + reinterpret_cast(bias_multiplier_->gpu_data()), + 1., bias_diff); + } + } + + int weight_offset = M_ * K_; + int col_offset = K_ * N_; + int top_offset = M_ * N_; + CUDA_CHECK(cudaMemset(weight_diff, 0, + sizeof(Dtype) * this->blobs_[0]->count())); + for (int n = 0; n < NUM_; ++n) { + // since we saved memory in the forward pass by not storing all col data, + // we will need to recompute them. + im2col_gpu(bottom_data + (*bottom)[0]->offset(n), CHANNELS_, HEIGHT_, + WIDTH_, KSIZE_, PAD_, STRIDE_, col_data); + // gradient w.r.t. weight. Note that we will accumulate diffs. + for (int g = 0; g < GROUP_; ++g) { + caffe_gpu_gemm(CblasNoTrans, CblasTrans, M_, K_, N_, + (Dtype)1., top_diff + top[0]->offset(n) + top_offset * g, + col_data + col_offset * g, (Dtype)1., + weight_diff + weight_offset * g); + } + // gradient w.r.t. bottom data, if necessary + if (propagate_down) { + for (int g = 0; g < GROUP_; ++g) { + caffe_gpu_gemm(CblasTrans, CblasNoTrans, K_, N_, M_, + (Dtype)1., weight + weight_offset * g, + top_diff + top[0]->offset(n) + top_offset * g, + (Dtype)0., col_diff + col_offset * g); + } + // col2im back to the data + col2im_gpu(col_diff, CHANNELS_, HEIGHT_, WIDTH_, KSIZE_, PAD_, STRIDE_, + bottom_diff + (*bottom)[0]->offset(n)); + } + } + return Dtype(0.); +} + + +INSTANTIATE_CLASS(ConvolutionLayer); + +} // namespace caffe diff --git a/src/caffe/layers/data_layer.cpp b/src/caffe/layers/data_layer.cpp index f973a56..cc03cdb 100644 --- a/src/caffe/layers/data_layer.cpp +++ b/src/caffe/layers/data_layer.cpp @@ -227,23 +227,6 @@ void DataLayer::Forward_cpu(const vector*>& bottom, reinterpret_cast(this))) << "Pthread execution failed."; } -template -void DataLayer::Forward_gpu(const vector*>& bottom, - vector*>* top) { - // First, join the thread - CHECK(!pthread_join(thread_, NULL)) << "Pthread joining failed."; - // Copy the data - CUDA_CHECK(cudaMemcpy((*top)[0]->mutable_gpu_data(), - prefetch_data_->cpu_data(), sizeof(Dtype) * prefetch_data_->count(), - cudaMemcpyHostToDevice)); - CUDA_CHECK(cudaMemcpy((*top)[1]->mutable_gpu_data(), - prefetch_label_->cpu_data(), sizeof(Dtype) * prefetch_label_->count(), - cudaMemcpyHostToDevice)); - // Start a new prefetch thread - CHECK(!pthread_create(&thread_, NULL, DataLayerPrefetch, - reinterpret_cast(this))) << "Pthread execution failed."; -} - // The backward operations are dummy - they do not carry any computation. template Dtype DataLayer::Backward_cpu(const vector*>& top, @@ -251,12 +234,6 @@ Dtype DataLayer::Backward_cpu(const vector*>& top, return Dtype(0.); } -template -Dtype DataLayer::Backward_gpu(const vector*>& top, - const bool propagate_down, vector*>* bottom) { - return Dtype(0.); -} - INSTANTIATE_CLASS(DataLayer); } // namespace caffe diff --git a/src/caffe/layers/data_layer.cu b/src/caffe/layers/data_layer.cu new file mode 100644 index 0000000..946f30f --- /dev/null +++ b/src/caffe/layers/data_layer.cu @@ -0,0 +1,44 @@ +// Copyright 2013 Yangqing Jia + +#include +#include +#include + +#include +#include + +#include "caffe/layer.hpp" +#include "caffe/util/io.hpp" +#include "caffe/vision_layers.hpp" + +using std::string; + +namespace caffe { + +template +void DataLayer::Forward_gpu(const vector*>& bottom, + vector*>* top) { + // First, join the thread + CHECK(!pthread_join(thread_, NULL)) << "Pthread joining failed."; + // Copy the data + CUDA_CHECK(cudaMemcpy((*top)[0]->mutable_gpu_data(), + prefetch_data_->cpu_data(), sizeof(Dtype) * prefetch_data_->count(), + cudaMemcpyHostToDevice)); + CUDA_CHECK(cudaMemcpy((*top)[1]->mutable_gpu_data(), + prefetch_label_->cpu_data(), sizeof(Dtype) * prefetch_label_->count(), + cudaMemcpyHostToDevice)); + // Start a new prefetch thread + CHECK(!pthread_create(&thread_, NULL, DataLayerPrefetch, + reinterpret_cast(this))) << "Pthread execution failed."; +} + +// The backward operations are dummy - they do not carry any computation. +template +Dtype DataLayer::Backward_gpu(const vector*>& top, + const bool propagate_down, vector*>* bottom) { + return Dtype(0.); +} + +INSTANTIATE_CLASS(DataLayer); + +} // namespace caffe diff --git a/src/caffe/layers/dropout_layer.cpp b/src/caffe/layers/dropout_layer.cpp new file mode 100644 index 0000000..4e1fbfa --- /dev/null +++ b/src/caffe/layers/dropout_layer.cpp @@ -0,0 +1,63 @@ +// Copyright 2013 Yangqing Jia + +#include "caffe/common.hpp" +#include "caffe/layer.hpp" +#include "caffe/syncedmem.hpp" +#include "caffe/vision_layers.hpp" + +namespace caffe { + +template +void DropoutLayer::SetUp(const vector*>& bottom, + vector*>* top) { + NeuronLayer::SetUp(bottom, top); + // Set up the cache for random number generation + rand_vec_.reset(new SyncedMemory(bottom[0]->count() * sizeof(int))); + threshold_ = this->layer_param_.dropout_ratio(); + DCHECK(threshold_ > 0.); + DCHECK(threshold_ < 1.); + scale_ = 1. / (1. - threshold_); + uint_thres_ = (unsigned int)(UINT_MAX * threshold_); +} + +template +void DropoutLayer::Forward_cpu(const vector*>& bottom, + vector*>* top) { + const Dtype* bottom_data = bottom[0]->cpu_data(); + Dtype* top_data = (*top)[0]->mutable_cpu_data(); + int* mask = reinterpret_cast(rand_vec_->mutable_cpu_data()); + const int count = bottom[0]->count(); + if (Caffe::phase() == Caffe::TRAIN) { + // Create random numbers + viRngBernoulli(VSL_RNG_METHOD_BERNOULLI_ICDF, Caffe::vsl_stream(), + count, mask, 1. - threshold_); + for (int i = 0; i < count; ++i) { + top_data[i] = bottom_data[i] * mask[i] * scale_; + } + } else { + memcpy(top_data, bottom_data, bottom[0]->count() * sizeof(Dtype)); + } +} + +template +Dtype DropoutLayer::Backward_cpu(const vector*>& top, + const bool propagate_down, + vector*>* bottom) { + CHECK(Caffe::phase() == Caffe::TRAIN); + if (propagate_down) { + const Dtype* top_diff = top[0]->cpu_diff(); + Dtype* bottom_diff = (*bottom)[0]->mutable_cpu_diff(); + const int* mask = reinterpret_cast(rand_vec_->cpu_data()); + const int count = (*bottom)[0]->count(); + for (int i = 0; i < count; ++i) { + bottom_diff[i] = top_diff[i] * mask[i] * scale_; + } + } + return Dtype(0); +} + + +INSTANTIATE_CLASS(DropoutLayer); + + +} // namespace caffe diff --git a/src/caffe/layers/dropout_layer.cu b/src/caffe/layers/dropout_layer.cu index efba295..2b6a68b 100644 --- a/src/caffe/layers/dropout_layer.cu +++ b/src/caffe/layers/dropout_layer.cu @@ -13,54 +13,6 @@ using std::max; namespace caffe { -template -void DropoutLayer::SetUp(const vector*>& bottom, - vector*>* top) { - NeuronLayer::SetUp(bottom, top); - // Set up the cache for random number generation - rand_vec_.reset(new SyncedMemory(bottom[0]->count() * sizeof(int))); - threshold_ = this->layer_param_.dropout_ratio(); - DCHECK(threshold_ > 0.); - DCHECK(threshold_ < 1.); - scale_ = 1. / (1. - threshold_); - uint_thres_ = (unsigned int)(UINT_MAX * threshold_); -} - -template -void DropoutLayer::Forward_cpu(const vector*>& bottom, - vector*>* top) { - const Dtype* bottom_data = bottom[0]->cpu_data(); - Dtype* top_data = (*top)[0]->mutable_cpu_data(); - int* mask = reinterpret_cast(rand_vec_->mutable_cpu_data()); - const int count = bottom[0]->count(); - if (Caffe::phase() == Caffe::TRAIN) { - // Create random numbers - viRngBernoulli(VSL_RNG_METHOD_BERNOULLI_ICDF, Caffe::vsl_stream(), - count, mask, 1. - threshold_); - for (int i = 0; i < count; ++i) { - top_data[i] = bottom_data[i] * mask[i] * scale_; - } - } else { - memcpy(top_data, bottom_data, bottom[0]->count() * sizeof(Dtype)); - } -} - -template -Dtype DropoutLayer::Backward_cpu(const vector*>& top, - const bool propagate_down, - vector*>* bottom) { - CHECK(Caffe::phase() == Caffe::TRAIN); - if (propagate_down) { - const Dtype* top_diff = top[0]->cpu_diff(); - Dtype* bottom_diff = (*bottom)[0]->mutable_cpu_diff(); - const int* mask = reinterpret_cast(rand_vec_->cpu_data()); - const int count = (*bottom)[0]->count(); - for (int i = 0; i < count; ++i) { - bottom_diff[i] = top_diff[i] * mask[i] * scale_; - } - } - return Dtype(0); -} template __global__ void DropoutForward(const int n, const Dtype* in, diff --git a/src/caffe/layers/flatten_layer.cpp b/src/caffe/layers/flatten_layer.cpp index bedf296..9e17a82 100644 --- a/src/caffe/layers/flatten_layer.cpp +++ b/src/caffe/layers/flatten_layer.cpp @@ -30,14 +30,6 @@ void FlattenLayer::Forward_cpu(const vector*>& bottom, } template -void FlattenLayer::Forward_gpu(const vector*>& bottom, - vector*>* top) { - const Dtype* bottom_data = bottom[0]->gpu_data(); - Dtype* top_data = (*top)[0]->mutable_gpu_data(); - caffe_gpu_copy(count_, bottom_data, top_data); -} - -template Dtype FlattenLayer::Backward_cpu(const vector*>& top, const bool propagate_down, vector*>* bottom) { const Dtype* top_diff = top[0]->cpu_diff(); @@ -46,16 +38,6 @@ Dtype FlattenLayer::Backward_cpu(const vector*>& top, return Dtype(0.); } - -template -Dtype FlattenLayer::Backward_gpu(const vector*>& top, - const bool propagate_down, vector*>* bottom) { - const Dtype* top_diff = top[0]->gpu_diff(); - Dtype* bottom_diff = (*bottom)[0]->mutable_gpu_diff(); - caffe_gpu_copy(count_, top_diff, bottom_diff); - return Dtype(0.); -} - INSTANTIATE_CLASS(FlattenLayer); } // namespace caffe diff --git a/src/caffe/layers/flatten_layer.cu b/src/caffe/layers/flatten_layer.cu new file mode 100644 index 0000000..571e22e --- /dev/null +++ b/src/caffe/layers/flatten_layer.cu @@ -0,0 +1,30 @@ +// Copyright 2013 Yangqing Jia + +#include + +#include "caffe/layer.hpp" +#include "caffe/vision_layers.hpp" +#include "caffe/util/math_functions.hpp" + +namespace caffe { + +template +void FlattenLayer::Forward_gpu(const vector*>& bottom, + vector*>* top) { + const Dtype* bottom_data = bottom[0]->gpu_data(); + Dtype* top_data = (*top)[0]->mutable_gpu_data(); + caffe_gpu_copy(count_, bottom_data, top_data); +} + +template +Dtype FlattenLayer::Backward_gpu(const vector*>& top, + const bool propagate_down, vector*>* bottom) { + const Dtype* top_diff = top[0]->gpu_diff(); + Dtype* bottom_diff = (*bottom)[0]->mutable_gpu_diff(); + caffe_gpu_copy(count_, top_diff, bottom_diff); + return Dtype(0.); +} + +INSTANTIATE_CLASS(FlattenLayer); + +} // namespace caffe diff --git a/src/caffe/layers/hdf5_data_layer.cpp b/src/caffe/layers/hdf5_data_layer.cpp index 11b7d29..c31213e 100644 --- a/src/caffe/layers/hdf5_data_layer.cpp +++ b/src/caffe/layers/hdf5_data_layer.cpp @@ -65,28 +65,6 @@ void HDF5DataLayer::Forward_cpu(const vector*>& bottom, } } -template -void HDF5DataLayer::Forward_gpu(const vector*>& bottom, - vector*>* top) { - const int batchsize = this->layer_param_.batchsize(); - for (int i = 0; i < batchsize; ++i, ++current_row) { - if (current_row == data_dims[0]) { - current_row = 0; - } - - CUDA_CHECK(cudaMemcpy( - &(*top)[0]->mutable_gpu_data()[i * data_dims[1]], - &(data.get()[current_row * data_dims[1]]), - sizeof(Dtype) * data_dims[1], - cudaMemcpyHostToDevice)); - - CUDA_CHECK(cudaMemcpy( - &(*top)[1]->mutable_gpu_data()[i * label_dims[1]], - &(label.get()[current_row * label_dims[1]]), - sizeof(Dtype) * label_dims[1], - cudaMemcpyHostToDevice)); - } -} // The backward operations are dummy - they do not carry any computation. template @@ -95,12 +73,6 @@ Dtype HDF5DataLayer::Backward_cpu(const vector*>& top, return Dtype(0.); } -template -Dtype HDF5DataLayer::Backward_gpu(const vector*>& top, - const bool propagate_down, vector*>* bottom) { - return Dtype(0.); -} - INSTANTIATE_CLASS(HDF5DataLayer); } // namespace caffe diff --git a/src/caffe/layers/hdf5_data_layer.cu b/src/caffe/layers/hdf5_data_layer.cu new file mode 100644 index 0000000..1ecf149 --- /dev/null +++ b/src/caffe/layers/hdf5_data_layer.cu @@ -0,0 +1,53 @@ +// Copyright Sergey Karayev 2014 +/* +TODO: +- only load parts of the file, in accordance with a prototxt param "max_mem" +*/ + +#include +#include +#include + +#include "hdf5.h" +#include "hdf5_hl.h" + +#include "caffe/layer.hpp" +#include "caffe/util/io.hpp" +#include "caffe/vision_layers.hpp" + +using std::string; + +namespace caffe { + +template +void HDF5DataLayer::Forward_gpu(const vector*>& bottom, + vector*>* top) { + const int batchsize = this->layer_param_.batchsize(); + for (int i = 0; i < batchsize; ++i, ++current_row) { + if (current_row == data_dims[0]) { + current_row = 0; + } + + CUDA_CHECK(cudaMemcpy( + &(*top)[0]->mutable_gpu_data()[i * data_dims[1]], + &(data.get()[current_row * data_dims[1]]), + sizeof(Dtype) * data_dims[1], + cudaMemcpyHostToDevice)); + + CUDA_CHECK(cudaMemcpy( + &(*top)[1]->mutable_gpu_data()[i * label_dims[1]], + &(label.get()[current_row * label_dims[1]]), + sizeof(Dtype) * label_dims[1], + cudaMemcpyHostToDevice)); + } +} + +template +Dtype HDF5DataLayer::Backward_gpu(const vector*>& top, + const bool propagate_down, vector*>* bottom) { + return Dtype(0.); +} + +INSTANTIATE_CLASS(HDF5DataLayer); + +} // namespace caffe diff --git a/src/caffe/layers/im2col_layer.cpp b/src/caffe/layers/im2col_layer.cpp index a94209b..e711713 100644 --- a/src/caffe/layers/im2col_layer.cpp +++ b/src/caffe/layers/im2col_layer.cpp @@ -37,17 +37,6 @@ void Im2colLayer::Forward_cpu(const vector*>& bottom, } template -void Im2colLayer::Forward_gpu(const vector*>& bottom, - vector*>* top) { - const Dtype* bottom_data = bottom[0]->gpu_data(); - Dtype* top_data = (*top)[0]->mutable_gpu_data(); - for (int n = 0; n < bottom[0]->num(); ++n) { - im2col_gpu(bottom_data + bottom[0]->offset(n), CHANNELS_, HEIGHT_, - WIDTH_, KSIZE_, PAD_, STRIDE_, top_data + (*top)[0]->offset(n)); - } -} - -template Dtype Im2colLayer::Backward_cpu(const vector*>& top, const bool propagate_down, vector*>* bottom) { const Dtype* top_diff = top[0]->cpu_diff(); @@ -59,19 +48,6 @@ Dtype Im2colLayer::Backward_cpu(const vector*>& top, return Dtype(0.); } - -template -Dtype Im2colLayer::Backward_gpu(const vector*>& top, - const bool propagate_down, vector*>* bottom) { - const Dtype* top_diff = top[0]->gpu_diff(); - Dtype* bottom_diff = (*bottom)[0]->mutable_gpu_diff(); - for (int n = 0; n < top[0]->num(); ++n) { - col2im_gpu(top_diff + top[0]->offset(n), CHANNELS_, HEIGHT_, - WIDTH_, KSIZE_, PAD_, STRIDE_, bottom_diff + (*bottom)[0]->offset(n)); - } - return Dtype(0.); -} - INSTANTIATE_CLASS(Im2colLayer); } // namespace caffe diff --git a/src/caffe/layers/im2col_layer.cu b/src/caffe/layers/im2col_layer.cu new file mode 100644 index 0000000..2d949b1 --- /dev/null +++ b/src/caffe/layers/im2col_layer.cu @@ -0,0 +1,38 @@ +// Copyright 2013 Yangqing Jia + +#include + +#include "caffe/layer.hpp" +#include "caffe/util/im2col.hpp" +#include "caffe/vision_layers.hpp" +#include "caffe/common.hpp" + +namespace caffe { + +template +void Im2colLayer::Forward_gpu(const vector*>& bottom, + vector*>* top) { + const Dtype* bottom_data = bottom[0]->gpu_data(); + Dtype* top_data = (*top)[0]->mutable_gpu_data(); + for (int n = 0; n < bottom[0]->num(); ++n) { + im2col_gpu(bottom_data + bottom[0]->offset(n), CHANNELS_, HEIGHT_, + WIDTH_, KSIZE_, PAD_, STRIDE_, top_data + (*top)[0]->offset(n)); + } +} + +template +Dtype Im2colLayer::Backward_gpu(const vector*>& top, + const bool propagate_down, vector*>* bottom) { + const Dtype* top_diff = top[0]->gpu_diff(); + Dtype* bottom_diff = (*bottom)[0]->mutable_gpu_diff(); + for (int n = 0; n < top[0]->num(); ++n) { + col2im_gpu(top_diff + top[0]->offset(n), CHANNELS_, HEIGHT_, + WIDTH_, KSIZE_, PAD_, STRIDE_, bottom_diff + (*bottom)[0]->offset(n)); + } + return Dtype(0.); +} + + +INSTANTIATE_CLASS(Im2colLayer); + +} // namespace caffe diff --git a/src/caffe/layers/inner_product_layer.cpp b/src/caffe/layers/inner_product_layer.cpp index d770e23a..6987a78 100644 --- a/src/caffe/layers/inner_product_layer.cpp +++ b/src/caffe/layers/inner_product_layer.cpp @@ -2,7 +2,6 @@ #include -#include #include @@ -100,45 +99,6 @@ Dtype InnerProductLayer::Backward_cpu(const vector*>& top, return Dtype(0); } -template -void InnerProductLayer::Forward_gpu(const vector*>& bottom, - vector*>* top) { - const Dtype* bottom_data = bottom[0]->gpu_data(); - Dtype* top_data = (*top)[0]->mutable_gpu_data(); - const Dtype* weight = this->blobs_[0]->gpu_data(); - caffe_gpu_gemm(CblasNoTrans, CblasTrans, M_, N_, K_, (Dtype)1., - bottom_data, weight, (Dtype)0., top_data); - if (biasterm_) { - caffe_gpu_gemm(CblasNoTrans, CblasNoTrans, M_, N_, 1, (Dtype)1., - reinterpret_cast(bias_multiplier_->gpu_data()), - this->blobs_[1]->gpu_data(), (Dtype)1., top_data); - } -} - -template -Dtype InnerProductLayer::Backward_gpu(const vector*>& top, - const bool propagate_down, - vector*>* bottom) { - const Dtype* top_diff = top[0]->gpu_diff(); - const Dtype* bottom_data = (*bottom)[0]->gpu_data(); - // Gradient with respect to weight - caffe_gpu_gemm(CblasTrans, CblasNoTrans, N_, K_, M_, (Dtype)1., - top_diff, bottom_data, (Dtype)0., this->blobs_[0]->mutable_gpu_diff()); - if (biasterm_) { - // Gradient with respect to bias - caffe_gpu_gemv(CblasTrans, M_, N_, (Dtype)1., top_diff, - reinterpret_cast(bias_multiplier_->gpu_data()), - (Dtype)0., this->blobs_[1]->mutable_gpu_diff()); - } - if (propagate_down) { - // Gradient with respect to bottom data - caffe_gpu_gemm(CblasNoTrans, CblasNoTrans, M_, K_, N_, (Dtype)1., - top_diff, this->blobs_[0]->gpu_data(), (Dtype)0., - (*bottom)[0]->mutable_gpu_diff()); - } - return Dtype(0); -} - INSTANTIATE_CLASS(InnerProductLayer); } // namespace caffe diff --git a/src/caffe/layers/inner_product_layer.cu b/src/caffe/layers/inner_product_layer.cu new file mode 100644 index 0000000..c7c3e2a --- /dev/null +++ b/src/caffe/layers/inner_product_layer.cu @@ -0,0 +1,59 @@ +// Copyright 2013 Yangqing Jia + + +#include +#include + +#include + +#include "caffe/blob.hpp" +#include "caffe/common.hpp" +#include "caffe/filler.hpp" +#include "caffe/layer.hpp" +#include "caffe/vision_layers.hpp" +#include "caffe/util/math_functions.hpp" + +namespace caffe { + +template +void InnerProductLayer::Forward_gpu(const vector*>& bottom, + vector*>* top) { + const Dtype* bottom_data = bottom[0]->gpu_data(); + Dtype* top_data = (*top)[0]->mutable_gpu_data(); + const Dtype* weight = this->blobs_[0]->gpu_data(); + caffe_gpu_gemm(CblasNoTrans, CblasTrans, M_, N_, K_, (Dtype)1., + bottom_data, weight, (Dtype)0., top_data); + if (biasterm_) { + caffe_gpu_gemm(CblasNoTrans, CblasNoTrans, M_, N_, 1, (Dtype)1., + reinterpret_cast(bias_multiplier_->gpu_data()), + this->blobs_[1]->gpu_data(), (Dtype)1., top_data); + } +} + +template +Dtype InnerProductLayer::Backward_gpu(const vector*>& top, + const bool propagate_down, + vector*>* bottom) { + const Dtype* top_diff = top[0]->gpu_diff(); + const Dtype* bottom_data = (*bottom)[0]->gpu_data(); + // Gradient with respect to weight + caffe_gpu_gemm(CblasTrans, CblasNoTrans, N_, K_, M_, (Dtype)1., + top_diff, bottom_data, (Dtype)0., this->blobs_[0]->mutable_gpu_diff()); + if (biasterm_) { + // Gradient with respect to bias + caffe_gpu_gemv(CblasTrans, M_, N_, (Dtype)1., top_diff, + reinterpret_cast(bias_multiplier_->gpu_data()), + (Dtype)0., this->blobs_[1]->mutable_gpu_diff()); + } + if (propagate_down) { + // Gradient with respect to bottom data + caffe_gpu_gemm(CblasNoTrans, CblasNoTrans, M_, K_, N_, (Dtype)1., + top_diff, this->blobs_[0]->gpu_data(), (Dtype)0., + (*bottom)[0]->mutable_gpu_diff()); + } + return Dtype(0); +} + +INSTANTIATE_CLASS(InnerProductLayer); + +} // namespace caffe diff --git a/src/caffe/layers/loss_layer.cu b/src/caffe/layers/loss_layer.cpp similarity index 96% rename from src/caffe/layers/loss_layer.cu rename to src/caffe/layers/loss_layer.cpp index 745bfa4..1c4303d 100644 --- a/src/caffe/layers/loss_layer.cu +++ b/src/caffe/layers/loss_layer.cpp @@ -42,7 +42,7 @@ Dtype MultinomialLogisticLossLayer::Backward_cpu( Dtype loss = 0; for (int i = 0; i < num; ++i) { int label = static_cast(bottom_label[i]); - Dtype prob = max(bottom_data[i * dim + label], kLOG_THRESHOLD); + Dtype prob = max(bottom_data[i * dim + label], Dtype(kLOG_THRESHOLD)); loss -= log(prob); bottom_diff[i * dim + label] = - 1. / prob / num; } @@ -86,7 +86,7 @@ Dtype InfogainLossLayer::Backward_cpu(const vector*>& top, for (int i = 0; i < num; ++i) { int label = static_cast(bottom_label[i]); for (int j = 0; j < dim; ++j) { - Dtype prob = max(bottom_data[i * dim + j], kLOG_THRESHOLD); + Dtype prob = max(bottom_data[i * dim + j], Dtype(kLOG_THRESHOLD)); loss -= infogain_mat[label * dim + j] * log(prob); bottom_diff[i * dim + j] = - infogain_mat[label * dim + j] / prob / num; } @@ -160,7 +160,7 @@ void AccuracyLayer::Forward_cpu(const vector*>& bottom, ++accuracy; } Dtype prob = max(bottom_data[i * dim + static_cast(bottom_label[i])], - kLOG_THRESHOLD); + Dtype(kLOG_THRESHOLD)); logprob -= log(prob); } // LOG(INFO) << "Accuracy: " << accuracy; diff --git a/src/caffe/layers/relu_layer.cpp b/src/caffe/layers/relu_layer.cpp new file mode 100644 index 0000000..478ed31 --- /dev/null +++ b/src/caffe/layers/relu_layer.cpp @@ -0,0 +1,42 @@ +// Copyright 2013 Yangqing Jia + +#include "caffe/layer.hpp" +#include "caffe/vision_layers.hpp" +#include + +using std::max; + +namespace caffe { + +template +void ReLULayer::Forward_cpu(const vector*>& bottom, + vector*>* top) { + const Dtype* bottom_data = bottom[0]->cpu_data(); + Dtype* top_data = (*top)[0]->mutable_cpu_data(); + const int count = bottom[0]->count(); + for (int i = 0; i < count; ++i) { + top_data[i] = max(bottom_data[i], Dtype(0)); + } +} + +template +Dtype ReLULayer::Backward_cpu(const vector*>& top, + const bool propagate_down, + vector*>* bottom) { + if (propagate_down) { + const Dtype* bottom_data = (*bottom)[0]->cpu_data(); + const Dtype* top_diff = top[0]->cpu_diff(); + Dtype* bottom_diff = (*bottom)[0]->mutable_cpu_diff(); + const int count = (*bottom)[0]->count(); + for (int i = 0; i < count; ++i) { + bottom_diff[i] = top_diff[i] * (bottom_data[i] > 0); + } + } + return Dtype(0); +} + + +INSTANTIATE_CLASS(ReLULayer); + + +} // namespace caffe diff --git a/src/caffe/layers/relu_layer.cu b/src/caffe/layers/relu_layer.cu index ed1aab4..e2e58d9 100644 --- a/src/caffe/layers/relu_layer.cu +++ b/src/caffe/layers/relu_layer.cu @@ -11,33 +11,6 @@ using std::max; namespace caffe { template -void ReLULayer::Forward_cpu(const vector*>& bottom, - vector*>* top) { - const Dtype* bottom_data = bottom[0]->cpu_data(); - Dtype* top_data = (*top)[0]->mutable_cpu_data(); - const int count = bottom[0]->count(); - for (int i = 0; i < count; ++i) { - top_data[i] = max(bottom_data[i], Dtype(0)); - } -} - -template -Dtype ReLULayer::Backward_cpu(const vector*>& top, - const bool propagate_down, - vector*>* bottom) { - if (propagate_down) { - const Dtype* bottom_data = (*bottom)[0]->cpu_data(); - const Dtype* top_diff = top[0]->cpu_diff(); - Dtype* bottom_diff = (*bottom)[0]->mutable_cpu_diff(); - const int count = (*bottom)[0]->count(); - for (int i = 0; i < count; ++i) { - bottom_diff[i] = top_diff[i] * (bottom_data[i] > 0); - } - } - return Dtype(0); -} - -template __global__ void ReLUForward(const int n, const Dtype* in, Dtype* out) { int index = threadIdx.x + blockIdx.x * blockDim.x; if (index < n) { diff --git a/src/caffe/layers/sigmoid_layer.cpp b/src/caffe/layers/sigmoid_layer.cpp new file mode 100644 index 0000000..112771f --- /dev/null +++ b/src/caffe/layers/sigmoid_layer.cpp @@ -0,0 +1,46 @@ +// Copyright 2014 Tobias Domhan + +#include "caffe/layer.hpp" +#include "caffe/vision_layers.hpp" +#include +#include + +namespace caffe { + +template +inline Dtype sigmoid(Dtype x) { + return 1. / (1. + exp(-x)); +} + +template +void SigmoidLayer::Forward_cpu(const vector*>& bottom, + vector*>* top) { + const Dtype* bottom_data = bottom[0]->cpu_data(); + Dtype* top_data = (*top)[0]->mutable_cpu_data(); + const int count = bottom[0]->count(); + for (int i = 0; i < count; ++i) { + top_data[i] = sigmoid(bottom_data[i]); + } +} + +template +Dtype SigmoidLayer::Backward_cpu(const vector*>& top, + const bool propagate_down, + vector*>* bottom) { + if (propagate_down) { + const Dtype* bottom_data = (*bottom)[0]->cpu_data(); + const Dtype* top_diff = top[0]->cpu_diff(); + Dtype* bottom_diff = (*bottom)[0]->mutable_cpu_diff(); + const int count = (*bottom)[0]->count(); + for (int i = 0; i < count; ++i) { + Dtype sigmoid_x = sigmoid(bottom_data[i]); + bottom_diff[i] = top_diff[i] * sigmoid_x * (1. - sigmoid_x); + } + } + return Dtype(0); +} + +INSTANTIATE_CLASS(SigmoidLayer); + + +} // namespace caffe diff --git a/src/caffe/layers/sigmoid_layer.cu b/src/caffe/layers/sigmoid_layer.cu index e50260d..785d144 100644 --- a/src/caffe/layers/sigmoid_layer.cu +++ b/src/caffe/layers/sigmoid_layer.cu @@ -12,45 +12,10 @@ using std::max; namespace caffe { template -inline Dtype sigmoid(Dtype x) { - return 1. / (1. + exp(-x)); -} - -template -void SigmoidLayer::Forward_cpu(const vector*>& bottom, - vector*>* top) { - const Dtype* bottom_data = bottom[0]->cpu_data(); - Dtype* top_data = (*top)[0]->mutable_cpu_data(); - const int count = bottom[0]->count(); - for (int i = 0; i < count; ++i) { - top_data[i] = sigmoid(bottom_data[i]); - } -} - -template -Dtype SigmoidLayer::Backward_cpu(const vector*>& top, - const bool propagate_down, - vector*>* bottom) { - if (propagate_down) { - const Dtype* bottom_data = (*bottom)[0]->cpu_data(); - const Dtype* top_diff = top[0]->cpu_diff(); - Dtype* bottom_diff = (*bottom)[0]->mutable_cpu_diff(); - const int count = (*bottom)[0]->count(); - for (int i = 0; i < count; ++i) { - Dtype sigmoid_x = sigmoid(bottom_data[i]); - bottom_diff[i] = top_diff[i] * sigmoid_x * (1. - sigmoid_x); - } - } - return Dtype(0); -} - - -template __device__ inline Dtype sigmoid_gpu(Dtype x) { return 1. / (1. + exp(-x)); } - template __global__ void SigmoidForward(const int n, const Dtype* in, Dtype* out) { int index = threadIdx.x + blockIdx.x * blockDim.x; diff --git a/src/caffe/layers/softmax_layer.cpp b/src/caffe/layers/softmax_layer.cpp new file mode 100644 index 0000000..172094d --- /dev/null +++ b/src/caffe/layers/softmax_layer.cpp @@ -0,0 +1,86 @@ +// Copyright 2013 Yangqing Jia + +#include "caffe/layer.hpp" +#include "caffe/vision_layers.hpp" +#include "caffe/util/math_functions.hpp" + +using std::max; + +namespace caffe { + +template +void SoftmaxLayer::SetUp(const vector*>& bottom, + vector*>* top) { + CHECK_EQ(bottom.size(), 1) << "Softmax Layer takes a single blob as input."; + CHECK_EQ(top->size(), 1) << "Softmax Layer takes a single blob as output."; + (*top)[0]->Reshape(bottom[0]->num(), bottom[0]->channels(), + bottom[0]->height(), bottom[0]->width()); + sum_multiplier_.Reshape(1, bottom[0]->channels(), + bottom[0]->height(), bottom[0]->width()); + Dtype* multiplier_data = sum_multiplier_.mutable_cpu_data(); + for (int i = 0; i < sum_multiplier_.count(); ++i) { + multiplier_data[i] = 1.; + } + scale_.Reshape(bottom[0]->num(), 1, 1, 1); +} + +template +void SoftmaxLayer::Forward_cpu(const vector*>& bottom, + vector*>* top) { + const Dtype* bottom_data = bottom[0]->cpu_data(); + Dtype* top_data = (*top)[0]->mutable_cpu_data(); + Dtype* scale_data = scale_.mutable_cpu_data(); + int num = bottom[0]->num(); + int dim = bottom[0]->count() / bottom[0]->num(); + memcpy(top_data, bottom_data, sizeof(Dtype) * bottom[0]->count()); + // we need to subtract the max to avoid numerical issues, compute the exp, + // and then normalize. + for (int i = 0; i < num; ++i) { + scale_data[i] = bottom_data[i*dim]; + for (int j = 0; j < dim; ++j) { + scale_data[i] = max(scale_data[i], bottom_data[i * dim + j]); + } + } + // subtraction + caffe_cpu_gemm(CblasNoTrans, CblasNoTrans, num, dim, 1, -1., + scale_data, sum_multiplier_.cpu_data(), 1., top_data); + // Perform exponentiation + caffe_exp(num * dim, top_data, top_data); + // sum after exp + caffe_cpu_gemv(CblasNoTrans, num, dim, 1., top_data, + sum_multiplier_.cpu_data(), 0., scale_data); + // Do division + for (int i = 0; i < num; ++i) { + caffe_scal(dim, Dtype(1.) / scale_data[i], top_data + i * dim); + } +} + +template +Dtype SoftmaxLayer::Backward_cpu(const vector*>& top, + const bool propagate_down, + vector*>* bottom) { + const Dtype* top_diff = top[0]->cpu_diff(); + const Dtype* top_data = top[0]->cpu_data(); + Dtype* bottom_diff = (*bottom)[0]->mutable_cpu_diff(); + Dtype* scale_data = scale_.mutable_cpu_data(); + int num = top[0]->num(); + int dim = top[0]->count() / top[0]->num(); + memcpy(bottom_diff, top_diff, sizeof(Dtype) * top[0]->count()); + // Compute inner1d(top_diff, top_data) and subtract them from the bottom diff + for (int i = 0; i < num; ++i) { + scale_data[i] = caffe_cpu_dot(dim, top_diff + i * dim, + top_data + i * dim); + } + // subtraction + caffe_cpu_gemm(CblasNoTrans, CblasNoTrans, num, dim, 1, -1., + scale_data, sum_multiplier_.cpu_data(), 1., bottom_diff); + // elementwise multiplication + caffe_mul(top[0]->count(), bottom_diff, top_data, bottom_diff); + return Dtype(0); +} + + +INSTANTIATE_CLASS(SoftmaxLayer); + + +} // namespace caffe diff --git a/src/caffe/layers/softmax_layer.cu b/src/caffe/layers/softmax_layer.cu index af73260..fe2a89e 100644 --- a/src/caffe/layers/softmax_layer.cu +++ b/src/caffe/layers/softmax_layer.cu @@ -15,53 +15,6 @@ using std::max; namespace caffe { template -void SoftmaxLayer::SetUp(const vector*>& bottom, - vector*>* top) { - CHECK_EQ(bottom.size(), 1) << "Softmax Layer takes a single blob as input."; - CHECK_EQ(top->size(), 1) << "Softmax Layer takes a single blob as output."; - (*top)[0]->Reshape(bottom[0]->num(), bottom[0]->channels(), - bottom[0]->height(), bottom[0]->width()); - sum_multiplier_.Reshape(1, bottom[0]->channels(), - bottom[0]->height(), bottom[0]->width()); - Dtype* multiplier_data = sum_multiplier_.mutable_cpu_data(); - for (int i = 0; i < sum_multiplier_.count(); ++i) { - multiplier_data[i] = 1.; - } - scale_.Reshape(bottom[0]->num(), 1, 1, 1); -} - -template -void SoftmaxLayer::Forward_cpu(const vector*>& bottom, - vector*>* top) { - const Dtype* bottom_data = bottom[0]->cpu_data(); - Dtype* top_data = (*top)[0]->mutable_cpu_data(); - Dtype* scale_data = scale_.mutable_cpu_data(); - int num = bottom[0]->num(); - int dim = bottom[0]->count() / bottom[0]->num(); - memcpy(top_data, bottom_data, sizeof(Dtype) * bottom[0]->count()); - // we need to subtract the max to avoid numerical issues, compute the exp, - // and then normalize. - for (int i = 0; i < num; ++i) { - scale_data[i] = bottom_data[i*dim]; - for (int j = 0; j < dim; ++j) { - scale_data[i] = max(scale_data[i], bottom_data[i * dim + j]); - } - } - // subtraction - caffe_cpu_gemm(CblasNoTrans, CblasNoTrans, num, dim, 1, -1., - scale_data, sum_multiplier_.cpu_data(), 1., top_data); - // Perform exponentiation - caffe_exp(num * dim, top_data, top_data); - // sum after exp - caffe_cpu_gemv(CblasNoTrans, num, dim, 1., top_data, - sum_multiplier_.cpu_data(), 0., scale_data); - // Do division - for (int i = 0; i < num; ++i) { - caffe_scal(dim, Dtype(1.) / scale_data[i], top_data + i * dim); - } -} - -template __global__ void kernel_get_max(const int num, const int dim, const Dtype* data, Dtype* out) { int index = threadIdx.x + blockIdx.x * blockDim.x; @@ -125,30 +78,6 @@ void SoftmaxLayer::Forward_gpu(const vector*>& bottom, num, dim, scale_data, top_data); } -template -Dtype SoftmaxLayer::Backward_cpu(const vector*>& top, - const bool propagate_down, - vector*>* bottom) { - const Dtype* top_diff = top[0]->cpu_diff(); - const Dtype* top_data = top[0]->cpu_data(); - Dtype* bottom_diff = (*bottom)[0]->mutable_cpu_diff(); - Dtype* scale_data = scale_.mutable_cpu_data(); - int num = top[0]->num(); - int dim = top[0]->count() / top[0]->num(); - memcpy(bottom_diff, top_diff, sizeof(Dtype) * top[0]->count()); - // Compute inner1d(top_diff, top_data) and subtract them from the bottom diff - for (int i = 0; i < num; ++i) { - scale_data[i] = caffe_cpu_dot(dim, top_diff + i * dim, - top_data + i * dim); - } - // subtraction - caffe_cpu_gemm(CblasNoTrans, CblasNoTrans, num, dim, 1, -1., - scale_data, sum_multiplier_.cpu_data(), 1., bottom_diff); - // elementwise multiplication - caffe_mul(top[0]->count(), bottom_diff, top_data, bottom_diff); - return Dtype(0); -} - // TODO(Yangqing): implement the GPU version of softmax. template Dtype SoftmaxLayer::Backward_gpu(const vector*>& top, diff --git a/src/caffe/layers/softmax_loss_layer.cpp b/src/caffe/layers/softmax_loss_layer.cpp new file mode 100644 index 0000000..2ec7308 --- /dev/null +++ b/src/caffe/layers/softmax_loss_layer.cpp @@ -0,0 +1,59 @@ +// Copyright 2013 Yangqing Jia + +#include +#include +#include + +#include "caffe/layer.hpp" +#include "caffe/vision_layers.hpp" +#include "caffe/util/math_functions.hpp" + +using std::max; + +namespace caffe { + +template +void SoftmaxWithLossLayer::SetUp(const vector*>& bottom, + vector*>* top) { + CHECK_EQ(bottom.size(), 2) << "SoftmaxLoss Layer takes two blobs as input."; + CHECK_EQ(top->size(), 0) << "SoftmaxLoss Layer takes no blob as output."; + softmax_bottom_vec_.clear(); + softmax_bottom_vec_.push_back(bottom[0]); + softmax_top_vec_.push_back(&prob_); + softmax_layer_->SetUp(softmax_bottom_vec_, &softmax_top_vec_); +} + +template +void SoftmaxWithLossLayer::Forward_cpu( + const vector*>& bottom, vector*>* top) { + // The forward pass computes the softmax prob values. + softmax_bottom_vec_[0] = bottom[0]; + softmax_layer_->Forward(softmax_bottom_vec_, &softmax_top_vec_); +} + +template +Dtype SoftmaxWithLossLayer::Backward_cpu(const vector*>& top, + const bool propagate_down, + vector*>* bottom) { + // First, compute the diff + Dtype* bottom_diff = (*bottom)[0]->mutable_cpu_diff(); + const Dtype* prob_data = prob_.cpu_data(); + memcpy(bottom_diff, prob_data, sizeof(Dtype) * prob_.count()); + const Dtype* label = (*bottom)[1]->cpu_data(); + int num = prob_.num(); + int dim = prob_.count() / num; + Dtype loss = 0; + for (int i = 0; i < num; ++i) { + bottom_diff[i * dim + static_cast(label[i])] -= 1; + loss += -log(max(prob_data[i * dim + static_cast(label[i])], Dtype(FLT_MIN))); + } + // Scale down gradient + caffe_scal(prob_.count(), Dtype(1) / num, bottom_diff); + return loss / num; +} + + +INSTANTIATE_CLASS(SoftmaxWithLossLayer); + + +} // namespace caffe diff --git a/src/caffe/layers/softmax_loss_layer.cu b/src/caffe/layers/softmax_loss_layer.cu index 3e26586..100393c 100644 --- a/src/caffe/layers/softmax_loss_layer.cu +++ b/src/caffe/layers/softmax_loss_layer.cu @@ -13,25 +13,6 @@ using std::max; namespace caffe { template -void SoftmaxWithLossLayer::SetUp(const vector*>& bottom, - vector*>* top) { - CHECK_EQ(bottom.size(), 2) << "SoftmaxLoss Layer takes two blobs as input."; - CHECK_EQ(top->size(), 0) << "SoftmaxLoss Layer takes no blob as output."; - softmax_bottom_vec_.clear(); - softmax_bottom_vec_.push_back(bottom[0]); - softmax_top_vec_.push_back(&prob_); - softmax_layer_->SetUp(softmax_bottom_vec_, &softmax_top_vec_); -} - -template -void SoftmaxWithLossLayer::Forward_cpu( - const vector*>& bottom, vector*>* top) { - // The forward pass computes the softmax prob values. - softmax_bottom_vec_[0] = bottom[0]; - softmax_layer_->Forward(softmax_bottom_vec_, &softmax_top_vec_); -} - -template void SoftmaxWithLossLayer::Forward_gpu( const vector*>& bottom, vector*>* top) { // The forward pass computes the softmax prob values. @@ -40,27 +21,6 @@ void SoftmaxWithLossLayer::Forward_gpu( } template -Dtype SoftmaxWithLossLayer::Backward_cpu(const vector*>& top, - const bool propagate_down, - vector*>* bottom) { - // First, compute the diff - Dtype* bottom_diff = (*bottom)[0]->mutable_cpu_diff(); - const Dtype* prob_data = prob_.cpu_data(); - memcpy(bottom_diff, prob_data, sizeof(Dtype) * prob_.count()); - const Dtype* label = (*bottom)[1]->cpu_data(); - int num = prob_.num(); - int dim = prob_.count() / num; - Dtype loss = 0; - for (int i = 0; i < num; ++i) { - bottom_diff[i * dim + static_cast(label[i])] -= 1; - loss += -log(max(prob_data[i * dim + static_cast(label[i])], FLT_MIN)); - } - // Scale down gradient - caffe_scal(prob_.count(), Dtype(1) / num, bottom_diff); - return loss / num; -} - -template Dtype SoftmaxWithLossLayer::Backward_gpu(const vector*>& top, const bool propagate_down, vector*>* bottom) { // TODO(Yangqing): implement the GPU version of softmax. diff --git a/src/caffe/layers/split_layer.cpp b/src/caffe/layers/split_layer.cpp index 56e9561..f9fc461 100644 --- a/src/caffe/layers/split_layer.cpp +++ b/src/caffe/layers/split_layer.cpp @@ -41,19 +41,6 @@ void SplitLayer::Forward_cpu(const vector*>& bottom, } template -void SplitLayer::Forward_gpu(const vector*>& bottom, - vector*>* top) { - const Dtype* bottom_data = bottom[0]->gpu_data(); - for (int i = 0; i < top->size(); ++i) { - if (i == 0 && (*top)[i] == bottom[0]) { - continue; - } - Dtype* top_data = (*top)[i]->mutable_gpu_data(); - caffe_gpu_copy(count_, bottom_data, top_data); - } -} - -template Dtype SplitLayer::Backward_cpu(const vector*>& top, const bool propagate_down, vector*>* bottom) { if (propagate_down) { @@ -75,27 +62,6 @@ Dtype SplitLayer::Backward_cpu(const vector*>& top, } -template -Dtype SplitLayer::Backward_gpu(const vector*>& top, - const bool propagate_down, vector*>* bottom) { - if (propagate_down) { - const Dtype* top_diff = top[0]->gpu_diff(); - Dtype* bottom_diff = (*bottom)[0]->mutable_gpu_diff(); - // Initialize by copying first top blob diff to our diff, unless we're - // doing in-place computation for the first blob, in which case the diff is - // already initialized. - if (top[0] != (*bottom)[0]) { - caffe_gpu_copy(count_, top_diff, bottom_diff); - } - // Add remaining top blob diffs. - for (int i = 1; i < top.size(); ++i) { - top_diff = top[i]->gpu_diff(); - caffe_gpu_axpy(count_, Dtype(1.), top_diff, bottom_diff); - } - } - return Dtype(0.); -} - INSTANTIATE_CLASS(SplitLayer); } // namespace caffe diff --git a/src/caffe/layers/split_layer.cu b/src/caffe/layers/split_layer.cu new file mode 100644 index 0000000..5f25a46 --- /dev/null +++ b/src/caffe/layers/split_layer.cu @@ -0,0 +1,48 @@ +// Copyright 2014 Jeff Donahue + +#include + +#include "caffe/layer.hpp" +#include "caffe/vision_layers.hpp" +#include "caffe/util/math_functions.hpp" + +namespace caffe { + +template +void SplitLayer::Forward_gpu(const vector*>& bottom, + vector*>* top) { + const Dtype* bottom_data = bottom[0]->gpu_data(); + for (int i = 0; i < top->size(); ++i) { + if (i == 0 && (*top)[i] == bottom[0]) { + continue; + } + Dtype* top_data = (*top)[i]->mutable_gpu_data(); + caffe_gpu_copy(count_, bottom_data, top_data); + } +} + +template +Dtype SplitLayer::Backward_gpu(const vector*>& top, + const bool propagate_down, vector*>* bottom) { + if (propagate_down) { + const Dtype* top_diff = top[0]->gpu_diff(); + Dtype* bottom_diff = (*bottom)[0]->mutable_gpu_diff(); + // Initialize by copying first top blob diff to our diff, unless we're + // doing in-place computation for the first blob, in which case the diff is + // already initialized. + if (top[0] != (*bottom)[0]) { + caffe_gpu_copy(count_, top_diff, bottom_diff); + } + // Add remaining top blob diffs. + for (int i = 1; i < top.size(); ++i) { + top_diff = top[i]->gpu_diff(); + caffe_gpu_axpy(count_, Dtype(1.), top_diff, bottom_diff); + } + } + return Dtype(0.); +} + + +INSTANTIATE_CLASS(SplitLayer); + +} // namespace caffe diff --git a/src/caffe/layers/tanh_layer.cpp b/src/caffe/layers/tanh_layer.cpp new file mode 100644 index 0000000..d6f9956 --- /dev/null +++ b/src/caffe/layers/tanh_layer.cpp @@ -0,0 +1,48 @@ +// Copyright 2014 Aravindh Mahendran +// TanH neuron activation function layer. +// Adapted from ReLU layer code written by Yangqing Jia + +#include +#include + +#include "caffe/layer.hpp" +#include "caffe/vision_layers.hpp" + +namespace caffe { + +template +void TanHLayer::Forward_cpu(const vector*>& bottom, + vector*>* top) { + const Dtype* bottom_data = bottom[0]->cpu_data(); + Dtype* top_data = (*top)[0]->mutable_cpu_data(); + Dtype exp2x; + const int count = bottom[0]->count(); + for (int i = 0; i < count; ++i) { + exp2x = exp(2*bottom_data[i]); + top_data[i] = (exp2x - Dtype(1))/(exp2x + Dtype(1)); + } +} + +template +Dtype TanHLayer::Backward_cpu(const vector*>& top, + const bool propagate_down, + vector*>* bottom) { + if (propagate_down) { + const Dtype* bottom_data = (*bottom)[0]->cpu_data(); + const Dtype* top_diff = top[0]->cpu_diff(); + Dtype* bottom_diff = (*bottom)[0]->mutable_cpu_diff(); + const int count = (*bottom)[0]->count(); + Dtype exp2x; + Dtype tanhx; + for (int i = 0; i < count; ++i) { + exp2x = exp(2*bottom_data[i]); + tanhx = (exp2x - Dtype(1))/(exp2x + Dtype(1)); + bottom_diff[i] = top_diff[i] * (1 - tanhx*tanhx); + } + } + return Dtype(0); +} + +INSTANTIATE_CLASS(TanHLayer); + +} // namespace caffe diff --git a/src/caffe/layers/tanh_layer.cu b/src/caffe/layers/tanh_layer.cu index a309a60..743e314 100644 --- a/src/caffe/layers/tanh_layer.cu +++ b/src/caffe/layers/tanh_layer.cu @@ -11,39 +11,6 @@ namespace caffe { template -void TanHLayer::Forward_cpu(const vector*>& bottom, - vector*>* top) { - const Dtype* bottom_data = bottom[0]->cpu_data(); - Dtype* top_data = (*top)[0]->mutable_cpu_data(); - Dtype exp2x; - const int count = bottom[0]->count(); - for (int i = 0; i < count; ++i) { - exp2x = exp(2*bottom_data[i]); - top_data[i] = (exp2x - Dtype(1))/(exp2x + Dtype(1)); - } -} - -template -Dtype TanHLayer::Backward_cpu(const vector*>& top, - const bool propagate_down, - vector*>* bottom) { - if (propagate_down) { - const Dtype* bottom_data = (*bottom)[0]->cpu_data(); - const Dtype* top_diff = top[0]->cpu_diff(); - Dtype* bottom_diff = (*bottom)[0]->mutable_cpu_diff(); - const int count = (*bottom)[0]->count(); - Dtype exp2x; - Dtype tanhx; - for (int i = 0; i < count; ++i) { - exp2x = exp(2*bottom_data[i]); - tanhx = (exp2x - Dtype(1))/(exp2x + Dtype(1)); - bottom_diff[i] = top_diff[i] * (1 - tanhx*tanhx); - } - } - return Dtype(0); -} - -template __global__ void TanHForward(const int n, const Dtype* in, Dtype* out) { int index = threadIdx.x + blockIdx.x * blockDim.x; if (index < n) { -- 2.7.4