From: Yangqing Jia Date: Mon, 16 Sep 2013 20:38:35 +0000 (-0700) Subject: working halfway into dropout, machine down, changing machine X-Git-Tag: submit/tizen/20180823.020014~1070 X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=002e004a6b3760016f28a189c458a66e0e574852;p=platform%2Fupstream%2Fcaffeonacl.git working halfway into dropout, machine down, changing machine --- diff --git a/src/Makefile b/src/Makefile index f3c8391..9ab43e5 100644 --- a/src/Makefile +++ b/src/Makefile @@ -34,7 +34,7 @@ LIBRARY_DIRS := . /usr/local/lib $(CUDA_LIB_DIR) $(MKL_LIB_DIR) LIBRARIES := cuda cudart cublas protobuf glog mkl_rt mkl_intel_thread WARNINGS := -Wall -CXXFLAGS += -fPIC $(foreach includedir,$(INCLUDE_DIRS),-I$(includedir)) +CXXFLAGS += -fPIC -O2 $(foreach includedir,$(INCLUDE_DIRS),-I$(includedir)) LDFLAGS += $(foreach librarydir,$(LIBRARY_DIRS),-L$(librarydir)) LDFLAGS += $(foreach library,$(LIBRARIES),-l$(library)) @@ -53,8 +53,8 @@ $(TEST_NAME): $(OBJS) $(TEST_OBJS) $(NAME): $(PROTO_GEN_CC) $(OBJS) $(LINK) -shared $(OBJS) -o $(NAME) -$(CU_OBJS): $(CU_SRCS) - $(NVCC) -c -o $(CU_OBJS) $(CU_SRCS) +$(CU_OBJS): %.o: %.cu + $(NVCC) -c $< -o $@ $(PROTO_GEN_CC): $(PROTO_SRCS) protoc $(PROTO_SRCS) --cpp_out=. --python_out=. diff --git a/src/caffeine/dropout_layer.cu b/src/caffeine/dropout_layer.cu new file mode 100644 index 0000000..23999fb --- /dev/null +++ b/src/caffeine/dropout_layer.cu @@ -0,0 +1,101 @@ +#include "caffeine/layer.hpp" +#include "caffeine/vision_layers.hpp" +#include + +using std::max; + +namespace caffeine { + +template +void DropoutLayer::SetUp(const vector*>& bottom, + vector*>* top) { + NeuronLayer::SetUp(bottom, top); + // Set up the cache for random number generation + rand_mat_.reset(new Blob(bottom.num(), bottom.channels(), + bottom.height(), bottom.width()); + filler_.reset(new UniformFiller(FillerParameter())); +}; + +template +void DropoutLayer::Forward_cpu(const vector*>& bottom, + vector*>* top) { + // First, create the random matrix + filler_->Fill(rand_mat_.get()); + const Dtype* bottom_data = bottom[0]->cpu_data(); + const Dtype* rand_vals = rand_mat_->cpu_data(); + Dtype* top_data = (*top)[0]->mutable_cpu_data(); + float threshold = layer_param_->dropout_ratio(); + float scale = layer_param_->dropo + const int count = bottom[0]->count(); + for (int i = 0; i < count; ++i) { + top_data[i] = rand_mat_ > ; + } +} + +template +Dtype DropoutLayer::Backward_cpu(const vector*>& top, + const bool propagate_down, + vector*>* bottom) { + if (propagate_down) { + const Dtype* bottom_data = (*bottom)[0]->cpu_data(); + const Dtype* top_diff = top[0]->cpu_diff(); + Dtype* bottom_diff = (*bottom)[0]->mutable_cpu_diff(); + const int count = (*bottom)[0]->count(); + for (int i = 0; i < count; ++i) { + bottom_diff[i] = top_diff[i] * (bottom_data[i] >= 0); + } + } + return Dtype(0); +} + +template +__global__ void DropoutForward(const int n, const Dtype* in, Dtype* out) { + int index = threadIdx.x + blockIdx.x * blockDim.x; + if (index < n) { + out[index] = max(in[index], Dtype(0.)); + } +} + +template +void DropoutLayer::Forward_gpu(const vector*>& bottom, + vector*>* top) { + const Dtype* bottom_data = bottom[0]->gpu_data(); + Dtype* top_data = (*top)[0]->mutable_gpu_data(); + const int count = bottom[0]->count(); + const int blocks = (count + CAFFEINE_CUDA_NUM_THREADS - 1) / + CAFFEINE_CUDA_NUM_THREADS; + DropoutForward<<>>(count, bottom_data, + top_data); +} + +template +__global__ void DropoutBackward(const int n, const Dtype* in_diff, + const Dtype* in_data, Dtype* out_diff) { + int index = threadIdx.x + blockIdx.x * blockDim.x; + if (index < n) { + out_diff[index] = in_diff[index] * (in_data[index] >= 0); + } +} + +template +Dtype DropoutLayer::Backward_gpu(const vector*>& top, + const bool propagate_down, + vector*>* bottom) { + if (propagate_down) { + const Dtype* bottom_data = (*bottom)[0]->gpu_data(); + const Dtype* top_diff = top[0]->gpu_diff(); + Dtype* bottom_diff = (*bottom)[0]->mutable_gpu_diff(); + const int count = (*bottom)[0]->count(); + const int blocks = (count + CAFFEINE_CUDA_NUM_THREADS - 1) / + CAFFEINE_CUDA_NUM_THREADS; + DropoutBackward<<>>(count, top_diff, + bottom_data, bottom_diff); + } + return Dtype(0); +} + +template class DropoutLayer; +template class DropoutLayer; + + +} // namespace caffeine diff --git a/src/caffeine/neuron_layer.cpp b/src/caffeine/neuron_layer.cpp new file mode 100644 index 0000000..050c690 --- /dev/null +++ b/src/caffeine/neuron_layer.cpp @@ -0,0 +1,18 @@ +#include "caffeine/layer.hpp" +#include "caffeine/vision_layers.hpp" + +namespace caffeine { + +template +void NeuronLayer::SetUp(const vector*>& bottom, + vector*>* top) { + CHECK_EQ(bottom.size(), 1) << "Neuron Layer takes a single blob as input."; + CHECK_EQ(top->size(), 1) << "Neuron Layer takes a single blob as output."; + (*top)[0]->Reshape(bottom[0]->num(), bottom[0]->channels(), + bottom[0]->height(), bottom[0]->width()); +}; + +template class NeuronLayer; +template class NeuronLayer; + +} // namespace caffeine diff --git a/src/caffeine/proto/layer_param.proto b/src/caffeine/proto/layer_param.proto index 1ad42d2..7bb3708 100644 --- a/src/caffeine/proto/layer_param.proto +++ b/src/caffeine/proto/layer_param.proto @@ -1,8 +1,24 @@ package caffeine; message LayerParameter { - required string name = 1; - required string type = 2; + required string name = 1; // the layer name + required string type = 2; // the string to specify the layer type + + // Parameters to specify layers with inner products. + optional int32 num_output = 3; // The number of outputs for the layer + optional bool biasterm = 4 [default = true]; // whether to have bias terms + optional FillerParameter weight_filler = 5; // The filler for the weight + optional FillerParameter bias_filler = 6; // The filler for the bias + + optional uint32 pad = 7 [default = 0]; // The padding size + optional uint32 kernelsize = 8; // The kernel size + optional uint32 group = 9 [default = 1]; // The group size for group conv + optional uint32 stride = 10 [default = 1]; // The stride + optional string pool = 11 [default = 'max']; // The pooling method + optional float dropout_ratio = 12 [default = 0.5]; // dropout ratio + + optional float alpha = 13 [default = 1.]; // for local response norm + optional float beta = 14 [default = 0.75]; // for local response norm } message FillerParameter { @@ -21,4 +37,4 @@ message BlobProto { optional int32 channels = 4 [default = 0]; repeated float data = 5; repeated float diff = 6; -} \ No newline at end of file +} diff --git a/src/caffeine/neuron_layer.cu b/src/caffeine/relu_layer.cu similarity index 85% rename from src/caffeine/neuron_layer.cu rename to src/caffeine/relu_layer.cu index 2801248..158131a 100644 --- a/src/caffeine/neuron_layer.cu +++ b/src/caffeine/relu_layer.cu @@ -7,18 +7,6 @@ using std::max; namespace caffeine { template -void NeuronLayer::SetUp(const vector*>& bottom, - vector*>* top) { - CHECK_EQ(bottom.size(), 1) << "Neuron Layer takes a single blob as input."; - CHECK_EQ(top->size(), 1) << "Neuron Layer takes a single blob as output."; - (*top)[0]->Reshape(bottom[0]->num(), bottom[0]->channels(), - bottom[0]->height(), bottom[0]->width()); -}; - -template class NeuronLayer; -template class NeuronLayer; - -template void ReLULayer::Forward_cpu(const vector*>& bottom, vector*>* top) { const Dtype* bottom_data = bottom[0]->cpu_data(); diff --git a/src/caffeine/vision_layers.hpp b/src/caffeine/vision_layers.hpp index b2f4926..08561bc 100644 --- a/src/caffeine/vision_layers.hpp +++ b/src/caffeine/vision_layers.hpp @@ -31,6 +31,32 @@ class ReLULayer : public NeuronLayer { const bool propagate_down, vector*>* bottom); }; +template +class DropoutLayer : public NeuronLayer { + public: + explicit DropoutLayer(const LayerParameter& param) + : NeuronLayer(param) {}; + virtual void SetUp(const vector*>& bottom, + vector*>* top); + protected: + virtual void Forward_cpu(const vector*>& bottom, + vector*>* top); + virtual void Forward_gpu(const vector*>& bottom, + vector*>* top); + + virtual Dtype Backward_cpu(const vector*>& top, + const bool propagate_down, vector*>* bottom); + virtual Dtype Backward_gpu(const vector*>& top, + const bool propagate_down, vector*>* bottom); + private: + shared_ptr > rand_mat_; + shared_ptr > filler_; +}; + + + + + } // namespace caffeine #endif // CAFFEINE_VISION_LAYERS_HPP_