From: Evan Shelhamer Date: Sat, 28 Jun 2014 08:52:49 +0000 (-0700) Subject: replace all memset with caffe_set() / caffe_gpu_set() X-Git-Tag: submit/tizen/20180823.020014~653^2~86^2~2 X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=af624faa55eb75f31de5f612b3d6b1553d652563;p=platform%2Fupstream%2Fcaffeonacl.git replace all memset with caffe_set() / caffe_gpu_set() ...except for `SyncedMem` since it has no type. --- diff --git a/src/caffe/layers/conv_layer.cpp b/src/caffe/layers/conv_layer.cpp index 67913bf..963dc68 100644 --- a/src/caffe/layers/conv_layer.cpp +++ b/src/caffe/layers/conv_layer.cpp @@ -126,11 +126,11 @@ void ConvolutionLayer::Backward_cpu(const vector*>& top, const vector& propagate_down, vector*>* bottom) { const Dtype* weight = this->blobs_[0]->cpu_data(); Dtype* weight_diff = this->blobs_[0]->mutable_cpu_diff(); - memset(weight_diff, 0, sizeof(Dtype) * this->blobs_[0]->count()); + caffe_set(this->blobs_[0]->count(), Dtype(0), weight_diff); Dtype* bias_diff = NULL; if (bias_term_) { bias_diff = this->blobs_[1]->mutable_cpu_diff(); - memset(bias_diff, 0, sizeof(Dtype) * this->blobs_[1]->count()); + caffe_set(this->blobs_[1]->count(), Dtype(0), bias_diff); } const int weight_offset = M_ * K_; const int col_offset = K_ * N_; diff --git a/src/caffe/layers/conv_layer.cu b/src/caffe/layers/conv_layer.cu index 71b00c9..59ec58d 100644 --- a/src/caffe/layers/conv_layer.cu +++ b/src/caffe/layers/conv_layer.cu @@ -48,15 +48,13 @@ void ConvolutionLayer::Backward_gpu(const vector*>& top, const vector& propagate_down, vector*>* bottom) { const Dtype* weight = this->blobs_[0]->gpu_data(); Dtype* weight_diff = this->blobs_[0]->mutable_gpu_diff(); - CUDA_CHECK(cudaMemset(weight_diff, 0, - sizeof(Dtype) * this->blobs_[0]->count())); + caffe_gpu_set(this->blobs_[0]->count(), Dtype(0), weight_diff); Dtype* col_data = col_buffer_.mutable_gpu_data(); Dtype* col_diff = col_buffer_.mutable_gpu_diff(); Dtype* bias_diff = NULL; if (bias_term_) { bias_diff = this->blobs_[1]->mutable_gpu_diff(); - CUDA_CHECK(cudaMemset(bias_diff, 0, - sizeof(Dtype) * this->blobs_[1]->count())); + caffe_gpu_set(this->blobs_[1]->count(), Dtype(0), bias_diff); } const int weight_offset = M_ * K_; const int col_offset = K_ * N_; diff --git a/src/caffe/layers/lrn_layer.cpp b/src/caffe/layers/lrn_layer.cpp index a86c1d4..2bda043 100644 --- a/src/caffe/layers/lrn_layer.cpp +++ b/src/caffe/layers/lrn_layer.cpp @@ -123,7 +123,7 @@ Dtype LRNLayer::CrossChannelForward_cpu( } Blob padded_square(1, channels_ + size_ - 1, height_, width_); Dtype* padded_square_data = padded_square.mutable_cpu_data(); - memset(padded_square_data, 0, sizeof(Dtype) * padded_square.count()); + caffe_set(padded_square.count(), Dtype(0), padded_square_data); Dtype alpha_over_size = alpha_ / size_; // go through the images for (int n = 0; n < num_; ++n) { @@ -201,7 +201,7 @@ void LRNLayer::CrossChannelBackward_cpu( Dtype* accum_ratio_data = accum_ratio.mutable_cpu_data(); // We hack a little bit by using the diff() to store an additional result Dtype* accum_ratio_times_bottom = accum_ratio.mutable_cpu_diff(); - memset(padded_ratio_data, 0, sizeof(Dtype) * padded_ratio.count()); + caffe_set(padded_ratio.count(), Dtype(0), padded_ratio_data); Dtype cache_ratio_value = 2. * alpha_ * beta_ / size_; caffe_powx(scale_.count(), scale_data, -beta_, bottom_diff); @@ -220,7 +220,7 @@ void LRNLayer::CrossChannelBackward_cpu( scale_data + block_offset, padded_ratio_data + padded_ratio.offset(0, inverse_pre_pad)); // Now, compute the accumulated ratios and the bottom diff - memset(accum_ratio_data, 0, sizeof(Dtype) * accum_ratio.count()); + caffe_set(accum_ratio.count(), Dtype(0), accum_ratio_data); for (int c = 0; c < size_ - 1; ++c) { caffe_axpy(height_ * width_, 1., padded_ratio_data + padded_ratio.offset(0, c), accum_ratio_data); diff --git a/src/caffe/layers/multinomial_logistic_loss_layer.cpp b/src/caffe/layers/multinomial_logistic_loss_layer.cpp index dd5cae4..8687784 100644 --- a/src/caffe/layers/multinomial_logistic_loss_layer.cpp +++ b/src/caffe/layers/multinomial_logistic_loss_layer.cpp @@ -55,7 +55,7 @@ void MultinomialLogisticLossLayer::Backward_cpu( Dtype* bottom_diff = (*bottom)[0]->mutable_cpu_diff(); int num = (*bottom)[0]->num(); int dim = (*bottom)[0]->count() / (*bottom)[0]->num(); - memset(bottom_diff, 0, sizeof(Dtype) * (*bottom)[0]->count()); + caffe_set((*bottom)[0]->count(), Dtype(0), bottom_diff); for (int i = 0; i < num; ++i) { int label = static_cast(bottom_label[i]); Dtype prob = max(bottom_data[i * dim + label], Dtype(kLOG_THRESHOLD)); diff --git a/src/caffe/layers/window_data_layer.cpp b/src/caffe/layers/window_data_layer.cpp index fd4860f..5dbdff3 100644 --- a/src/caffe/layers/window_data_layer.cpp +++ b/src/caffe/layers/window_data_layer.cpp @@ -59,7 +59,7 @@ void* WindowDataLayerPrefetch(void* layer_pointer) { bool use_square = (crop_mode == "square") ? true : false; // zero out batch - memset(top_data, 0, sizeof(Dtype)*layer->prefetch_data_->count()); + caffe_set(layer->prefetch_data_->count(), Dtype(0), top_data); const int num_fg = static_cast(static_cast(batch_size) * fg_fraction); diff --git a/src/caffe/util/im2col.cpp b/src/caffe/util/im2col.cpp index 037410e..ce4e188 100644 --- a/src/caffe/util/im2col.cpp +++ b/src/caffe/util/im2col.cpp @@ -5,6 +5,7 @@ #include #include "caffe/util/im2col.hpp" +#include "caffe/util/math_functions.hpp" namespace caffe { @@ -45,7 +46,7 @@ template void col2im_cpu(const Dtype* data_col, const int channels, const int height, const int width, const int ksize, const int pad, const int stride, Dtype* data_im) { - memset(data_im, 0, sizeof(Dtype) * height * width * channels); + caffe_set(height * width * channels, Dtype(0), data_im); int height_col = (height + 2 * pad - ksize) / stride + 1; int width_col = (width + 2 * pad - ksize) / stride + 1; int channels_col = channels * ksize * ksize; diff --git a/src/caffe/util/im2col.cu b/src/caffe/util/im2col.cu index ec4465e..79faa6c 100644 --- a/src/caffe/util/im2col.cu +++ b/src/caffe/util/im2col.cu @@ -106,8 +106,6 @@ template void col2im_gpu(const Dtype* data_col, const int channels, const int height, const int width, const int ksize, const int pad, const int stride, Dtype* data_im) { - // CUDA_CHECK(cudaMemset(data_im, 0, - // sizeof(Dtype) * height * width * channels)); int height_col = (height + 2 * pad - ksize) / stride + 1; int width_col = (width + 2 * pad - ksize) / stride + 1; int num_kernels = channels * height * width; diff --git a/src/caffe/util/math_functions.cu b/src/caffe/util/math_functions.cu index 63c8fac..849e53b 100644 --- a/src/caffe/util/math_functions.cu +++ b/src/caffe/util/math_functions.cu @@ -20,27 +20,20 @@ __global__ void set_kernel(const int n, const Dtype alpha, Dtype* y) { } } -template <> -void caffe_gpu_set(const int N, const float alpha, float* Y) { +template +void caffe_gpu_set(const int N, const Dtype alpha, Dtype* Y) { if (alpha == 0) { - CUDA_CHECK(cudaMemset(Y, 0, sizeof(float) * N)); + CUDA_CHECK(cudaMemset(Y, 0, sizeof(Dtype) * N)); return; } // NOLINT_NEXT_LINE(whitespace/operators) - set_kernel<<>>( + set_kernel<<>>( N, alpha, Y); } -template <> -void caffe_gpu_set(const int N, const double alpha, double* Y) { - if (alpha == 0) { - CUDA_CHECK(cudaMemset(Y, 0, sizeof(double) * N)); - return; - } - // NOLINT_NEXT_LINE(whitespace/operators) - set_kernel<<>>( - N, alpha, Y); -} +template void caffe_gpu_set(const int N, const int alpha, int* Y); +template void caffe_gpu_set(const int N, const float alpha, float* Y); +template void caffe_gpu_set(const int N, const double alpha, double* Y); template __global__ void add_scalar_kernel(const int n, const Dtype alpha, Dtype* y) {