...except for `SyncedMem` since it has no type.
const vector<bool>& propagate_down, vector<Blob<Dtype>*>* bottom) {
const Dtype* weight = this->blobs_[0]->cpu_data();
Dtype* weight_diff = this->blobs_[0]->mutable_cpu_diff();
- memset(weight_diff, 0, sizeof(Dtype) * this->blobs_[0]->count());
+ caffe_set(this->blobs_[0]->count(), Dtype(0), weight_diff);
Dtype* bias_diff = NULL;
if (bias_term_) {
bias_diff = this->blobs_[1]->mutable_cpu_diff();
- memset(bias_diff, 0, sizeof(Dtype) * this->blobs_[1]->count());
+ caffe_set(this->blobs_[1]->count(), Dtype(0), bias_diff);
}
const int weight_offset = M_ * K_;
const int col_offset = K_ * N_;
const vector<bool>& propagate_down, vector<Blob<Dtype>*>* bottom) {
const Dtype* weight = this->blobs_[0]->gpu_data();
Dtype* weight_diff = this->blobs_[0]->mutable_gpu_diff();
- CUDA_CHECK(cudaMemset(weight_diff, 0,
- sizeof(Dtype) * this->blobs_[0]->count()));
+ caffe_gpu_set(this->blobs_[0]->count(), Dtype(0), weight_diff);
Dtype* col_data = col_buffer_.mutable_gpu_data();
Dtype* col_diff = col_buffer_.mutable_gpu_diff();
Dtype* bias_diff = NULL;
if (bias_term_) {
bias_diff = this->blobs_[1]->mutable_gpu_diff();
- CUDA_CHECK(cudaMemset(bias_diff, 0,
- sizeof(Dtype) * this->blobs_[1]->count()));
+ caffe_gpu_set(this->blobs_[1]->count(), Dtype(0), bias_diff);
}
const int weight_offset = M_ * K_;
const int col_offset = K_ * N_;
}
Blob<Dtype> padded_square(1, channels_ + size_ - 1, height_, width_);
Dtype* padded_square_data = padded_square.mutable_cpu_data();
- memset(padded_square_data, 0, sizeof(Dtype) * padded_square.count());
+ caffe_set(padded_square.count(), Dtype(0), padded_square_data);
Dtype alpha_over_size = alpha_ / size_;
// go through the images
for (int n = 0; n < num_; ++n) {
Dtype* accum_ratio_data = accum_ratio.mutable_cpu_data();
// We hack a little bit by using the diff() to store an additional result
Dtype* accum_ratio_times_bottom = accum_ratio.mutable_cpu_diff();
- memset(padded_ratio_data, 0, sizeof(Dtype) * padded_ratio.count());
+ caffe_set(padded_ratio.count(), Dtype(0), padded_ratio_data);
Dtype cache_ratio_value = 2. * alpha_ * beta_ / size_;
caffe_powx<Dtype>(scale_.count(), scale_data, -beta_, bottom_diff);
scale_data + block_offset,
padded_ratio_data + padded_ratio.offset(0, inverse_pre_pad));
// Now, compute the accumulated ratios and the bottom diff
- memset(accum_ratio_data, 0, sizeof(Dtype) * accum_ratio.count());
+ caffe_set(accum_ratio.count(), Dtype(0), accum_ratio_data);
for (int c = 0; c < size_ - 1; ++c) {
caffe_axpy<Dtype>(height_ * width_, 1.,
padded_ratio_data + padded_ratio.offset(0, c), accum_ratio_data);
Dtype* bottom_diff = (*bottom)[0]->mutable_cpu_diff();
int num = (*bottom)[0]->num();
int dim = (*bottom)[0]->count() / (*bottom)[0]->num();
- memset(bottom_diff, 0, sizeof(Dtype) * (*bottom)[0]->count());
+ caffe_set((*bottom)[0]->count(), Dtype(0), bottom_diff);
for (int i = 0; i < num; ++i) {
int label = static_cast<int>(bottom_label[i]);
Dtype prob = max(bottom_data[i * dim + label], Dtype(kLOG_THRESHOLD));
bool use_square = (crop_mode == "square") ? true : false;
// zero out batch
- memset(top_data, 0, sizeof(Dtype)*layer->prefetch_data_->count());
+ caffe_set(layer->prefetch_data_->count(), Dtype(0), top_data);
const int num_fg = static_cast<int>(static_cast<float>(batch_size)
* fg_fraction);
#include <cstring>
#include "caffe/util/im2col.hpp"
+#include "caffe/util/math_functions.hpp"
namespace caffe {
void col2im_cpu(const Dtype* data_col, const int channels,
const int height, const int width, const int ksize, const int pad,
const int stride, Dtype* data_im) {
- memset(data_im, 0, sizeof(Dtype) * height * width * channels);
+ caffe_set(height * width * channels, Dtype(0), data_im);
int height_col = (height + 2 * pad - ksize) / stride + 1;
int width_col = (width + 2 * pad - ksize) / stride + 1;
int channels_col = channels * ksize * ksize;
void col2im_gpu(const Dtype* data_col, const int channels,
const int height, const int width, const int ksize, const int pad,
const int stride, Dtype* data_im) {
- // CUDA_CHECK(cudaMemset(data_im, 0,
- // sizeof(Dtype) * height * width * channels));
int height_col = (height + 2 * pad - ksize) / stride + 1;
int width_col = (width + 2 * pad - ksize) / stride + 1;
int num_kernels = channels * height * width;
}
}
-template <>
-void caffe_gpu_set(const int N, const float alpha, float* Y) {
+template <typename Dtype>
+void caffe_gpu_set(const int N, const Dtype alpha, Dtype* Y) {
if (alpha == 0) {
- CUDA_CHECK(cudaMemset(Y, 0, sizeof(float) * N));
+ CUDA_CHECK(cudaMemset(Y, 0, sizeof(Dtype) * N));
return;
}
// NOLINT_NEXT_LINE(whitespace/operators)
- set_kernel<float><<<CAFFE_GET_BLOCKS(N), CAFFE_CUDA_NUM_THREADS>>>(
+ set_kernel<Dtype><<<CAFFE_GET_BLOCKS(N), CAFFE_CUDA_NUM_THREADS>>>(
N, alpha, Y);
}
-template <>
-void caffe_gpu_set(const int N, const double alpha, double* Y) {
- if (alpha == 0) {
- CUDA_CHECK(cudaMemset(Y, 0, sizeof(double) * N));
- return;
- }
- // NOLINT_NEXT_LINE(whitespace/operators)
- set_kernel<double><<<CAFFE_GET_BLOCKS(N), CAFFE_CUDA_NUM_THREADS>>>(
- N, alpha, Y);
-}
+template void caffe_gpu_set<int>(const int N, const int alpha, int* Y);
+template void caffe_gpu_set<float>(const int N, const float alpha, float* Y);
+template void caffe_gpu_set<double>(const int N, const double alpha, double* Y);
template <typename Dtype>
__global__ void add_scalar_kernel(const int n, const Dtype alpha, Dtype* y) {