Dtype* scale_data = scale_.mutable_gpu_data();
int num = bottom[0]->num();
int dim = bottom[0]->count() / bottom[0]->num();
- CUDA_CHECK(cudaMemcpy(top_data, bottom_data,
- sizeof(Dtype) * bottom[0]->count(), cudaMemcpyDeviceToDevice));
+ caffe_gpu_copy(bottom[0]->count(), bottom_data, top_data);
// we need to subtract the max to avoid numerical issues, compute the exp,
// and then normalize.
// Compute max
Dtype* bottom_diff = (*bottom)[0]->mutable_gpu_diff();
int num = top[0]->num();
int dim = top[0]->count() / top[0]->num();
- CUDA_CHECK(cudaMemcpy(bottom_diff, top_diff,
- sizeof(Dtype) * top[0]->count(), cudaMemcpyDeviceToDevice));
+ caffe_gpu_copy(top[0]->count(), top_diff, bottom_diff);
// Compute inner1d(top_diff, top_data) and subtract them from the bottom diff
// cuda dot returns the result to cpu, so we temporarily change the pointer
// mode