const int dim = bottom[0]->count() / outer_num_;
const int num_labels = bottom[0]->shape(label_axis_);
const int nthreads = outer_num_ * inner_num_;
- // Since this memory is not used for anything,
- // we use it here to avoid having to allocate new GPU
- // memory to accumulate intermediate results in the kernel.
+ // Since this memory is not used for anything, we use it here to avoid having
+ // to allocate new GPU memory to accumulate intermediate results.
Dtype* acc_data = bottom[0]->mutable_gpu_diff();
if (top.size() == 1) {
// simple case - report only global accuracy.
}
}
}
+ // Clear scratch memory to prevent interfering with backward (see #6202).
caffe_gpu_set(bottom[0]->count(), Dtype(0), bottom[0]->mutable_gpu_diff());
}
// Stable version of loss computation from input data
const Dtype* input_data = bottom[0]->gpu_data();
const Dtype* target = bottom[1]->gpu_data();
- // Since this memory is not used for anything until it is overwritten
- // on the backward pass, we use it here to avoid having to allocate new GPU
- // memory to accumulate intermediate results in the kernel.
+ // Since this memory is not used for anything, we use it here to avoid having
+ // to allocate new GPU memory to accumulate intermediate results.
Dtype* loss_data = bottom[0]->mutable_gpu_diff();
Dtype* count_data = bottom[1]->mutable_gpu_diff();
Dtype valid_count;
normalizer_ = get_normalizer(normalization_, valid_count);
top[0]->mutable_cpu_data()[0] = loss / normalizer_;
+ // Clear scratch memory to prevent interfering with backward (see #6202).
caffe_gpu_set(bottom[0]->count(), Dtype(0), bottom[0]->mutable_gpu_diff());
caffe_gpu_set(bottom[1]->count(), Dtype(0), bottom[1]->mutable_gpu_diff());
}
const Dtype* label = bottom[1]->gpu_data();
const int dim = prob_.count() / outer_num_;
const int nthreads = outer_num_ * inner_num_;
- // Since this memory is not used for anything until it is overwritten
- // on the backward pass, we use it here to avoid having to allocate new GPU
- // memory to accumulate intermediate results in the kernel.
+ // Since this memory is not used for anything, we use it here to avoid having
+ // to allocate new GPU memory to accumulate intermediate results.
Dtype* loss_data = bottom[0]->mutable_gpu_diff();
// Similarly, this memory is never used elsewhere, and thus we can use it
// to avoid having to allocate additional GPU memory.
top[1]->ShareData(prob_);
}
+ // Clear scratch memory to prevent interfering with backward (see #6202).
caffe_gpu_set(bottom[0]->count(), Dtype(0), bottom[0]->mutable_gpu_diff());
}