}
} else {
// We are at the last dimensions, which is stored continuously in memory
- for (int i = 0; i < top[0]->shape(cur_dim); ++i) {
- // prepare index vector reduced(red) and with offsets(off)
- std::vector<int> ind_red(cur_dim, 0);
- std::vector<int> ind_off(cur_dim+1, 0);
- for (int j = 0; j < cur_dim; ++j) {
- ind_red[j] = indices[j];
- ind_off[j] = indices[j] + offsets[j];
- }
- ind_off[cur_dim] = offsets[cur_dim];
- // do the copy
- if (is_forward) {
- caffe_copy(top[0]->shape(cur_dim),
- src_data + bottom[0]->offset(ind_off),
- dest_data + top[0]->offset(ind_red));
- } else {
- // in the backwards pass the src_data is top_diff
- // and the dest_data is bottom_diff
- caffe_copy(top[0]->shape(cur_dim),
- src_data + top[0]->offset(ind_red),
- dest_data + bottom[0]->offset(ind_off));
- }
+ // prepare index vector reduced(red) and with offsets(off)
+ std::vector<int> ind_red(cur_dim, 0);
+ std::vector<int> ind_off(cur_dim+1, 0);
+ for (int j = 0; j < cur_dim; ++j) {
+ ind_red[j] = indices[j];
+ ind_off[j] = indices[j] + offsets[j];
+ }
+ ind_off[cur_dim] = offsets[cur_dim];
+ // do the copy
+ if (is_forward) {
+ caffe_copy(top[0]->shape(cur_dim),
+ src_data + bottom[0]->offset(ind_off),
+ dest_data + top[0]->offset(ind_red));
+ } else {
+ // in the backwards pass the src_data is top_diff
+ // and the dest_data is bottom_diff
+ caffe_copy(top[0]->shape(cur_dim),
+ src_data + top[0]->offset(ind_red),
+ dest_data + bottom[0]->offset(ind_off));
}
}
}
// strides in the last two dimensions.
template <typename Dtype>
__global__ void copy_kernel(const int n, const int height, const int width,
- const int src_outer_stride, const int src_inner_stride,
- const int dest_outer_stride, const int dest_inner_stride,
+ const int src_inner_stride,
+ const int dest_inner_stride,
const Dtype* src, Dtype* dest) {
CUDA_KERNEL_LOOP(index, n) {
- int src_start = index / height * src_outer_stride
- + index % height * src_inner_stride;
- int dest_start = index / height * dest_outer_stride
- + index % height * dest_inner_stride;
+ int src_start = index * src_inner_stride;
+ int dest_start = index * dest_inner_stride;
for (int i = 0; i < width; ++i) {
dest[dest_start + i] = src[src_start + i];
}
ind_off[cur_dim] = offsets[cur_dim];
ind_off[cur_dim+1] = offsets[cur_dim+1];
// Compute copy strides
- const int src_outer_stride =
- bottom[0]->shape(cur_dim)*bottom[0]->shape(cur_dim+1);
const int src_inner_stride = bottom[0]->shape(cur_dim+1);
- const int dest_outer_stride =
- top[0]->shape(cur_dim)*top[0]->shape(cur_dim+1);
const int dest_inner_stride = top[0]->shape(cur_dim+1);
if (is_forward) {
// NOLINT_NEXT_LINE(whitespace/operators)
copy_kernel<<<CAFFE_GET_BLOCKS(lines), CAFFE_CUDA_NUM_THREADS>>>(
lines, height, width,
- src_outer_stride, src_inner_stride,
- dest_outer_stride, dest_inner_stride,
+ src_inner_stride,
+ dest_inner_stride,
bottom_data, top_data);
} else {
// NOLINT_NEXT_LINE(whitespace/operators)
copy_kernel<<<CAFFE_GET_BLOCKS(lines), CAFFE_CUDA_NUM_THREADS>>>(
lines, height, width,
- dest_outer_stride, dest_inner_stride,
- src_outer_stride, src_inner_stride,
+ dest_inner_stride,
+ src_inner_stride,
top_diff, bottom_diff);
}
}