Revert D30526034: [pytorch][PR] compute reduction intermediate buffer size in elements
authorAlban Desmaison <albandes@fb.com>
Wed, 25 Aug 2021 14:15:18 +0000 (07:15 -0700)
committerFacebook GitHub Bot <facebook-github-bot@users.noreply.github.com>
Wed, 25 Aug 2021 14:17:22 +0000 (07:17 -0700)
Test Plan: revert-hammer

Differential Revision:
D30526034 (https://github.com/pytorch/pytorch/commit/e69a1398cbe534874060460faf36af21d24ce6e7)

Original commit changeset: 0aca7f887974

fbshipit-source-id: a22472723818d6fe0c11a6e134080df1ac408038

aten/src/ATen/native/cuda/Reduce.cuh

index 161a896..8c42306 100644 (file)
@@ -923,7 +923,6 @@ inline void gpu_reduce_kernel(TensorIterator& iter, const ops_t& ops, ident_t id
       for (int dim = 0; dim < iter.ndim(); dim++) {
         output_memory_size = std::max(output_memory_size, iter.shape()[dim] * iter.strides(0)[dim]);
       }
-      output_memory_size /= iter.element_size(0); //iter.strides is in bytes
       owned_buf_ptr.reset(new AccumulationBuffer(sizeof(arg_t),
                                                  sizeof(out_scalar_t),
                                                  (char*) iter.data_ptr(0),