initialize with ident value in global reduction (#15653)
authorNatalia Gimelshein <ngimelshein@nvidia.com>
Thu, 3 Jan 2019 03:50:19 +0000 (19:50 -0800)
committerFacebook Github Bot <facebook-github-bot@users.noreply.github.com>
Thu, 3 Jan 2019 03:52:57 +0000 (19:52 -0800)
Summary:
Fixes #15647. cc colesbury.
Pull Request resolved: https://github.com/pytorch/pytorch/pull/15653

Differential Revision: D13571132

Pulled By: soumith

fbshipit-source-id: 8f25943c974b3b931f4528e0e0a370bc095dab51

aten/src/ATen/native/cuda/Reduce.cuh
test/test_cuda.py

index 6e870ad..db26a17 100644 (file)
@@ -402,7 +402,7 @@ struct ReduceOp {
     bool is_last_block_done = mark_block_finished();
 
     if (is_last_block_done) {
-      value = arg_t {};
+      value = ident;
       if (config.should_warp_reduce()) {
         index_t input_offset = threadIdx.x + threadIdx.y * blockDim.x;
         index_t step = blockDim.x * blockDim.y;
index 28ccc6a..9ea50ce 100644 (file)
@@ -1574,6 +1574,11 @@ class TestCuda(TestCase):
         x = torch.ones(65536, device='cuda', dtype=torch.float16)
         self.assertEqual(x.mean(dtype=torch.float32), 1)
 
+    def test_prod_large(self):
+        # tests global reduction (should_global_reduce = true) in case of non-zero identity element
+        x = torch.ones(240000, device='cuda', dtype=torch.float32)
+        self.assertEqual(x.prod(), 1)
+
     @staticmethod
     def _select_broadcastable_dims(dims_full=None):
         return _TestTorchMixin._select_broadcastable_dims(dims_full)