initialize with ident value in global reduction (#15653)

author Natalia Gimelshein <ngimelshein@nvidia.com>

Thu, 3 Jan 2019 03:50:19 +0000 (19:50 -0800)

committer Facebook Github Bot <facebook-github-bot@users.noreply.github.com>

Thu, 3 Jan 2019 03:52:57 +0000 (19:52 -0800)
author Natalia Gimelshein <ngimelshein@nvidia.com>
Thu, 3 Jan 2019 03:50:19 +0000 (19:50 -0800)
committer Facebook Github Bot <facebook-github-bot@users.noreply.github.com>
Thu, 3 Jan 2019 03:52:57 +0000 (19:52 -0800)
diff --git a/aten/src/ATen/native/cuda/Reduce.cuh b/aten/src/ATen/native/cuda/Reduce.cuh

index 6e870ad..db26a17 100644 (file)
--- a/aten/src/ATen/native/cuda/Reduce.cuh
+++ b/aten/src/ATen/native/cuda/Reduce.cuh
@@ -402,7 +402,7 @@ struct ReduceOp {
      bool is_last_block_done = mark_block_finished();
  
      if (is_last_block_done) {
-      value = arg_t {};
+      value = ident;
        if (config.should_warp_reduce()) {
          index_t input_offset = threadIdx.x + threadIdx.y * blockDim.x;
          index_t step = blockDim.x * blockDim.y;
diff --git a/test/test_cuda.py b/test/test_cuda.py

index 28ccc6a..9ea50ce 100644 (file)
--- a/test/test_cuda.py
+++ b/test/test_cuda.py
@@ -1574,6 +1574,11 @@ class TestCuda(TestCase):
          x = torch.ones(65536, device='cuda', dtype=torch.float16)
          self.assertEqual(x.mean(dtype=torch.float32), 1)
  
+    def test_prod_large(self):
+        # tests global reduction (should_global_reduce = true) in case of non-zero identity element
+        x = torch.ones(240000, device='cuda', dtype=torch.float32)
+        self.assertEqual(x.prod(), 1)
+
      @staticmethod
      def _select_broadcastable_dims(dims_full=None):
          return _TestTorchMixin._select_broadcastable_dims(dims_full)
author	Natalia Gimelshein <ngimelshein@nvidia.com>
	Thu, 3 Jan 2019 03:50:19 +0000 (19:50 -0800)
committer	Facebook Github Bot <facebook-github-bot@users.noreply.github.com>
	Thu, 3 Jan 2019 03:52:57 +0000 (19:52 -0800)
aten/src/ATen/native/cuda/Reduce.cuh		patch \| blob \| history
test/test_cuda.py		patch \| blob \| history