Fix DepthToSpace and SpaceToDepth to silently return instead of failing when the...

author A. Unique TensorFlower <gardener@tensorflow.org>

Sat, 10 Mar 2018 06:43:51 +0000 (22:43 -0800)

committer TensorFlower Gardener <gardener@tensorflow.org>

Sat, 10 Mar 2018 06:47:25 +0000 (22:47 -0800)
author A. Unique TensorFlower <gardener@tensorflow.org>
Sat, 10 Mar 2018 06:43:51 +0000 (22:43 -0800)
committer TensorFlower Gardener <gardener@tensorflow.org>
Sat, 10 Mar 2018 06:47:25 +0000 (22:47 -0800)
diff --git a/tensorflow/core/kernels/depthtospace_op_gpu.cu.cc b/tensorflow/core/kernels/depthtospace_op_gpu.cu.cc

index 7a66285..184c703 100644 (file)
--- a/tensorflow/core/kernels/depthtospace_op_gpu.cu.cc
+++ b/tensorflow/core/kernels/depthtospace_op_gpu.cu.cc
@@ -158,6 +158,9 @@ struct DepthToSpaceOpFunctor<GPUDevice, T, FORMAT_NHWC> {
  
      const int total_count =
          batch_size * output_height * output_width * output_depth;
+    if (total_count == 0) {
+      return;
+    }
      CudaLaunchConfig config = GetCudaLaunchConfig(total_count, d);
      D2S_NHWC<<<config.block_count, config.thread_per_block, 0, d.stream()>>>(
          config.virtual_thread_count, input.data(), block_size, batch_size,
@@ -188,6 +191,9 @@ struct DepthToSpaceOpFunctor<GPUDevice, T, FORMAT_NCHW> {
        const int output_width = output.dimension(3);
        const int output_depth_by_input_area = output_depth * input_area;
        const int total_count = batch_size * output_depth_by_input_area;
+      if (total_count == 0) {
+        return;
+      }
        CudaLaunchConfig config = GetCudaLaunchConfig(total_count, d);
        switch (block_size) {
          case 2:
@@ -213,6 +219,9 @@ struct DepthToSpaceOpFunctor<GPUDevice, T, FORMAT_NCHW> {
  
      // Other block sizes are processed by the generic kernel.
      const int total_count = batch_size * input_depth_by_input_area;
+    if (total_count == 0) {
+      return;
+    }
      auto config = GetCudaLaunchConfig(total_count, d);
      D2S_NCHW<<<config.block_count, config.thread_per_block, 0, d.stream()>>>(
          config.virtual_thread_count, input.data(), block_size, input_width,
diff --git a/tensorflow/core/kernels/spacetodepth_op_gpu.cu.cc b/tensorflow/core/kernels/spacetodepth_op_gpu.cu.cc

index a1a01e8..db05ca1 100644 (file)
--- a/tensorflow/core/kernels/spacetodepth_op_gpu.cu.cc
+++ b/tensorflow/core/kernels/spacetodepth_op_gpu.cu.cc
@@ -154,6 +154,9 @@ struct SpaceToDepthOpFunctor<GPUDevice, T, FORMAT_NHWC> {
  
      const int total_count =
          batch_size * input_height * input_width * input_depth;
+    if (total_count == 0) {
+      return;
+    }
      CudaLaunchConfig config = GetCudaLaunchConfig(total_count, d);
      S2D_NHWC<<<config.block_count, config.thread_per_block, 0, d.stream()>>>(
          config.virtual_thread_count, input.data(), block_size, batch_size,
@@ -184,6 +187,9 @@ struct SpaceToDepthOpFunctor<GPUDevice, T, FORMAT_NCHW> {
        const int input_width = input.dimension(3);
        const int input_depth_by_output_area = input_depth * output_area;
        const int total_count = batch_size * input_depth_by_output_area;
+      if (total_count == 0) {
+        return;
+      }
        CudaLaunchConfig config = GetCudaLaunchConfig(total_count, d);
        switch (block_size) {
          case 2:
@@ -209,6 +215,9 @@ struct SpaceToDepthOpFunctor<GPUDevice, T, FORMAT_NCHW> {
  
      // Other block sizes are processed by the generic kernel.
      const int total_count = batch_size * output_depth_by_output_area;
+    if (total_count == 0) {
+      return;
+    }
      CudaLaunchConfig config = GetCudaLaunchConfig(total_count, d);
      S2D_NCHW<<<config.block_count, config.thread_per_block, 0, d.stream()>>>(
          config.virtual_thread_count, input.data(), block_size, output_width,
diff --git a/tensorflow/python/kernel_tests/depthtospace_op_test.py b/tensorflow/python/kernel_tests/depthtospace_op_test.py

index 7df2366..96c9718 100644 (file)
--- a/tensorflow/python/kernel_tests/depthtospace_op_test.py
+++ b/tensorflow/python/kernel_tests/depthtospace_op_test.py
@@ -90,6 +90,24 @@ class DepthToSpaceTest(test.TestCase):
      x_out = [batch_output_elt(i) for i in range(batch_size)]
      self._testOne(x_np, block_size, x_out)
  
+  def testBatchSize0(self):
+    block_size = 2
+    batch_size = 0
+    input_nhwc = array_ops.ones([batch_size, 2, 3, 12])
+    x_out = array_ops.ones([batch_size, 4, 6, 3])
+
+    with self.test_session(use_gpu=False):
+      # test NHWC (default) on CPU
+      x_tf = array_ops.depth_to_space(input_nhwc, block_size)
+      self.assertAllEqual(x_tf.shape, x_out.shape)
+      x_tf.eval()
+    if test.is_gpu_available():
+      with self.test_session(use_gpu=True):
+        # test NHWC (default) on GPU
+        x_tf = array_ops.depth_to_space(input_nhwc, block_size)
+        self.assertAllEqual(x_tf.shape, x_out.shape)
+        x_tf.eval()
+
    # Tests for different width and height.
    def testNonSquare(self):
      x_np = [[[[1, 10, 2, 20, 3, 30, 4, 40]],
diff --git a/tensorflow/python/kernel_tests/spacetodepth_op_test.py b/tensorflow/python/kernel_tests/spacetodepth_op_test.py

index 3c98a68..b761357 100644 (file)
--- a/tensorflow/python/kernel_tests/spacetodepth_op_test.py
+++ b/tensorflow/python/kernel_tests/spacetodepth_op_test.py
@@ -126,6 +126,24 @@ class SpaceToDepthTest(test.TestCase):
      x_out = [batch_output_elt(i) for i in range(batch_size)]
      self._testOne(x_np, block_size, x_out)
  
+  def testBatchSize0(self):
+    block_size = 2
+    batch_size = 0
+    input_nhwc = array_ops.ones([batch_size, 4, 6, 3])
+    x_out = array_ops.ones([batch_size, 2, 3, 12])
+
+    with self.test_session(use_gpu=False):
+      # test NHWC (default) on CPU
+      x_tf = array_ops.space_to_depth(input_nhwc, block_size)
+      self.assertAllEqual(x_tf.shape, x_out.shape)
+      x_tf.eval()
+    if test.is_gpu_available():
+      with self.test_session(use_gpu=True):
+        # test NHWC (default) on GPU
+        x_tf = array_ops.space_to_depth(input_nhwc, block_size)
+        self.assertAllEqual(x_tf.shape, x_out.shape)
+        x_tf.eval()
+
    # Tests for different width and height.
    def testNonSquare(self):
      x_np = [[[[1, 10], [2, 20]], [[3, 30], [4, 40]], [[5, 50], [6, 60]],
author	A. Unique TensorFlower <gardener@tensorflow.org>
	Sat, 10 Mar 2018 06:43:51 +0000 (22:43 -0800)
committer	TensorFlower Gardener <gardener@tensorflow.org>
	Sat, 10 Mar 2018 06:47:25 +0000 (22:47 -0800)
tensorflow/core/kernels/depthtospace_op_gpu.cu.cc		patch \| blob \| history
tensorflow/core/kernels/spacetodepth_op_gpu.cu.cc		patch \| blob \| history
tensorflow/python/kernel_tests/depthtospace_op_test.py		patch \| blob \| history
tensorflow/python/kernel_tests/spacetodepth_op_test.py		patch \| blob \| history