Fix DepthToSpace and SpaceToDepth to silently return instead of failing when the...
authorA. Unique TensorFlower <gardener@tensorflow.org>
Sat, 10 Mar 2018 06:43:51 +0000 (22:43 -0800)
committerTensorFlower Gardener <gardener@tensorflow.org>
Sat, 10 Mar 2018 06:47:25 +0000 (22:47 -0800)
PiperOrigin-RevId: 188580972

tensorflow/core/kernels/depthtospace_op_gpu.cu.cc
tensorflow/core/kernels/spacetodepth_op_gpu.cu.cc
tensorflow/python/kernel_tests/depthtospace_op_test.py
tensorflow/python/kernel_tests/spacetodepth_op_test.py

index 7a66285..184c703 100644 (file)
@@ -158,6 +158,9 @@ struct DepthToSpaceOpFunctor<GPUDevice, T, FORMAT_NHWC> {
 
     const int total_count =
         batch_size * output_height * output_width * output_depth;
+    if (total_count == 0) {
+      return;
+    }
     CudaLaunchConfig config = GetCudaLaunchConfig(total_count, d);
     D2S_NHWC<<<config.block_count, config.thread_per_block, 0, d.stream()>>>(
         config.virtual_thread_count, input.data(), block_size, batch_size,
@@ -188,6 +191,9 @@ struct DepthToSpaceOpFunctor<GPUDevice, T, FORMAT_NCHW> {
       const int output_width = output.dimension(3);
       const int output_depth_by_input_area = output_depth * input_area;
       const int total_count = batch_size * output_depth_by_input_area;
+      if (total_count == 0) {
+        return;
+      }
       CudaLaunchConfig config = GetCudaLaunchConfig(total_count, d);
       switch (block_size) {
         case 2:
@@ -213,6 +219,9 @@ struct DepthToSpaceOpFunctor<GPUDevice, T, FORMAT_NCHW> {
 
     // Other block sizes are processed by the generic kernel.
     const int total_count = batch_size * input_depth_by_input_area;
+    if (total_count == 0) {
+      return;
+    }
     auto config = GetCudaLaunchConfig(total_count, d);
     D2S_NCHW<<<config.block_count, config.thread_per_block, 0, d.stream()>>>(
         config.virtual_thread_count, input.data(), block_size, input_width,
index a1a01e8..db05ca1 100644 (file)
@@ -154,6 +154,9 @@ struct SpaceToDepthOpFunctor<GPUDevice, T, FORMAT_NHWC> {
 
     const int total_count =
         batch_size * input_height * input_width * input_depth;
+    if (total_count == 0) {
+      return;
+    }
     CudaLaunchConfig config = GetCudaLaunchConfig(total_count, d);
     S2D_NHWC<<<config.block_count, config.thread_per_block, 0, d.stream()>>>(
         config.virtual_thread_count, input.data(), block_size, batch_size,
@@ -184,6 +187,9 @@ struct SpaceToDepthOpFunctor<GPUDevice, T, FORMAT_NCHW> {
       const int input_width = input.dimension(3);
       const int input_depth_by_output_area = input_depth * output_area;
       const int total_count = batch_size * input_depth_by_output_area;
+      if (total_count == 0) {
+        return;
+      }
       CudaLaunchConfig config = GetCudaLaunchConfig(total_count, d);
       switch (block_size) {
         case 2:
@@ -209,6 +215,9 @@ struct SpaceToDepthOpFunctor<GPUDevice, T, FORMAT_NCHW> {
 
     // Other block sizes are processed by the generic kernel.
     const int total_count = batch_size * output_depth_by_output_area;
+    if (total_count == 0) {
+      return;
+    }
     CudaLaunchConfig config = GetCudaLaunchConfig(total_count, d);
     S2D_NCHW<<<config.block_count, config.thread_per_block, 0, d.stream()>>>(
         config.virtual_thread_count, input.data(), block_size, output_width,
index 7df2366..96c9718 100644 (file)
@@ -90,6 +90,24 @@ class DepthToSpaceTest(test.TestCase):
     x_out = [batch_output_elt(i) for i in range(batch_size)]
     self._testOne(x_np, block_size, x_out)
 
+  def testBatchSize0(self):
+    block_size = 2
+    batch_size = 0
+    input_nhwc = array_ops.ones([batch_size, 2, 3, 12])
+    x_out = array_ops.ones([batch_size, 4, 6, 3])
+
+    with self.test_session(use_gpu=False):
+      # test NHWC (default) on CPU
+      x_tf = array_ops.depth_to_space(input_nhwc, block_size)
+      self.assertAllEqual(x_tf.shape, x_out.shape)
+      x_tf.eval()
+    if test.is_gpu_available():
+      with self.test_session(use_gpu=True):
+        # test NHWC (default) on GPU
+        x_tf = array_ops.depth_to_space(input_nhwc, block_size)
+        self.assertAllEqual(x_tf.shape, x_out.shape)
+        x_tf.eval()
+
   # Tests for different width and height.
   def testNonSquare(self):
     x_np = [[[[1, 10, 2, 20, 3, 30, 4, 40]],
index 3c98a68..b761357 100644 (file)
@@ -126,6 +126,24 @@ class SpaceToDepthTest(test.TestCase):
     x_out = [batch_output_elt(i) for i in range(batch_size)]
     self._testOne(x_np, block_size, x_out)
 
+  def testBatchSize0(self):
+    block_size = 2
+    batch_size = 0
+    input_nhwc = array_ops.ones([batch_size, 4, 6, 3])
+    x_out = array_ops.ones([batch_size, 2, 3, 12])
+
+    with self.test_session(use_gpu=False):
+      # test NHWC (default) on CPU
+      x_tf = array_ops.space_to_depth(input_nhwc, block_size)
+      self.assertAllEqual(x_tf.shape, x_out.shape)
+      x_tf.eval()
+    if test.is_gpu_available():
+      with self.test_session(use_gpu=True):
+        # test NHWC (default) on GPU
+        x_tf = array_ops.space_to_depth(input_nhwc, block_size)
+        self.assertAllEqual(x_tf.shape, x_out.shape)
+        x_tf.eval()
+
   # Tests for different width and height.
   def testNonSquare(self):
     x_np = [[[[1, 10], [2, 20]], [[3, 30], [4, 40]], [[5, 50], [6, 60]],