const int total_count =
batch_size * output_height * output_width * output_depth;
+ if (total_count == 0) {
+ return;
+ }
CudaLaunchConfig config = GetCudaLaunchConfig(total_count, d);
D2S_NHWC<<<config.block_count, config.thread_per_block, 0, d.stream()>>>(
config.virtual_thread_count, input.data(), block_size, batch_size,
const int output_width = output.dimension(3);
const int output_depth_by_input_area = output_depth * input_area;
const int total_count = batch_size * output_depth_by_input_area;
+ if (total_count == 0) {
+ return;
+ }
CudaLaunchConfig config = GetCudaLaunchConfig(total_count, d);
switch (block_size) {
case 2:
// Other block sizes are processed by the generic kernel.
const int total_count = batch_size * input_depth_by_input_area;
+ if (total_count == 0) {
+ return;
+ }
auto config = GetCudaLaunchConfig(total_count, d);
D2S_NCHW<<<config.block_count, config.thread_per_block, 0, d.stream()>>>(
config.virtual_thread_count, input.data(), block_size, input_width,
const int total_count =
batch_size * input_height * input_width * input_depth;
+ if (total_count == 0) {
+ return;
+ }
CudaLaunchConfig config = GetCudaLaunchConfig(total_count, d);
S2D_NHWC<<<config.block_count, config.thread_per_block, 0, d.stream()>>>(
config.virtual_thread_count, input.data(), block_size, batch_size,
const int input_width = input.dimension(3);
const int input_depth_by_output_area = input_depth * output_area;
const int total_count = batch_size * input_depth_by_output_area;
+ if (total_count == 0) {
+ return;
+ }
CudaLaunchConfig config = GetCudaLaunchConfig(total_count, d);
switch (block_size) {
case 2:
// Other block sizes are processed by the generic kernel.
const int total_count = batch_size * output_depth_by_output_area;
+ if (total_count == 0) {
+ return;
+ }
CudaLaunchConfig config = GetCudaLaunchConfig(total_count, d);
S2D_NCHW<<<config.block_count, config.thread_per_block, 0, d.stream()>>>(
config.virtual_thread_count, input.data(), block_size, output_width,
x_out = [batch_output_elt(i) for i in range(batch_size)]
self._testOne(x_np, block_size, x_out)
+ def testBatchSize0(self):
+ block_size = 2
+ batch_size = 0
+ input_nhwc = array_ops.ones([batch_size, 2, 3, 12])
+ x_out = array_ops.ones([batch_size, 4, 6, 3])
+
+ with self.test_session(use_gpu=False):
+ # test NHWC (default) on CPU
+ x_tf = array_ops.depth_to_space(input_nhwc, block_size)
+ self.assertAllEqual(x_tf.shape, x_out.shape)
+ x_tf.eval()
+ if test.is_gpu_available():
+ with self.test_session(use_gpu=True):
+ # test NHWC (default) on GPU
+ x_tf = array_ops.depth_to_space(input_nhwc, block_size)
+ self.assertAllEqual(x_tf.shape, x_out.shape)
+ x_tf.eval()
+
# Tests for different width and height.
def testNonSquare(self):
x_np = [[[[1, 10, 2, 20, 3, 30, 4, 40]],
x_out = [batch_output_elt(i) for i in range(batch_size)]
self._testOne(x_np, block_size, x_out)
+ def testBatchSize0(self):
+ block_size = 2
+ batch_size = 0
+ input_nhwc = array_ops.ones([batch_size, 4, 6, 3])
+ x_out = array_ops.ones([batch_size, 2, 3, 12])
+
+ with self.test_session(use_gpu=False):
+ # test NHWC (default) on CPU
+ x_tf = array_ops.space_to_depth(input_nhwc, block_size)
+ self.assertAllEqual(x_tf.shape, x_out.shape)
+ x_tf.eval()
+ if test.is_gpu_available():
+ with self.test_session(use_gpu=True):
+ # test NHWC (default) on GPU
+ x_tf = array_ops.space_to_depth(input_nhwc, block_size)
+ self.assertAllEqual(x_tf.shape, x_out.shape)
+ x_tf.eval()
+
# Tests for different width and height.
def testNonSquare(self):
x_np = [[[[1, 10], [2, 20]], [[3, 30], [4, 40]], [[5, 50], [6, 60]],