@autotvm.register_topi_compute("conv3d_transpose_ncdhw.cuda")
-def conv3d_transpose_ncdhw(cfg, data, kernel, stride, padding, out_dtype):
+def conv3d_transpose_ncdhw(cfg, data, kernel, stride, padding, out_dtype,
+ output_padding):
"""Transposed 3D convolution ncdhw forward operator.
Parameters
Padding size, or ['VALID', 'SAME']
out_dtype: str
The output type. This is used in mixed precision
+ output_padding : tuple of three ints
+ Used to disambiguate output shape
Returns
-------
batch, inp_channels, inp_depth, inp_height, inp_width = get_const_tuple(data.shape)
_, out_channels, kernel_depth, kernel_height, kernel_width = get_const_tuple(kernel.shape)
stride_depth, stride_height, stride_width = stride
+ outpad_depth, outpad_height, outpad_width = output_padding
+ assert (outpad_height < stride_height and outpad_width < stride_width and
+ outpad_depth < stride_depth)
cfg.stride = stride
pad_front, pad_top, pad_left, pad_back, pad_bottom, pad_right = nn.get_pad_tuple3d(
padding, (kernel_depth, kernel_height, kernel_width))
out_depth = (inp_depth - 1) * stride_depth + \
- kernel_depth - pad_front - pad_back
+ kernel_depth - pad_front - pad_back + outpad_depth
pad_front = kernel_depth - 1 - pad_front
pad_back = kernel_depth - 1 - pad_back
dilated_depth = stride_depth * (inp_depth - 1) + 1
out_width = (inp_width - 1) * stride_width + \
- kernel_width - pad_left - pad_right
+ kernel_width - pad_left - pad_right + outpad_width
pad_left = kernel_width - 1 - pad_left
pad_right = kernel_width - 1 - pad_right
dilated_width = stride_width * (inp_width - 1) + 1
out_height = (inp_height - 1) * stride_height + \
- kernel_height - pad_top - pad_bottom
+ kernel_height - pad_top - pad_bottom + outpad_height
pad_top = kernel_height - 1 - pad_top
pad_bottom = kernel_height - 1 - pad_bottom
dilated_height = stride_height * (inp_height - 1) + 1
from ..util import simplify
-def conv3d_transpose_ncdhw(Input, Filter, strides, padding, out_dtype):
+def conv3d_transpose_ncdhw(Input, Filter, strides, padding, out_dtype, output_padding):
"""Transposed 3D convolution ncdhw forward operator.
Parameters
out_dtype : str
The output data type. This is used for mixed precision.
+ output_padding : tuple of ints
+ Used to get the right output shape for gradients
+
Returns
-------
Output : tvm.te.Tensor
5-D with shape [batch, out_channel, out_depth, out_height, out_width]
"""
- return declaration_conv3d_transpose_impl(Input, Filter, strides, padding, out_dtype)
+ return declaration_conv3d_transpose_impl(Input, Filter, strides, padding,
+ out_dtype, output_padding)
-def conv3d_transpose_ncdhw_preprocess(data, kernel, strides, padding, out_dtype):
+def conv3d_transpose_ncdhw_preprocess(data, kernel, strides, padding, out_dtype, output_padding):
"""Preprocess data and kernel to make the compute pattern
of conv3d_transpose the same as conv3d"""
batch, in_c, in_d, in_h, in_w = data.shape
_, out_c, filter_d, filter_h, filter_w = kernel.shape
stride_d, stride_h, stride_w = strides
+ opad_d, opad_h, opad_w = output_padding
+ assert opad_d < stride_d and opad_h < stride_h and opad_w < stride_w
# dilate data
data_dilate = dilate(data, [1, 1, stride_d, stride_h, stride_w], name='data_dilate')
# pad data
fpad_front, fpad_top, fpad_left, fpad_back, fpad_bottom, fpad_right = get_pad_tuple3d(
padding, (filter_d, filter_h, filter_w))
bpad_front = filter_d - 1 - fpad_front
- bpad_back = filter_d - 1 - fpad_back
+ bpad_back = filter_d - 1 - fpad_back + opad_d
bpad_top = filter_h - 1 - fpad_top
- bpad_bottom = filter_h - 1 - fpad_bottom
+ bpad_bottom = filter_h - 1 - fpad_bottom + opad_h
bpad_left = filter_w - 1 - fpad_left
- bpad_right = filter_w - 1 - fpad_right
+ bpad_right = filter_w - 1 - fpad_right + opad_w
data_pad = pad(data_dilate, \
[0, 0, bpad_front, bpad_top, bpad_left], \
[0, 0, bpad_back, bpad_bottom, bpad_right], \
return data_pad, kernel_transform
-def declaration_conv3d_transpose_impl(data, kernel, strides, padding, out_dtype):
+def declaration_conv3d_transpose_impl(data, kernel, strides, padding, out_dtype, output_padding):
"""Implementation of conv3d transpose"""
data_pad, kernel_transform = \
- conv3d_transpose_ncdhw_preprocess(data, kernel, strides, padding, out_dtype)
+ conv3d_transpose_ncdhw_preprocess(data, kernel, strides, padding, out_dtype, output_padding)
batch, in_c, in_d, in_h, in_w = data_pad.shape
out_c, _, filter_d, filter_h, filter_w = kernel_transform.shape
stride_d, stride_h, stride_w = strides
"gpu": (topi.cuda.conv3d_transpose_ncdhw, topi.cuda.schedule_conv3d_transpose_ncdhw),
}
-def verify_conv3d_transpose_ncdhw(batch, in_channel, in_size, num_filter, kernel, stride, padding):
+def verify_conv3d_transpose_ncdhw(batch, in_channel, in_size, num_filter, kernel, stride, padding, output_padding):
in_depth, in_height, in_width = in_size
kernel_depth, kernel_height, kernel_width = kernel
stride_depth, stride_height, stride_width = stride
def get_ref_data():
a_np = np.random.uniform(size=a_shape).astype(dtype)
w_np = np.random.uniform(size=w_shape).astype(dtype)
- b_np = tvm.topi.testing.conv3d_transpose_ncdhw_python(a_np, w_np, stride, padding)
+ b_np = tvm.topi.testing.conv3d_transpose_ncdhw_python(a_np, w_np, stride, padding, output_padding)
c_np = np.maximum(b_np, 0)
return a_np, w_np, b_np, c_np
B = fcompute(A, W,
[stride_depth, stride_height, stride_width],
[pad_front, pad_top, pad_left, pad_back, pad_bottom, pad_right],
- A.dtype)
+ A.dtype, output_padding)
C = topi.nn.relu(B)
s1 = fschedule([B])
s2 = fschedule([C])
def test_conv3d_transpose_ncdhw():
- verify_conv3d_transpose_ncdhw(1, 3, (24, 24, 24), 1, (1, 1, 1), (1, 1, 1), (0, 0, 0, 0, 0, 0))
- verify_conv3d_transpose_ncdhw(1, 3, (24, 24, 24), 2, (3, 3, 3), (1, 1, 1), (0, 0, 0, 0, 0, 0))
- verify_conv3d_transpose_ncdhw(1, 3, (24, 24, 24), 16, (3, 3, 3), (1, 1, 1), (0, 0, 0, 0, 0, 0))
- verify_conv3d_transpose_ncdhw(1, 3, (24, 24, 24), 16, (3, 3, 3), (3, 3, 3), (0, 0, 0, 0, 0, 0))
- verify_conv3d_transpose_ncdhw(1, 3, (24, 24, 24), 16, (3, 3, 3), (1, 1, 1), (0, 0, 0, 0, 0, 0))
- verify_conv3d_transpose_ncdhw(1, 3, (24, 24, 24), 16, (3, 3, 3), (2, 2, 2), (1, 1, 1, 1, 1, 1))
- verify_conv3d_transpose_ncdhw(1, 3, (24, 24, 24), 16, (2, 2, 2), (2, 2, 2), (0, 0, 0, 0, 0, 0))
- verify_conv3d_transpose_ncdhw(1, 8, (32, 32, 32), 32, (5, 5, 5), (1, 1, 1), (0, 0, 0, 0, 0, 0))
- verify_conv3d_transpose_ncdhw(1, 8, (32, 32, 32), 64, (5, 5, 5), (2, 2, 2), (1, 1, 1, 1, 1, 1))
+ verify_conv3d_transpose_ncdhw(1, 3, (24, 24, 24), 1, (1, 1, 1), (1, 1, 1), (0, 0, 0, 0, 0, 0), (0, 0, 0))
+ verify_conv3d_transpose_ncdhw(1, 3, (24, 24, 24), 2, (3, 3, 3), (1, 1, 1), (0, 0, 0, 0, 0, 0), (0, 0, 0))
+ verify_conv3d_transpose_ncdhw(1, 3, (24, 24, 24), 16, (3, 3, 3), (1, 1, 1), (0, 0, 0, 0, 0, 0), (0, 0, 0))
+ verify_conv3d_transpose_ncdhw(1, 3, (24, 24, 24), 16, (3, 3, 3), (3, 3, 3), (0, 0, 0, 0, 0, 0), (0, 0, 0))
+ verify_conv3d_transpose_ncdhw(1, 3, (24, 24, 24), 16, (3, 3, 3), (3, 3, 3), (0, 0, 0, 0, 0, 0), (2, 2, 2))
+ verify_conv3d_transpose_ncdhw(1, 3, (24, 24, 24), 16, (3, 3, 3), (3, 3, 3), (0, 0, 0, 0, 0, 0), (1, 0, 2))
+ verify_conv3d_transpose_ncdhw(1, 3, (24, 24, 24), 16, (3, 3, 3), (1, 1, 1), (0, 0, 0, 0, 0, 0), (0, 0, 0))
+ verify_conv3d_transpose_ncdhw(1, 3, (24, 24, 24), 16, (3, 3, 3), (2, 2, 2), (1, 1, 1, 1, 1, 1), (0, 0, 0))
+ verify_conv3d_transpose_ncdhw(1, 3, (24, 24, 24), 16, (2, 2, 2), (2, 2, 2), (0, 0, 0, 0, 0, 0), (0, 0, 0))
+ verify_conv3d_transpose_ncdhw(1, 8, (32, 32, 32), 32, (5, 5, 5), (1, 1, 1), (0, 0, 0, 0, 0, 0), (0, 0, 0))
+ verify_conv3d_transpose_ncdhw(1, 8, (32, 32, 32), 64, (5, 5, 5), (2, 2, 2), (1, 1, 1, 1, 1, 1), (0, 0, 0))
+ verify_conv3d_transpose_ncdhw(1, 8, (32, 32, 32), 64, (5, 5, 5), (2, 2, 2), (1, 1, 1, 1, 1, 1), (1, 1, 1))
if __name__ == "__main__":
test_conv3d_transpose_ncdhw()