return nn.conv2d_nchw(data, kernel, strides, padding, dilation, out_dtype)
if layout == 'HWCN':
return nn.conv2d_hwcn(data, kernel, strides, padding, dilation, out_dtype)
+ if layout == 'NHWC':
+ return nn.conv2d_nhwc(data, kernel, strides, padding, dilation, out_dtype)
raise ValueError("not support this layout {} yet".format(layout))
traverse_inline(s, outs[0].op, _callback)
return s
+
+
+@autotvm.register_topi_schedule(generic.schedule_conv2d_nhwc, ["cuda", "gpu"],
+ ["direct"])
+def schedule_conv2d_nhwc_cuda(cfg, outs):
+ """TOPI schedule for CUDA conv2d_nhwc
+
+ Parameters
+ ----------
+ cfg: ConfigEntity
+ The config for this template
+
+ outs: Array of Tensor
+ The computation graph description of conv2d
+ in the format of an array of tensors.
+
+ Returns
+ -------
+ s: Schedule
+ The computation schedule for conv2d.
+ """
+ target = tvm.target.current_target()
+ if 'cudnn' in target.libs:
+ return generic.schedule_extern(outs)
+
+ outs = [outs] if isinstance(outs, tvm.tensor.Tensor) else outs
+ s = tvm.create_schedule([x.op for x in outs])
+
+ def _callback(op):
+ if op.tag == 'conv2d_nhwc':
+ schedule_direct_cuda(cfg, s, op.output(0))
+
+ traverse_inline(s, outs[0].op, _callback)
+ return s
A = tvm.placeholder((batch, in_height, in_width, in_channel), name='A')
W = tvm.placeholder((kernel, kernel, in_channel, num_filter), name='W')
- B = topi.nn.conv2d_nhwc(A, W, stride, padding, dilation)
a_shape = get_const_tuple(A.shape)
w_shape = get_const_tuple(W.shape)
return
print("Running on target: %s" % device)
with tvm.target.create(device):
+ B = topi.nn.conv2d(A, W, (stride, stride), padding,
+ (dilation, dilation), layout='NHWC', out_dtype=dtype)
s = topi.generic.schedule_conv2d_nhwc([B])
ctx = tvm.context(device, 0)
a = tvm.nd.array(a_np, ctx)
func(a, w, b)
tvm.testing.assert_allclose(b.asnumpy(), b_np, rtol=1e-5)
- for device in ['llvm']:
+ for device in ['llvm', 'cuda']:
check_device(device)