From 512ed3930a61daf38e80e1f71e51f0d1f139fb8e Mon Sep 17 00:00:00 2001 From: Lianmin Zheng Date: Wed, 1 Jul 2020 22:59:21 -0700 Subject: [PATCH] [TOPI] Fix x86 conv2d template when tuning with unpacked layout (#5938) * fix x86 conv2d and conv2d_transpose template * address comments --- topi/python/topi/x86/conv2d_avx_1x1.py | 2 +- topi/python/topi/x86/conv2d_avx_common.py | 2 +- topi/python/topi/x86/conv2d_transpose.py | 14 ++++++++------ 3 files changed, 10 insertions(+), 8 deletions(-) diff --git a/topi/python/topi/x86/conv2d_avx_1x1.py b/topi/python/topi/x86/conv2d_avx_1x1.py index 978c4b9..c6ed832 100644 --- a/topi/python/topi/x86/conv2d_avx_1x1.py +++ b/topi/python/topi/x86/conv2d_avx_1x1.py @@ -73,6 +73,7 @@ def _schedule_conv_NCHWc(s, cfg, data_vec, kernel_vec, conv_out, last): s[data_vec].parallel(parallel_axis) data_vec = data_vec.op.input_tensors[0] + oc_bn = cfg["tile_oc"].size[-1] if isinstance(kernel_vec.op, tvm.te.ComputeOp) and \ kernel_vec.name == 'kernel_vec': # data and kernel are not pre-computed, schedule layout transform here. @@ -84,7 +85,6 @@ def _schedule_conv_NCHWc(s, cfg, data_vec, kernel_vec, conv_out, last): oc_chunk, ic_chunk, oh, ow, ic_block, oc_block = s[kernel_vec].op.axis s[kernel_vec].reorder(oc_chunk, oh, ic_chunk, ow, ic_block, oc_block) - oc_bn = cfg["tile_oc"].size[-1] if oc_bn > 1: s[kernel_vec].vectorize(oc_block) parallel_axis = s[kernel_vec].fuse(oc_chunk, oh) diff --git a/topi/python/topi/x86/conv2d_avx_common.py b/topi/python/topi/x86/conv2d_avx_common.py index a88d168..aea954f 100644 --- a/topi/python/topi/x86/conv2d_avx_common.py +++ b/topi/python/topi/x86/conv2d_avx_common.py @@ -95,6 +95,7 @@ def _schedule_conv_NCHWc(s, cfg, data_vec, kernel_vec, conv_out, last): s[data_vec].parallel(parallel_axis) data_vec = data_vec.op.input_tensors[0] + oc_bn = cfg["tile_oc"].size[-1] if isinstance(kernel_vec.op, tvm.te.ComputeOp) and \ kernel_vec.name == 'kernel_vec': # data and kernel are not pre-computed, schedule layout transform here. @@ -106,7 +107,6 @@ def _schedule_conv_NCHWc(s, cfg, data_vec, kernel_vec, conv_out, last): oc_chunk, ic_chunk, oh, ow, ic_block, oc_block = s[kernel_vec].op.axis s[kernel_vec].reorder(oc_chunk, oh, ic_chunk, ow, ic_block, oc_block) - oc_bn = cfg["tile_oc"].size[-1] if oc_bn > 1: s[kernel_vec].vectorize(oc_block) parallel_axis = s[kernel_vec].fuse(oc_chunk, oh) diff --git a/topi/python/topi/x86/conv2d_transpose.py b/topi/python/topi/x86/conv2d_transpose.py index d490b28..7ec2817 100644 --- a/topi/python/topi/x86/conv2d_transpose.py +++ b/topi/python/topi/x86/conv2d_transpose.py @@ -40,14 +40,16 @@ def schedule_conv2d_transpose_nchw(outs): conv_out = op.input_tensors[0] # retrieve data data_vec = conv_out.op.input_tensors[0] - data_pad = data_vec.op.input_tensors[0] - data_dilate = data_pad.op.input_tensors[0] - s[data_dilate].compute_inline() - s[data_pad].compute_inline() + if isinstance(data_vec, te.ComputeOp): + data_pad = data_vec.op.input_tensors[0] + data_dilate = data_pad.op.input_tensors[0] + s[data_dilate].compute_inline() + s[data_pad].compute_inline() # retrieve kernel kernel_vec = conv_out.op.input_tensors[1] - kernel_transform = kernel_vec.op.input_tensors[0] - s[kernel_transform].compute_inline() + if isinstance(kernel_vec, te.ComputeOp): + kernel_transform = kernel_vec.op.input_tensors[0] + s[kernel_transform].compute_inline() traverse_inline(s, outs[0].op, _callback) return s -- 2.7.4