self.cuda_target_arch = None
self.in_tuning = False
+ self.silent = False
GLOBAL_SCOPE = AutotvmGlobalScope()
import numpy as np
from .space import FallbackConfigEntity
+from .. import env as _env
logger = logging.getLogger('autotvm')
specific dispatch mechanism for templates.
"""
current = None
+ # a set to prevent print duplicated message
+ warning_messages = set()
def __init__(self):
self._old_ctx = DispatchContext.current
def __init__(self):
super(FallbackContext, self).__init__()
self.memory = {}
- self.silent = False
-
- # a set to prevent print duplicated message
- self.messages = set()
def _query_inside(self, target, workload):
key = (str(target), workload)
if key in self.memory:
return self.memory[key]
- if not self.silent:
+ if not _env.GLOBAL_SCOPE.silent:
msg = "Cannot find config for target=%s, workload=%s. A fallback configuration "\
"is used, which may bring great performance regression." % (target, workload)
- if msg not in self.messages:
- self.messages.add(msg)
+ if msg not in DispatchContext.warning_messages:
+ DispatchContext.warning_messages.add(msg)
logger.warning(msg)
cfg = FallbackConfigEntity()
from . import _backend
logger = logging.getLogger('compile_engine')
-
+autotvm_logger = logging.getLogger('autotvm')
@tvm._ffi.register_object("relay.LoweredOutput")
class LoweredOutput(Object):
return best_plevel_impl, outs
outputs = {}
+ workloads = {}
best_autotvm_impl = None
best_cfg = None
dispatch_ctx = autotvm.task.DispatchContext.current
+ autotvm.GLOBAL_SCOPE.silent = True
for impl in all_impls:
outs = impl.compute(attrs, inputs, out_type)
outputs[impl] = outs
workload = autotvm.task.get_workload(outs)
+ workloads[impl] = workload
if workload is None:
+ # Not an AutoTVM tunable implementation
continue
cfg = dispatch_ctx.query(target, workload)
if cfg.is_fallback:
- # It's a fallback config
+ # Skip fallback config
continue
if best_cfg is None or best_cfg.cost > cfg.cost:
best_autotvm_impl = impl
best_cfg = cfg
+ autotvm.GLOBAL_SCOPE.silent = False
if best_autotvm_impl:
+ # The best autotvm implementation definitely doesn't use fallback config
return best_autotvm_impl, outputs[best_autotvm_impl]
+ # Use the implementation with highest plevel
+ if workloads[best_plevel_impl] is not None:
+ msg = "Cannot find config for target=%s, workload=%s. A fallback configuration "\
+ "is used, which may bring great performance regression." \
+ % (target, workloads[best_plevel_impl])
+ if msg not in autotvm.task.DispatchContext.warning_messages:
+ autotvm.task.DispatchContext.warning_messages.add(msg)
+ autotvm_logger.warning(msg)
return best_plevel_impl, outputs[best_plevel_impl]
skip("nnpack is not available")
devices = ['llvm -device=arm_cpu']
- autotvm.DispatchContext.current.silent = True
+ autotvm.GLOBAL_SCOPE.silent = True
with WinogradFallback():
# resnet 18 workloads
verify_conv2d_nchw(1, 64, 56, 64, 3, 1, 1, devices=devices)
# werid workloads
verify_conv2d_nchw(1, 3, 3, 3, 3, 1, 1, devices=devices)
verify_conv2d_nchw(1, 13, 71, 59, 3, 1, 1, devices=devices)
+ autotvm.GLOBAL_SCOPE.silent = False
if __name__ == "__main__":
import pytest
- pytest.main()
+ pytest.main([__file__])
break
ic_block = 8
- autotvm.DispatchContext.current.silent = True
+ autotvm.GLOBAL_SCOPE.silent = True
A = te.placeholder((batch, in_channel//ic_block, in_height, in_width, ic_block), name='A', dtype='uint8')
W = te.placeholder((num_filter//oc_block, in_channel//ic_block//groups, kernel, kernel, ic_block//4, oc_block, 4), name='W', dtype='int8')
for device in ["llvm -mcpu=skylake-avx512"]:
with autotvm.tophub.context(device): # load tophub pre-tuned parameters
check_device(device)
+ autotvm.GLOBAL_SCOPE.silent = False
@pytest.mark.skip
def test_conv2d_NCHWc():