[TESTS] Refactor tests to run on either the GPU or CPU. (#6331)
authorTristan Konolige <tristan.konolige@gmail.com>
Wed, 2 Sep 2020 21:50:59 +0000 (14:50 -0700)
committerGitHub <noreply@github.com>
Wed, 2 Sep 2020 21:50:59 +0000 (14:50 -0700)
Much of the time spent in testing is duplicated work between CPU and GPU
test nodes. The main reason is that there is no way to control which
TVM devices are enabled at runtime, so tests that use LLVM will run on
both GPU and CPU nodes.

This patch adds an environment variable, TVM_TEST_DEVICES, which
controls which TVM devices should be used by tests. Devices not in
TVM_TEST_DEVICES can still be used, so tests must be careful to check
that the desired device is enabled with `tvm.testing.device_enabled` or
by enumerating all devices with `tvm.testing.enabled_devices`. All
tests have been retrofitted with these checks.

This patch also provides the decorator `@tvm.testing.gpu` to mark a test
as possibly using the gpu. Tests that require the gpu can use
`@tvm.testing.requires_gpu`. Tests without these flags will not be run
on GPU nodes.

154 files changed:
apps/extension/tests/test_ext.py
conftest.py [new file with mode: 0644]
docs/contribute/code_guide.rst
pytest.ini [moved from python/tvm/relay/testing/config.py with 62% similarity]
python/tvm/relay/testing/__init__.py
python/tvm/testing.py
tests/lint/check_file_type.py
tests/python/contrib/test_cblas.py
tests/python/contrib/test_cublas.py
tests/python/contrib/test_cudnn.py
tests/python/contrib/test_gemm_acc32_vnni.py
tests/python/contrib/test_miopen.py
tests/python/contrib/test_mps.py
tests/python/contrib/test_nnpack.py
tests/python/contrib/test_random.py
tests/python/contrib/test_rocblas.py
tests/python/frontend/caffe2/test_forward.py
tests/python/frontend/coreml/test_forward.py
tests/python/frontend/keras/test_forward.py
tests/python/frontend/mxnet/test_forward.py
tests/python/frontend/onnx/test_forward.py
tests/python/frontend/pytorch/test_forward.py
tests/python/frontend/tensorflow/test_bn_dynamic.py
tests/python/frontend/tensorflow/test_forward.py
tests/python/frontend/tflite/test_forward.py
tests/python/integration/test_dot.py
tests/python/integration/test_ewise.py
tests/python/integration/test_ewise_fpga.py
tests/python/integration/test_gemm.py
tests/python/integration/test_reduce.py
tests/python/integration/test_scan.py
tests/python/integration/test_tuning.py
tests/python/integration/test_winograd_nnpack.py
tests/python/nightly/quantization/test_quantization_accuracy.py
tests/python/relay/dyn/test_dynamic_op_level10.py
tests/python/relay/dyn/test_dynamic_op_level2.py
tests/python/relay/dyn/test_dynamic_op_level3.py
tests/python/relay/dyn/test_dynamic_op_level5.py
tests/python/relay/dyn/test_dynamic_op_level6.py
tests/python/relay/test_backend_compile_engine.py
tests/python/relay/test_backend_graph_runtime.py
tests/python/relay/test_backend_interpreter.py
tests/python/relay/test_cpp_build_module.py
tests/python/relay/test_op_grad_level1.py
tests/python/relay/test_op_grad_level2.py
tests/python/relay/test_op_grad_level3.py
tests/python/relay/test_op_level1.py
tests/python/relay/test_op_level10.py
tests/python/relay/test_op_level2.py
tests/python/relay/test_op_level3.py
tests/python/relay/test_op_level4.py
tests/python/relay/test_op_level5.py
tests/python/relay/test_op_level6.py
tests/python/relay/test_pass_alter_op_layout.py
tests/python/relay/test_pass_annotation.py
tests/python/relay/test_pass_dynamic_to_static.py
tests/python/relay/test_pass_fuse_ops.py
tests/python/relay/test_pass_lazy_gradient_init.py
tests/python/relay/test_pass_manager.py
tests/python/relay/test_vm.py
tests/python/topi/python/common.py
tests/python/topi/python/test_fifo_buffer.py
tests/python/topi/python/test_topi_batch_matmul.py
tests/python/topi/python/test_topi_broadcast.py
tests/python/topi/python/test_topi_clip.py
tests/python/topi/python/test_topi_conv1d.py
tests/python/topi/python/test_topi_conv1d_transpose_ncw.py
tests/python/topi/python/test_topi_conv2d_NCHWc.py
tests/python/topi/python/test_topi_conv2d_hwcn.py
tests/python/topi/python/test_topi_conv2d_hwnc_tensorcore.py
tests/python/topi/python/test_topi_conv2d_int8.py
tests/python/topi/python/test_topi_conv2d_nchw.py
tests/python/topi/python/test_topi_conv2d_nhwc.py
tests/python/topi/python/test_topi_conv2d_nhwc_pack_int8.py
tests/python/topi/python/test_topi_conv2d_nhwc_tensorcore.py
tests/python/topi/python/test_topi_conv2d_nhwc_winograd.py
tests/python/topi/python/test_topi_conv2d_transpose_nchw.py
tests/python/topi/python/test_topi_conv2d_winograd.py
tests/python/topi/python/test_topi_conv3d_ncdhw.py
tests/python/topi/python/test_topi_conv3d_ndhwc.py
tests/python/topi/python/test_topi_conv3d_ndhwc_tensorcore.py
tests/python/topi/python/test_topi_conv3d_transpose_ncdhw.py
tests/python/topi/python/test_topi_conv3d_winograd.py
tests/python/topi/python/test_topi_correlation.py
tests/python/topi/python/test_topi_deformable_conv2d.py
tests/python/topi/python/test_topi_dense.py
tests/python/topi/python/test_topi_dense_tensorcore.py
tests/python/topi/python/test_topi_depth_to_space.py
tests/python/topi/python/test_topi_depthwise_conv2d.py
tests/python/topi/python/test_topi_depthwise_conv2d_back_input.py
tests/python/topi/python/test_topi_depthwise_conv2d_back_weight.py
tests/python/topi/python/test_topi_group_conv2d.py
tests/python/topi/python/test_topi_group_conv2d_NCHWc_int8.py
tests/python/topi/python/test_topi_image.py
tests/python/topi/python/test_topi_lrn.py
tests/python/topi/python/test_topi_math.py
tests/python/topi/python/test_topi_pooling.py
tests/python/topi/python/test_topi_reduce.py
tests/python/topi/python/test_topi_relu.py
tests/python/topi/python/test_topi_reorg.py
tests/python/topi/python/test_topi_softmax.py
tests/python/topi/python/test_topi_sort.py
tests/python/topi/python/test_topi_space_to_depth.py
tests/python/topi/python/test_topi_sparse.py
tests/python/topi/python/test_topi_tensor.py
tests/python/topi/python/test_topi_transform.py
tests/python/topi/python/test_topi_upsampling.py
tests/python/topi/python/test_topi_util.py
tests/python/topi/python/test_topi_vision.py
tests/python/unittest/test_auto_scheduler_measure.py
tests/python/unittest/test_auto_scheduler_search_policy.py
tests/python/unittest/test_auto_scheduler_sketch_generation.py
tests/python/unittest/test_autotvm_index_tuner.py
tests/python/unittest/test_hybrid_error_report.py
tests/python/unittest/test_runtime_graph.py
tests/python/unittest/test_runtime_graph_debug.py
tests/python/unittest/test_runtime_module_based_interface.py
tests/python/unittest/test_runtime_module_export.py
tests/python/unittest/test_runtime_module_load.py
tests/python/unittest/test_runtime_ndarray.py
tests/python/unittest/test_runtime_rpc.py
tests/python/unittest/test_target_codegen_blob.py
tests/python/unittest/test_target_codegen_bool.py
tests/python/unittest/test_target_codegen_cross_llvm.py
tests/python/unittest/test_target_codegen_cuda.py
tests/python/unittest/test_target_codegen_device.py
tests/python/unittest/test_target_codegen_extern.py
tests/python/unittest/test_target_codegen_llvm.py
tests/python/unittest/test_target_codegen_opencl.py
tests/python/unittest/test_target_codegen_rocm.py
tests/python/unittest/test_target_codegen_vm_basic.py
tests/python/unittest/test_target_codegen_vulkan.py
tests/python/unittest/test_te_autodiff.py
tests/python/unittest/test_te_hybrid_script.py
tests/python/unittest/test_te_schedule_postproc_rewrite_for_tensor_core.py
tests/python/unittest/test_te_schedule_tensor_core.py
tests/python/unittest/test_te_tensor_overload.py
tests/python/unittest/test_testing.py
tests/python/unittest/test_tir_analysis_verify_gpu_code.py
tests/python/unittest/test_tir_analysis_verify_memory.py
tests/python/unittest/test_tir_buffer.py
tests/python/unittest/test_tir_ir_builder.py
tests/python/unittest/test_tir_transform_hoist_if.py
tests/python/unittest/test_tir_transform_instrument_bound_checkers.py
tests/python/unittest/test_tir_transform_lower_intrin.py
tests/python/unittest/test_tir_transform_lower_warp_memory.py
tests/python/unittest/test_tir_transform_thread_sync.py
tests/scripts/setup-pytest-env.sh
tests/scripts/task_python_frontend.sh
tests/scripts/task_python_frontend_cpu.sh
tests/scripts/task_python_integration.sh
tests/scripts/task_python_integration_gpuonly.sh
tests/scripts/task_python_unittest_gpuonly.sh
tutorials/frontend/deploy_ssd_gluoncv.py

index f7e17d2..defac94 100644 (file)
@@ -17,6 +17,7 @@
 import tvm_ext
 import tvm
 import tvm._ffi.registry
+import tvm.testing
 from tvm import te
 import numpy as np
 
@@ -32,7 +33,7 @@ def test_ext_dev():
     B = te.compute((n,), lambda *i: A(*i) + 1.0, name='B')
     s = te.create_schedule(B.op)
     def check_llvm():
-        if not tvm.runtime.enabled("llvm"):
+        if not tvm.testing.device_enabled("llvm"):
             return
         f = tvm.build(s, [A, B], "ext_dev", "llvm")
         ctx = tvm.ext_dev(0)
@@ -77,7 +78,7 @@ def test_extern_call():
     s = te.create_schedule(B.op)
 
     def check_llvm():
-        if not tvm.runtime.enabled("llvm"):
+        if not tvm.testing.device_enabled("llvm"):
             return
         f = tvm.build(s, [A, B], "llvm")
         ctx = tvm.cpu(0)
diff --git a/conftest.py b/conftest.py
new file mode 100644 (file)
index 0000000..edf1a73
--- /dev/null
@@ -0,0 +1,29 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+import tvm.testing
+from pytest import ExitCode
+
+def pytest_configure(config):
+    print("enabled targets:", "; ".join(map(lambda x: x[0], tvm.testing.enabled_targets())))
+    print("pytest marker:", config.option.markexpr)
+
+def pytest_sessionfinish(session, exitstatus):
+    # Don't exit with an error if we select a subset of tests that doesn't
+    # include anything
+    if session.config.option.markexpr != '':
+        if exitstatus == ExitCode.NO_TESTS_COLLECTED:
+            session.exitstatus = ExitCode.OK
index c932e93..c0b022b 100644 (file)
@@ -82,6 +82,20 @@ Python Code Styles
 - Stick to language features as in ``python 3.5``
 
 
+Writing Python Tests
+--------------------
+We use `pytest <https://docs.pytest.org/en/stable/>`_ for all python testing. ``tests/python`` contains all the tests.
+
+If you want your test to run over a variety of targets, use the :py:func:`tvm.testing.parametrize_targets` decorator. For example:
+
+.. code:: python
+
+  @tvm.testing.parametrize_targets
+  def test_mytest(target, ctx):
+    ...
+
+will run `test_mytest` with `target="llvm"`, `target="cuda"`, and few others. This also ensures that your test is run on the correct hardware by the CI. If you only want to test against a couple targets use `@tvm.testing.parametrize_targets("target_1", "target_2")`. If you want to test on a single target, use the associated decorator from :py:func:`tvm.testing`. For example, CUDA tests use the `@tvm.testing.requires_cuda` decorator.
+
 Handle Integer Constant Expression
 ----------------------------------
 We often need to handle constant integer expressions in TVM. Before we do so, the first question we want to ask is that is it really necessary to get a constant integer. If symbolic expression also works and let the logic flow, we should use symbolic expression as much as possible. So the generated code works for shapes that are not known ahead of time.
similarity index 62%
rename from python/tvm/relay/testing/config.py
rename to pytest.ini
index 93a08db..675f8fe 100644 (file)
 # KIND, either express or implied.  See the License for the
 # specific language governing permissions and limitations
 # under the License.
-"""Configuration about tests"""
-from __future__ import absolute_import as _abs
-
-import os
-import tvm
-
-
-def ctx_list():
-    """Get context list for testcases"""
-    device_list = os.environ.get("RELAY_TEST_TARGETS", "")
-    device_list = (device_list.split(",") if device_list
-                   else ["llvm", "cuda"])
-    device_list = set(device_list)
-    res = [(device, tvm.context(device, 0)) for device in device_list]
-    return [x for x in res if x[1].exist]
+[pytest]
+markers =
+    gpu: mark a test as requiring a gpu
+    tensorcore: mark a test as requiring a tensorcore
+    cuda: mark a test as requiring cuda
+    opencl: mark a test as requiring opencl
+    rocm: mark a test as requiring rocm
+    vulkan: mark a test as requiring vulkan
+    metal: mark a test as requiring metal
+    llvm: mark a test as requiring llvm
index 904e4d7..534015f 100644 (file)
@@ -25,6 +25,7 @@ from tvm import te
 import tvm.relay as relay
 import tvm.relay.op as op
 from tvm.relay import Prelude
+from tvm.testing import enabled_targets
 
 from . import mlp
 from . import resnet
@@ -41,7 +42,6 @@ from . import yolo_detection
 from . import temp_op_attr
 from . import synthetic
 
-from .config import ctx_list
 from .init import create_workload
 from .nat import add_nat_definitions, count, make_nat_value, make_nat_expr
 from .py_converter import to_python, run_as_python
@@ -125,7 +125,7 @@ def check_grad(func,
     if test_inputs is None:
         test_inputs = inputs
 
-    for target, ctx in ctx_list():
+    for target, ctx in enabled_targets():
         intrp = relay.create_executor(ctx=ctx, target=target)
 
         # Get analytic gradients.
index 7483a9f..0a568b0 100644 (file)
 # under the License.
 
 # pylint: disable=invalid-name,unnecessary-comprehension
-""" TVM testing utilities """
+""" TVM testing utilities
+
+Testing Markers
+***************
+
+We use pytest markers to specify the requirements of test functions. Currently
+there is a single distinction that matters for our testing environment: does
+the test require a gpu. For tests that require just a gpu or just a cpu, we
+have the decorator :py:func:`requires_gpu` that enables the test when a gpu is
+available. To avoid running tests that don't require a gpu on gpu nodes, this
+decorator also sets the pytest marker `gpu` so we can use select the gpu subset
+of tests (using `pytest -m gpu`).
+
+Unfortunately, many tests are written like this:
+
+.. python::
+
+    def test_something():
+        for target in all_targets():
+            do_something()
+
+The test uses both gpu and cpu targets, so the test needs to be run on both cpu
+and gpu nodes. But we still want to only run the cpu targets on the cpu testing
+node. The solution is to mark these tests with the gpu marker so they will be
+run on the gpu nodes. But we also modify all_targets (renamed to
+enabled_targets) so that it only returns gpu targets on gpu nodes and cpu
+targets on cpu nodes (using an environment variable).
+
+Instead of using the all_targets function, future tests that would like to
+test against a variety of targets should use the
+:py:func:`tvm.testing.parametrize_targets` functionality. This allows us
+greater control over which targets are run on which testing nodes.
+
+If in the future we want to add a new type of testing node (for example
+fpgas), we need to add a new marker in `tests/python/pytest.ini` and a new
+function in this module. Then targets using this node should be added to the
+`TVM_TEST_TARGETS` environment variable in the CI.
+"""
 import logging
+import os
+import pytest
 import numpy as np
 import tvm
 import tvm.arith
 import tvm.tir
 import tvm.te
 import tvm._ffi
+from tvm.contrib import nvcc
 
 
 def assert_allclose(actual, desired, rtol=1e-7, atol=1e-7):
@@ -285,4 +325,345 @@ def check_int_constraints_trans_consistency(constraints_trans, vranges=None):
                    constraints_trans.dst_to_src, constraints_trans.src_to_dst)
 
 
+def _get_targets():
+    target_str = os.environ.get("TVM_TEST_TARGETS", "")
+    if len(target_str) == 0:
+        target_str = DEFAULT_TEST_TARGETS
+    targets = {
+        dev
+        for dev in target_str.split(";")
+        if len(dev) > 0 and tvm.context(dev, 0).exist and tvm.runtime.enabled(dev)
+    }
+    if len(targets) == 0:
+        logging.warning(
+            "None of the following targets are supported by this build of TVM: %s."
+            " Try setting TVM_TEST_TARGETS to a supported target. Defaulting to llvm.",
+            target_str,
+        )
+        return {"llvm"}
+    return targets
+
+
+DEFAULT_TEST_TARGETS = (
+    "llvm;cuda;opencl;metal;rocm;vulkan;nvptx;"
+    "llvm -device=arm_cpu;opencl -device=mali,aocl_sw_emu"
+)
+
+
+def device_enabled(target):
+    """Check if a target should be used when testing.
+
+    It is recommended that you use :py:func:`tvm.testing.parametrize_targets`
+    instead of manually checking if a target is enabled.
+
+    This allows the user to control which devices they are testing against. In
+    tests, this should be used to check if a device should be used when said
+    device is an optional part of the test.
+
+    Parameters
+    ----------
+    target : str
+        Target string to check against
+
+    Returns
+    -------
+    bool
+        Whether or not the device associated with this target is enabled.
+
+    Example
+    -------
+    >>> @tvm.testing.uses_gpu
+    >>> def test_mytest():
+    >>>     for target in ["cuda", "llvm"]:
+    >>>         if device_enabled(target):
+    >>>             test_body...
+
+    Here, `test_body` will only be reached by with `target="cuda"` on gpu test
+    nodes and `target="llvm"` on cpu test nodes.
+    """
+    assert isinstance(target, str), "device_enabled requires a target as a string"
+    target_kind = target.split(" ")[
+        0
+    ]  # only check if device name is found, sometime there are extra flags
+    return any([target_kind in test_target for test_target in _get_targets()])
+
+
+def enabled_targets():
+    """Get all enabled targets with associated contexts.
+
+    In most cases, you should use :py:func:`tvm.testing.parametrize_targets` instead of
+    this function.
+
+    In this context, enabled means that TVM was built with support for this
+    target and the target name appears in the TVM_TEST_TARGETS environment
+    variable. If TVM_TEST_TARGETS is not set, it defaults to variable
+    DEFAULT_TEST_TARGETS in this module.
+
+    If you use this function in a test, you **must** decorate the test with
+    :py:func:`tvm.testing.uses_gpu` (otherwise it will never be run on the gpu).
+
+    Returns
+    -------
+    targets: list
+        A list of pairs of all enabled devices and the associated context
+    """
+    return [(tgt, tvm.context(tgt)) for tgt in _get_targets()]
+
+
+def _compose(args, decs):
+    """Helper to apply multiple markers
+    """
+    if len(args) > 0:
+        f = args[0]
+        for d in reversed(decs):
+            f = d(f)
+        return f
+    return decs
+
+
+def uses_gpu(*args):
+    """Mark to differentiate tests that use the GPU is some capacity.
+
+    These tests will be run on CPU-only test nodes and on test nodes with GPUS.
+    To mark a test that must have a GPU present to run, use
+    :py:func:`tvm.testing.requires_gpu`.
+
+    Parameters
+    ----------
+    f : function
+        Function to mark
+    """
+    _uses_gpu = [pytest.mark.gpu]
+    return _compose(args, _uses_gpu)
+
+
+def requires_gpu(*args):
+    """Mark a test as requiring a GPU to run.
+
+    Tests with this mark will not be run unless a gpu is present.
+
+    Parameters
+    ----------
+    f : function
+        Function to mark
+    """
+    _requires_gpu = [
+        pytest.mark.skipif(not tvm.gpu().exist, reason="No GPU present"),
+        *uses_gpu(),
+    ]
+    return _compose(args, _requires_gpu)
+
+
+
+
+def requires_cuda(*args):
+    """Mark a test as requiring the CUDA runtime.
+
+    This also marks the test as requiring a gpu.
+
+    Parameters
+    ----------
+    f : function
+        Function to mark
+    """
+    _requires_cuda = [
+        pytest.mark.cuda,
+        pytest.mark.skipif(
+            not device_enabled("cuda"), reason="CUDA support not enabled"
+        ),
+        *requires_gpu(),
+    ]
+    return _compose(args, _requires_cuda)
+
+
+
+
+def requires_opencl(*args):
+    """Mark a test as requiring the OpenCL runtime.
+
+    This also marks the test as requiring a gpu.
+
+    Parameters
+    ----------
+    f : function
+        Function to mark
+    """
+    _requires_opencl = [
+        pytest.mark.opencl,
+        pytest.mark.skipif(
+            not device_enabled("opencl"), reason="OpenCL support not enabled"
+        ),
+        *requires_gpu(),
+    ]
+    return _compose(args, _requires_opencl)
+
+
+
+
+def requires_rocm(*args):
+    """Mark a test as requiring the rocm runtime.
+
+    This also marks the test as requiring a gpu.
+
+    Parameters
+    ----------
+    f : function
+        Function to mark
+    """
+    _requires_rocm = [
+        pytest.mark.rocm,
+        pytest.mark.skipif(
+            not device_enabled("rocm"), reason="rocm support not enabled"
+        ),
+        *requires_gpu(),
+    ]
+    return _compose(args, _requires_rocm)
+
+
+
+
+def requires_metal(*args):
+    """Mark a test as requiring the metal runtime.
+
+    This also marks the test as requiring a gpu.
+
+    Parameters
+    ----------
+    f : function
+        Function to mark
+    """
+    _requires_metal = [
+        pytest.mark.metal,
+        pytest.mark.skipif(
+            not device_enabled("metal"), reason="metal support not enabled"
+        ),
+        *requires_gpu(),
+    ]
+    return _compose(args, _requires_metal)
+
+
+
+
+def requires_vulkan(*args):
+    """Mark a test as requiring the vulkan runtime.
+
+    This also marks the test as requiring a gpu.
+
+    Parameters
+    ----------
+    f : function
+        Function to mark
+    """
+    _requires_vulkan = [
+        pytest.mark.vulkan,
+        pytest.mark.skipif(
+            not device_enabled("vulkan"), reason="vulkan support not enabled"
+        ),
+        *requires_gpu(),
+    ]
+    return _compose(args, _requires_vulkan)
+
+
+
+
+def requires_tensorcore(*args):
+    """Mark a test as requiring a tensorcore to run.
+
+    Tests with this mark will not be run unless a tensorcore is present.
+
+    Parameters
+    ----------
+    f : function
+        Function to mark
+    """
+    _requires_tensorcore = [
+        pytest.mark.tensorcore,
+        pytest.mark.skipif(
+            not tvm.gpu().exist or not nvcc.have_tensorcore(tvm.gpu(0).compute_version),
+            reason="No tensorcore present",
+        ),
+        *requires_gpu(),
+    ]
+    return _compose(args, _requires_tensorcore)
+
+
+
+
+def requires_llvm(*args):
+    """Mark a test as requiring llvm to run.
+
+    Parameters
+    ----------
+    f : function
+        Function to mark
+    """
+    _requires_llvm = [
+        pytest.mark.llvm,
+        pytest.mark.skipif(
+            not device_enabled("llvm"), reason="LLVM support not enabled"
+        ),
+    ]
+    return _compose(args, _requires_llvm)
+
+
+def _target_to_requirement(target):
+    # mapping from target to decorator
+    if target.startswith("cuda"):
+        return requires_cuda()
+    if target.startswith("rocm"):
+        return requires_rocm()
+    if target.startswith("vulkan"):
+        return requires_vulkan()
+    if target.startswith("nvptx"):
+        return [*requires_llvm(), *requires_gpu()]
+    if target.startswith("metal"):
+        return requires_metal()
+    if target.startswith("opencl"):
+        return requires_opencl()
+    if target.startswith("llvm"):
+        return requires_llvm()
+    return []
+
+
+def parametrize_targets(*args):
+    """Parametrize a test over all enabled targets.
+
+    Use this decorator when you want your test to be run over a variety of
+    targets and devices (including cpu and gpu devices).
+
+    Parameters
+    ----------
+    f : function
+        Function to parametrize. Must be of the form `def test_xxxxxxxxx(target, ctx)`:,
+        where `xxxxxxxxx` is any name.
+    targets : list[str], optional
+        Set of targets to run against. If not supplied,
+        :py:func:`tvm.testing.enabled_targets` will be used.
+
+    Example
+    -------
+    >>> @tvm.testing.parametrize
+    >>> def test_mytest(target, ctx):
+    >>>     ...  # do something
+
+    Or
+
+    >>> @tvm.testing.parametrize("llvm", "cuda")
+    >>> def test_mytest(target, ctx):
+    >>>     ...  # do something
+    """
+    def wrap(targets):
+        def func(f):
+            params = [
+                pytest.param(target, tvm.context(target, 0), marks=_target_to_requirement(target))
+                for target in targets
+            ]
+            return pytest.mark.parametrize("target,ctx", params)(f)
+        return func
+    if len(args) == 1 and callable(args[0]):
+        targets = [t for t, _ in enabled_targets()]
+        return wrap(targets)(args[0])
+    return wrap(args)
+
+
 tvm._ffi._init_api("testing", __name__)
index f803647..9c0a607 100644 (file)
@@ -122,6 +122,8 @@ ALLOW_SPECIFIC_FILE = {
     "docs/_static/css/tvm_theme.css",
     "docs/_static/img/tvm-logo-small.png",
     "docs/_static/img/tvm-logo-square.png",
+    # pytest config
+    "pytest.ini",
 }
 
 
index e1c1c71..7247ab7 100644 (file)
@@ -22,6 +22,7 @@ import tvm.topi.testing
 from tvm.contrib import cblas
 from tvm.contrib import mkl
 from tvm.contrib import mkldnn
+import tvm.testing
 
 def verify_matmul_add(m, l, n, lib, transa=False, transb=False, dtype="float32"):
     bias = te.var('bias', dtype=dtype)
@@ -41,7 +42,7 @@ def verify_matmul_add(m, l, n, lib, transa=False, transb=False, dtype="float32")
         return np.dot(a, b) + bb
 
     def verify(target="llvm"):
-        if not tvm.runtime.enabled(target):
+        if not tvm.testing.device_enabled(target):
             print("skip because %s is not enabled..." % target)
             return
         if not tvm.get_global_func(lib.__name__ + ".matmul", True):
@@ -107,7 +108,7 @@ def verify_quantized_matmul_add(m, l, n, transa=False, transb=False):
         return np.dot(a, b) + bb
 
     def verify(target="llvm"):
-        if not tvm.runtime.enabled(target):
+        if not tvm.testing.device_enabled(target):
             print("skip because %s is not enabled..." % target)
             return
         if not tvm.get_global_func("tvm.contrib.mkl.matmul_u8s8s32", True):
@@ -153,7 +154,7 @@ def verify_batch_matmul(batch, m, l, n, lib, transa=False, transb=False, iterati
         return tvm.topi.testing.batch_matmul(a, b)
 
     def verify(target="llvm"):
-        if not tvm.runtime.enabled(target):
+        if not tvm.testing.device_enabled(target):
             print("skip because %s is not enabled..." % target)
             return
         if not tvm.get_global_func(lib.__name__ + ".matmul", True):
index 517e6e1..f387f35 100644 (file)
@@ -19,6 +19,7 @@ from tvm import te
 import numpy as np
 from tvm.contrib import cublas
 from tvm.contrib import cublaslt
+import tvm.testing
 
 def verify_matmul_add(in_dtype, out_dtype, rtol=1e-5):
     n = 1024
@@ -30,9 +31,6 @@ def verify_matmul_add(in_dtype, out_dtype, rtol=1e-5):
     s = te.create_schedule(C.op)
 
     def verify(target="cuda"):
-        if not tvm.runtime.enabled(target):
-            print("skip because %s is not enabled..." % target)
-            return
         if not tvm.get_global_func("tvm.contrib.cublas.matmul", True):
             print("skip because extern function is not available")
             return
@@ -64,9 +62,6 @@ def verify_matmul_add_igemm(in_dtype, out_dtype, rtol=1e-5):
     s = te.create_schedule(C.op)
 
     def verify(target="cuda"):
-        if not tvm.runtime.enabled(target):
-            print("skip because %s is not enabled..." % target)
-            return
         if not tvm.get_global_func("tvm.contrib.cublaslt.matmul", True):
             print("skip because extern function is not available")
             return
@@ -115,9 +110,6 @@ def verify_batch_matmul(in_dtype, out_dtype, rtol=1e-5):
     s = te.create_schedule(C.op)
 
     def verify(target="cuda"):
-        if not tvm.runtime.enabled(target):
-            print("skip because %s is not enabled..." % target)
-            return
         if not tvm.get_global_func("tvm.contrib.cublas.matmul", True):
             print("skip because extern function is not available")
             return
@@ -132,15 +124,18 @@ def verify_batch_matmul(in_dtype, out_dtype, rtol=1e-5):
                                    b.asnumpy().astype(C.dtype)).astype(C.dtype), rtol=rtol)
     verify()
 
+@tvm.testing.requires_cuda
 def test_matmul_add():
     verify_matmul_add('float', 'float', rtol=1e-3)
     verify_matmul_add('float16', 'float')
     verify_matmul_add('float16', 'float16', rtol=1e-2)
     verify_matmul_add('int8', 'int32')
 
+@tvm.testing.requires_cuda
 def test_matmul_add_igemm():
     verify_matmul_add_igemm('int8', 'int32')
 
+@tvm.testing.requires_cuda
 def test_batch_matmul():
     verify_batch_matmul('float', 'float')
     verify_batch_matmul('float16', 'float')
index 61822c8..5777c3b 100644 (file)
@@ -20,6 +20,7 @@ from tvm.contrib import cudnn
 from tvm.contrib.nvcc import have_fp16
 import numpy as np
 import tvm.topi.testing
+import tvm.testing
 
 def verify_conv2d(data_dtype, conv_dtype, tensor_format=0, groups=1):
     in_channel = 4
@@ -36,9 +37,6 @@ def verify_conv2d(data_dtype, conv_dtype, tensor_format=0, groups=1):
     height = 32
     width = 32
 
-    if not tvm.runtime.enabled("cuda"):
-        print("skip because cuda is not enabled...")
-        return
     if not tvm.get_global_func("tvm.contrib.cudnn.conv.output_shape", True):
         print("skip because cudnn is not enabled...")
         return
@@ -87,6 +85,7 @@ def verify_conv2d(data_dtype, conv_dtype, tensor_format=0, groups=1):
     f(x, w, y)
     tvm.testing.assert_allclose(y.asnumpy(), c_np, atol=1e-2, rtol=1e-2)
 
+@tvm.testing.requires_gpu
 def test_conv2d():
     verify_conv2d("float32", "float32", tensor_format=0)
     verify_conv2d("float16", "float32", tensor_format=1)
@@ -118,9 +117,6 @@ def verify_conv3d(data_dtype, conv_dtype, tensor_format=0, groups=1):
     height = 32
     width = 32
 
-    if not tvm.runtime.enabled("cuda"):
-        print("skip because cuda is not enabled...")
-        return
     if not tvm.get_global_func("tvm.contrib.cudnn.conv.output_shape", True):
         print("skip because cudnn is not enabled...")
         return
@@ -161,6 +157,7 @@ def verify_conv3d(data_dtype, conv_dtype, tensor_format=0, groups=1):
     f(x, w, y)
     tvm.testing.assert_allclose(y.asnumpy(), c_np, atol=3e-5, rtol=1e-4)
 
+@tvm.testing.requires_gpu
 def test_conv3d():
     verify_conv3d("float32", "float32", tensor_format=0)
     verify_conv3d("float32", "float32", tensor_format=0, groups=2)
@@ -195,10 +192,8 @@ def verify_softmax_4d(shape, dtype="float32"):
     f(a, b)
     tvm.testing.assert_allclose(b.asnumpy(), b_np, rtol=1e-3)
 
+@tvm.testing.requires_gpu
 def test_softmax():
-    if not tvm.runtime.enabled("cuda"):
-        print("skip because cuda is not enabled...")
-        return
     if not tvm.get_global_func("tvm.contrib.cudnn.conv.output_shape", True):
         print("skip because cudnn is not enabled...")
         return
index 37101a8..5380040 100644 (file)
@@ -17,6 +17,7 @@
 # pylint: disable=import-self, invalid-name, unused-argument, too-many-lines, len-as-condition
 
 import tvm
+import tvm.testing
 from tvm import te
 import numpy as np
 from tvm.topi.x86.tensor_intrin import dot_16x1x16_uint8_int8_int32_cascadelake
@@ -24,6 +25,7 @@ from tvm.topi.x86.tensor_intrin import dot_16x1x16_uint8_int8_int32
 import pytest
 
 
+@tvm.testing.requires_llvm
 @pytest.mark.skip("skip because feature not enabled")
 def test_fc_int8_acc32():
     m = 1024
@@ -42,7 +44,7 @@ def test_fc_int8_acc32():
     # (ignoring processor)" error with the following setting. After LLVM 8.0 is enabled in the
     # test, we should use cascadelake setting.
     def verify(target="llvm -mcpu=cascadelake"):
-        if not tvm.runtime.enabled(target):
+        if not tvm.testing.device_enabled(target):
             print("skip because %s is not enabled..." % target)
             return
 
index deffbe9..e8d348e 100644 (file)
@@ -20,6 +20,7 @@ from tvm.contrib import miopen
 import numpy as np
 
 
+@tvm.testing.requires_rocm
 def test_conv2d():
     in_channel = 3
     out_channel = 64
@@ -33,9 +34,6 @@ def test_conv2d():
     dilation_w = 1
 
     xshape = [1, in_channel, 128, 128]
-    if not tvm.runtime.enabled("rocm"):
-        print("skip because rocm is not enabled...")
-        return
     if not tvm.get_global_func("tvm.contrib.miopen.conv2d.setup", True):
         print("skip because miopen is not enabled...")
         return
index b524365..1f0906e 100644 (file)
@@ -19,10 +19,8 @@ from tvm import te
 import numpy as np
 from tvm.contrib import mps
 
+@tvm.testing.requires_metal
 def test_matmul():
-    if not tvm.runtime.enabled("metal"):
-        print("skip because %s is not enabled..." % "metal")
-        return
     n = 1024
     l = 128
     m = 256
@@ -62,10 +60,8 @@ def test_matmul():
             c.asnumpy(), np.dot(a.asnumpy(), b.asnumpy()) + 1, rtol=1e-5)
     verify(A, B, D, s)
 
+@tvm.testing.requires_metal
 def test_conv2d():
-    if not tvm.runtime.enabled("metal"):
-        print("skip because %s is not enabled..." % "metal")
-        return
     n = 1
     h = 14
     w = 14
index 81fcb12..bbee2b6 100644 (file)
@@ -23,6 +23,7 @@ from tvm.contrib import nnpack
 import pytest
 
 
+@tvm.testing.requires_llvm
 def test_fully_connected_inference():
     n = 1024
     l = 128
@@ -35,8 +36,6 @@ def test_fully_connected_inference():
     s = te.create_schedule(D.op)
 
     def verify(target="llvm"):
-        if not tvm.runtime.enabled(target):
-            pytest.skip("%s is not enabled..." % target)
         if not tvm.get_global_func("tvm.contrib.nnpack.fully_connected_inference", True):
             pytest.skip("extern function is not available")
         if not nnpack.is_available():
@@ -82,6 +81,7 @@ def np_conv(na, nw, padding, stride=1):
                 nb[n, f] += out[::stride, ::stride]
     return nb
 
+@tvm.testing.requires_llvm
 def test_convolution_inference():
     BATCH = 8
     IH = 48
@@ -105,8 +105,6 @@ def test_convolution_inference():
     def verify(target="llvm",
                algorithm=nnpack.ConvolutionAlgorithm.AUTO,
                with_bias=True):
-        if not tvm.runtime.enabled(target):
-            pytest.skip("%s is not enabled..." % target)
         if not tvm.get_global_func("tvm.contrib.nnpack.fully_connected_inference", True):
             pytest.skip("extern function is not available")
         if not nnpack.is_available():
@@ -144,6 +142,7 @@ def test_convolution_inference():
             verify(algorithm=algorithm, with_bias=with_bias)
 
 
+@tvm.testing.requires_llvm
 def test_convolution_inference_without_weight_transform():
     BATCH = 6
     IH = 48
@@ -167,8 +166,6 @@ def test_convolution_inference_without_weight_transform():
     def verify(target="llvm",
                algorithm=nnpack.ConvolutionAlgorithm.AUTO,
                with_bias=True):
-        if not tvm.runtime.enabled(target):
-            pytest.skip("%s is not enabled..." % target)
         if not tvm.get_global_func("tvm.contrib.nnpack.fully_connected_inference", True):
             pytest.skip("extern function is not available")
         if not nnpack.is_available():
index e61030b..c3601c7 100644 (file)
@@ -19,21 +19,7 @@ from tvm import te
 import numpy as np
 from tvm.contrib import random
 from tvm import rpc
-
-def enabled_ctx_list():
-    ctx_list = [('cpu', tvm.cpu(0)),
-                ('gpu', tvm.gpu(0)),
-                ('cl', tvm.opencl(0)),
-                ('metal', tvm.metal(0)),
-                ('rocm', tvm.rocm(0)),
-                ('vulkan', tvm.vulkan(0)),
-                ('vpi', tvm.vpi(0))]
-    for k, v  in ctx_list:
-        assert tvm.context(k, 0) == v
-    ctx_list = [x[1] for x in ctx_list if x[1].exist]
-    return ctx_list
-
-ENABLED_CTX_LIST = enabled_ctx_list()
+import tvm.testing
 
 def test_randint():
     m = 10240
@@ -42,7 +28,7 @@ def test_randint():
     s = te.create_schedule(A.op)
 
     def verify(target="llvm"):
-        if not tvm.runtime.enabled(target):
+        if not tvm.testing.device_enabled(target):
             print("skip because %s is not enabled..." % target)
             return
         if not tvm.get_global_func("tvm.contrib.random.randint", True):
@@ -66,7 +52,7 @@ def test_uniform():
     s = te.create_schedule(A.op)
 
     def verify(target="llvm"):
-        if not tvm.runtime.enabled(target):
+        if not tvm.testing.device_enabled(target):
             print("skip because %s is not enabled..." % target)
             return
         if not tvm.get_global_func("tvm.contrib.random.uniform", True):
@@ -90,7 +76,7 @@ def test_normal():
     s = te.create_schedule(A.op)
 
     def verify(target="llvm"):
-        if not tvm.runtime.enabled(target):
+        if not tvm.testing.device_enabled(target):
             print("skip because %s is not enabled..." % target)
             return
         if not tvm.get_global_func("tvm.contrib.random.normal", True):
@@ -105,6 +91,7 @@ def test_normal():
         assert abs(np.std(na) - 4) < 1e-2
     verify()
 
+@tvm.testing.uses_gpu
 def test_random_fill():
     def test_local(ctx, dtype):
         if not tvm.get_global_func("tvm.contrib.random.random_fill", True):
@@ -125,7 +112,7 @@ def test_random_fill():
         if not tvm.get_global_func("tvm.contrib.random.random_fill", True):
             print("skip because extern function is not available")
             return
-        if not tvm.runtime.enabled("rpc") or not tvm.runtime.enabled("llvm"):
+        if not tvm.testing.device_enabled("rpc") or not tvm.runtime.enabled("llvm"):
             return
         np_ones = np.ones((512, 512), dtype=dtype)
         server = rpc.Server("localhost")
@@ -142,7 +129,7 @@ def test_random_fill():
 
     for dtype in ["bool", "int8", "uint8", "int16", "uint16", "int32", "int32",
                   "int64", "uint64", "float16", "float32", "float64"]:
-        for ctx in ENABLED_CTX_LIST:
+        for _, ctx in tvm.testing.enabled_targets():
             test_local(ctx, dtype)
         test_rpc(dtype)
 
@@ -151,3 +138,4 @@ if __name__ == "__main__":
     test_uniform()
     test_normal()
     test_random_fill()
+
index af9d6dd..f5ec5be 100644 (file)
@@ -19,6 +19,7 @@ from tvm import te
 import numpy as np
 from tvm.contrib import rocblas
 
+@tvm.testing.requires_rocm
 def test_matmul_add():
     n = 1024
     l = 128
@@ -29,9 +30,6 @@ def test_matmul_add():
     s = te.create_schedule(C.op)
 
     def verify(target="rocm"):
-        if not tvm.runtime.enabled(target):
-            print("skip because %s is not enabled..." % target)
-            return
         if not tvm.get_global_func("tvm.contrib.rocblas.matmul", True):
             print("skip because extern function is not available")
             return
index 50a8781..84d03d9 100644 (file)
@@ -18,12 +18,12 @@ import numpy as np
 import tvm
 from tvm import te
 from tvm.contrib import graph_runtime
-from tvm.relay.testing.config import ctx_list
 from tvm import relay
 from model_zoo import c2_squeezenet, c2_resnet50, c2_vgg19
 from caffe2.python import workspace, core
 from caffe2.proto import caffe2_pb2
 from collections import namedtuple
+import tvm.testing
 
 
 def get_tvm_output(model,
@@ -84,19 +84,22 @@ def verify_caffe2_forward_impl(model, data_shape, out_shape):
     dtype = 'float32'
     data = np.random.uniform(size=data_shape).astype(dtype)
     c2_out = get_caffe2_output(model, data, dtype)
-    for target, ctx in ctx_list():
+    for target, ctx in tvm.testing.enabled_targets():
         tvm_out = get_tvm_output(model, data, target, ctx, out_shape, dtype)
         tvm.testing.assert_allclose(c2_out, tvm_out, rtol=1e-5, atol=1e-5)
 
 
+@tvm.testing.uses_gpu
 def test_forward_squeezenet1_1():
     verify_caffe2_forward_impl(c2_squeezenet, (1, 3, 224, 224), (1, 1000, 1, 1))
 
 
+@tvm.testing.uses_gpu
 def test_forward_resnet50():
     verify_caffe2_forward_impl(c2_resnet50, (1, 3, 224, 224), (1, 1000))
 
 
+@tvm.testing.uses_gpu
 def test_forward_vgg19():
     verify_caffe2_forward_impl(c2_vgg19, (1, 3, 224, 224), (1, 1000))
 
@@ -104,6 +107,7 @@ def test_forward_vgg19():
 Model = namedtuple('Model', ['init_net', 'predict_net'])
 
 
+@tvm.testing.uses_gpu
 def test_elementwise_add():
     data_shape = (1, 16, 9, 9)
     init_net = caffe2_pb2.NetDef()
@@ -142,6 +146,7 @@ def test_elementwise_add():
     verify_caffe2_forward_impl(model, data_shape, data_shape)
 
 
+@tvm.testing.uses_gpu
 def test_elementwise_add_with_broadcast():
     data_shape = (1, 16, 9, 9)
     init_net = caffe2_pb2.NetDef()
@@ -181,6 +186,7 @@ def test_elementwise_add_with_broadcast():
     verify_caffe2_forward_impl(model, data_shape, data_shape)
 
 
+@tvm.testing.uses_gpu
 def test_normalize_yuv():
     data_shape = (1, 3, 96, 96)
     init_net = caffe2_pb2.NetDef()
index 5ae7a6c..d3a31fe 100644 (file)
@@ -25,11 +25,11 @@ from tvm.contrib import graph_runtime
 from tvm import topi
 import tvm.topi.testing
 from tvm import relay
-from tvm.relay.testing.config import ctx_list
 from tvm.topi.testing import conv2d_nchw_python
 
 import coremltools as cm
 import model_zoo
+import tvm.testing
 
 def get_tvm_output(func, x, params, target, ctx,
                    out_shape=(1, 1000), input_name='image', dtype='float32'):
@@ -50,15 +50,17 @@ def run_model_checkonly(model_file, model_name='', input_name='image'):
     shape_dict = {input_name : x.shape}
     # Some Relay passes change operators on the fly. Ensuring that we generate
     # new graph for each target.
-    for target, ctx in ctx_list():
+    for target, ctx in tvm.testing.enabled_targets():
         mod, params = relay.frontend.from_coreml(model, shape_dict)
         tvm_output = get_tvm_output(mod["main"], x, params, target, ctx)
         print(target, ctx, model_name, 'prediction id: ', np.argmax(tvm_output.flat))
 
+@tvm.testing.uses_gpu
 def test_mobilenet_checkonly():
     model_file = model_zoo.get_mobilenet()
     run_model_checkonly(model_file, 'mobilenet')
 
+@tvm.testing.uses_gpu
 def test_resnet50_checkonly():
     model_file = model_zoo.get_resnet50()
     run_model_checkonly(model_file, 'resnet50')
@@ -122,10 +124,11 @@ def verify_AddLayerParams(input_dim, alpha=2):
                             output_name='output',
                             mode='ADD')
     model = cm.models.MLModel(builder.spec)
-    for target, ctx in ctx_list():
+    for target, ctx in tvm.testing.enabled_targets():
         out = run_tvm_graph(model, target, ctx, [a_np1, a_np2], ['input1', 'input2'], b_np.shape, dtype)
         tvm.testing.assert_allclose(out, b_np, rtol=1e-5)
 
+@tvm.testing.uses_gpu
 def test_forward_AddLayerParams():
     verify_AddLayerParams((1, 2, 2), 0)
     verify_AddLayerParams((1, 2, 2), 1)
@@ -148,10 +151,11 @@ def verify_MultiplyLayerParams(input_dim, alpha):
                             output_name='output',
                             mode='MULTIPLY')
     model = cm.models.MLModel(builder.spec)
-    for target, ctx in ctx_list():
+    for target, ctx in tvm.testing.enabled_targets():
         out = run_tvm_graph(model, target, ctx, [a_np1, a_np2], ['input1', 'input2'], b_np.shape, dtype)
         tvm.testing.assert_allclose(out, b_np, rtol=1e-5)
 
+@tvm.testing.uses_gpu
 def test_forward_MultiplyLayerParams():
     verify_MultiplyLayerParams((1, 2, 2), 0)
     verify_MultiplyLayerParams((1, 2, 2), 1)
@@ -173,10 +177,11 @@ def verify_ConcatLayerParams(input1_dim, input2_dim):
                             output_name='output',
                             mode='CONCAT')
     model = cm.models.MLModel(builder.spec)
-    for target, ctx in ctx_list():
+    for target, ctx in tvm.testing.enabled_targets():
         out = run_tvm_graph(model, target, ctx, [a_np1, a_np2], ['input1', 'input2'], b_np.shape, dtype)
         tvm.testing.assert_allclose(out, b_np, rtol=1e-5)
 
+@tvm.testing.uses_gpu
 def test_forward_ConcatLayerParams():
     verify_ConcatLayerParams((1, 1, 2, 2), (1, 2, 2, 2))
     verify_ConcatLayerParams((1, 2, 4, 4), (1, 3, 4, 4))
@@ -203,10 +208,11 @@ def verify_UpsampleLayerParams(input_dim, scale, mode):
                          output_name='output')
 
     model = cm.models.MLModel(builder.spec)
-    for target, ctx in ctx_list():
+    for target, ctx in tvm.testing.enabled_targets():
         out = run_tvm_graph(model, target, ctx, a_np, 'input', b_np.shape, dtype)
         tvm.testing.assert_allclose(out, b_np, rtol=1e-5)
 
+@tvm.testing.uses_gpu
 def test_forward_UpsampleLayerParams():
     verify_UpsampleLayerParams((1, 16, 32, 32), 2, 'NN')
     verify_UpsampleLayerParams((1, 4, 6, 6), 3, 'BILINEAR')
@@ -223,10 +229,11 @@ def verify_l2_normalize(input_dim, eps):
     builder.add_l2_normalize(name='L2', epsilon=eps, input_name='input', output_name='output')
 
     model = cm.models.MLModel(builder.spec)
-    for target, ctx in ctx_list():
+    for target, ctx in tvm.testing.enabled_targets():
         out = run_tvm_graph(model, target, ctx, a_np, 'input', b_np.shape, dtype)
         tvm.testing.assert_allclose(out, b_np, rtol=1e-5)
 
+@tvm.testing.uses_gpu
 def test_forward_l2_normalize():
     verify_l2_normalize((1, 3, 20, 20), 0.001)
 
@@ -248,10 +255,11 @@ def verify_lrn(input_dim, size, bias, alpha, beta):
                     local_size=size)
 
     model = cm.models.MLModel(builder.spec)
-    for target, ctx in ctx_list():
+    for target, ctx in tvm.testing.enabled_targets():
         out = run_tvm_graph(model, target, ctx, a_np, 'input', b_np.shape, dtype)
         tvm.testing.assert_allclose(out, b_np, rtol=1e-5)
 
+@tvm.testing.uses_gpu
 def test_forward_lrn():
     verify_lrn((1, 3, 10, 20), 3, 1.0, 1.0, 0.5)
 
@@ -272,10 +280,11 @@ def verify_average(input_dim1, input_dim2, axis=0):
                             output_name='output',
                             mode='AVE')
     model = cm.models.MLModel(builder.spec)
-    for target, ctx in ctx_list():
+    for target, ctx in tvm.testing.enabled_targets():
         out = run_tvm_graph(model, target, ctx, [a_np1, a_np2], ['input1', 'input2'], b_np.shape, dtype)
         tvm.testing.assert_allclose(out, b_np, rtol=1e-5)
 
+@tvm.testing.uses_gpu
 def test_forward_average():
     verify_average((1, 3, 20, 20), (1, 3, 20, 20))
     verify_average((3, 20, 20), (1, 3, 20, 20))
@@ -300,11 +309,12 @@ def verify_max(input_dim):
                             output_name='output',
                             mode='MAX')
     model = cm.models.MLModel(builder.spec)
-    for target, ctx in ctx_list():
+    for target, ctx in tvm.testing.enabled_targets():
         out = run_tvm_graph(model, target, ctx, [a_np1, a_np2, a_np3],
                             ['input1', 'input2', 'input3'], b_np.shape, dtype)
         tvm.testing.assert_allclose(out, b_np, rtol=1e-5)
 
+@tvm.testing.uses_gpu
 def test_forward_max():
     verify_max((1, 3, 20, 20))
     verify_max((20, 20))
@@ -328,11 +338,12 @@ def verify_min(input_dim):
                             output_name='output',
                             mode='MIN')
     model = cm.models.MLModel(builder.spec)
-    for target, ctx in ctx_list():
+    for target, ctx in tvm.testing.enabled_targets():
         out = run_tvm_graph(model, target, ctx, [a_np1, a_np2, a_np3],
                             ['input1', 'input2', 'input3'], b_np.shape, dtype)
         tvm.testing.assert_allclose(out, b_np, rtol=1e-5)
 
+@tvm.testing.uses_gpu
 def test_forward_min():
     verify_min((1, 3, 20, 20))
     verify_min((20, 20))
@@ -353,7 +364,7 @@ def verify_unary_sqrt(input_dim):
                       mode='sqrt')
 
     model = cm.models.MLModel(builder.spec)
-    for target, ctx in ctx_list():
+    for target, ctx in tvm.testing.enabled_targets():
         out = run_tvm_graph(model, target, ctx, [a_np],
                             ['input'], ref_val.shape, dtype)
         tvm.testing.assert_allclose(out, ref_val, rtol=1e-5)
@@ -375,7 +386,7 @@ def verify_unary_rsqrt(input_dim, epsilon=0):
                       epsilon=epsilon)
 
     model = cm.models.MLModel(builder.spec)
-    for target, ctx in ctx_list():
+    for target, ctx in tvm.testing.enabled_targets():
         out = run_tvm_graph(model, target, ctx, [a_np],
                             ['input'], ref_val.shape, dtype)
         tvm.testing.assert_allclose(out, ref_val, rtol=1e-5)
@@ -397,7 +408,7 @@ def verify_unary_inverse(input_dim, epsilon=0):
                       epsilon=epsilon)
 
     model = cm.models.MLModel(builder.spec)
-    for target, ctx in ctx_list():
+    for target, ctx in tvm.testing.enabled_targets():
         out = run_tvm_graph(model, target, ctx, [a_np],
                             ['input'], ref_val.shape, dtype)
         tvm.testing.assert_allclose(out, ref_val, rtol=1e-5)
@@ -419,7 +430,7 @@ def verify_unary_power(input_dim, alpha):
                       alpha=alpha)
 
     model = cm.models.MLModel(builder.spec)
-    for target, ctx in ctx_list():
+    for target, ctx in tvm.testing.enabled_targets():
         out = run_tvm_graph(model, target, ctx, [a_np],
                             ['input'], ref_val.shape, dtype)
         tvm.testing.assert_allclose(out, ref_val, rtol=1e-5)
@@ -440,7 +451,7 @@ def verify_unary_exp(input_dim):
                       mode='exp')
 
     model = cm.models.MLModel(builder.spec)
-    for target, ctx in ctx_list():
+    for target, ctx in tvm.testing.enabled_targets():
         out = run_tvm_graph(model, target, ctx, [a_np],
                             ['input'], ref_val.shape, dtype)
         tvm.testing.assert_allclose(out, ref_val, rtol=1e-5)
@@ -461,7 +472,7 @@ def verify_unary_log(input_dim):
                       mode='log')
 
     model = cm.models.MLModel(builder.spec)
-    for target, ctx in ctx_list():
+    for target, ctx in tvm.testing.enabled_targets():
         out = run_tvm_graph(model, target, ctx, [a_np],
                             ['input'], ref_val.shape, dtype)
         tvm.testing.assert_allclose(out, ref_val, rtol=1e-5)
@@ -482,7 +493,7 @@ def verify_unary_abs(input_dim):
                       mode='abs')
 
     model = cm.models.MLModel(builder.spec)
-    for target, ctx in ctx_list():
+    for target, ctx in tvm.testing.enabled_targets():
         out = run_tvm_graph(model, target, ctx, [a_np],
                             ['input'], ref_val.shape, dtype)
         tvm.testing.assert_allclose(out, ref_val, rtol=1e-5)
@@ -504,12 +515,13 @@ def verify_unary_threshold(input_dim, alpha):
                       alpha=alpha)
 
     model = cm.models.MLModel(builder.spec)
-    for target, ctx in ctx_list():
+    for target, ctx in tvm.testing.enabled_targets():
         out = run_tvm_graph(model, target, ctx, [a_np],
                             ['input'], ref_val.shape, dtype)
         tvm.testing.assert_allclose(out, ref_val, rtol=1e-5)
 
 
+@tvm.testing.uses_gpu
 def test_forward_unary():
     verify_unary_sqrt((1, 3, 20, 20))
     verify_unary_rsqrt((1, 3, 20, 20))
@@ -525,6 +537,7 @@ def test_forward_unary():
     verify_unary_threshold((1, 3, 20, 20), alpha=5.0)
 
 
+@tvm.testing.uses_gpu
 def test_forward_reduce():
     from enum import Enum
     class ReduceAxis(Enum):
@@ -565,7 +578,7 @@ def test_forward_reduce():
                           mode=mode)
 
         model = cm.models.MLModel(builder.spec)
-        for target, ctx in ctx_list():
+        for target, ctx in tvm.testing.enabled_targets():
             out = run_tvm_graph(model, target, ctx, [a_np],
                                 ['input'], ref_val.shape, dtype)
             tvm.testing.assert_allclose(out, ref_val, rtol=1e-5, atol=1e-5)
@@ -602,7 +615,7 @@ def verify_reshape(input_dim, target_shape, mode):
                        mode=mode)
 
     model = cm.models.MLModel(builder.spec)
-    for target, ctx in ctx_list():
+    for target, ctx in tvm.testing.enabled_targets():
         out = run_tvm_graph(model, target, ctx, [a_np],
                             ['input'], ref_val.shape, dtype)
         tvm.testing.assert_allclose(out, ref_val, rtol=1e-5)
@@ -637,7 +650,7 @@ def verify_split(input_dim, nOutputs):
                       output_names=output_names)
 
     model = cm.models.MLModel(builder.spec)
-    for target, ctx in ctx_list():
+    for target, ctx in tvm.testing.enabled_targets():
         out = run_tvm_graph(model, target, ctx, [a_np],
                             ['input'], output_shapes, [dtype] * len(output_shapes))
         tvm.testing.assert_allclose(out, ref_val, rtol=1e-5)
@@ -673,11 +686,12 @@ def verify_image_scaler(input_dim, blue_bias=0.0, green_bias=0.0, red_bias=0.0,
     builder.add_elementwise(name='add', input_names=['input1', 'input2'],
                             output_name='output', alpha=0, mode='ADD')
     model = cm.models.MLModel(builder.spec)
-    for target, ctx in ctx_list():
+    for target, ctx in tvm.testing.enabled_targets():
         out = run_tvm_graph(model, target, ctx, [a_np, a_np],
                             ['input1', 'input2'], b_np.shape, dtype)
         tvm.testing.assert_allclose(out, b_np, rtol=1e-5)
 
+@tvm.testing.uses_gpu
 def test_forward_image_scaler():
     verify_image_scaler((3, 224, 224), image_scale=0.17)
     verify_image_scaler((3, 224, 224),
@@ -705,11 +719,12 @@ def verify_convolution(input_dim, filter, padding):
                             input_name='input1',
                             output_name='output')
     model = cm.models.MLModel(builder.spec)
-    for target, ctx in ctx_list():
+    for target, ctx in tvm.testing.enabled_targets():
         out = run_tvm_graph(model, target, ctx, [a_np],
                             ['input1'], output_shape=None)
         tvm.testing.assert_allclose(out, b_np, rtol=1e-5)
 
+@tvm.testing.uses_gpu
 def test_forward_convolution():
     verify_convolution((1, 3, 224, 224), filter=(32, 3, 3, 3), padding='VALID')
     verify_convolution((1, 3, 224, 224), filter=(32, 3, 3, 3), padding='SAME')
index f940255..9482230 100644 (file)
@@ -19,8 +19,8 @@ import tvm
 from tvm import te
 from tvm import relay
 from tvm.contrib import graph_runtime
-from tvm.relay.testing.config import ctx_list
 import keras
+import tvm.testing
 
 try:
     import tensorflow.compat.v1 as tf
@@ -104,7 +104,7 @@ def verify_keras_frontend(keras_model, need_transpose=True, layout='NCHW'):
     xs = [np.random.uniform(size=shape, low=-1.0, high=1.0) for shape in in_shapes]
     keras_out = get_keras_output(xs)
     keras_out = keras_out if isinstance(keras_out, list) else [keras_out]
-    for target, ctx in ctx_list():
+    for target, ctx in tvm.testing.enabled_targets():
         inputs = [to_channels_first(x) for x in xs] if need_transpose else xs
         tvm_out = get_tvm_output(inputs, target, ctx)
         for kout, tout in zip(keras_out, tvm_out):
@@ -113,6 +113,7 @@ def verify_keras_frontend(keras_model, need_transpose=True, layout='NCHW'):
             tvm.testing.assert_allclose(kout, tout, rtol=1e-5, atol=1e-5)
 
 
+@tvm.testing.uses_gpu
 class TestKeras:
     scenarios = [using_classic_keras, using_tensorflow_keras]
 
index 594ffe7..bc5cbeb 100644 (file)
@@ -20,15 +20,16 @@ import operator
 import tvm
 from tvm import te
 from tvm.contrib import graph_runtime
-from tvm.relay.testing.config import ctx_list
 from tvm import relay
 import mxnet as mx
 
 from mxnet import gluon
 from mxnet.gluon.model_zoo import vision
-import model_zoo
 import random
 import pytest
+import model_zoo
+
+import tvm.testing
 
 def verify_mxnet_frontend_impl(mx_symbol,
                                data_shape=(1, 3, 224, 224),
@@ -82,32 +83,36 @@ def verify_mxnet_frontend_impl(mx_symbol,
     x = np.random.uniform(size=data_shape)
     if gluon_impl:
         gluon_out, gluon_sym = get_gluon_output(name, x)
-        for target, ctx in ctx_list():
+        for target, ctx in tvm.testing.enabled_targets():
             tvm_out = get_tvm_output(gluon_sym, x, None, None, target, ctx, dtype)
             tvm.testing.assert_allclose(gluon_out, tvm_out, rtol=1e-5, atol=1e-5)
     else:
         mx_out, args, auxs = get_mxnet_output(mx_symbol, x, dtype)
         assert "data" not in args
-        for target, ctx in ctx_list():
+        for target, ctx in tvm.testing.enabled_targets():
             tvm_out = get_tvm_output(mx_symbol, x, args, auxs, target, ctx, dtype)
             tvm.testing.assert_allclose(mx_out, tvm_out, rtol=1e-5, atol=1e-5)
 
+@tvm.testing.uses_gpu
 def test_forward_mlp():
     mlp = model_zoo.mx_mlp()
     verify_mxnet_frontend_impl(mlp,
                                data_shape=(1, 1, 28, 28),
                                out_shape=(1, 10))
 
+@tvm.testing.uses_gpu
 def test_forward_vgg():
     for n in [11]:
         mx_sym = model_zoo.mx_vgg(n)
         verify_mxnet_frontend_impl(mx_sym)
 
+@tvm.testing.uses_gpu
 def test_forward_resnet():
     for n in [18]:
         mx_sym = model_zoo.mx_resnet(18)
         verify_mxnet_frontend_impl(mx_sym)
 
+@tvm.testing.uses_gpu
 def test_forward_leaky_relu():
     data = mx.sym.var('data')
     data = mx.sym.concat(data, -data, dim=1)  # negative part explicitly
@@ -116,36 +121,42 @@ def test_forward_leaky_relu():
     mx_sym = mx.sym.LeakyReLU(data, act_type='leaky')
     verify_mxnet_frontend_impl(mx_sym, (1, 3, 100, 100), (1, 6, 100, 100))
 
+@tvm.testing.uses_gpu
 def test_forward_elu():
     data = mx.sym.var('data')
     data = mx.sym.concat(data, -data, dim=1)  # negative part explicitly
     mx_sym = mx.sym.LeakyReLU(data, act_type='elu')
     verify_mxnet_frontend_impl(mx_sym, (1, 3, 100, 100), (1, 6, 100, 100))
 
+@tvm.testing.uses_gpu
 def test_forward_rrelu():
     data = mx.sym.var('data')
     data = mx.sym.concat(data, -data, dim=1)  # negative part explicitly
     mx_sym = mx.sym.LeakyReLU(data, act_type='rrelu', lower_bound=0.3, upper_bound=0.7)
     verify_mxnet_frontend_impl(mx_sym[0], (1, 3, 100, 100), (1, 6, 100, 100))
 
+@tvm.testing.uses_gpu
 def test_forward_prelu():
     data = mx.sym.var('data')
     data = mx.sym.concat(data, -data, dim=1)  # negative part explicitly
     mx_sym = mx.sym.LeakyReLU(data, act_type='prelu')
     verify_mxnet_frontend_impl(mx_sym, (1, 3, 100, 100), (1, 6, 100, 100))
 
+@tvm.testing.uses_gpu
 def test_forward_gelu():
     data = mx.sym.var('data')
     data = mx.sym.concat(data, -data, dim=1)  # negative part explicitly
     mx_sym = mx.sym.LeakyReLU(data, act_type='gelu')
     verify_mxnet_frontend_impl(mx_sym, (1, 3, 100, 100), (1, 6, 100, 100))
 
+@tvm.testing.uses_gpu
 def test_forward_softrelu():
     data = mx.sym.var('data')
     data = mx.sym.concat(data, -data, dim=1)  # negative part explicitly
     mx_sym = mx.sym.Activation(data, act_type='softrelu')
     verify_mxnet_frontend_impl(mx_sym, (1, 3, 100, 100), (1, 6, 100, 100))
 
+@tvm.testing.uses_gpu
 def test_forward_fc_flatten():
     # test flatten=True option in mxnet 0.11.1
     data = mx.sym.var('data')
@@ -157,27 +168,32 @@ def test_forward_fc_flatten():
     except:
         pass
 
+@tvm.testing.uses_gpu
 def test_forward_clip():
     data = mx.sym.var('data')
     data = mx.sym.concat(data, -data, dim=1)  # negative part explicitly
     mx_sym = mx.sym.clip(data, a_min=0, a_max=1)
     verify_mxnet_frontend_impl(mx_sym, (1, 3, 100, 100), (1, 6, 100, 100))
 
+@tvm.testing.uses_gpu
 def test_forward_split():
     data = mx.sym.var('data')
     mx_sym = mx.sym.split(data, axis=1, num_outputs=4, squeeze_axis=False)
     verify_mxnet_frontend_impl(mx_sym, (1, 4, 2, 1), (1, 1, 2, 1))
 
+@tvm.testing.uses_gpu
 def test_forward_split_squeeze():
     data = mx.sym.var('data')
     mx_sym = mx.sym.split(data, axis=1, num_outputs=4, squeeze_axis=True)
     verify_mxnet_frontend_impl(mx_sym, (1, 4, 2, 1), (1, 2, 1))
 
+@tvm.testing.uses_gpu
 def test_forward_expand_dims():
     data = mx.sym.var('data')
     mx_sym = mx.sym.expand_dims(data, axis=1)
     verify_mxnet_frontend_impl(mx_sym, (2, 3, 4), (2, 1, 3, 4))
 
+@tvm.testing.uses_gpu
 def test_forward_pooling():
     data = mx.sym.var('data')
     mx_sym = mx.sym.Pooling(data, kernel=(3, 3), pad=(1, 1), pool_type='avg')
@@ -186,6 +202,7 @@ def test_forward_pooling():
     mx_sym = mx.sym.Pooling(data, kernel=(3, 3), pad=(1, 1), pool_type='max')
     verify_mxnet_frontend_impl(mx_sym, (1, 20, 8, 8), (1, 20, 8, 8))
 
+@tvm.testing.uses_gpu
 def test_forward_pooling3d():
     data = mx.sym.var('data')
     mx_sym = mx.sym.Pooling(data, kernel=(3, 3, 3), pad=(1, 1, 1), pool_type='avg')
@@ -194,6 +211,7 @@ def test_forward_pooling3d():
     mx_sym = mx.sym.Pooling(data, kernel=(3, 3, 3), pad=(1, 1, 1), pool_type='max')
     verify_mxnet_frontend_impl(mx_sym, (1, 20, 8, 8, 8), (1, 20, 8, 8, 8))
 
+@tvm.testing.uses_gpu
 def test_forward_adaptive_pooling():
     data = mx.sym.var('data')
     mx_sym = mx.sym.contrib.AdaptiveAvgPooling2D(data, output_size=(1,))
@@ -202,49 +220,58 @@ def test_forward_adaptive_pooling():
     mx_sym = mx.sym.contrib.AdaptiveAvgPooling2D(data, output_size=(3, 3))
     verify_mxnet_frontend_impl(mx_sym, (1, 20, 8, 8), (1, 20, 3, 3))
 
+@tvm.testing.uses_gpu
 def test_forward_lrn():
     data = mx.sym.var('data')
     mx_sym = mx.sym.LRN(data, alpha=2, beta=2, knorm=1, nsize=5)
     verify_mxnet_frontend_impl(mx_sym, (1, 10, 24, 24), (1, 10, 24, 24))
 
+@tvm.testing.uses_gpu
 def test_forward_ones():
     data = mx.sym.var('data')
     ones = mx.sym.ones(shape=(2, 3, 4), dtype='float32')
     mx_sym = mx.sym.elemwise_add(data, ones)
     verify_mxnet_frontend_impl(mx_sym, (2, 3, 4), (2, 3, 4))
 
+@tvm.testing.uses_gpu
 def test_forward_zeros():
     data = mx.sym.var('data')
     zeros = mx.sym.zeros(shape=(2, 3, 4), dtype='float32')
     mx_sym = mx.sym.elemwise_add(data, zeros)
     verify_mxnet_frontend_impl(mx_sym, (2, 3, 4), (2, 3, 4))
 
+@tvm.testing.uses_gpu
 def test_forward_ones_like():
     data = mx.sym.var('data')
     mx_sym = mx.sym.ones_like(data, dtype='float32')
     verify_mxnet_frontend_impl(mx_sym, (2, 3, 4), (2, 3, 4))
 
+@tvm.testing.uses_gpu
 def test_forward_make_loss():
     data = mx.sym.var('data')
     ones = mx.sym.ones(shape=(2, 3, 4), dtype='float32')
     mx_sym = mx.sym.make_loss((data-ones)**2/2, dtype='float32')
     verify_mxnet_frontend_impl(mx_sym, (2, 3, 4), (2, 3, 4))
 
+@tvm.testing.uses_gpu
 def test_forward_zeros_like():
     data = mx.sym.var('data')
     mx_sym = mx.sym.zeros_like(data, dtype='float32')
     verify_mxnet_frontend_impl(mx_sym, (2, 3, 4), (2, 3, 4))
 
+@tvm.testing.uses_gpu
 def test_forward_argmax():
     data = mx.sym.var('data')
     mx_sym = mx.sym.argmax(data, axis=1)
     verify_mxnet_frontend_impl(mx_sym, (5, 3), (5,))
 
+@tvm.testing.uses_gpu
 def test_forward_argmin():
     data = mx.sym.var('data')
     mx_sym = mx.sym.argmin(data, axis=0)
     verify_mxnet_frontend_impl(mx_sym, (5, 4), (4,))
 
+@tvm.testing.uses_gpu
 def test_forward_slice():
     data = mx.sym.var('data')
     mx_sym = mx.sym.slice(data, begin=(0, 1), end=(2, 4))
@@ -252,6 +279,7 @@ def test_forward_slice():
     mx_sym = mx.sym.slice(data, begin=(-1, 1), end=(-3, 4), step=(-1, 2))
     verify_mxnet_frontend_impl(mx_sym, (3, 4), (2, 2))
 
+@tvm.testing.uses_gpu
 def test_forward_where():
     cond = mx.sym.var('cond')
     x = mx.sym.var('x')
@@ -273,13 +301,14 @@ def test_forward_where():
     mx_out = mx.nd.where(mx_cond, mx_x, mx_y).asnumpy()
 
     mod, _ = relay.frontend.from_mxnet(mx_sym, shapes, args, auxs)
-    for target, ctx in ctx_list():
+    for target, ctx in tvm.testing.enabled_targets():
         for kind in ["graph", "debug"]:
             intrp = relay.create_executor(kind, mod=mod, ctx=ctx, target=target)
             op_res = intrp.evaluate()(np_cond, np_x, np_y)
             tvm.testing.assert_allclose(op_res.asnumpy(), mx_out)
 
 
+@tvm.testing.uses_gpu
 def test_forward_arange():
     def _mx_symbol(F, start, stop, step):
         if start is None and step is None:
@@ -296,7 +325,7 @@ def test_forward_arange():
         ref_res = _mx_symbol(mx.nd, start, stop, step).asnumpy()
         mx_sym = _mx_symbol(mx.sym, start, stop, step)
         mod, _ = relay.frontend.from_mxnet(mx_sym, {})
-        for target, ctx in ctx_list():
+        for target, ctx in tvm.testing.enabled_targets():
             for kind in ["graph", "debug"]:
                 intrp = relay.create_executor(kind, mod=mod, ctx=ctx, target=target)
                 op_res = intrp.evaluate()()
@@ -315,6 +344,7 @@ def _mx_symbol(F, op_name, inputs):
     op = getattr(F, op_name)
     return op(*inputs)
 
+@tvm.testing.uses_gpu
 def test_forward_broadcast_ops():
     for op in ["broadcast_add",
                "broadcast_plus",
@@ -349,12 +379,13 @@ def test_forward_broadcast_ops():
         ref_res = _mx_symbol(mx.nd, op, [mx.nd.array(a_np), mx.nd.array(b_np)])
         shapes = {'a': a_shape, 'b': b_shape}
         mod, _ = relay.frontend.from_mxnet(mx_sym, shapes, dtype)
-        for target, ctx in ctx_list():
+        for target, ctx in tvm.testing.enabled_targets():
             for kind in ["graph", "debug"]:
                 intrp = relay.create_executor(kind, mod=mod, ctx=ctx, target=target)
                 op_res = intrp.evaluate()(a_np, b_np)
                 tvm.testing.assert_allclose(op_res.asnumpy(), ref_res.asnumpy())
 
+@tvm.testing.uses_gpu
 def test_forward_elemwise_ops():
     for op in ["elemwise_add", "elemwise_sub", "elemwise_mul",
                "elemwise_div", "maximum", "minimum",
@@ -372,13 +403,14 @@ def test_forward_elemwise_ops():
             ref_res = op(mx.nd.array(a_np), mx.nd.array(b_np))
         shapes = {'a': shape, 'b': shape}
         mod, _ = relay.frontend.from_mxnet(mx_sym, shapes, dtype)
-        for target, ctx in ctx_list():
+        for target, ctx in tvm.testing.enabled_targets():
             for kind in ["graph", "debug"]:
                 intrp = relay.create_executor(kind, mod=mod, ctx=ctx, target=target)
                 op_res = intrp.evaluate()(a_np, b_np)
                 tvm.testing.assert_allclose(op_res.asnumpy(), ref_res.asnumpy())
 
 
+@tvm.testing.uses_gpu
 def test_forward_softmin():
     data = mx.sym.var('data')
     mx_sym = mx.sym.softmin(data)
@@ -388,6 +420,7 @@ def test_forward_softmin():
     verify_mxnet_frontend_impl(mx_sym, (1, 3, 100, 100), (1, 3, 100, 100))
 
 
+@tvm.testing.uses_gpu
 def test_forward_unary_ops():
     for op in ["abs", "sqrt", "ceil", "floor", "round", "reciprocal", "trunc",
                "softsign", "hard_sigmoid",
@@ -402,13 +435,14 @@ def test_forward_unary_ops():
         ref_res = _mx_symbol(mx.nd, op, [mx.nd.array(a_np)])
         shapes = {'a': shape}
         mod, _ = relay.frontend.from_mxnet(mx_sym, shapes, dtype)
-        for target, ctx in ctx_list():
+        for target, ctx in tvm.testing.enabled_targets():
             for kind in ["graph", "debug"]:
                 intrp = relay.create_executor(kind, mod=mod, ctx=ctx, target=target)
                 op_res = intrp.evaluate()(a_np)
                 tvm.testing.assert_allclose(op_res.asnumpy(), ref_res.asnumpy(), rtol=1e-5, atol=1e-5)
 
 
+@tvm.testing.uses_gpu
 def test_forward_scalar_ops():
     for op in [operator.add, operator.sub, operator.mul, operator.truediv,
                operator.pow, operator.lt, operator.le, operator.eq,
@@ -421,7 +455,7 @@ def test_forward_scalar_ops():
         ref_res = op(mx.nd.array(a_np), b_scalar)
         shapes = {'a': a_shape}
         mod, _ = relay.frontend.from_mxnet(mx_sym, shapes, dtype)
-        for target, ctx in ctx_list():
+        for target, ctx in tvm.testing.enabled_targets():
             for kind in ["graph", "debug"]:
                 intrp = relay.create_executor(kind, mod=mod, ctx=ctx, target=target)
                 op_res = intrp.evaluate()(a_np)
@@ -435,19 +469,20 @@ def test_forward_scalar_ops():
         ref_res = _mx_symbol(mx.nd, op, [mx.nd.array(a_np), b_scalar])
         shapes = {'a': a_shape}
         mod, _ = relay.frontend.from_mxnet(mx_sym, shapes, dtype)
-        for target, ctx in ctx_list():
+        for target, ctx in tvm.testing.enabled_targets():
             for kind in ["graph", "debug"]:
                 intrp = relay.create_executor(kind, mod=mod, ctx=ctx, target=target)
                 op_res = intrp.evaluate()(a_np)
                 tvm.testing.assert_allclose(op_res.asnumpy(), ref_res.asnumpy())
 
+@tvm.testing.uses_gpu
 def test_forward_slice_axis():
     def verify(shape, axis, begin, end):
         data_np = np.random.uniform(size=shape).astype("float32")
         ref_res = mx.nd.slice_axis(mx.nd.array(data_np), axis, begin, end)
         mx_sym = mx.sym.slice_axis(mx.sym.var("data"), axis, begin, end)
         mod, _ = relay.frontend.from_mxnet(mx_sym, {"data": shape})
-        for target, ctx in ctx_list():
+        for target, ctx in tvm.testing.enabled_targets():
             for kind in ["graph", "debug"]:
                 intrp = relay.create_executor(kind, mod=mod, ctx=ctx, target=target)
                 op_res = intrp.evaluate()(data_np)
@@ -458,6 +493,7 @@ def test_forward_slice_axis():
     verify((3, 4), 1, -3, -1)
     verify((3, 4), -1, -3, -1)
 
+@tvm.testing.uses_gpu
 def test_forward_slice_like():
     def verify(x_shape, y_shape, axes):
         x_np = np.random.uniform(size=x_shape).astype("float32")
@@ -469,7 +505,7 @@ def test_forward_slice_like():
             ref_res = mx.nd.slice_like(mx.nd.array(x_np), mx.nd.array(y_np), axes=axes)
             mx_sym = mx.sym.slice_like(mx.sym.var("x"), mx.sym.var("y"), axes=axes)
         mod, _ = relay.frontend.from_mxnet(mx_sym, {"x": x_shape, "y": y_shape})
-        for target, ctx in ctx_list():
+        for target, ctx in tvm.testing.enabled_targets():
             for kind in ["graph", "debug"]:
                 intrp = relay.create_executor(kind, mod=mod, ctx=ctx, target=target)
                 op_res = intrp.evaluate()(x_np, y_np)
@@ -479,6 +515,7 @@ def test_forward_slice_like():
     verify((3, 4), (2, 3), (0))
     verify((3, 4), (2, 3), (-1))
 
+@tvm.testing.uses_gpu
 def test_forward_sequence_reverse():
     def verify(shape, seq_lengths, use_seq_lengths, seq_axis):
         data_np = np.random.uniform(size=shape).astype("float32")
@@ -500,7 +537,7 @@ def test_forward_sequence_reverse():
         mx_sym = mx.sym.SequenceReverse(*mx_sym_args)
         mod, _ = relay.frontend.from_mxnet(mx_sym, *from_mxnet_args)
 
-        for target, ctx in ctx_list():
+        for target, ctx in tvm.testing.enabled_targets():
             for kind in ["graph", "debug"]:
                 intrp = relay.create_executor(kind, mod=mod, ctx=ctx, target=target)
                 op_res = intrp.evaluate()(*in_data)
@@ -512,18 +549,20 @@ def test_forward_sequence_reverse():
     # MXNet accepts axis value as 0 only
     # verify((3, 4, 5, 6), None, False, 2)
 
+@tvm.testing.uses_gpu
 def test_forward_l2_normalize():
     data = mx.sym.var('data')
     mx_sym = mx.sym.L2Normalization(data, mode="channel")
     verify_mxnet_frontend_impl(mx_sym, (2, 3, 4, 5), (2, 3, 4, 5))
 
+@tvm.testing.uses_gpu
 def test_forward_shape_array():
     def verify(shape):
         x_np = np.random.uniform(size=shape).astype("float32")
         ref_res = mx.nd.shape_array(mx.nd.array(x_np))
         mx_sym = mx.sym.shape_array(mx.sym.var("x"))
         mod, _ = relay.frontend.from_mxnet(mx_sym, {"x": shape})
-        for target, ctx in ctx_list():
+        for target, ctx in tvm.testing.enabled_targets():
             for kind in ["debug"]:
                 intrp = relay.create_executor(kind, mod=mod, ctx=ctx, target=target)
                 op_res = intrp.evaluate()(x_np)
@@ -532,6 +571,7 @@ def test_forward_shape_array():
     verify((3, 4, 5))
     verify((3, 4, 5, 6))
 
+@tvm.testing.uses_gpu
 def test_forward_squeeze():
     def verify(shape, axis):
         x_np = np.random.uniform(size=shape).astype("float32")
@@ -542,7 +582,7 @@ def test_forward_squeeze():
             ref_res = mx.nd.squeeze(mx.nd.array(x_np), axis=axis)
             mx_sym = mx.sym.squeeze(mx.sym.var("x"), axis=axis)
         mod, _ = relay.frontend.from_mxnet(mx_sym, {"x": shape})
-        for target, ctx in ctx_list():
+        for target, ctx in tvm.testing.enabled_targets():
             for kind in ["graph", "debug"]:
                 intrp = relay.create_executor(kind, mod=mod, ctx=ctx, target=target)
                 op_res = intrp.evaluate()(x_np)
@@ -552,6 +592,7 @@ def test_forward_squeeze():
     verify((1, 3, 1), 2)
     verify((1, 3, 1), (0, 2))
 
+@tvm.testing.uses_gpu
 def test_forward_broadcast_axis():
     def verify(shape, axis, size):
         x_np = np.random.uniform(size=shape).astype("float32")
@@ -560,7 +601,7 @@ def test_forward_broadcast_axis():
             mx_sym = _mx_symbol(mx.sym, op, [mx.sym.var('x'),axis,size])
             ref_res = _mx_symbol(mx.nd, op, [mx.nd.array(x_np),axis,size])
             mod, _ = relay.frontend.from_mxnet(mx_sym, {"x": shape})
-            for target, ctx in ctx_list():
+            for target, ctx in tvm.testing.enabled_targets():
                 for kind in ["graph", "debug"]:
                     intrp = relay.create_executor(kind, mod=mod, ctx=ctx, target=target)
                     op_res = intrp.evaluate()(x_np)
@@ -570,13 +611,14 @@ def test_forward_broadcast_axis():
     verify((1, 2, 1), (0, 2), (2, 3))
 
 
+@tvm.testing.uses_gpu
 def test_forward_broadcast_to():
     def verify(input_shape, shape):
         x_np = np.random.uniform(size=input_shape).astype("float32")
         ref_res = mx.nd.broadcast_to(mx.nd.array(x_np), shape=shape)
         mx_sym = mx.sym.broadcast_to(mx.sym.var("x"), shape=shape)
         mod, _ = relay.frontend.from_mxnet(mx_sym, {"x": input_shape})
-        for target, ctx in ctx_list():
+        for target, ctx in tvm.testing.enabled_targets():
             for kind in ["graph", "debug"]:
                 intrp = relay.create_executor(kind, mod=mod, ctx=ctx, target=target)
                 op_res = intrp.evaluate()(x_np)
@@ -586,6 +628,7 @@ def test_forward_broadcast_to():
     verify((4, 1, 32, 32), (4, 8, 32, 32))
 
 
+@tvm.testing.uses_gpu
 def test_forward_logical_not():
     a_shape = (3, 4, 5)
     dtype = 'float32'
@@ -594,20 +637,21 @@ def test_forward_logical_not():
     ref_res = mx.nd.logical_not(mx.nd.array(a_np))
     shapes = {'a': a_shape}
     mod, _ = relay.frontend.from_mxnet(mx_sym, shapes, dtype)
-    for target, ctx in ctx_list():
+    for target, ctx in tvm.testing.enabled_targets():
         for kind in ["graph", "debug"]:
             intrp = relay.create_executor(kind, mod=mod, ctx=ctx, target=target)
             op_res = intrp.evaluate()(a_np)
             tvm.testing.assert_allclose(op_res.asnumpy(), ref_res.asnumpy())
 
 
+@tvm.testing.uses_gpu
 def test_forward_full():
     def verify(val, shape, dtype):
         ctx = mx.cpu()
         ref_res = mx.nd.full(shape, val, dtype=dtype)
         mx_sym = mx.sym.full(shape, val, dtype=dtype)
         mod, _ = relay.frontend.from_mxnet(mx_sym, {})
-        for target, ctx in ctx_list():
+        for target, ctx in tvm.testing.enabled_targets():
             # Skip testing graph runtime because this op will be optimized out
             # by constant folding.
             for kind in ["debug"]:
@@ -618,6 +662,7 @@ def test_forward_full():
     verify(2, (3, 4), "int32")
     verify(3.5, (1, 3, 4), "float32")
 
+@tvm.testing.uses_gpu
 def test_forward_embedding():
     def verify(data_shape, weight_shape):
         in_dim, out_dim = weight_shape
@@ -629,7 +674,7 @@ def test_forward_embedding():
                                   input_dim=in_dim, output_dim=out_dim)
         mod, _ = relay.frontend.from_mxnet(
             mx_sym, {"x": data_shape, "w": weight_shape})
-        for target, ctx in ctx_list():
+        for target, ctx in tvm.testing.enabled_targets():
             for kind in ["graph", "debug"]:
                 intrp = relay.create_executor(kind, mod=mod, ctx=ctx, target=target)
                 op_res = intrp.evaluate()(x=x_np, w=w_np)
@@ -637,6 +682,7 @@ def test_forward_embedding():
     verify((2, 2), (4, 5))
     verify((2, 3, 4), (4, 5))
 
+@tvm.testing.uses_gpu
 def test_forward_smooth_l1():
     data = mx.sym.var('data')
     mx_sym = mx.sym.smooth_l1(data)
@@ -644,6 +690,7 @@ def test_forward_smooth_l1():
     mx_sym = mx.sym.smooth_l1(data, scalar=1.0)
     verify_mxnet_frontend_impl(mx_sym, (3, 4), (3, 4))
 
+@tvm.testing.uses_gpu
 def test_forward_take():
     def verify(shape, indices_src, axis, mode="clip"):
         x_np = np.random.uniform(size=shape).astype("float32")
@@ -651,7 +698,7 @@ def test_forward_take():
         ref_res = mx.nd.take(mx.nd.array(x_np), mx.nd.array(indices_np), axis, mode)
         mx_sym = mx.sym.take(mx.sym.var("x"), mx.sym.var("y"), axis, mode)
         mod, _ = relay.frontend.from_mxnet(mx_sym, {"x": shape, "y": indices_np.shape})
-        for target, ctx in ctx_list():
+        for target, ctx in tvm.testing.enabled_targets():
             for kind in ["graph", "debug"]:
                 intrp = relay.create_executor(kind, mod=mod, ctx=ctx, target=target)
                 op_res = intrp.evaluate()(x_np, indices_np)
@@ -664,13 +711,14 @@ def test_forward_take():
     verify((3,4), [-1, 5], 1)
     verify((3,4), [-1, 5], 1, mode="wrap")
 
+@tvm.testing.uses_gpu
 def test_forward_gather_nd():
     def verify(xshape, yshape, y_data, error=False):
         x_data = np.random.uniform(size=xshape).astype("float32")
         ref_res = mx.nd.gather_nd(mx.nd.array(x_data), mx.nd.array(y_data))
         mx_sym = mx.sym.gather_nd(mx.sym.var("x_data"), mx.sym.var("y_data"))
         mod, _ = relay.frontend.from_mxnet(mx_sym, {"x_data": xshape, "y_data": yshape}, {"x_data": "float32", "y_data": "int32"})
-        for target, ctx in ctx_list():
+        for target, ctx in tvm.testing.enabled_targets():
             for kind in ["graph", "debug"]:
                 intrp = relay.create_executor(kind, mod=mod, ctx=ctx, target=target)
                 op_res = intrp.evaluate()(x_data, y_data)
@@ -682,12 +730,14 @@ def test_forward_gather_nd():
     verify((3, 2), (2, 2, 3), [[[0, 1, 2], [2, 0, 1]], [[0, 0, 0], [1, 1, 1]]])
     verify((1, 4), (1, 1), [[0]])
 
+@tvm.testing.uses_gpu
 def test_forward_bilinear_resize():
     # add tests including scale_height and scale_width when mxnet is updated to version 1.5
     data = mx.sym.var('data')
     mx_sym = mx.sym.contrib.BilinearResize2D(data, height=5, width=10)
     verify_mxnet_frontend_impl(mx_sym, (1, 2, 3, 4), (1, 2, 5, 10))
 
+@tvm.testing.uses_gpu
 def test_forward_grid_generator():
     def verify(shape, transform_type, target_shape):
         x = np.random.uniform(size=shape).astype("float32")
@@ -695,7 +745,7 @@ def test_forward_grid_generator():
         mx_sym = mx.sym.GridGenerator(mx.sym.var("x"), transform_type, target_shape)
         shape_dict = {"x": x.shape}
         mod, _ = relay.frontend.from_mxnet(mx_sym, shape_dict)
-        for target, ctx in ctx_list():
+        for target, ctx in tvm.testing.enabled_targets():
             for kind in ["graph", "debug"]:
                 intrp = relay.create_executor(
                     kind, mod=mod, ctx=ctx, target=target)
@@ -706,6 +756,7 @@ def test_forward_grid_generator():
     verify((4, 2, 16, 16), 'warp', None)
     verify((1, 2, 16, 16), 'warp', None)
 
+@tvm.testing.uses_gpu
 def test_forward_bilinear_sampler():
     def verify(data_shape, grid_shape):
         data = np.random.uniform(size=data_shape).astype("float32")
@@ -714,7 +765,7 @@ def test_forward_bilinear_sampler():
         mx_sym = mx.sym.BilinearSampler(mx.sym.var("data"), mx.sym.var("grid"))
         shape_dict = {"data": data.shape, "grid": grid.shape}
         mod, _ = relay.frontend.from_mxnet(mx_sym, shape_dict)
-        for target, ctx in ctx_list():
+        for target, ctx in tvm.testing.enabled_targets():
             for kind in ["graph", "debug"]:
                 intrp = relay.create_executor(
                     kind, mod=mod, ctx=ctx, target=target)
@@ -724,6 +775,7 @@ def test_forward_bilinear_sampler():
     verify((4, 4, 16, 32), (4, 2, 8, 8))
     verify((4, 4, 16, 32), (4, 2, 32, 32))
 
+@tvm.testing.uses_gpu
 def test_forward_rnn_layer():
     def verify(mode, seq_len, input_size, hidden_size, num_layers,
                batch=1, init_states=True, bidirectional=False):
@@ -768,7 +820,7 @@ def test_forward_rnn_layer():
 
         mod, params = relay.frontend.from_mxnet(
             mx_sym, shape=shape_dict, arg_params=mx_params)
-        for target, ctx in ctx_list():
+        for target, ctx in tvm.testing.enabled_targets():
             # only test graph runtime because debug runtime is too slow
             for kind in ["graph"]:
                 intrp = relay.create_executor(kind, mod=mod, ctx=ctx, target=target)
@@ -792,6 +844,7 @@ def test_forward_rnn_layer():
         # verify(mode, 10, 64, 64, 3, init_states=False)
         # verify(mode, 10, 64, 64, 3, batch=2, bidirectional=True, init_states=False)
 
+@tvm.testing.uses_gpu
 def test_forward_Crop():
     def verify(xshape, yshape, offset=None):
         x_data = np.random.uniform(size=xshape).astype("float32")
@@ -803,7 +856,7 @@ def test_forward_Crop():
             mx_sym = mx.sym.Crop(mx.sym.var("x"), mx.sym.var("y"), offset=offset)
             ref_res = mx.nd.Crop(mx.nd.array(x_data), mx.nd.array(y_data), offset=offset)
         mod, _ = relay.frontend.from_mxnet(mx_sym, {"x": xshape, "y": yshape})
-        for target, ctx in ctx_list():
+        for target, ctx in tvm.testing.enabled_targets():
             for kind in ["graph", "debug"]:
                 intrp = relay.create_executor(kind, mod=mod, ctx=ctx, target=target)
                 if offset is None or offset == (0, 0):
@@ -817,13 +870,14 @@ def test_forward_Crop():
     verify((5, 32, 40, 40), (5, 32, 25, 25))
     verify((5, 32, 40, 40), (5, 32, 25, 25), (5, 5))
 
+@tvm.testing.uses_gpu
 def test_forward_argsort():
     def verify(shape, axis, is_ascend, dtype="float32"):
         x_np = np.random.uniform(size=shape).astype("float32")
         ref_res = mx.nd.argsort(mx.nd.array(x_np), axis=axis, is_ascend=is_ascend, dtype=dtype)
         mx_sym = mx.sym.argsort(mx.sym.var("x"), axis=axis, is_ascend=is_ascend, dtype=dtype)
         mod, _ = relay.frontend.from_mxnet(mx_sym, {"x": shape})
-        for target, ctx in ctx_list():
+        for target, ctx in tvm.testing.enabled_targets():
             for kind in ["graph", "debug"]:
                 intrp = relay.create_executor(kind, mod=mod, ctx=ctx, target=target)
                 op_res = intrp.evaluate()(x_np)
@@ -832,6 +886,7 @@ def test_forward_argsort():
     verify((1, 4, 6), axis=1, is_ascend=True)
     verify((3, 5, 6), axis=-3, is_ascend=False, dtype="int32")
 
+@tvm.testing.uses_gpu
 def test_forward_topk():
     def verify(shape, k, axis, ret_type, is_ascend=False, dtype="float32"):
         x_np = np.random.uniform(size=shape).astype("float32")
@@ -840,7 +895,7 @@ def test_forward_topk():
         mx_sym = mx.sym.topk(mx.sym.var("x"), k=k, axis=axis, ret_typ=ret_type,
                              is_ascend=is_ascend, dtype=dtype)
         mod, _ = relay.frontend.from_mxnet(mx_sym, {"x": shape})
-        for target, ctx in ctx_list():
+        for target, ctx in tvm.testing.enabled_targets():
             for kind in ["graph", "debug"]:
                 intrp = relay.create_executor(kind, mod=mod, ctx=ctx, target=target)
                 op_res = intrp.evaluate()(x_np)
@@ -856,6 +911,7 @@ def test_forward_topk():
     verify((3, 5, 6), k=2, axis=1, ret_type="value", is_ascend=True)
     verify((3, 5, 6), k=0, axis=2, ret_type="both", dtype="int32")
 
+@tvm.testing.uses_gpu
 def test_forward_sequence_mask():
     def verify(shape, use_sequence_length, value, axis, dtype, itype):
         data_np = np.random.uniform(size=shape).astype(dtype)
@@ -885,7 +941,7 @@ def test_forward_sequence_mask():
                                          value=value,
                                          axis=axis)
             mod, _ = relay.frontend.from_mxnet(mx_sym, {"data": shape}, dtype={"data": dtype})
-        for target, ctx in ctx_list():
+        for target, ctx in tvm.testing.enabled_targets():
             for kind in ['graph', 'debug']:
                 if use_sequence_length is False and kind == 'graph':
                     # Disable the test for 'graph' when it's identity.
@@ -901,13 +957,14 @@ def test_forward_sequence_mask():
     verify((5, 4, 3), False, 1.0, 1, 'float64', 'float64')
     verify((5, 4, 3, 2), True, 1.0, 0, 'float32', 'float32')
 
+@tvm.testing.uses_gpu
 def test_forward_contrib_div_sqrt_dim():
     def verify(shape):
         x_np = np.random.uniform(size=shape).astype("float32")
         ref_res = mx.nd.contrib.div_sqrt_dim(mx.nd.array(x_np))
         mx_sym = mx.sym.contrib.div_sqrt_dim(mx.sym.var("x"))
         mod, _ = relay.frontend.from_mxnet(mx_sym, {"x": shape})
-        for target, ctx in ctx_list():
+        for target, ctx in tvm.testing.enabled_targets():
             for kind in ["graph", "debug"]:
                 intrp = relay.create_executor(kind, mod=mod, ctx=ctx, target=target)
                 op_res = intrp.evaluate()(x_np)
@@ -915,6 +972,7 @@ def test_forward_contrib_div_sqrt_dim():
     verify((3, 4))
     verify((3, 4, 5))
 
+@tvm.testing.uses_gpu
 def test_forward_batch_norm():
     def verify(shape, axis=1, fix_gamma=False):
         x = np.random.uniform(size=shape).astype("float32")
@@ -934,7 +992,7 @@ def test_forward_batch_norm():
                       "mean": moving_mean.shape, "var": moving_var.shape}
         mod, _ = relay.frontend.from_mxnet(mx_sym, shape_dict)
         #print(mod)
-        for target, ctx in ctx_list():
+        for target, ctx in tvm.testing.enabled_targets():
             for kind in ["graph", "debug"]:
                 intrp = relay.create_executor(kind, mod=mod, ctx=ctx, target=target)
                 op_res = intrp.evaluate()(x, gamma, beta, moving_mean, moving_var)
@@ -945,6 +1003,7 @@ def test_forward_batch_norm():
     verify((2, 3, 4, 5), fix_gamma=True)
 
 
+@tvm.testing.uses_gpu
 def test_forward_instance_norm():
     def verify(shape, axis=1, epsilon=1e-5):
         x = np.random.uniform(size=shape).astype("float32")
@@ -954,7 +1013,7 @@ def test_forward_instance_norm():
         mx_sym = mx.sym.InstanceNorm(mx.sym.var("x"), mx.sym.var("gamma"), mx.sym.var("beta"), epsilon)
         shape_dict = {"x": x.shape, "gamma": gamma.shape, "beta": beta.shape}
         mod, _ = relay.frontend.from_mxnet(mx_sym, shape_dict)
-        for target, ctx in ctx_list():
+        for target, ctx in tvm.testing.enabled_targets():
             for kind in ["graph", "debug"]:
                 intrp = relay.create_executor(kind, mod=mod, ctx=ctx, target=target)
                 op_res = intrp.evaluate()(x, gamma, beta)
@@ -965,6 +1024,7 @@ def test_forward_instance_norm():
     verify((8, 7, 6, 5, 4))
 
 
+@tvm.testing.uses_gpu
 def test_forward_layer_norm():
     def verify(shape, axis=-1):
         x = np.random.uniform(size=shape).astype("float32")
@@ -976,7 +1036,7 @@ def test_forward_layer_norm():
                                   mx.sym.var("beta"), axis=axis)
         shape_dict = {"x": x.shape, "gamma": gamma.shape, "beta": beta.shape}
         mod, _ = relay.frontend.from_mxnet(mx_sym, shape_dict)
-        for target, ctx in ctx_list():
+        for target, ctx in tvm.testing.enabled_targets():
             for kind in ["graph", "debug"]:
                 intrp = relay.create_executor(kind, mod=mod, ctx=ctx, target=target)
                 op_res = intrp.evaluate()(x, gamma, beta)
@@ -985,6 +1045,7 @@ def test_forward_layer_norm():
     verify((2, 5), axis=0)
     verify((2, 5, 6))
 
+@tvm.testing.uses_gpu
 def test_forward_one_hot():
     def verify(indices_shape, depth, on_value, off_value, dtype):
         x = np.random.randint(0, 5, size=indices_shape)
@@ -992,7 +1053,7 @@ def test_forward_one_hot():
         mx_sym = mx.sym.one_hot(mx.sym.var("x"), depth, on_value, off_value, dtype)
         shape_dict = {"x": x.shape}
         mod, _ = relay.frontend.from_mxnet(mx_sym, shape_dict)
-        for target, ctx in ctx_list():
+        for target, ctx in tvm.testing.enabled_targets():
             for kind in ["graph", "debug"]:
                 intrp = relay.create_executor(kind, mod=mod, ctx=ctx, target=target)
                 op_res = intrp.evaluate()(x.astype("float32"))
@@ -1004,6 +1065,7 @@ def test_forward_one_hot():
     verify((3, 2, 4, 5), 6, 1, 0, "int32")
     verify((3, 2, 4, 5), 6, 1.0, 0.0, "float32")
 
+@tvm.testing.uses_gpu
 def test_forward_pad():
     def verify(data_shape, out_shape, mode, pad_width, constant_value=0.0):
         data = mx.sym.var('data')
@@ -1028,6 +1090,7 @@ def test_forward_pad():
            pad_width=(0,0,0,0,1,2,3,4,5,6))
 
 
+@tvm.testing.uses_gpu
 def test_forward_slice():
     def verify(data_shape, out_shape, begin, end):
         data = mx.sym.var('data')
@@ -1038,6 +1101,7 @@ def test_forward_slice():
     verify(data_shape=(1,1,10), out_shape=(1,1,8), begin=(None, None, 2), end=(None, None, None))
 
 
+@tvm.testing.uses_gpu
 def test_forward_convolution():
     def verify(data_shape, kernel_size, stride, pad, num_filter, is_depthwise=False):
         if is_depthwise:
@@ -1057,7 +1121,7 @@ def test_forward_convolution():
                                     pad=pad, num_filter=num_filter, num_group=groups)
         shape_dict = {"x": x.shape, "weight": weight.shape, "bias": bias.shape}
         mod, _ = relay.frontend.from_mxnet(mx_sym, shape_dict)
-        for target, ctx in ctx_list():
+        for target, ctx in tvm.testing.enabled_targets():
             for kind in ["graph", "debug"]:
                 intrp = relay.create_executor(kind, mod=mod, ctx=ctx, target=target)
                 op_res = intrp.evaluate()(x, weight, bias)
@@ -1078,6 +1142,7 @@ def test_forward_convolution():
     verify(data_shape=(1, 8, 16, 16, 16), kernel_size=(3, 3, 3), stride=(2, 2, 2), pad=(1, 1, 1), num_filter=2)
     verify(data_shape=(20, 8, 16, 16, 16), kernel_size=(3, 3, 3), stride=(1, 1, 1), pad=(1, 1, 1), num_filter=2)
 
+@tvm.testing.uses_gpu
 def test_forward_deconvolution():
     def verify(data_shape, kernel_size, stride, pad, num_filter):
         weight_shape=(data_shape[1], num_filter) + kernel_size
@@ -1092,7 +1157,7 @@ def test_forward_deconvolution():
                                       pad=pad, num_filter=num_filter, no_bias=False)
         shape_dict = {"x": x.shape, "weight": weight.shape, "bias": bias.shape}
         mod, _ = relay.frontend.from_mxnet(mx_sym, shape_dict)
-        for target, ctx in ctx_list():
+        for target, ctx in tvm.testing.enabled_targets():
             for kind in ["graph", "debug"]:
                 intrp = relay.create_executor(kind, mod=mod, ctx=ctx, target=target)
                 op_res = intrp.evaluate()(x, weight, bias)
@@ -1107,6 +1172,7 @@ def test_forward_deconvolution():
     verify(data_shape=(1, 8, 32, 32), kernel_size=(3, 3), stride=(1, 1), pad=(1, 1), num_filter=2)
     verify(data_shape=(20, 8, 32, 32), kernel_size=(3, 3), stride=(1, 1), pad=(1, 1), num_filter=2)
 
+@tvm.testing.uses_gpu
 def test_forward_cond():
     def verify(a_np, b_np):
         a_nd, b_nd = mx.nd.array(a_np), mx.nd.array(b_np)
@@ -1123,7 +1189,7 @@ def test_forward_cond():
 
         shape_dict = {"a": a_np.shape, "b": b_np.shape}
         mod, _ = relay.frontend.from_mxnet(mx_sym, shape_dict)
-        for target, ctx in ctx_list():
+        for target, ctx in tvm.testing.enabled_targets():
             for kind in ["debug", "vm"]:
                 intrp = relay.create_executor(kind, mod=mod, ctx=ctx, target=target)
                 op_res = intrp.evaluate()(a_np, b_np)
@@ -1132,6 +1198,7 @@ def test_forward_cond():
     verify(np.asarray([1.0], 'float32'), np.asarray([2.0],'float32'))
     verify(np.asarray([4.0], 'float32'), np.asarray([3.0],'float32'))
 
+@tvm.testing.uses_gpu
 def test_forward_amp_cast():
     def verify(from_dtype, to_dtype):
         from_np = np.random.uniform(size=(1,3,18)).astype(from_dtype)
@@ -1140,7 +1207,7 @@ def test_forward_amp_cast():
         shape_dict = {'x': (1,3,18)}
         dtype_dict = {'x': from_dtype}
         mod, _ = relay.frontend.from_mxnet(mx_sym, shape_dict, dtype_dict)
-        for target, ctx in ctx_list():
+        for target, ctx in tvm.testing.enabled_targets():
             for kind in ["graph", "vm", "debug"]:
                 intrp = relay.create_executor(kind, mod=mod, ctx=ctx, target=target)
                 op_res = intrp.evaluate()(from_np)
@@ -1150,6 +1217,7 @@ def test_forward_amp_cast():
     verify('float32', 'float16')
     verify('float16', 'float32')
 
+@tvm.testing.uses_gpu
 def test_forward_amp_multicast():
     def verify(dtypes, cast_narrow, expected_dtype):
         x_nps = [np.random.uniform(size=(1,3,18)).astype(dtype) for dtype in dtypes]
@@ -1162,7 +1230,7 @@ def test_forward_amp_multicast():
             shape_dict[str(i)] = (1,3,18)
             dtype_dict[str(i)] = dtype
         mod, _ = relay.frontend.from_mxnet(mx_sym, shape_dict, dtype_dict)
-        for target, ctx in ctx_list():
+        for target, ctx in tvm.testing.enabled_targets():
             for kind in ["graph", "vm", "debug"]:
                 intrp = relay.create_executor(kind, mod=mod, ctx=ctx, target=target)
                 op_res = intrp.evaluate()(*x_nps)
@@ -1178,6 +1246,7 @@ def test_forward_amp_multicast():
     verify(['float16', 'float16'], True, 'float16')
 
 
+@tvm.testing.uses_gpu
 def test_forward_unravel_index():
     def verify(x, shape, dtype):
         a_np = np.array(x).astype(dtype)
@@ -1186,7 +1255,7 @@ def test_forward_unravel_index():
         shapes = {'a': a_np.shape}
         mod, _ = relay.frontend.from_mxnet(mx_sym, shapes, dtype)
 
-        for target, ctx in ctx_list():
+        for target, ctx in tvm.testing.enabled_targets():
             for kind in ["graph", "vm", "debug"]:
                 intrp = relay.create_executor(kind, mod=mod, ctx=ctx, target=target)
                 op_res = intrp.evaluate()(a_np)
@@ -1204,6 +1273,7 @@ def test_forward_unravel_index():
     # verify([0, 1, 2, 5], [2, 2], dtype)
 
 
+@tvm.testing.uses_gpu
 def test_forward_swap_axis():
     def _verify_swap_axis(in_shape, out_shape, dim1, dim2):
         data = mx.sym.var('data')
@@ -1216,6 +1286,7 @@ def test_forward_swap_axis():
     # _verify_swap_axis((4, 5), (5, 4), 0, 0)
 
 
+@tvm.testing.uses_gpu
 def test_forward_depth_to_space():
     def verify(shape, blocksize=2):
         x = np.random.uniform(size=shape).astype("float32")
@@ -1223,7 +1294,7 @@ def test_forward_depth_to_space():
         mx_sym = mx.sym.depth_to_space(mx.sym.var("x"), blocksize)
         shape_dict = {"x": x.shape, }
         mod, _ = relay.frontend.from_mxnet(mx_sym, shape_dict)
-        for target, ctx in ctx_list():
+        for target, ctx in tvm.testing.enabled_targets():
             for kind in ["graph", "debug"]:
                 intrp = relay.create_executor(kind, mod=mod, ctx=ctx, target=target)
                 op_res = intrp.evaluate()(x)
@@ -1232,6 +1303,7 @@ def test_forward_depth_to_space():
     verify((1, 18, 3, 3), 3)
 
 
+@tvm.testing.uses_gpu
 def test_forward_space_to_depth():
     def verify(shape, blocksize=2):
         x = np.random.uniform(size=shape).astype("float32")
@@ -1239,7 +1311,7 @@ def test_forward_space_to_depth():
         mx_sym = mx.sym.space_to_depth(mx.sym.var("x"), blocksize)
         shape_dict = {"x": x.shape, }
         mod, _ = relay.frontend.from_mxnet(mx_sym, shape_dict)
-        for target, ctx in ctx_list():
+        for target, ctx in tvm.testing.enabled_targets():
             for kind in ["graph", "debug"]:
                 intrp = relay.create_executor(kind, mod=mod, ctx=ctx, target=target)
                 op_res = intrp.evaluate()(x)
@@ -1248,6 +1320,7 @@ def test_forward_space_to_depth():
     verify((1, 1, 9, 9), 3)
 
 
+@tvm.testing.uses_gpu
 def test_forward_correlation():
     def verify(data_shape, kernel_size, max_displacement, stride1, stride2, pad_size,
                is_multiply):
@@ -1263,7 +1336,7 @@ def test_forward_correlation():
                                     is_multiply=is_multiply)
         shape_dict = {"data1": data1.shape, "data2": data2.shape}
         mod, _ = relay.frontend.from_mxnet(mx_sym, shape_dict)
-        for target, ctx in ctx_list():
+        for target, ctx in tvm.testing.enabled_targets():
             for kind in ["graph", "debug"]:
                 intrp = relay.create_executor(kind, mod=mod, ctx=ctx, target=target)
                 op_res = intrp.evaluate()(data1, data2)
@@ -1280,6 +1353,7 @@ def test_forward_correlation():
     verify((5, 1, 11, 11), kernel_size = 5, max_displacement = 1, stride1 = 1, stride2 = 1, pad_size = 2, is_multiply = False)
 
 
+@tvm.testing.uses_gpu
 def test_forward_arange_like():
     def verify(data_shape, start=None, step=None, axis=None):
         attrs = {}
@@ -1295,7 +1369,7 @@ def test_forward_arange_like():
         
         mx_sym = mx.sym.contrib.arange_like(data, **attrs)
         mod, _ = relay.frontend.from_mxnet(mx_sym, {"data": data_shape})
-        for target, ctx in ctx_list():
+        for target, ctx in tvm.testing.enabled_targets():
             for kind in ["graph"]:
                 intrp = relay.create_executor(kind, mod=mod, ctx=ctx, target=target)
                 op_res = intrp.evaluate()()
@@ -1307,6 +1381,7 @@ def test_forward_arange_like():
     verify(data_shape=(3, 4, 5), start=2., step=3., axis=1)
 
 
+@tvm.testing.uses_gpu
 def test_forward_interleaved_matmul_selfatt_qk():
     def verify(batch, seq_length, num_heads, head_dim):
         data_shape = (seq_length, batch, num_heads * head_dim * 3)
@@ -1317,7 +1392,7 @@ def test_forward_interleaved_matmul_selfatt_qk():
 
         mx_sym = mx.sym.contrib.interleaved_matmul_selfatt_qk(data, heads=num_heads)
         mod, _ = relay.frontend.from_mxnet(mx_sym, {"data": data_shape})
-        for target, ctx in ctx_list():
+        for target, ctx in tvm.testing.enabled_targets():
             for kind in ["graph"]:
                 intrp = relay.create_executor(kind, mod=mod, ctx=ctx, target=target)
                 op_res = intrp.evaluate()(data_np)
@@ -1327,6 +1402,7 @@ def test_forward_interleaved_matmul_selfatt_qk():
     verify(3, 10, 6, 8)
 
 
+@tvm.testing.uses_gpu
 def test_forward_interleaved_matmul_selfatt_valatt():
     def verify(batch, seq_length, num_heads, head_dim):
         data_shape = (seq_length, batch, num_heads * head_dim * 3)
@@ -1342,7 +1418,7 @@ def test_forward_interleaved_matmul_selfatt_valatt():
             data, weight, heads=num_heads)
         mod, _ = relay.frontend.from_mxnet(
             mx_sym, {"data": data_shape, "weight": weight_shape})
-        for target, ctx in ctx_list():
+        for target, ctx in tvm.testing.enabled_targets():
             for kind in ["graph"]:
                 intrp = relay.create_executor(kind, mod=mod, ctx=ctx, target=target)
                 op_res = intrp.evaluate()(data=data_np, weight=weight_np)
@@ -1352,6 +1428,7 @@ def test_forward_interleaved_matmul_selfatt_valatt():
     verify(3, 10, 6, 8)
 
 
+@tvm.testing.uses_gpu
 def test_forward_box_decode():
     def verify(data_shape, anchor_shape, stds=[1, 1, 1, 1], clip=-1, in_format="corner"):
         dtype = "float32"
@@ -1361,7 +1438,7 @@ def test_forward_box_decode():
         mx_sym = mx.sym.contrib.box_decode(mx.sym.var("data"), mx.sym.var("anchors"), stds[0], stds[1], stds[2], stds[3], clip, in_format)
         shape_dict = {"data": data_shape, "anchors": anchor_shape}
         mod, _ = relay.frontend.from_mxnet(mx_sym, shape_dict)
-        for target, ctx in ctx_list():
+        for target, ctx in tvm.testing.enabled_targets():
             for kind in ["graph", "debug"]:
                 intrp = relay.create_executor(kind, mod=mod, ctx=ctx, target=target)
                 op_res = intrp.evaluate()(data, anchors)
@@ -1374,6 +1451,7 @@ def test_forward_box_decode():
     verify((1, 10, 4), (1, 10, 4), in_format="center")
 
 
+@tvm.testing.uses_gpu
 def test_forward_softmax():
     def verify(data_shape, axis, use_length, length):
         dtype = "float32"
@@ -1394,7 +1472,7 @@ def test_forward_softmax():
             shape_dict = {"data": data_shape}
             mod, _ = relay.frontend.from_mxnet(mx_sym, shape_dict)
 
-        for target, ctx in ctx_list():
+        for target, ctx in tvm.testing.enabled_targets():
             for kind in ["graph", "debug"]:
                 intrp = relay.create_executor(kind, mod=mod, ctx=ctx, target=target)
                 if use_length:
@@ -1419,7 +1497,7 @@ def test_forward_softmax():
 @pytest.mark.parametrize("mode", ["constant", "edge", "reflect"])
 @pytest.mark.parametrize("dtype", ['float64', 'float32', 'int64', 'int32'])
 @pytest.mark.parametrize("constant_value", [0.0, 3.0])
-@pytest.mark.parametrize("target, ctx", ctx_list())
+@tvm.testing.parametrize_targets
 @pytest.mark.parametrize("kind", ["graph", "vm", "debug"])
 def test_forward_npi_pad(data_shape, pad_width, mode, dtype, constant_value,target, ctx, kind):
     data_np = np.random.uniform(size=data_shape).astype(dtype)
@@ -1435,12 +1513,12 @@ def test_forward_npi_pad(data_shape, pad_width, mode, dtype, constant_value,targ
     op_res = intrp.evaluate()(data_np)
     tvm.testing.assert_allclose(op_res.asnumpy(), ref_res.asnumpy(), rtol=1e-5)
 
-    
+
 @pytest.mark.skipif(not hasattr(mx.sym.np, 'pad'), reason="test'll abort with Mxnet 1.x, skip for now")
 @pytest.mark.parametrize("data_shape", [(2,2,2),(2,7,2)])
 @pytest.mark.parametrize("dtype", ['float64', 'float32', 'int64', 'int32', 'bool'])
 @pytest.mark.parametrize("axes", [(1,0,2),None])
-@pytest.mark.parametrize("target, ctx", ctx_list())
+@tvm.testing.parametrize_targets
 @pytest.mark.parametrize("kind", ["graph", "vm", "debug"])
 def test_forward_npi_transpose(data_shape, axes, dtype,target, ctx, kind):
     data_np = np.random.uniform(size=data_shape).astype(dtype)
@@ -1458,7 +1536,7 @@ def test_forward_npi_transpose(data_shape, axes, dtype,target, ctx, kind):
     [((2,2),(2,2),1),((2,4),(2,3),1),((1,3,2),(1,3,5),2),((1,3,3),(1,3,3),1),((1,3),(1,3),0)]
 )
 @pytest.mark.parametrize("dtype", ['float64', 'float32', 'int64', 'int32'])
-@pytest.mark.parametrize("target, ctx", ctx_list())
+@tvm.testing.parametrize_targets
 @pytest.mark.parametrize("kind", ["graph", "vm", "debug"])
 def test_forward_npi_concatenate(data_shape1, data_shape2, axis, dtype,target, ctx, kind):
     data_np1 = np.random.uniform(size=data_shape1).astype(dtype)
@@ -1475,7 +1553,7 @@ def test_forward_npi_concatenate(data_shape1, data_shape2, axis, dtype,target, c
 
 @pytest.mark.parametrize("data_shape", [(2,2,2),(2,7,2),(2,2,2,1,2,3,1),(1,8)])
 @pytest.mark.parametrize("dtype", ['float64', 'float32', 'int64', 'int32', 'bool'])
-@pytest.mark.parametrize("target, ctx", ctx_list())
+@tvm.testing.parametrize_targets
 @pytest.mark.parametrize("kind", ["graph", "vm", "debug"])
 def test_forward_np_copy(data_shape,dtype,target, ctx, kind):
     data_np = np.random.uniform(size=data_shape).astype(dtype)
@@ -1489,7 +1567,7 @@ def test_forward_np_copy(data_shape,dtype,target, ctx, kind):
 
 
 @pytest.mark.parametrize("dtype", ['float64', 'float32', 'int64', 'int32', 'bool'])
-@pytest.mark.parametrize("target, ctx", ctx_list())
+@tvm.testing.parametrize_targets
 @pytest.mark.parametrize("kind", ["graph", "vm", "debug"])
 @pytest.mark.parametrize("data_shape,out_shape,reverse",
                          [((2, 3, 8),(-2, -2, 2, -1),False),
@@ -1510,7 +1588,7 @@ def test_forward_npx_reshape(data_shape,out_shape,dtype,target,reverse, ctx, kin
 
 @pytest.mark.parametrize("data_shape", [(2,2,2),(2,7,2),(2,2,2,1,2,3,1),(1,8),(2,2),(1,3)])
 @pytest.mark.parametrize("dtype", ['float64', 'float32', 'int64', 'int32'])
-@pytest.mark.parametrize("target, ctx", ctx_list())
+@tvm.testing.parametrize_targets
 @pytest.mark.parametrize("kind", ["graph", "vm", "debug"])
 def test_forward_npi_binary(data_shape,dtype,target, ctx, kind):
     ref_ops = [mx.np.power, mx.np.multiply, mx.np.add, mx.np.less]
@@ -1535,7 +1613,7 @@ def test_forward_npi_binary(data_shape,dtype,target, ctx, kind):
 
 @pytest.mark.parametrize("data_shape", [(2,2,2),(2,7,2),(2,2,2,1,2,3,1),(1,8),(2,2),(1,3)])
 @pytest.mark.parametrize("dtype", ['float64', 'float32', 'int64', 'int32'])
-@pytest.mark.parametrize("target, ctx", ctx_list())
+@tvm.testing.parametrize_targets
 @pytest.mark.parametrize("scalar", [1.0,2.0,3.0,4.0])
 @pytest.mark.parametrize("kind", ["graph", "vm", "debug"])
 def test_forward_npi_binary_scalar(data_shape,dtype,scalar,target, ctx, kind):
@@ -1559,7 +1637,7 @@ def test_forward_npi_binary_scalar(data_shape,dtype,scalar,target, ctx, kind):
 
 @pytest.mark.parametrize("data_shape", [(2,2,2),(2,7,2),(2,2,2,1,2,3,1),(1,8),(2,2),(1,3)])
 @pytest.mark.parametrize("dtype", ['float64', 'float32'])
-@pytest.mark.parametrize("target, ctx", ctx_list())
+@tvm.testing.parametrize_targets
 @pytest.mark.parametrize("kind", ["graph", "vm", "debug"])
 def test_forward_npi_tanh(data_shape,dtype,target, ctx, kind):
     data_np1 = np.random.uniform(size=data_shape).astype(dtype)
@@ -1577,7 +1655,7 @@ def test_forward_npi_tanh(data_shape,dtype,target, ctx, kind):
 @pytest.mark.parametrize("data_dtype", ['float64', 'float32', 'int64', 'int32', 'bool'])
 @pytest.mark.parametrize("cond_dtype", ['float64', 'float32', 'int64', 'int32', 'bool'])
 @pytest.mark.parametrize("scalar", [1.0,2.0])
-@pytest.mark.parametrize("target, ctx", ctx_list())
+@tvm.testing.parametrize_targets
 @pytest.mark.parametrize("kind", ["graph", "vm", "debug"])
 def test_forward_npi_where_rscalar(data_shape,cond_dtype,data_dtype,scalar,target, ctx, kind):
     if data_dtype == 'bool':
@@ -1600,7 +1678,7 @@ def test_forward_npi_where_rscalar(data_shape,cond_dtype,data_dtype,scalar,targe
 
 
 @pytest.mark.parametrize("dtype", ['float64', 'float32', 'int64', 'int32', 'bool'])
-@pytest.mark.parametrize("target, ctx", ctx_list())
+@tvm.testing.parametrize_targets
 @pytest.mark.parametrize("kind", ["graph", "vm", "debug"])
 @pytest.mark.parametrize("data_shape, axis, indices_or_sections, squeeze_axis", 
                          [((3,2,1),1,2,False),((3,2,1),0,3,False),((3,2,1),0,3,True),((3,2,1),0,(1,2),False)])
index c09580e..5921c0d 100644 (file)
@@ -26,8 +26,8 @@ import tvm
 from tvm import te
 from tvm import relay
 from tvm.contrib import graph_runtime
-from tvm.relay.testing.config import ctx_list
 import scipy
+import tvm.testing
 
 
 def get_input_data_shape_dict(graph_def, input_data):
@@ -117,11 +117,12 @@ def verify_onnx_forward_impl(graph_file, data_shape, out_shape):
     x = np.random.uniform(size=data_shape)
     model = onnx.load_model(graph_file)
     c2_out = get_onnxruntime_output(model, x, dtype)
-    for target, ctx in ctx_list():
+    for target, ctx in tvm.testing.enabled_targets():
         tvm_out = get_tvm_output(model, x, target, ctx, out_shape, dtype)
         tvm.testing.assert_allclose(c2_out, tvm_out, rtol=1e-5, atol=1e-5)
 
 
+@tvm.testing.uses_gpu
 def test_reshape():
     in_shape = (4, 3, 3, 4)
     ref_shape = (6, 2, 4, 3)
@@ -145,13 +146,14 @@ def test_reshape():
 
     model = helper.make_model(graph, producer_name='reshape_test')
 
-    for target, ctx in ctx_list():
+    for target, ctx in tvm.testing.enabled_targets():
         x = np.random.uniform(size=in_shape).astype('int32')
         tvm_out = get_tvm_output(model, x, target, ctx, ref_shape, 'float32')
 
     tvm.testing.assert_allclose(ref_shape, tvm_out.shape)
 
 
+@tvm.testing.uses_gpu
 def test_expand():
 
     def _test_expand(name, data, shape, ref_data):
@@ -174,7 +176,7 @@ def test_expand():
 
         model = helper.make_model(graph, producer_name=name)
 
-        for target, ctx in ctx_list():
+        for target, ctx in tvm.testing.enabled_targets():
             tvm_out = get_tvm_output(model, data, target, ctx, ref_data.shape, 'float32')
 
         tvm.testing.assert_allclose(ref_data, tvm_out)
@@ -205,13 +207,14 @@ def verify_depth_to_space(inshape, outshape, mode, blockSize):
 
     model = helper.make_model(graph, producer_name='depth_to_space_test')
 
-    for target, ctx in ctx_list():
+    for target, ctx in tvm.testing.enabled_targets():
         x = np.random.uniform(size=inshape).astype('float32')
         tvm_out = get_tvm_output(model, x, target, ctx, outshape, 'float32')
         onnx_out = get_onnxruntime_output(model, x, 'float32')
         tvm.testing.assert_allclose(onnx_out, tvm_out)
 
 
+@tvm.testing.uses_gpu
 def test_depth_to_space():
     # current onnx.checker use OpSet-1 version of DepthToSpace, which doesn't have a mode argument.
     # TO-DO, we can add mode arguement to test CRD mode and DCR mode
@@ -232,17 +235,19 @@ def verify_space_to_depth(inshape, outshape, blockSize):
 
     model = helper.make_model(graph, producer_name='space_to_depth_test')
 
-    for target, ctx in ctx_list():
+    for target, ctx in tvm.testing.enabled_targets():
         x = np.random.uniform(size=inshape).astype('float32')
         tvm_out = get_tvm_output(model, x, target, ctx, outshape, 'float32')
         onnx_out = get_onnxruntime_output(model, x, 'float32')
         tvm.testing.assert_allclose(onnx_out, tvm_out)
 
 
+@tvm.testing.uses_gpu
 def test_space_to_depth():
     verify_space_to_depth((1, 1, 4, 6), (1, 4, 2, 3), 2)
 
 
+@tvm.testing.uses_gpu
 def test_shape():
     in_shape = (4, 3, 3, 4)
     ref_shape = (6, 2, 4, 3)
@@ -268,7 +273,7 @@ def test_shape():
 
     model = helper.make_model(graph, producer_name='shape_test')
 
-    for target, ctx in ctx_list():
+    for target, ctx in tvm.testing.enabled_targets():
         x = np.random.uniform(size=in_shape).astype('int32')
         tvm_out = get_tvm_output(model, x, target, ctx, ref_shape, 'int32')
 
@@ -297,17 +302,19 @@ def _test_power_iteration(x_shape, y_shape):
 
     model = helper.make_model(graph, producer_name='power_test')
 
-    for target, ctx in ctx_list():
+    for target, ctx in tvm.testing.enabled_targets():
         tvm_out = get_tvm_output(model, [x, y], target, ctx, np_res.shape)
         tvm.testing.assert_allclose(np_res, tvm_out, rtol=1e-5, atol=1e-5)
 
 
+@tvm.testing.uses_gpu
 def test_power():
     _test_power_iteration((1, 3), (1))
     _test_power_iteration((2, 3), (2, 3))
     _test_power_iteration((2, 3), (1, 3))
 
 
+@tvm.testing.uses_gpu
 def test_squeeze():
     in_shape = (1, 3, 1, 3, 1, 1)
     out_shape = (3, 3)
@@ -322,13 +329,14 @@ def test_squeeze():
 
     model = helper.make_model(graph, producer_name='squeeze_test')
 
-    for target, ctx in ctx_list():
+    for target, ctx in tvm.testing.enabled_targets():
         x = np.random.uniform(size=in_shape).astype('float32')
         tvm_out = get_tvm_output(model, x, target, ctx, out_shape, 'float32')
 
     tvm.testing.assert_allclose(out_shape, tvm_out.shape)
 
 
+@tvm.testing.uses_gpu
 def test_flatten():
 
     in_shape = (1, 3, 4, 4)
@@ -346,13 +354,14 @@ def test_flatten():
 
     model = helper.make_model(graph, producer_name='flatten_test')
 
-    for target, ctx in ctx_list():
+    for target, ctx in tvm.testing.enabled_targets():
         x = np.random.uniform(size=in_shape).astype('int32')
         tvm_out = get_tvm_output(model, x, target, ctx, ref_shape, 'float32')
 
     tvm.testing.assert_allclose(ref_shape, tvm_out.shape)
 
 
+@tvm.testing.uses_gpu
 def test_unsqueeze():
     in_shape = (3, 3)
     axis = (0, 3, 4)
@@ -368,7 +377,7 @@ def test_unsqueeze():
 
     model = helper.make_model(graph, producer_name='squeeze_test')
 
-    for target, ctx in ctx_list():
+    for target, ctx in tvm.testing.enabled_targets():
         x = np.random.uniform(size=in_shape).astype('float32')
         tvm_out = get_tvm_output(model, x, target, ctx, out_shape, 'float32')
 
@@ -392,12 +401,13 @@ def verify_gather(in_shape, indices, axis, dtype):
                                                                      TensorProto.FLOAT, list(out_np.shape))])
     model = helper.make_model(graph, producer_name='gather_test')
 
-    for target, ctx in ctx_list():
+    for target, ctx in tvm.testing.enabled_targets():
         tvm_out = get_tvm_output(
             model, [x, indices], target, ctx, out_np.shape)
         tvm.testing.assert_allclose(out_np, tvm_out)
 
 
+@tvm.testing.uses_gpu
 def test_gather():
     verify_gather((4,), [1], 0, 'int32')
     verify_gather((1, 4), [0], 0, 'int32')
@@ -427,12 +437,13 @@ def verify_scatter(in_shape, indices, axis):
     model = helper.make_model(graph, producer_name='scatter_test')
     onnx_out = get_onnxruntime_output(model, [x, indices, updates])
 
-    for target, ctx in ctx_list():
+    for target, ctx in tvm.testing.enabled_targets():
         tvm_out = get_tvm_output(
             model, [x, indices, updates], target, ctx, onnx_out[0].shape)
         tvm.testing.assert_allclose(onnx_out[0], tvm_out)
 
 
+@tvm.testing.uses_gpu
 def test_scatter():
     verify_scatter((4,), [1], 0)
     verify_scatter((1, 4), [[0]], 0)
@@ -459,7 +470,7 @@ def _test_slice_iteration_v1(indata, outdata, starts, ends, axes=None):
 
     model = helper.make_model(graph, producer_name='slice_test')
 
-    for target, ctx in ctx_list():
+    for target, ctx in tvm.testing.enabled_targets():
         tvm_out = get_tvm_output(
             model, indata, target, ctx, outdata.shape, 'float32', opset=1)
 
@@ -547,7 +558,7 @@ def _test_slice_iteration_v10(indata, outdata, **attrs):
                               initializer=initializer)
     model = helper.make_model(graph, producer_name='slice_test')
 
-    for target, ctx in ctx_list():
+    for target, ctx in tvm.testing.enabled_targets():
         tvm_out = get_tvm_output(model,
                                  indata,
                                  target,
@@ -559,6 +570,7 @@ def _test_slice_iteration_v10(indata, outdata, **attrs):
     tvm.testing.assert_allclose(outdata, tvm_out)
 
 
+@tvm.testing.uses_gpu
 def test_slice():
     x = np.random.randn(20, 10, 5).astype(np.float32)
     _test_slice_iteration_v1(x, x[0:3, 0:10], starts=(0, 0), ends=(3, 10), axes=(0, 1))
@@ -595,22 +607,25 @@ def _test_onnx_op_elementwise(inshape, outfunc, npargs, dtype, opname, kwargs):
 
     model = helper.make_model(graph, producer_name=opname+'_test')
 
-    for target, ctx in ctx_list():
+    for target, ctx in tvm.testing.enabled_targets():
         tvm_out = get_tvm_output(
             model, indata, target, ctx, outdata.shape, dtype)
 
     tvm.testing.assert_allclose(outdata, tvm_out)
 
 
+@tvm.testing.uses_gpu
 def test_floor():
     _test_onnx_op_elementwise((2, 4, 5, 6), np.floor,
                               {}, 'float32', 'Floor', {})
 
 
+@tvm.testing.uses_gpu
 def test_ceil():
     _test_onnx_op_elementwise((2, 4, 5, 6), np.ceil, {}, 'float32', 'Ceil', {})
 
 
+@tvm.testing.uses_gpu
 def test_clip():
     _test_onnx_op_elementwise((2, 4, 5, 6),
                               np.clip,
@@ -620,7 +635,7 @@ def test_clip():
                               {'min': -1.0, 'max': 1.0})
 
 
-
+@tvm.testing.uses_gpu
 def test_round():
     _test_onnx_op_elementwise((2, 4, 5, 6), np.round, {}, 'float32', 'Round', {})
 
@@ -640,17 +655,19 @@ def _test_finite_ops(inshape, outfunc, npargs, dtype, opname, kwargs):
 
     model = helper.make_model(graph, producer_name=opname+'_test')
 
-    for target, ctx in ctx_list():
+    for target, ctx in tvm.testing.enabled_targets():
         tvm_out = get_tvm_output(
             model, indata, target, ctx, outdata.shape, dtype)
 
     tvm.testing.assert_allclose(outdata, tvm_out)
 
 
+@tvm.testing.uses_gpu
 def test_isinf():
     _test_finite_ops((2, 4, 5, 6), np.isinf, {}, 'float32', 'IsInf', {})
 
 
+@tvm.testing.uses_gpu
 def test_isnan():
     _test_finite_ops((2, 4, 5, 6), np.isnan, {}, 'float32', 'IsNaN', {})
 
@@ -672,18 +689,20 @@ def verify_gather_nd(in_shape, indices, dtype):
                                                                      TensorProto.FLOAT, list(out_np.shape))])
     model = helper.make_model(graph, producer_name='gather_test')
 
-    for target, ctx in ctx_list():
+    for target, ctx in tvm.testing.enabled_targets():
         tvm_out = get_tvm_output(
             model, [x, indices], target, ctx, out_np.shape)
         tvm.testing.assert_allclose(out_np, tvm_out)
 
 
+@tvm.testing.uses_gpu
 def test_gather_nd():
     verify_gather_nd((2, 2), [[0,0],[1,1]], 'int32')
     verify_gather_nd((3, 3, 3), [[0,1],[1,0]] , 'float32')
     verify_gather_nd((4, 3, 5, 6), [[2, 1, 0, 0]], 'float32')
 
 
+@tvm.testing.uses_gpu
 def test_onehot():
     indices_shape = [10]
     indices_array = np.random.randint(
@@ -709,12 +728,13 @@ def test_onehot():
 
     model = helper.make_model(graph, producer_name="onehot_test")
 
-    for target, ctx in ctx_list():
+    for target, ctx in tvm.testing.enabled_targets():
         tvm_out = get_tvm_output(
             model, [indices_array], target, ctx, out_np.shape)
         tvm.testing.assert_allclose(out_np, tvm_out, rtol=1e-5, atol=1e-5)
 
 
+@tvm.testing.uses_gpu
 def test_matmul():
     a_shape = (4, 3)
     b_shape = (3, 4)
@@ -736,7 +756,7 @@ def test_matmul():
 
     model = helper.make_model(graph, producer_name='matmul_test')
 
-    for target, ctx in ctx_list():
+    for target, ctx in tvm.testing.enabled_targets():
         tvm_out = get_tvm_output(
             model, [a_array, b_array], target, ctx, out_np.shape)
         tvm.testing.assert_allclose(out_np, tvm_out, rtol=1e-5, atol=1e-5)
@@ -759,11 +779,12 @@ def verify_batch_matmul(a_shape, b_shape):
 
     model = helper.make_model(graph, producer_name='matmul_test')
 
-    for target, ctx in ctx_list():
+    for target, ctx in tvm.testing.enabled_targets():
         tvm_out = get_tvm_output(
             model, [a_array, b_array], target, ctx, out_np.shape)
         tvm.testing.assert_allclose(out_np, tvm_out, rtol=1e-5, atol=1e-5)
 
+@tvm.testing.uses_gpu
 def test_batch_matmul():
     verify_batch_matmul((2, 3, 4, 3), (2, 3, 3, 4))
     verify_batch_matmul((2, 4, 3), (3, 4))
@@ -800,7 +821,7 @@ def verify_lrn(shape, nsize, dtype, alpha=None, beta=None, bias=None):
         py_out = in_array / ((bias + (alpha / nsize) * square_sum) ** beta)
         return py_out
 
-    for target, ctx in ctx_list():
+    for target, ctx in tvm.testing.enabled_targets():
         input_name = model.graph.input[0].name
         py_out = _get_python_lrn()
         tvm_out = get_tvm_output(
@@ -808,6 +829,7 @@ def verify_lrn(shape, nsize, dtype, alpha=None, beta=None, bias=None):
         tvm.testing.assert_allclose(py_out, tvm_out, rtol=1e-5, atol=1e-5)
 
 
+@tvm.testing.uses_gpu
 def test_lrn():
     verify_lrn((5, 5, 5, 5), 3, 'float32')
     verify_lrn((5, 5, 5, 5), 3, 'float32', alpha=0.0002, beta=0.5, bias=2.0)
@@ -845,12 +867,13 @@ def verify_instance_norm(shape, axis=1):
                                       helper.make_tensor_value_info("beta", TensorProto.FLOAT, (shape[1],))],
                               outputs=[helper.make_tensor_value_info("y", TensorProto.FLOAT, list(shape))])
     model = helper.make_model(graph, producer_name='instance_norm_test')
-    for target, ctx in ctx_list():
+    for target, ctx in tvm.testing.enabled_targets():
         tvm_out = get_tvm_output(
             model, [x, gamma, beta], target, ctx, shape, 'float32')
         tvm.testing.assert_allclose(y, tvm_out, rtol=1e-5, atol=1e-5)
 
 
+@tvm.testing.uses_gpu
 def test_instance_norm():
     verify_instance_norm((2, 3, 4, 5))
     verify_instance_norm((32, 64, 80, 64))
@@ -877,7 +900,7 @@ def _test_upsample_nearest():
 
     model = helper.make_model(graph, producer_name='upsample_nearest_test')
 
-    for target, ctx in ctx_list():
+    for target, ctx in tvm.testing.enabled_targets():
         tvm_out = get_tvm_output(
             model, in_array, target, ctx, out_shape, 'float32')
         tvm.testing.assert_allclose(out_array, tvm_out)
@@ -902,7 +925,7 @@ def _test_upsample3d_nearest():
 
     model = helper.make_model(graph, producer_name='upsample_nearest_test')
 
-    for target, ctx in ctx_list():
+    for target, ctx in tvm.testing.enabled_targets():
         tvm_out = get_tvm_output(
             model, in_array, target, ctx, out_shape, 'float32')
         tvm.testing.assert_allclose(out_array, tvm_out)
@@ -926,7 +949,7 @@ def _test_upsample_bilinear():
 
     model = helper.make_model(graph, producer_name='upsample_bilinear_test')
 
-    for target, ctx in ctx_list():
+    for target, ctx in tvm.testing.enabled_targets():
         tvm_out = get_tvm_output(
             model, in_array, target, ctx, out_shape, 'float32')
         tvm.testing.assert_allclose(out_array, tvm_out, rtol=1e-5, atol=1e-5)
@@ -961,7 +984,7 @@ def _test_upsample_bilinear_opset9():
     model = helper.make_model(
         graph, producer_name='upsample_bilinear_opset9_test')
 
-    for target, ctx in ctx_list():
+    for target, ctx in tvm.testing.enabled_targets():
         tvm_out = get_tvm_output(
             model, in_array, target, ctx, out_shape, 'float32')
         tvm.testing.assert_allclose(out_array, tvm_out, rtol=1e-5, atol=1e-5)
@@ -995,11 +1018,12 @@ def _test_upsample3d_trilinear():
     model = helper.make_model(
         graph, producer_name='upsample_trilinear_test')
 
-    for target, ctx in ctx_list():
+    for target, ctx in tvm.testing.enabled_targets():
         tvm_out = get_tvm_output(
             model, in_array, target, ctx, out_shape, 'float32')
         tvm.testing.assert_allclose(out_array, tvm_out, rtol=1e-5, atol=1e-5)
 
+@tvm.testing.uses_gpu
 def test_upsample():
     _test_upsample_nearest()
     _test_upsample_bilinear()
@@ -1026,12 +1050,13 @@ def _test_softmax(inshape, axis):
 
     model = helper.make_model(graph, producer_name=opname+'_test')
 
-    for target, ctx in ctx_list():
+    for target, ctx in tvm.testing.enabled_targets():
         tvm_out = get_tvm_output(
             model, indata, target, ctx, outshape, 'float32')
         tvm.testing.assert_allclose(outdata, tvm_out, rtol=1e-5, atol=1e-5)
 
 
+@tvm.testing.uses_gpu
 def test_softmax():
     _test_softmax((1, 10), None)
     _test_softmax((1, 10), 1)
@@ -1061,12 +1086,13 @@ def verify_min(input_dim):
 
     model = helper.make_model(graph, producer_name='Min_test')
 
-    for target, ctx in ctx_list():
+    for target, ctx in tvm.testing.enabled_targets():
         tvm_out = get_tvm_output(
             model, [a_np1, a_np2, a_np3], target, ctx, b_np.shape)
         tvm.testing.assert_allclose(b_np, tvm_out, rtol=1e-5, atol=1e-5)
 
 
+@tvm.testing.uses_gpu
 def test_forward_min():
     verify_min((1, 3, 20, 20))
     verify_min((20, 20))
@@ -1096,12 +1122,13 @@ def verify_max(input_dim):
 
     model = helper.make_model(graph, producer_name='Max_test')
 
-    for target, ctx in ctx_list():
+    for target, ctx in tvm.testing.enabled_targets():
         tvm_out = get_tvm_output(
             model, [a_np1, a_np2, a_np3], target, ctx, b_np.shape)
         tvm.testing.assert_allclose(b_np, tvm_out, rtol=1e-5, atol=1e-5)
 
 
+@tvm.testing.uses_gpu
 def test_forward_max():
     verify_max((1, 3, 20, 20))
     verify_max((20, 20))
@@ -1131,12 +1158,13 @@ def verify_mean(input_dim):
 
     model = helper.make_model(graph, producer_name='Mean_test')
 
-    for target, ctx in ctx_list():
+    for target, ctx in tvm.testing.enabled_targets():
         tvm_out = get_tvm_output(
             model, [a_np1, a_np2, a_np3], target, ctx, b_np.shape)
         tvm.testing.assert_allclose(b_np, tvm_out, rtol=1e-5, atol=1e-5)
 
 
+@tvm.testing.uses_gpu
 def test_forward_mean():
     verify_mean((1, 3, 20, 20))
     verify_mean((20, 20))
@@ -1161,11 +1189,12 @@ def verify_hardsigmoid(input_dim, alpha, beta):
 
     model = helper.make_model(graph, producer_name='HardSigmoid_test')
 
-    for target, ctx in ctx_list():
+    for target, ctx in tvm.testing.enabled_targets():
         tvm_out = get_tvm_output(model, [a_np1], target, ctx, b_np.shape)
         tvm.testing.assert_allclose(b_np, tvm_out, rtol=1e-5, atol=1e-5)
 
 
+@tvm.testing.uses_gpu
 def test_forward_hardsigmoid():
     verify_hardsigmoid((1, 3, 20, 20), 0.5, 0.6)
     verify_hardsigmoid((20, 20), 0.3, 0.4)
@@ -1212,7 +1241,7 @@ def verify_argmin(input_dim, axis=None, keepdims=None):
 
     model = helper.make_model(graph, producer_name='argmin_test')
 
-    for target, ctx in ctx_list():
+    for target, ctx in tvm.testing.enabled_targets():
         tvm_out = get_tvm_output(
             model, [a_np1], target, ctx, b_np.shape, b_np.dtype)
         tvm.testing.assert_allclose(b_np, tvm_out, rtol=1e-5, atol=1e-5)
@@ -1260,12 +1289,13 @@ def verify_argmax(input_dim, axis=None, keepdims=None):
 
     model = helper.make_model(graph, producer_name='argmax_test')
 
-    for target, ctx in ctx_list():
+    for target, ctx in tvm.testing.enabled_targets():
         tvm_out = get_tvm_output(
             model, [a_np1], target, ctx, b_np.shape, b_np.dtype)
         tvm.testing.assert_allclose(b_np, tvm_out, rtol=1e-5, atol=1e-5)
 
 
+@tvm.testing.uses_gpu
 def test_forward_arg_min_max():
     '''Verify argmin and argmax'''
     verify_argmin([3, 4, 4])
@@ -1309,12 +1339,13 @@ def verify_constantofshape(input_dim, value, dtype):
 
     model = helper.make_model(graph, producer_name='fill_test')
 
-    for target, ctx in ctx_list():
+    for target, ctx in tvm.testing.enabled_targets():
         tvm_out = get_tvm_output(model, [], target, ctx, out.shape)
 
         tvm.testing.assert_allclose(out, tvm_out, rtol=1e-5, atol=1e-5)
 
 
+@tvm.testing.uses_gpu
 def test_constantofshape():
     verify_constantofshape((2, 3, 4, 5), 10, 'float32')
     verify_constantofshape((3, 3), 0, 'int32')
@@ -1355,7 +1386,7 @@ def verify_pad(indata, pads, mode='constant', value=0.0):
                                                                      TensorProto.FLOAT, list(outdata.shape))])
     model = helper.make_model(graph, producer_name='pad_test')
     #  tvm result
-    for target, ctx in ctx_list():
+    for target, ctx in tvm.testing.enabled_targets():
         tvm_out = get_tvm_output(
             model, indata, target, ctx, outdata.shape, 'float32', opset=2)
     tvm.testing.assert_allclose(outdata, tvm_out, rtol=1e-5, atol=1e-5)
@@ -1411,12 +1442,13 @@ def verify_pad_v11(indata, pads, mode='constant', value=0.0):
                                                                          TensorProto.FLOAT, list(outdata.shape))])
     model = helper.make_model(graph, producer_name='pad_test')
     #  tvm result
-    for target, ctx in ctx_list():
+    for target, ctx in tvm.testing.enabled_targets():
         tvm_out = get_tvm_output(
             model, inputs, target, ctx, outdata.shape, 'float32', opset=11)
     tvm.testing.assert_allclose(outdata, tvm_out, rtol=1e-5, atol=1e-5)
 
 
+@tvm.testing.uses_gpu
 def test_pad():
     verify_pad(np.random.randn(2, 2).astype(
         np.float32), [0, 1, 0, 0], 'constant', 0.0)
@@ -1465,10 +1497,11 @@ def verify_reduce_func(func, data, axis, keepdims):
     model = helper.make_model(graph, producer_name='reduce_test')
 
     onnx_out = get_onnxruntime_output(model, data, 'float32')
-    for target, ctx in ctx_list():
+    for target, ctx in tvm.testing.enabled_targets():
         tvm_out = get_tvm_output(model, data, target, ctx, outshape, 'float32')
         tvm.testing.assert_allclose(onnx_out, tvm_out, rtol=1e-5, atol=1e-5)
 
+@tvm.testing.uses_gpu
 def test_all_reduce_funcs():
     funcs = ["ReduceMax",
              "ReduceMean",
@@ -1532,7 +1565,7 @@ def verify_split(indata, outdatas, split, axis=0):
                                        ])
     model = helper.make_model(graph, producer_name='split_test')
 
-    for target, ctx in ctx_list():
+    for target, ctx in tvm.testing.enabled_targets():
         output_shape = [o.shape for o in outdatas]
         output_type = ['float32', 'float32', 'float32']
         tvm_out = get_tvm_output(
@@ -1541,6 +1574,7 @@ def verify_split(indata, outdatas, split, axis=0):
         tvm.testing.assert_allclose(o, t)
 
 
+@tvm.testing.uses_gpu
 def test_split():
     # 1D
     verify_split([1., 2., 3., 4., 5., 6.], [
@@ -1554,6 +1588,7 @@ def test_split():
     verify_split([1, 2, 3], [[1], [2], [3]], False)
 
 
+@tvm.testing.uses_gpu
 def test_binary_ops():
     in_shape = (1, 2, 3, 3)
     dtype = "float32"
@@ -1573,7 +1608,7 @@ def test_binary_ops():
                                   outputs=[helper.make_tensor_value_info("out",
                                                                          TensorProto.FLOAT, list(out_shape))])
         model = helper.make_model(graph, producer_name='_test')
-        for target, ctx in ctx_list():
+        for target, ctx in tvm.testing.enabled_targets():
             tvm_out = get_tvm_output(model, [x, y], target, ctx)
             tvm.testing.assert_allclose(out_np, tvm_out, rtol=1e-5, atol=1e-5)
 
@@ -1595,6 +1630,7 @@ def test_binary_ops():
     verify_binary_ops("Equal", x, y, x == y, broadcast=True)
 
 
+@tvm.testing.uses_gpu
 def test_single_ops():
     in_shape = (1, 2, 3, 3)
     dtype = "float32"
@@ -1609,7 +1645,7 @@ def test_single_ops():
                                   outputs=[helper.make_tensor_value_info("out",
                                                                          TensorProto.FLOAT, list(out_shape))])
         model = helper.make_model(graph, producer_name='_test')
-        for target, ctx in ctx_list():
+        for target, ctx in tvm.testing.enabled_targets():
             tvm_out = get_tvm_output(model, [x], target, ctx)
             tvm.testing.assert_allclose(out_np, tvm_out, rtol=rtol, atol=atol)
 
@@ -1639,6 +1675,7 @@ def test_single_ops():
     verify_single_ops("SoftPlus", x, np.log(1 + np.exp(x)))
 
 
+@tvm.testing.uses_gpu
 def test_leaky_relu():
     def leaky_relu_x(x, alpha):
         return np.where(x >= 0, x, x * alpha)
@@ -1650,6 +1687,7 @@ def test_leaky_relu():
                               {'alpha': 0.25})
 
 
+@tvm.testing.uses_gpu
 def test_elu():
     def elu_x(x, alpha):
         return np.where(x > 0, x, alpha * (np.exp(x) - 1.0))
@@ -1661,6 +1699,7 @@ def test_elu():
                               {'alpha': 0.25})
 
 
+@tvm.testing.uses_gpu
 def test_selu():
     def selu_x(x, alpha, gamma):
         return gamma * np.where(x > 0, x, alpha * (np.exp(x) - 1.0))
@@ -1672,6 +1711,7 @@ def test_selu():
                               {'alpha': 0.25, 'gamma': 0.3})
 
 
+@tvm.testing.uses_gpu
 def test_prelu():
     def verify_prelu(x_shape, a_shape):
         node = helper.make_node('PRelu',
@@ -1700,6 +1740,7 @@ def test_prelu():
     verify_prelu([2,12,16,16], [1, 12, 1, 1])
 
 
+@tvm.testing.uses_gpu
 def test_ThresholdedRelu():
     def ThresholdedRelu_x(x, alpha):
         out_np = np.clip(x, alpha, np.inf)
@@ -1713,6 +1754,7 @@ def test_ThresholdedRelu():
                               {'alpha': 0.25})
 
 
+@tvm.testing.uses_gpu
 def test_ScaledTanh():
     def ScaledTanh_x(x, alpha, beta):
         return alpha * np.tanh(beta * x)
@@ -1724,6 +1766,7 @@ def test_ScaledTanh():
                               {'alpha': 0.25, 'beta': 0.3})
 
 
+@tvm.testing.uses_gpu
 def test_ParametricSoftplus():
     def ParametricSoftplus_x(x, alpha, beta):
         return alpha * np.log(np.exp(beta * x) + 1)
@@ -1735,6 +1778,7 @@ def test_ParametricSoftplus():
                               {'alpha': 0.25, 'beta': 0.3})
 
 
+@tvm.testing.uses_gpu
 def test_Scale():
     def Scale_x(x, scale):
         return scale * x
@@ -1746,6 +1790,7 @@ def test_Scale():
                               {'scale': 0.25})
 
 
+@tvm.testing.uses_gpu
 def test_LogSoftmax():
     _test_onnx_op_elementwise((1, 4),
                               tvm.topi.testing.log_softmax_python,
@@ -1762,13 +1807,14 @@ def check_torch_conversion(model, input_size):
     torch.onnx.export(model(), dummy_input, file_name,
                       export_params=True, verbose=False)
     onnx_model = onnx.load(file_name)
-    for target, ctx in ctx_list():
+    for target, ctx in tvm.testing.enabled_targets():
         input_data = np.random.uniform(size=input_size).astype('int32')
         c2_out = get_onnxruntime_output(onnx_model, input_data)
         tvm_out = get_tvm_output(onnx_model, input_data, target, ctx)
         tvm.testing.assert_allclose(c2_out, tvm_out)
 
 
+@tvm.testing.uses_gpu
 def test_resnet():
     check_torch_conversion(torchvision.models.resnet18, (1, 3, 224, 224))
     # check_torch_conversion(torchvision.models.resnet101, (1,3,224,224))
@@ -1787,10 +1833,12 @@ def test_resnet():
 #     check_torch_conversion(torchvision.models.squeezenet1_0, (1,3,224,224))
 
 
+@tvm.testing.uses_gpu
 def test_densenet():
     check_torch_conversion(torchvision.models.densenet161, (1, 3, 224, 224))
 
 
+@tvm.testing.uses_gpu
 def test_inception():
     check_torch_conversion(torchvision.models.inception_v3, (1, 3, 224, 224))
 
@@ -1803,6 +1851,7 @@ def test_inception():
 #     check_torch_conversion(torchvision.models.shufflenetv2, (1,3,224,224))
 
 
+@tvm.testing.uses_gpu
 def test_sign():
     def Sign_x(x):
         return np.sign(x)
@@ -1828,11 +1877,12 @@ def verify_not(indata, dtype):
 
     model = helper.make_model(graph, producer_name='not_test')
 
-    for target, ctx in ctx_list():
+    for target, ctx in tvm.testing.enabled_targets():
         tvm_out = get_tvm_output(model, [x], target, ctx, outdata.shape)
         tvm.testing.assert_allclose(outdata, tvm_out)
 
 
+@tvm.testing.uses_gpu
 def test_not():
     # 2d
     verify_not(indata=(np.random.randn(3, 4) > 0), dtype=bool)
@@ -1857,11 +1907,12 @@ def verify_and(indata, dtype):
 
     model = helper.make_model(graph, producer_name='and_test')
 
-    for target, ctx in ctx_list():
+    for target, ctx in tvm.testing.enabled_targets():
         tvm_out = get_tvm_output(model, [x, y], target, ctx, outdata.shape)
         tvm.testing.assert_allclose(outdata, tvm_out)
 
 
+@tvm.testing.uses_gpu
 def test_and():
     # 2d
     x = (np.random.randn(3, 4) > 0)
@@ -1899,7 +1950,7 @@ def verify_tile_v1(indata, outdata, **kwargs):
 
     model = helper.make_model(graph, producer_name='tile_test')
 
-    for target, ctx in ctx_list():
+    for target, ctx in tvm.testing.enabled_targets():
         tvm_out = get_tvm_output(
             model, [indata], target, ctx, outdata.shape, opset=1)
         tvm.testing.assert_allclose(outdata, tvm_out)
@@ -1929,7 +1980,7 @@ def verify_tile_v6(indata, repeats, outdata):
 
     model = helper.make_model(graph, producer_name='tile_test')
 
-    for target, ctx in ctx_list():
+    for target, ctx in tvm.testing.enabled_targets():
         tvm_out = get_tvm_output(model, [indata],
                                  target,
                                  ctx,
@@ -1938,6 +1989,7 @@ def verify_tile_v6(indata, repeats, outdata):
         tvm.testing.assert_allclose(outdata, tvm_out)
 
 
+@tvm.testing.uses_gpu
 def test_tile():
     x = np.random.rand(2, 3, 4, 5).astype(np.float32)
     repeats = np.random.randint(
@@ -1956,11 +2008,12 @@ def verify_erf(indata, outdata):
                               outputs=[helper.make_tensor_value_info('out', TensorProto.FLOAT, list(outdata.shape))])
     model = helper.make_model(graph, producer_name='erf_test')
 
-    for target, ctx in ctx_list():
+    for target, ctx in tvm.testing.enabled_targets():
         tvm_out = get_tvm_output(model, [indata], target, ctx, outdata.shape)
         tvm.testing.assert_allclose(outdata, tvm_out)
 
 
+@tvm.testing.uses_gpu
 def test_erf():
     x = np.random.rand(2, 3, 4, 6).astype(np.float32)
     z = scipy.special.erf(x)
@@ -1977,11 +2030,12 @@ def verify_where(condition, x, y, dtype, outdata):
                               outputs=[helper.make_tensor_value_info('out', dtype, list(outdata.shape))])
     model = helper.make_model(graph, producer_name='where_test')
 
-    for target, ctx in ctx_list():
+    for target, ctx in tvm.testing.enabled_targets():
         tvm_out = get_tvm_output(model, [condition, x, y], target, ctx, outdata.shape)
         tvm.testing.assert_allclose(outdata, tvm_out)
 
 
+@tvm.testing.uses_gpu
 def test_where():
     condition = np.array([[1, 0], [1, 1]], dtype=np.bool)
     x = np.array([[1, 2], [3, 4]], dtype=np.int64)
@@ -2031,11 +2085,12 @@ def verify_or(indata, dtype):
 
     model = helper.make_model(graph, producer_name='or_test')
 
-    for target, ctx in ctx_list():
+    for target, ctx in tvm.testing.enabled_targets():
         tvm_out = get_tvm_output(model, [x, y], target, ctx, outdata.shape)
         tvm.testing.assert_allclose(outdata, tvm_out)
 
 
+@tvm.testing.uses_gpu
 def test_or():
     # 2d
     x = (np.random.randn(3, 4) > 0)
@@ -2063,6 +2118,7 @@ def test_or():
     verify_or(indata=[x, y], dtype=bool)
 
 
+@tvm.testing.uses_gpu
 def test_batch_norm():
     def verify_batch_norm(in_shape):
         batchnorm = onnx.helper.make_node('BatchNormalization',
@@ -2087,7 +2143,7 @@ def test_batch_norm():
 
         model = helper.make_model(graph, producer_name='batchnorm_test')
 
-        for target, ctx in ctx_list():
+        for target, ctx in tvm.testing.enabled_targets():
             x = np.random.uniform(size=in_shape).astype('float32')
             scale = np.random.uniform(size=in_shape[1]).astype('float32')
             b = np.random.uniform(size=in_shape[1]).astype('float32')
@@ -2104,6 +2160,7 @@ def test_batch_norm():
     verify_batch_norm([16, 16, 10, 10])
 
 
+@tvm.testing.uses_gpu
 def test_batch_norm_dynamic_subgraph():
     def verify_batch_norm_dynamic_subgraph(in_shape, o_shape):
         batchnorm = onnx.helper.make_node('BatchNormalization',
@@ -2132,7 +2189,7 @@ def test_batch_norm_dynamic_subgraph():
 
         model = helper.make_model(graph, producer_name='batchnorm_test')
 
-        for target, ctx in ctx_list():
+        for target, ctx in tvm.testing.enabled_targets():
             x = np.random.uniform(size=in_shape).astype('float32')
             inp = np.random.uniform(size=o_shape).astype('float32')
             scale = np.random.uniform(size=in_shape[1]).astype('float32')
@@ -2186,7 +2243,7 @@ def verify_conv(x_shape, w_shape, y_shape, padding, kernel_shape, strides, dilat
 
     model = helper.make_model(graph, producer_name='conv_test')
 
-    for target, ctx in ctx_list():
+    for target, ctx in tvm.testing.enabled_targets():
         x = np.random.uniform(size=x_shape).astype('float32')
         W = np.random.uniform(size=w_shape).astype('float32')
         tvm_out = get_tvm_output(model, [x, W], target, ctx, y_shape)
@@ -2194,6 +2251,7 @@ def verify_conv(x_shape, w_shape, y_shape, padding, kernel_shape, strides, dilat
         tvm.testing.assert_allclose(onnx_out, tvm_out, rtol=1e-5, atol=1e-5)
 
 
+@tvm.testing.uses_gpu
 def test_conv():
     def repeat(N, D):
         return tuple([N for _ in range(D)])
@@ -2276,7 +2334,7 @@ def verify_convtranspose(x_shape, w_shape, y_shape, p):
 
     model = helper.make_model(graph, producer_name='convtranspose_trest')
 
-    for target, ctx in ctx_list():
+    for target, ctx in tvm.testing.enabled_targets():
         x = np.random.uniform(size=x_shape).astype('float32')
         W = np.random.uniform(size=w_shape).astype('float32')
         tvm_out = get_tvm_output(model, [x, W], target, ctx, y_shape)
@@ -2284,6 +2342,7 @@ def verify_convtranspose(x_shape, w_shape, y_shape, p):
         tvm.testing.assert_allclose(onnx_out, tvm_out, rtol=1e-5, atol=1e-5)
 
 
+@tvm.testing.uses_gpu
 def test_convtranspose():
     # Convolution Transpose with padding
     # (1, 1, 3, 3) input tensor
@@ -2293,6 +2352,7 @@ def test_convtranspose():
     verify_convtranspose((1, 1, 3, 3), (1, 2, 3, 3), (1, 2, 7, 3), [1, 2, 1, 2])
 
 
+@tvm.testing.uses_gpu
 def test_unsqueeze_constant():
     from torch.nn import Linear, Sequential, Module
     class Flatten(Module):
@@ -2343,13 +2403,14 @@ def verify_pooling(x_shape, kernel_shape, strides, pads, out_shape, mode, auto_p
 
     model = helper.make_model(graph, producer_name='pooling_test')
 
-    for target, ctx in ctx_list():
+    for target, ctx in tvm.testing.enabled_targets():
         onnx_out = get_onnxruntime_output(model, x_np, 'float32')
         tvm_out = get_tvm_output(
             model, [x_np], target, ctx, out_shape)
         tvm.testing.assert_allclose(onnx_out, tvm_out, rtol=1e-5, atol=1e-5)
 
 
+@tvm.testing.uses_gpu
 def test_pooling():
     for mode in ['max', 'average']:
         # Pool1D
@@ -2440,12 +2501,13 @@ def verify_mod(x_shape, y_shape, fmod, out_shape, dtype='float32'):
 
     onnx_out = get_onnxruntime_output(model, [x_np, y_np], dtype)[0]
 
-    for target, ctx in ctx_list():
+    for target, ctx in tvm.testing.enabled_targets():
         tvm_out = get_tvm_output(
             model, [x_np, y_np], target, ctx, out_shape)
         tvm.testing.assert_allclose(onnx_out, tvm_out, rtol=1e-5, atol=1e-5)
 
 
+@tvm.testing.uses_gpu
 def test_mod():
     # Mod
     verify_mod(x_shape=[1, 32, 32], y_shape=[1, 1, 32], fmod=0, out_shape=(1, 32, 32), dtype="int32")
@@ -2481,12 +2543,13 @@ def verify_xor(x_shape, y_shape):
                                                                     onnx_dtype, list(out_shape))])
     model = helper.make_model(graph, producer_name='xor_test')
 
-    for target, ctx in ctx_list():
+    for target, ctx in tvm.testing.enabled_targets():
         tvm_out = get_tvm_output(
             model, [x_np, y_np], target, ctx, out_shape)
         tvm.testing.assert_allclose(np_out, tvm_out, rtol=1e-5, atol=1e-5)
 
 
+@tvm.testing.uses_gpu
 def test_xor():
     # XOR
     verify_xor(x_shape=[1, 32, 32], y_shape=[1, 32, 32])
@@ -2523,12 +2586,13 @@ def verify_max_roi_pool(x_shape, rois_shape, pooled_shape, spatial_scale, out_sh
     model = helper.make_model(graph, producer_name='pool_test')
 
     onnx_out = get_onnxruntime_output(model, [x_np, rois_np], 'float32')[0]
-    for target, ctx in ctx_list():
+    for target, ctx in tvm.testing.enabled_targets():
         tvm_out = get_tvm_output(
             model, [x_np, rois_np], target, ctx, out_shape)
         tvm.testing.assert_allclose(onnx_out, tvm_out, rtol=1e-5, atol=1e-5)
 
 
+@tvm.testing.uses_gpu
 def test_max_roi_pool():
     verify_max_roi_pool(x_shape=[1, 3, 6, 6],
                         rois_shape=[3, 5],
@@ -2572,13 +2636,14 @@ def verify_lppool(x_shape, kernel_shape, p, strides, pads, out_shape, auto_pad="
 
     model = helper.make_model(graph, producer_name='lppool_test')
 
-    for target, ctx in ctx_list():
+    for target, ctx in tvm.testing.enabled_targets():
         onnx_out = get_onnxruntime_output(model, x_np, 'float32')
         tvm_out = get_tvm_output(
             model, [x_np], target, ctx, out_shape)
         tvm.testing.assert_allclose(onnx_out, tvm_out, rtol=1e-5, atol=1e-5)
 
 
+@tvm.testing.uses_gpu
 def test_lppool():
     # Pool1D
     verify_lppool(x_shape=[1, 1, 32], kernel_shape=[3], p=2, strides=[1], pads=[1, 1],
@@ -2728,7 +2793,7 @@ def verify_rnn(seq_length,
 
     model = helper.make_model(graph, producer_name='rnn_test')
 
-    for target, ctx in ctx_list():
+    for target, ctx in tvm.testing.enabled_targets():
         onnx_out = get_onnxruntime_output(model, input_values, 'float32')
         tvm_out = get_tvm_output(
             model,
@@ -2741,6 +2806,7 @@ def verify_rnn(seq_length,
             tvm.testing.assert_allclose(o_out, t_out, rtol=5e-3, atol=5e-3)
 
 
+@tvm.testing.uses_gpu
 def test_lstm():
     # No bias.
     verify_rnn(
@@ -2845,6 +2911,7 @@ def test_lstm():
         rnn_type='LSTM')
 
 
+@tvm.testing.uses_gpu
 def test_gru():
     # No bias.
     verify_rnn(
@@ -2940,6 +3007,7 @@ def test_gru():
         rnn_type='GRU')
 
 
+@tvm.testing.uses_gpu
 def test_resize():
     def make_constant_node(name, data_type, dims, vals):
         return helper.make_node('Constant',
@@ -2977,7 +3045,7 @@ def test_resize():
 
         model = helper.make_model(graph, producer_name='resize_test')
 
-        for target, ctx in ctx_list():
+        for target, ctx in tvm.testing.enabled_targets():
             x = np.random.uniform(size=ishape).astype('float32')
             onnx_out = get_onnxruntime_output(model, x, 'float32')
             tvm_out = get_tvm_output(model, x, target, ctx, oshape, 'float32', opset=11)
@@ -2997,6 +3065,7 @@ def test_resize():
     verify([1, 16, 32, 32], [], [1, 1, 0.5, 0.5], "linear", "half_pixel")
 
 
+@tvm.testing.uses_gpu
 def test_nonzero():
 
     def verify_nonzero(indata, outdata, dtype):
@@ -3025,6 +3094,7 @@ def test_nonzero():
     result = np.array((np.nonzero(input_data)))  # expected output [[0, 1, 2, 2], [0, 1, 0, 1]]
     verify_nonzero(input_data, result, dtype=np.int64)
 
+@tvm.testing.uses_gpu
 def test_topk():
     def verify_topk(input_dims, K, axis=-1):
         output_dims = list(input_dims)
@@ -3063,6 +3133,7 @@ def test_topk():
         verify_topk([n, n, n], 5, 2)
 
 
+@tvm.testing.uses_gpu
 def test_roi_align():
     def verify_roi_align(input_dims, num_roi, output_height, output_width, sampling_ratio=0, spatial_scale=1.0):
         output_dims = [num_roi, input_dims[1], output_height, output_width]
index 6840ca3..cfe9507 100644 (file)
@@ -28,7 +28,7 @@ import torchvision
 from tvm import relay
 from tvm.contrib import graph_runtime
 from tvm.contrib.nvcc import have_fp16
-from tvm.relay.testing.config import ctx_list
+import tvm.testing
 
 
 sys.setrecursionlimit(10000)
@@ -152,7 +152,6 @@ def measure_latency(model, input_shapes, output_shapes, thresh, dryruns=40):
 
 def verify_model(model_name, input_data=[],
                  custom_convert_map={},
-                 ctx_list=ctx_list(),
                  rtol=1e-5, atol=1e-5):
     """Assert that the output of a compiled model matches with that of its
     baseline."""
@@ -198,7 +197,7 @@ def verify_model(model_name, input_data=[],
                               [inp.cpu().numpy() for inp in baseline_input]))
 
     with tvm.transform.PassContext(opt_level=3):
-        for target, ctx in ctx_list:
+        for target, ctx in tvm.testing.enabled_targets():
             relay_graph, relay_lib, relay_params = relay.build(mod, target=target, params=params)
             relay_model = graph_runtime.create(relay_graph, relay_lib, ctx)
             relay_model.set_input(**relay_params)
@@ -218,6 +217,7 @@ def verify_model(model_name, input_data=[],
     torch.cuda.empty_cache()
 
 # Single operator tests
+@tvm.testing.uses_gpu
 def test_forward_add():
     torch.set_grad_enabled(False)
     input_shape = [10]
@@ -250,6 +250,7 @@ def test_forward_add():
     verify_model(Add3().float().eval(), input_data=input_data)
     verify_model(Add4().float().eval(), input_data=input_data)
 
+@tvm.testing.uses_gpu
 def test_forward_subtract():
     torch.set_grad_enabled(False)
     input_shape = [10]
@@ -282,6 +283,7 @@ def test_forward_subtract():
     verify_model(Subtract3().float().eval(), input_data=input_data)
     verify_model(Subtract4().float().eval(), input_data=input_data)
 
+@tvm.testing.uses_gpu
 def test_forward_multiply():
     torch.set_grad_enabled(False)
     input_shape = [10]
@@ -315,6 +317,7 @@ def test_forward_multiply():
     verify_model(Multiply4().float().eval(), input_data=input_data)
 
 
+@tvm.testing.uses_gpu
 def test_min_max():
     class Max(Module):
         def forward(self, inp):
@@ -352,6 +355,7 @@ def test_min_max():
     verify_model(Min3(), input_data=input_data)
 
 
+@tvm.testing.uses_gpu
 def test_forward_reciprocal():
     torch.set_grad_enabled(False)
     input_shape = [2, 1, 10, 1, 10]
@@ -362,6 +366,7 @@ def test_forward_reciprocal():
     input_data = torch.rand(input_shape).float()
     verify_model(Reciprocal1().float().eval(), input_data=input_data)
 
+@tvm.testing.uses_gpu
 def test_forward_repeat():
     torch.set_grad_enabled(False)
     input_shape = [1, 3]
@@ -382,6 +387,7 @@ def test_forward_repeat():
     verify_model(Repeat2().float().eval(), input_data=input_data)
     verify_model(Repeat3().float().eval(), input_data=input_data)
 
+@tvm.testing.uses_gpu
 def test_forward_repeat_interleave():
     torch.set_grad_enabled(False)
     input_shape = [2, 2, 3]
@@ -407,6 +413,7 @@ def test_forward_repeat_interleave():
     verify_model(RepeatInterleave3().float().eval(), input_data=input_data)
     verify_model(RepeatInterleave4().float().eval(), input_data=input_data)
 
+@tvm.testing.uses_gpu
 def test_forward_unsqueeze():
     torch.set_grad_enabled(False)
     input_shape = [10, 10]
@@ -418,6 +425,7 @@ def test_forward_unsqueeze():
     input_data = torch.rand(input_shape).float()
     verify_model(Unsqueeze1().float().eval(), input_data=input_data)
 
+@tvm.testing.uses_gpu
 def test_forward_squeeze():
     torch.set_grad_enabled(False)
     input_shape = [2, 1, 10, 1, 10]
@@ -434,6 +442,7 @@ def test_forward_squeeze():
     verify_model(Squeeze1().float().eval(), input_data=input_data)
     verify_model(Squeeze2().float().eval(), input_data=input_data)
 
+@tvm.testing.uses_gpu
 def test_forward_arange():
     torch.set_grad_enabled(False)
 
@@ -508,6 +517,7 @@ def test_forward_arange():
     verify_model(Arange11().float().eval())
     verify_model(Arange12().float().eval())
 
+@tvm.testing.uses_gpu
 def test_forward_mesh_grid():
     torch.set_grad_enabled(False)
 
@@ -528,6 +538,7 @@ def test_forward_mesh_grid():
     verify_model(MeshGrid1().float().eval())
     verify_model(MeshGrid2().float().eval())
 
+@tvm.testing.uses_gpu
 def test_forward_abs():
     torch.set_grad_enabled(False)
     input_shape = [2, 1, 10, 1, 10]
@@ -539,6 +550,7 @@ def test_forward_abs():
     input_data = torch.rand(input_shape).float()
     verify_model(Abs1().float().eval(), input_data=input_data)
 
+@tvm.testing.uses_gpu
 def test_forward_concatenate():
     torch.set_grad_enabled(False)
     input_shape = [1, 3, 10, 10]
@@ -558,18 +570,21 @@ def test_forward_concatenate():
     verify_model(Concatenate1().float().eval(), input_data=input_data)
     verify_model(Concatenate2().float().eval(), input_data=input_data)
 
+@tvm.testing.uses_gpu
 def test_forward_relu():
     torch.set_grad_enabled(False)
     input_shape = [10, 10]
     input_data = torch.rand(input_shape).float()
     verify_model(torch.nn.ReLU().eval(), input_data=input_data)
 
+@tvm.testing.uses_gpu
 def test_forward_prelu():
     torch.set_grad_enabled(False)
     input_shape = [1, 3, 10, 10]
     input_data = torch.rand(input_shape).float()
     verify_model(torch.nn.PReLU(num_parameters=3).eval(), input_data=input_data)
 
+@tvm.testing.uses_gpu
 def test_forward_leakyrelu():
     torch.set_grad_enabled(False)
     input_shape = [1, 3, 10, 10]
@@ -579,6 +594,7 @@ def test_forward_leakyrelu():
     verify_model(torch.nn.LeakyReLU(negative_slope=1.0, inplace=True).eval(), input_data=input_data)
     verify_model(torch.nn.LeakyReLU(negative_slope=1.25, inplace=True).eval(), input_data=input_data)
 
+@tvm.testing.uses_gpu
 def test_forward_elu():
     torch.set_grad_enabled(False)
     input_shape = [1, 3, 10, 10]
@@ -588,6 +604,7 @@ def test_forward_elu():
     verify_model(torch.nn.ELU(alpha=1.0).eval(), input_data=input_data)
     verify_model(torch.nn.ELU(alpha=1.3).eval(), input_data=input_data)
 
+@tvm.testing.uses_gpu
 def test_forward_celu():
     torch.set_grad_enabled(False)
     input_shape = [1, 3, 10, 10]
@@ -597,18 +614,21 @@ def test_forward_celu():
     verify_model(torch.nn.CELU(alpha=1.0).eval(), input_data=input_data)
     verify_model(torch.nn.CELU(alpha=1.3).eval(), input_data=input_data)
 
+@tvm.testing.uses_gpu
 def test_forward_gelu():
     torch.set_grad_enabled(False)
     input_shape = [1, 3, 10, 10]
     input_data = torch.rand(input_shape).float()
     verify_model(torch.nn.GELU().eval(), input_data=input_data)
 
+@tvm.testing.uses_gpu
 def test_forward_selu():
     torch.set_grad_enabled(False)
     input_shape = [1, 3, 10, 10]
     input_data = torch.rand(input_shape).float()
     verify_model(torch.nn.SELU().eval(), input_data=input_data)
 
+@tvm.testing.uses_gpu
 def test_forward_softplus():
     torch.set_grad_enabled(False)
     input_shape = [1, 3, 10, 10]
@@ -617,18 +637,21 @@ def test_forward_softplus():
     verify_model(torch.nn.Softplus(beta=1.5, threshold=20).eval(), input_data=input_data)
     verify_model(torch.nn.Softplus(beta=5, threshold=10).eval(), input_data=input_data)
 
+@tvm.testing.uses_gpu
 def test_forward_softsign():
     torch.set_grad_enabled(False)
     input_shape = [1, 3, 10, 10]
     input_data = torch.rand(input_shape).float()
     verify_model(torch.nn.Softsign().eval(), input_data=input_data)
 
+@tvm.testing.uses_gpu
 def test_forward_log_sigmoid():
     torch.set_grad_enabled(False)
     input_shape = [10, 10]
     input_data = torch.rand(input_shape).float()
     verify_model(torch.nn.LogSigmoid().eval(), input_data=input_data)
 
+@tvm.testing.uses_gpu
 def test_forward_adaptiveavgpool():
     torch.set_grad_enabled(False)
     input_shape = [1, 3, 10, 10]
@@ -636,6 +659,7 @@ def test_forward_adaptiveavgpool():
     verify_model(torch.nn.AdaptiveAvgPool2d([1, 1]).eval(), input_data=input_data)
     verify_model(torch.nn.AdaptiveAvgPool2d([10, 10]).eval(), input_data=input_data)
 
+@tvm.testing.uses_gpu
 def test_forward_maxpool2d():
     torch.set_grad_enabled(False)
     input_shape = [1, 3, 10, 10]
@@ -661,6 +685,7 @@ def test_forward_maxpool2d():
 
     verify_model(MaxPool2DWithIndices().float().eval(), input_data=input_data)
 
+@tvm.testing.uses_gpu
 def test_forward_maxpool1d():
     torch.set_grad_enabled(False)
     input_shape = [1, 3, 10]
@@ -675,6 +700,7 @@ def test_forward_maxpool1d():
                                     stride=2).eval(),
                  input_data)
 
+@tvm.testing.uses_gpu
 def test_forward_maxpool3d():
     torch.set_grad_enabled(False)
     input_shape = [1, 3, 10, 10, 10]
@@ -689,6 +715,7 @@ def test_forward_maxpool3d():
                                     stride=2).eval(),
                  input_data)
 
+@tvm.testing.uses_gpu
 def test_forward_split():
     torch.set_grad_enabled(False)
     input_shape = [4, 10]
@@ -712,6 +739,7 @@ def test_forward_split():
     verify_model(Split([2, 3, 5], 1).float().eval(),
                  input_data=input_data)
 
+@tvm.testing.uses_gpu
 def test_forward_avgpool():
     torch.set_grad_enabled(False)
     input_shape = [1, 3, 10, 10]
@@ -724,6 +752,7 @@ def test_forward_avgpool():
     verify_model(torch.nn.AvgPool2d(kernel_size=[10, 10]).eval(), input_data=input_data)
     verify_model(AvgPool2D2().float().eval(), input_data=input_data)
 
+@tvm.testing.uses_gpu
 def test_forward_avgpool3d():
     torch.set_grad_enabled(False)
     input_shape = [1, 3, 10, 10, 10]
@@ -736,12 +765,14 @@ def test_forward_avgpool3d():
     verify_model(torch.nn.AvgPool3d(kernel_size=[10, 10, 10]).eval(), input_data=input_data)
     verify_model(AvgPool3D1().float().eval(), input_data=input_data)
 
+@tvm.testing.uses_gpu
 def test_forward_hardtanh():
     torch.set_grad_enabled(False)
     input_shape = [10]
     input_data = torch.rand(input_shape).float()
     verify_model(torch.nn.Hardtanh().eval(), input_data=input_data)
 
+@tvm.testing.uses_gpu
 def test_forward_conv():
     torch.set_grad_enabled(False)
     conv1d_input_shape = [1, 3, 10]
@@ -816,6 +847,7 @@ def test_forward_conv():
     verify_model(Conv1D2().float().eval(), input_data=conv1d_input_data)
     verify_model(Conv1D3().float().eval(), input_data=conv1d_input_data)
 
+@tvm.testing.uses_gpu
 def test_forward_conv_transpose():
     torch.set_grad_enabled(False)
     conv2d_input_shape = [1, 3, 10, 10]
@@ -829,12 +861,14 @@ def test_forward_conv_transpose():
     verify_model(torch.nn.ConvTranspose1d(3, 12, 3, bias=False), input_data=conv1d_input_data)
 
 
+@tvm.testing.uses_gpu
 def test_forward_threshold():
     torch.set_grad_enabled(False)
     input_shape = [1, 3]
     input_data = torch.rand(input_shape).float()
     verify_model(torch.nn.Threshold(0, 0).float().eval(), input_data=input_data)
 
+@tvm.testing.uses_gpu
 def test_forward_contiguous():
     torch.set_grad_enabled(False)
     input_shape = [10]
@@ -847,6 +881,7 @@ def test_forward_contiguous():
     verify_model(Contiguous1().float().eval(), input_data=input_data)
 
 
+@tvm.testing.uses_gpu
 def test_forward_batchnorm():
     def init_weight(m):
         torch.nn.init.normal_(m.weight, 0, 0.01)
@@ -861,6 +896,7 @@ def test_forward_batchnorm():
         verify_model(bn.eval(), input_data=inp)
 
 
+@tvm.testing.uses_gpu
 def test_forward_instancenorm():
     inp_2d = torch.rand((1, 16, 10, 10))
     inp_3d = torch.rand((1, 16, 10, 10, 10))
@@ -869,6 +905,7 @@ def test_forward_instancenorm():
                           (torch.nn.InstanceNorm3d(16), inp_3d)]:
         verify_model(ins_norm.eval(), input_data=inp)
 
+@tvm.testing.uses_gpu
 def test_forward_layernorm():
     def init_weight(m):
         torch.nn.init.normal_(m.weight, 0, 0.01)
@@ -882,6 +919,7 @@ def test_forward_layernorm():
         verify_model(ln.eval(), input_data=inp)
 
 
+@tvm.testing.uses_gpu
 def test_forward_groupnorm():
     input_shape = [10, 6, 5, 5]
     input_data = torch.rand(input_shape).float()
@@ -903,6 +941,7 @@ def test_forward_groupnorm():
     verify_model(torch.nn.GroupNorm(10, 10).eval(), input_data=input_data)
 
 
+@tvm.testing.uses_gpu
 def test_forward_reshape():
     torch.set_grad_enabled(False)
     input_shape = [2, 1, 10, 1, 10]
@@ -920,6 +959,7 @@ def test_forward_reshape():
     verify_model(Reshape2().float().eval(), input_data=input_data)
 
 
+@tvm.testing.uses_gpu
 def test_flatten():
     class Flatten(Module):
         def forward(self, x):
@@ -934,6 +974,7 @@ def test_flatten():
     verify_model(BatchFlatten(), input_data=inp)
 
 
+@tvm.testing.uses_gpu
 def test_forward_transpose():
     torch.set_grad_enabled(False)
     input_shape = [1, 3, 10, 10]
@@ -955,6 +996,7 @@ def test_forward_transpose():
     verify_model(Transpose2().float().eval(), input_data=input_data)
     verify_model(Transpose3().float().eval(), input_data=input_data)
 
+@tvm.testing.uses_gpu
 def test_forward_size():
     torch.set_grad_enabled(False)
     input_shape = [1, 3]
@@ -967,6 +1009,7 @@ def test_forward_size():
     verify_model(Size1().float().eval(), input_data=input_data)
 
 
+@tvm.testing.uses_gpu
 def test_type_as():
     torch.set_grad_enabled(False)
     input_shape = [1, 3]
@@ -1004,6 +1047,7 @@ def test_type_as():
             verify_model(_create_module(torch.float16), input_data=input_data)
 
 
+@tvm.testing.uses_gpu
 def test_forward_view():
     torch.set_grad_enabled(False)
     input_shape = [1, 3, 10, 10]
@@ -1026,7 +1070,7 @@ def test_forward_view():
     verify_model(View2().float().eval(), input_data=input_data)
     verify_model(View3().float().eval(), input_data=input_data)
 
-
+@tvm.testing.uses_gpu
 def test_forward_select():
     torch.set_grad_enabled(False)
     input_shape = [1, 3, 10, 10]
@@ -1055,6 +1099,7 @@ def test_forward_select():
     verify_model(IndexedSelect(x, 1).eval(), input_data=indices)
 
 
+@tvm.testing.uses_gpu
 def test_forward_clone():
     torch.set_grad_enabled(False)
     input_shape = [10]
@@ -1067,6 +1112,7 @@ def test_forward_clone():
     verify_model(Clone1().float().eval(), input_data=input_data)
 
 
+@tvm.testing.uses_gpu
 def test_forward_gather():
     torch.set_grad_enabled(False)
 
@@ -1105,6 +1151,7 @@ def test_forward_gather():
     verify_model(Gather3().float().eval(), input_data=[input_data, index])
 
 
+@tvm.testing.uses_gpu
 def test_forward_logsoftmax():
     torch.set_grad_enabled(False)
     input_shape = [1, 3, 10, 10]
@@ -1117,6 +1164,7 @@ def test_forward_logsoftmax():
     verify_model(LogSoftmax1().float().eval(), input_data=input_data)
 
 
+@tvm.testing.uses_gpu
 def test_forward_norm():
     torch.set_grad_enabled(False)
     input_shape = [1, 3, 10, 10]
@@ -1174,6 +1222,7 @@ def test_forward_norm():
     verify_model(Norm10().float().eval(), input_data=input_data)
 
 
+@tvm.testing.uses_gpu
 def test_forward_frobenius_norm():
     torch.set_grad_enabled(False)
     input_shape = [1, 3, 10, 10]
@@ -1201,12 +1250,14 @@ def test_forward_frobenius_norm():
     verify_model(FroNorm4().float().eval(), input_data=input_data)
 
 
+@tvm.testing.uses_gpu
 def test_forward_sigmoid():
     torch.set_grad_enabled(False)
     input_shape = [1, 3, 10, 10]
     input_data = torch.rand(input_shape).float()
     verify_model(torch.nn.Sigmoid().eval(), input_data=input_data)
 
+@tvm.testing.uses_gpu
 def test_forward_dense():
     torch.set_grad_enabled(False)
     input_shape = [1, 3, 10, 10]
@@ -1236,6 +1287,7 @@ def test_forward_dense():
     )
     assert not any([op.name == "multiply" for op in list_ops(mod['main'])])
 
+@tvm.testing.uses_gpu
 def test_forward_dropout():
     torch.set_grad_enabled(False)
     input_shape = [1, 3, 10, 10]
@@ -1245,6 +1297,7 @@ def test_forward_dropout():
     verify_model(torch.nn.Dropout3d(p=0.5).eval(), input_data=input_data)
     verify_model(torch.nn.AlphaDropout(p=0.5).eval(), input_data=input_data[0, 0])
 
+@tvm.testing.uses_gpu
 def test_forward_slice():
     torch.set_grad_enabled(False)
     input_shape = [1, 3, 10, 10]
@@ -1269,6 +1322,7 @@ def test_forward_slice():
     verify_model(Slice3().float().eval(), input_data=input_data)
 
 
+@tvm.testing.uses_gpu
 def test_forward_mean():
     torch.set_grad_enabled(False)
     input_shape = [1, 3, 10, 10]
@@ -1280,6 +1334,7 @@ def test_forward_mean():
     input_data = torch.rand(input_shape).float()
     verify_model(Mean1().float().eval(), input_data=input_data)
 
+@tvm.testing.uses_gpu
 def test_forward_expand():
     torch.set_grad_enabled(False)
 
@@ -1300,6 +1355,7 @@ def test_forward_expand():
     verify_model(Expand2().float().eval(), input_data=input_data)
 
 
+@tvm.testing.uses_gpu
 def test_forward_pow():
     torch.set_grad_enabled(False)
     input_shape = [1, 3, 10, 10]
@@ -1311,6 +1367,7 @@ def test_forward_pow():
     input_data = torch.rand(input_shape).float()
     verify_model(Pow1().float().eval(), input_data=input_data)
 
+@tvm.testing.uses_gpu
 def test_forward_chunk():
     torch.set_grad_enabled(False)
     input_shape = [1, 3, 14, 14]
@@ -1323,6 +1380,7 @@ def test_forward_chunk():
     input_data = torch.rand(input_shape).float()
     verify_model(Chunk1().float().eval(), input_data=input_data)
 
+@tvm.testing.uses_gpu
 def test_upsample():
     class Upsample(Module):
         def __init__(self, size=None, scale=None,
@@ -1346,6 +1404,7 @@ def test_upsample():
     verify_model(Upsample(scale=2, mode="bilinear", align_corners=True), inp)
     verify_model(Upsample(size=(50, 50), mode="bilinear", align_corners=True), inp)
 
+@tvm.testing.uses_gpu
 def test_to():
     """ test for aten::to(...) """
     class ToCPU(Module):
@@ -1377,6 +1436,7 @@ def test_to():
     verify_model(ToDouble().eval(), torch.tensor(0.8))
 
 
+@tvm.testing.uses_gpu
 def test_adaptive_pool3d():
     for ishape in [(1, 32, 16, 16, 16),
                    (1, 32, 9, 15, 15),
@@ -1390,6 +1450,7 @@ def test_adaptive_pool3d():
         verify_model(torch.nn.AdaptiveMaxPool3d((7, 8, 9)).eval(), inp)
 
 
+@tvm.testing.uses_gpu
 def test_forward_functional_pad():
     torch.set_grad_enabled(False)
     pad = (0, 0)
@@ -1408,12 +1469,14 @@ def test_forward_functional_pad():
     verify_model(Pad1().float().eval(), input_data=input_data)
 
 
+@tvm.testing.uses_gpu
 def test_forward_zero_pad2d():
     inp = torch.rand((1, 1, 3, 3))
     verify_model(torch.nn.ZeroPad2d(2).eval(), inp)
     verify_model(torch.nn.ZeroPad2d((1, 1, 2, 0)).eval(), inp)
 
 
+@tvm.testing.uses_gpu
 def test_forward_constant_pad1d():
     inp = torch.rand((1, 2, 4))
     verify_model(torch.nn.ConstantPad2d(2, 3.5).eval(), inp)
@@ -1422,18 +1485,21 @@ def test_forward_constant_pad1d():
     verify_model(torch.nn.ConstantPad2d((3, 1), 3.5).eval(), inp)
 
 
+@tvm.testing.uses_gpu
 def test_forward_constant_pad2d():
     inp = torch.rand((1, 2, 2, 2))
     verify_model(torch.nn.ConstantPad2d(2, 3.5).eval(), inp)
     verify_model(torch.nn.ConstantPad2d((3, 0, 2, 1), 3.5).eval(), inp)
 
 
+@tvm.testing.uses_gpu
 def test_forward_constant_pad3d():
     inp = torch.rand((1, 3, 2, 2, 2))
     verify_model(torch.nn.ConstantPad3d(3, 3.5).eval(), inp)
     verify_model(torch.nn.ConstantPad3d((3, 4, 5, 6, 0, 1), 3.5).eval(), inp)
 
 
+@tvm.testing.uses_gpu
 def test_forward_reflection_pad1d():
     inp = torch.rand((1, 2, 4))
     verify_model(torch.nn.ReflectionPad1d(2).eval(), inp)
@@ -1443,6 +1509,7 @@ def test_forward_reflection_pad1d():
     verify_model(torch.nn.ReflectionPad1d((2, 3)).eval(), inp)
 
 
+@tvm.testing.uses_gpu
 def test_forward_reflection_pad2d():
     inp = torch.rand((1, 1, 3, 3))
     verify_model(torch.nn.ReflectionPad2d(2).eval(), inp)
@@ -1452,6 +1519,7 @@ def test_forward_reflection_pad2d():
     verify_model(torch.nn.ReflectionPad2d((1, 3, 2, 4)).eval(), inp)
 
 
+@tvm.testing.uses_gpu
 def test_forward_replication_pad1d():
     inp = torch.rand((1, 2, 4))
     verify_model(torch.nn.ReplicationPad1d(2).eval(), inp)
@@ -1461,6 +1529,7 @@ def test_forward_replication_pad1d():
     verify_model(torch.nn.ReplicationPad1d((2, 3)).eval(), inp)
 
 
+@tvm.testing.uses_gpu
 def test_forward_replication_pad2d():
     inp = torch.rand((1, 1, 3, 3))
     verify_model(torch.nn.ReplicationPad2d(2).eval(), inp)
@@ -1470,6 +1539,7 @@ def test_forward_replication_pad2d():
     verify_model(torch.nn.ReplicationPad2d((1, 3, 2, 4)).eval(), inp)
 
 
+@tvm.testing.uses_gpu
 def test_forward_replication_pad3d():
     inp = torch.rand((1, 1, 3, 3, 3))
     verify_model(torch.nn.ReplicationPad3d(3).eval(), inp)
@@ -1479,6 +1549,7 @@ def test_forward_replication_pad3d():
     verify_model(torch.nn.ReplicationPad3d((2, 3, 2, 5, 1, 4)).eval(), inp)
 
 
+@tvm.testing.uses_gpu
 def test_forward_upsample3d():
     inp = torch.arange(1, 9, dtype=torch.float32).view(1, 1, 2, 2, 2)
     verify_model(torch.nn.Upsample(scale_factor=2, mode='nearest').eval(), inp)
@@ -1511,6 +1582,7 @@ def test_forward_nms():
         verify_trace_model(NonMaxSupression(iou_thres), [in_boxes, in_scores])
 
 
+@tvm.testing.uses_gpu
 def test_conv3d():
     for ishape in [(1, 32, 16, 16, 16),
                    (1, 32, 9, 15, 15),
@@ -1529,6 +1601,7 @@ def test_conv3d():
                      inp)
 
 
+@tvm.testing.uses_gpu
 def test_conv3d_transpose():
     for ishape in [(1, 8, 10, 5, 10),
                    (1, 8, 5, 8, 8),
@@ -1557,53 +1630,65 @@ def test_conv3d_transpose():
 
 
 # Model tests
+@tvm.testing.uses_gpu
 def test_resnet18():
     torch.set_grad_enabled(False)
     verify_model("resnet18", atol=1e-4, rtol=1e-4)
 
+@tvm.testing.uses_gpu
 def test_squeezenet1_0():
     torch.set_grad_enabled(False)
     verify_model("squeezenet1_0", atol=1e-4, rtol=1e-4)
 
+@tvm.testing.uses_gpu
 def test_squeezenet1_1():
     torch.set_grad_enabled(False)
     verify_model("squeezenet1_1", atol=1e-4, rtol=1e-4)
 
+@tvm.testing.uses_gpu
 def test_densenet121():
     torch.set_grad_enabled(False)
     verify_model("densenet121", atol=1e-4, rtol=1e-4)
 
+@tvm.testing.uses_gpu
 def test_inception_v3():
     torch.set_grad_enabled(False)
     verify_model("inception_v3", atol=1e-4, rtol=1e-4)
 
+@tvm.testing.uses_gpu
 def test_googlenet():
     torch.set_grad_enabled(False)
     verify_model("googlenet", atol=1e-4, rtol=1e-4)
 
+@tvm.testing.uses_gpu
 def test_mnasnet0_5():
     torch.set_grad_enabled(False)
     verify_model("mnasnet0_5", atol=1e-4, rtol=1e-4)
 
+@tvm.testing.uses_gpu
 def test_mobilenet_v2():
     torch.set_grad_enabled(False)
     verify_model("mobilenet_v2", atol=1e-4, rtol=1e-4)
 
 """
 #TODO: Fix VGG and AlexNet issues (probably due to pooling)
+@tvm.testing.uses_gpu
 def test_alexnet():
     torch.set_grad_enabled(False)
     verify_model("alexnet")
 
+@tvm.testing.uses_gpu
 def test_vgg11():
     torch.set_grad_enabled(False)
     verify_model("vgg11")
 
+@tvm.testing.uses_gpu
 def test_vgg11_bn():
     torch.set_grad_enabled(False)
     verify_model("vgg11_bn")
 """
 
+@tvm.testing.uses_gpu
 def test_custom_conversion_map():
     def get_roi_align():
         pool_size = 5
@@ -1633,6 +1718,7 @@ def test_custom_conversion_map():
     verify_model(model, inputs, custom_map)
 
 
+@tvm.testing.uses_gpu
 def test_segmentaton_models():
     class SegmentationModelWrapper(Module):
         def __init__(self, model):
@@ -1652,6 +1738,7 @@ def test_segmentaton_models():
     verify_model(SegmentationModelWrapper(deeplab.eval()), inp, atol=1e-4, rtol=1e-4)
 
 
+@tvm.testing.uses_gpu
 def test_3d_models():
     input_shape = (1, 3, 4, 56, 56)
     resnet3d = torchvision.models.video.r3d_18(pretrained=True).eval()
@@ -1700,6 +1787,7 @@ def verify_model_vm(imodel, ishapes, idtype=torch.float, idata=None):
                                     rtol=1e-5, atol=1e-5)
 
 
+@tvm.testing.uses_gpu
 def test_control_flow():
     class SimpleIf(torch.nn.Module):
         def __init__(self, N, M):
@@ -1813,6 +1901,7 @@ def test_control_flow():
         verify_script_model(pt_model.eval(), [(10, 20)])
 
 
+@tvm.testing.uses_gpu
 def test_simple_rnn():
     # The mixed tracing and scripting example from
     # https://pytorch.org/tutorials/beginner/Intro_to_TorchScript_tutorial.html#mixing-scripting-and-tracing
@@ -1850,6 +1939,7 @@ def test_simple_rnn():
     verify_script_model(RNNLoop().eval(), [(10, 10, 4)])
 
 
+@tvm.testing.uses_gpu
 def test_forward_reduce_sum():
     torch.set_grad_enabled(False)
     input_shape = [1, 3, 10, 10]
@@ -1882,6 +1972,7 @@ def test_forward_reduce_sum():
     verify_model(ReduceSum5().float().eval(), input_data=input_data)
 
 
+@tvm.testing.uses_gpu
 def test_forward_reduce_prod():
     torch.set_grad_enabled(False)
     input_shape = [1, 3, 10, 10]
@@ -1904,6 +1995,7 @@ def test_forward_reduce_prod():
     verify_model(ReduceProd3().float().eval(), input_data=input_data)
 
 
+@tvm.testing.uses_gpu
 def test_forward_argmin():
     torch.set_grad_enabled(False)
     input_shape = [1, 3, 10, 10]
@@ -1926,6 +2018,7 @@ def test_forward_argmin():
     verify_model(ArgMin3().float().eval(), input_data=input_data)
 
 
+@tvm.testing.uses_gpu
 def test_forward_argmax():
     torch.set_grad_enabled(False)
     input_shape = [1, 3, 10, 10]
@@ -1948,6 +2041,7 @@ def test_forward_argmax():
     verify_model(ArgMax3().float().eval(), input_data=input_data)
 
 
+@tvm.testing.uses_gpu
 def test_forward_std():
     torch.set_grad_enabled(False)
     input_shape = [1, 3, 10, 10]
@@ -2000,6 +2094,7 @@ def test_forward_std():
     verify_model(Std9().float().eval(), input_data=input_data)
 
 
+@tvm.testing.uses_gpu
 def test_forward_variance():
     torch.set_grad_enabled(False)
     input_shape = [1, 3, 10, 10]
@@ -2052,6 +2147,7 @@ def test_forward_variance():
     verify_model(Variance9().float().eval(), input_data=input_data)
 
 
+@tvm.testing.uses_gpu
 def test_forward_rsub():
     torch.set_grad_enabled(False)
 
@@ -2072,6 +2168,7 @@ def test_forward_rsub():
     verify_model(Rsub2().float().eval(), input_data=[d1, d3])
 
 
+@tvm.testing.uses_gpu
 def test_forward_embedding():
     torch.set_grad_enabled(False)
 
@@ -2085,6 +2182,7 @@ def test_forward_embedding():
     verify_model(torch.nn.Embedding(4, 5, sparse=True).float().eval(), input_data=input_data)
 
 
+@tvm.testing.uses_gpu
 def test_forward_onehot():
     torch.set_grad_enabled(False)
 
@@ -2103,6 +2201,7 @@ def test_forward_onehot():
     verify_model(OneHot2().float().eval(), input_data=input_data)
 
 
+@tvm.testing.uses_gpu
 def test_forward_isfinite():
     torch.set_grad_enabled(False)
 
@@ -2114,6 +2213,7 @@ def test_forward_isfinite():
     verify_model(IsFinite1().float().eval(), input_data=input_data)
 
 
+@tvm.testing.uses_gpu
 def test_forward_isnan():
     torch.set_grad_enabled(False)
 
@@ -2125,6 +2225,7 @@ def test_forward_isnan():
     verify_model(IsNan1().float().eval(), input_data=input_data)
 
 
+@tvm.testing.uses_gpu
 def test_forward_isinf():
     torch.set_grad_enabled(False)
 
@@ -2136,6 +2237,7 @@ def test_forward_isinf():
     verify_model(IsInf1().float().eval(), input_data=input_data)
 
 
+@tvm.testing.uses_gpu
 def test_forward_clamp():
     torch.set_grad_enabled(False)
     input_shape = [1, 3, 10, 10]
@@ -2158,6 +2260,7 @@ def test_forward_clamp():
     verify_model(Clamp3().float().eval(), input_data=input_data)
 
 
+@tvm.testing.uses_gpu
 def test_forward_ones():
     torch.set_grad_enabled(False)
 
@@ -2168,6 +2271,7 @@ def test_forward_ones():
     verify_model(Ones1().float().eval(), input_data=[])
 
 
+@tvm.testing.uses_gpu
 def test_forward_ones_like():
     torch.set_grad_enabled(False)
     input_shape = [1, 3, 10, 10]
@@ -2190,6 +2294,7 @@ def test_forward_ones_like():
     verify_model(OnesLike3().float().eval(), input_data=input_data)
 
 
+@tvm.testing.uses_gpu
 def test_forward_zeros():
     torch.set_grad_enabled(False)
 
@@ -2200,6 +2305,7 @@ def test_forward_zeros():
     verify_model(Zeros1().float().eval(), input_data=[])
 
 
+@tvm.testing.uses_gpu
 def test_forward_zeros_like():
     torch.set_grad_enabled(False)
     input_shape = [1, 3, 10, 10]
@@ -2222,6 +2328,7 @@ def test_forward_zeros_like():
     verify_model(ZerosLike3().float().eval(), input_data=input_data)
 
 
+@tvm.testing.uses_gpu
 def test_forward_full():
     torch.set_grad_enabled(False)
 
@@ -2237,6 +2344,7 @@ def test_forward_full():
     verify_model(Full2().float().eval(), input_data=[])
 
 
+@tvm.testing.uses_gpu
 def test_forward_full_like():
     torch.set_grad_enabled(False)
     input_shape = [1, 3, 10, 10]
@@ -2258,6 +2366,7 @@ def test_forward_full_like():
     verify_model(FullLike2().float().eval(), input_data=input_data)
     verify_model(FullLike3().float().eval(), input_data=input_data)
 
+@tvm.testing.uses_gpu
 def test_forward_linspace():
     torch.set_grad_enabled(False)
 
@@ -2296,6 +2405,7 @@ def test_forward_linspace():
     verify_model(Linspace8().float().eval())
 
 
+@tvm.testing.uses_gpu
 def test_forward_take():
     torch.set_grad_enabled(False)
     class Take1(Module):
@@ -2315,6 +2425,7 @@ def test_forward_take():
     verify_model(Take2().float().eval(), input_data=[input_data, indices])
 
 
+@tvm.testing.uses_gpu
 def test_forward_topk():
     torch.set_grad_enabled(False)
     class Topk1(Module):
@@ -2351,6 +2462,7 @@ def test_forward_topk():
     verify_model(Topk6().float().eval(), input_data=input_data)
 
 
+@tvm.testing.uses_gpu
 def test_forward_logical_not():
     torch.set_grad_enabled(False)
 
@@ -2371,6 +2483,7 @@ def test_forward_logical_not():
     verify_model(LogicalNot1().float().eval(), input_data=input_data)
 
 
+@tvm.testing.uses_gpu
 def test_forward_bitwise_not():
     torch.set_grad_enabled(False)
 
@@ -2388,6 +2501,7 @@ def test_forward_bitwise_not():
     verify_model(BitwiseNot1().float().eval(), input_data=input_data)
 
 
+@tvm.testing.uses_gpu
 def test_forward_bitwise_xor():
     torch.set_grad_enabled(False)
 
@@ -2414,6 +2528,7 @@ def test_forward_bitwise_xor():
     verify_model(BitwiseXor2().float().eval(), input_data=[lhs])
 
 
+@tvm.testing.uses_gpu
 def test_forward_logical_xor():
     torch.set_grad_enabled(False)
 
@@ -2440,6 +2555,7 @@ def test_forward_logical_xor():
     verify_model(LogicalXor2().float().eval(), input_data=[lhs])
 
 
+@tvm.testing.uses_gpu
 def test_forward_unary():
     torch.set_grad_enabled(False)
 
@@ -2562,6 +2678,7 @@ def test_forward_unary():
     verify_model(Neg1().float().eval(), input_data=input_data)
 
 
+@tvm.testing.uses_gpu
 def test_forward_where():
     torch.set_grad_enabled(False)
 
@@ -2582,6 +2699,7 @@ def test_forward_where():
     verify_model(Where2().float().eval(), input_data=[x, y])
 
 
+@tvm.testing.uses_gpu
 def test_forward_addcdiv():
     torch.set_grad_enabled(False)
 
@@ -2605,6 +2723,7 @@ def test_forward_addcdiv():
     verify_model(Addcdiv2().float().eval(), input_data=[input_data, t1, t2])
 
 
+@tvm.testing.uses_gpu
 def test_forward_addcmul():
     torch.set_grad_enabled(False)
 
@@ -2627,6 +2746,7 @@ def test_forward_addcmul():
     t2 = torch.rand([1, 3]).float()
     verify_model(Addcmul2().float().eval(), input_data=[input_data, t1, t2])
 
+@tvm.testing.uses_gpu
 def test_forward_traced_function():
     def fn(t1, t2):
         return t1 + t2
@@ -2635,6 +2755,7 @@ def test_forward_traced_function():
     tensor2 = torch.randn(3, 4)
     verify_model(fn, input_data=[tensor1, tensor2])
 
+@tvm.testing.uses_gpu
 def test_forward_dtypes():
     def fn(t1, t2):
         return 2.5 * t1 + t2
@@ -2658,12 +2779,14 @@ def test_forward_dtypes():
     verify_model(ModuleWithIntParameters(param), input_data=inp)
 
 
+@tvm.testing.uses_gpu
 def test_weight_names():
     tm = torch.jit.trace(torch.nn.Linear(3, 4), [torch.randn(2, 3)])
     mod, params = relay.frontend.from_pytorch(tm, [('input', (2, 3))])
     assert set(params.keys()) == set(n for n, p in tm.named_parameters())
 
 
+@tvm.testing.uses_gpu
 def test_duplicate_weight_use():
     # The test cases doesn't make any sense as a neural network,
     # the issue popped up in shared input/output embeddings of bert,
@@ -2681,6 +2804,7 @@ def test_duplicate_weight_use():
     verify_model(Test(), input_data=[torch.randn(5, 5)])
 
 
+@tvm.testing.uses_gpu
 def test_forward_matmul():
     torch.set_grad_enabled(False)
 
index e80d774..010899c 100644 (file)
@@ -45,7 +45,7 @@ def verify_fused_batch_norm(shape):
 
     for device in ["llvm"]:
         ctx = tvm.context(device, 0)
-        if not ctx.exist:
+        if not tvm.testing.device_enabled(device):
             print("Skip because %s is not enabled" % device)
             continue
         mod, params = relay.frontend.from_tensorflow(constant_graph,
index 799d9c2..37a32be 100644 (file)
@@ -49,6 +49,8 @@ import tvm.relay.testing.tf as tf_testing
 from tvm.runtime.vm import VirtualMachine
 from packaging import version as package_version
 
+import tvm.testing
+
 #######################################################################
 # Generic run functions for TVM & tensorflow
 # ------------------------------------------
@@ -198,7 +200,7 @@ def compare_tf_with_tvm(in_data, in_name, out_name, init_global_variables=False,
 
         for device in ["llvm", "cuda"]:
             ctx = tvm.context(device, 0)
-            if not ctx.exist:
+            if not tvm.testing.device_enabled(device):
                 print("Skip because %s is not enabled" % device)
                 continue
             if no_gpu and device == 'cuda':
@@ -262,6 +264,7 @@ def _test_pooling(input_shape, **kwargs):
             _test_pooling_iteration(input_shape, **kwargs)
 
 
+@tvm.testing.uses_gpu
 def test_forward_pooling():
     """ Pooling """
     # TensorFlow only supports NDHWC for max_pool3d on CPU
@@ -408,6 +411,7 @@ def _test_convolution(opname, tensor_in_sizes, filter_in_sizes,
                                 'Placeholder:0', 'DepthwiseConv2dNative:0')
 
 
+@tvm.testing.uses_gpu
 def test_forward_convolution():
     if is_gpu_available():
         _test_convolution('conv', [4, 176, 8, 8], [1, 1, 176, 32], [1, 1], [1, 1], 'SAME', 'NCHW')
@@ -526,6 +530,7 @@ def _test_convolution3d(opname, tensor_in_sizes, filter_in_sizes,
             compare_tf_with_tvm(np.reshape(data_array, tensor_in_sizes).astype('float32'),
                                 'Placeholder:0', 'Conv3D:0', cuda_layout="NCDHW")
 
+@tvm.testing.uses_gpu
 def test_forward_convolution3d():
     if is_gpu_available():
         _test_convolution3d('conv', [4, 176, 8, 8, 8], [1, 1, 1, 176, 32], [1, 1, 1], [1, 1, 1], 'SAME', 'NCDHW')
@@ -569,6 +574,7 @@ def _test_convolution3d_transpose(data_shape, filter_shape, strides,
         compare_tf_with_tvm(data_array, 'Placeholder:0', 'conv3d_transpose:0', cuda_layout="NDHWC")
 
 
+@tvm.testing.uses_gpu
 def test_forward_convolution3d_transpose():
     if is_gpu_available():
         _test_convolution3d_transpose(data_shape=[1, 10, 8, 8, 8],
@@ -655,6 +661,7 @@ def _test_biasadd(tensor_in_sizes, data_format):
                             'Placeholder:0', 'BiasAdd:0')
 
 
+@tvm.testing.uses_gpu
 def test_forward_biasadd():
     if is_gpu_available():
         _test_biasadd([4, 176, 8, 8], 'NCHW')
@@ -1230,7 +1237,8 @@ def test_forward_variable():
     _test_variable(np.random.uniform(size=(32, 100)).astype('float32'))
 
 
-def test_read_variable_op():
+@tvm.testing.parametrize_targets("llvm", "cuda")
+def test_read_variable_op(target, ctx):
     """ Read Variable op test """
 
     tf.reset_default_graph()
@@ -1270,18 +1278,12 @@ def test_read_variable_op():
             out_node,
         )
 
-        for device in ["llvm", "cuda"]:
-            ctx = tvm.context(device, 0)
-            if not ctx.exist:
-                print("Skip because %s is not enabled" % device)
-                continue
-
-            tvm_output = run_tvm_graph(final_graph_def, in_data, in_node,
-                                       target=device, out_names=out_name,
-                                       num_output=len(out_name))
-            for i in range(len(tf_output)):
-                tvm.testing.assert_allclose(
-                    tf_output[i], tvm_output[i], atol=1e-4, rtol=1e-5)
+        tvm_output = run_tvm_graph(final_graph_def, in_data, in_node,
+                                   target=target, out_names=out_name,
+                                   num_output=len(out_name))
+        for i in range(len(tf_output)):
+            tvm.testing.assert_allclose(
+                tf_output[i], tvm_output[i], atol=1e-4, rtol=1e-5)
 
         sess.close()
 
@@ -2382,6 +2384,7 @@ def test_forward_mobilenet():
 # --------
 
 
+@tvm.testing.requires_gpu
 def test_forward_resnetv2():
     '''test resnet model'''
     if is_gpu_available():
@@ -2399,7 +2402,7 @@ def test_forward_resnetv2():
                     sess, data, 'input_tensor:0', out_node + ':0')
                 for device in ["llvm", "cuda"]:
                     ctx = tvm.context(device, 0)
-                    if not ctx.exist:
+                    if not tvm.testing.device_enabled(device):
                         print("Skip because %s is not enabled" % device)
                         continue
                     tvm_output = run_tvm_graph(graph_def, data, 'input_tensor', len(tf_output),
@@ -2431,7 +2434,7 @@ def _test_ssd_impl():
             # TODO(kevinthesun): enable gpu test when VM heterogeneous execution is ready.
             for device in ["llvm"]:
                 ctx = tvm.context(device, 0)
-                if not ctx.exist:
+                if not tvm.testing.device_enabled(device):
                     print("Skip because %s is not enabled" % device)
                     continue
                 tvm_output = run_tvm_graph(graph_def, data, in_node, len(out_node),
@@ -3754,7 +3757,7 @@ def test_forward_dynamic_input_shape():
             # TODO(kevinthesun): enable gpu test when VM heterogeneous execution is ready.
             for device in ["llvm"]:
                 ctx = tvm.context(device, 0)
-                if not ctx.exist:
+                if not tvm.testing.device_enabled(device):
                     print("Skip because %s is not enabled" % device)
                     continue
                 tvm_output = run_tvm_graph(graph_def, np_data, ["data"], 1,
index 5496dfe..3e95000 100644 (file)
@@ -278,7 +278,7 @@ def compare_tflite_with_tvm(in_data, in_name, input_tensors,
 
         for device in ["llvm"]:
             ctx = tvm.context(device, 0)
-            if not ctx.exist:
+            if not tvm.testing.device_enabled(device):
                 print("Skip because %s is not enabled" % device)
                 continue
 
index 27f3788..0bfe61a 100644 (file)
 # specific language governing permissions and limitations
 # under the License.
 import tvm
+import tvm.testing
 from tvm import te
 import numpy as np
 
 
+@tvm.testing.requires_llvm
 def test_dot():
     nn = 12
     n = tvm.runtime.convert(nn)
@@ -29,9 +31,6 @@ def test_dot():
     s = te.create_schedule(C.op)
 
     def verify(target):
-        if not tvm.runtime.enabled(target):
-            print("Target %s is not enabled" % target)
-            return
         f = tvm.driver.build(s, [A, B, C], target)
         # verify
         ctx = tvm.cpu(0)
index dfa247e..d2fb503 100644 (file)
@@ -19,7 +19,9 @@ from tvm import te
 from tvm.contrib import nvcc
 import numpy as np
 import time
+import tvm.testing
 
+@tvm.testing.requires_gpu
 def test_exp():
     # graph
     n = tvm.runtime.convert(1024)
@@ -34,11 +36,9 @@ def test_exp():
 
     # one line to build the function.
     def check_device(device, host="stackvm"):
-        if not tvm.runtime.enabled(host):
+        if not tvm.testing.device_enabled(host):
             return
         ctx = tvm.context(device, 0)
-        if not ctx.exist:
-            return
         fexp = tvm.build(s, [A, B],
                          device, host,
                          name="myexp")
@@ -55,6 +55,7 @@ def test_exp():
     check_device("cuda", "llvm")
     check_device("vulkan")
 
+@tvm.testing.requires_gpu
 def test_fmod():
     # graph
     def run(dtype):
@@ -69,7 +70,7 @@ def test_fmod():
 
         def check_device(device):
             ctx = tvm.context(device, 0)
-            if not ctx.exist:
+            if not tvm.testing.device_enabled(device):
                 print("skip because %s is not enabled.." % device)
                 return
             target = tvm.target.create(device)
@@ -102,6 +103,7 @@ def test_fmod():
 
     run("float32")
 
+@tvm.testing.requires_gpu
 def test_multiple_cache_write():
     # graph
     n = tvm.runtime.convert(1024)
@@ -123,10 +125,10 @@ def test_multiple_cache_write():
     s[C].bind(tx, te.thread_axis("threadIdx.x"))
     # one line to build the function.
     def check_device(device, host="stackvm"):
-        if not tvm.runtime.enabled(host):
+        if not tvm.testing.device_enabled(host):
             return
         ctx = tvm.context(device, 0)
-        if not ctx.exist:
+        if not tvm.testing.device_enabled(device):
             return
         func = tvm.build(s, [A0, A1, C],
                          device, host,
@@ -155,7 +157,7 @@ def test_log_pow_llvm():
     # create iter var and assign them tags.
     bx, tx = s[B].split(B.op.axis[0], factor=32)
     # one line to build the function.
-    if not tvm.runtime.enabled("llvm"):
+    if not tvm.testing.device_enabled("llvm"):
         return
 
     flog = tvm.build(s, [A, B],
@@ -173,6 +175,7 @@ def test_log_pow_llvm():
         b.asnumpy(), np.power(np.log(a.asnumpy()), 2.0), rtol=1e-5)
 
 
+@tvm.testing.uses_gpu
 def test_popcount():
     def run(dtype):
         # graph
@@ -186,7 +189,7 @@ def test_popcount():
 
         def check_device(device):
             ctx = tvm.context(device, 0)
-            if not ctx.exist:
+            if not tvm.testing.device_enabled(device):
                 print("skip because %s is not enabled.." % device)
                 return
             target = tvm.target.create(device)
@@ -212,6 +215,7 @@ def test_popcount():
     run('uint64')
 
 
+@tvm.testing.requires_gpu
 def test_add():
     def run(dtype):
         # graph
@@ -235,7 +239,7 @@ def test_add():
         # one line to build the function.
         def check_device(device):
             ctx = tvm.context(device, 0)
-            if not ctx.exist:
+            if not tvm.testing.device_enabled(device):
                 print("skip because %s is not enabled.." % device)
                 return
             fadd = tvm.build(s, [A, B, C],
@@ -264,6 +268,7 @@ def test_add():
     run("uint64")
 
 
+@tvm.testing.requires_gpu
 def try_warp_memory():
     """skip this in default test because it require higher arch"""
     m = 128
@@ -289,7 +294,7 @@ def try_warp_memory():
     # one line to build the function.
     def check_device(device):
         ctx = tvm.context(device, 0)
-        if not ctx.exist:
+        if not tvm.testing.device_enabled(device):
             print("skip because %s is not enabled.." % device)
             return
         f = tvm.build(s, [A, B], device)
index 7883a4c..abcddc4 100644 (file)
@@ -40,11 +40,9 @@ def test_exp():
 
     # one line to build the function.
     def check_device(device, host="llvm"):
-        if not tvm.runtime.enabled(host):
+        if not tvm.testing.device_enabled(device):
             return
         ctx = tvm.context(device, 0)
-        if not ctx.exist:
-            return
         fexp = tvm.build(s, [A, B],
                          device, host,
                          name="myexp")
@@ -79,11 +77,9 @@ def test_multi_kernel():
 
     # one line to build the function.
     def check_device(device, host="llvm"):
-        if not tvm.runtime.enabled(host):
+        if not tvm.testing.device_enabled(device):
             return
         ctx = tvm.context(device, 0)
-        if not ctx.exist:
-            return
         fadd = tvm.build(s, [A, B, C, D],
                          device, host,
                          name="myadd")
index 12026da..1b7d54e 100644 (file)
@@ -18,8 +18,10 @@ import tvm
 from tvm import te
 import numpy as np
 import time
+import tvm.testing
 
 
+@tvm.testing.requires_gpu
 def test_gemm():
     # graph
     nn = 1024
@@ -82,7 +84,7 @@ def test_gemm():
     # one line to build the function.
     def check_device(device):
         ctx = tvm.context(device, 0)
-        if not ctx.exist:
+        if not tvm.testing.device_enabled(device):
             print("skip because %s is not enabled.." % device)
             return
 
index 67f6dcf..35980ed 100644 (file)
 import tvm
 from tvm import te
 import numpy as np
+import tvm.testing
 
 
+@tvm.testing.requires_gpu
 def test_reduce_prims():
     def test_prim(reducer, np_reducer):
         # graph
@@ -40,9 +42,7 @@ def test_reduce_prims():
         # one line to build the function.
         def check_device(device, host="llvm"):
             ctx = tvm.context(device, 0)
-            if not tvm.runtime.enabled(host):
-                return
-            if not ctx.exist:
+            if not tvm.testing.device_enabled(device):
                 print("skip because %s is not enabled.." % device)
                 return
             freduce = tvm.build(s,
@@ -140,7 +140,7 @@ def test_rfactor():
     s[BF].parallel(BF.op.axis[0])
     # one line to build the function.
     def check_target(target="llvm"):
-        if not tvm.runtime.enabled(target):
+        if not tvm.testing.device_enabled(target):
             return
         ctx = tvm.cpu(0)
         fapi = tvm.lower(s, args=[A, B])
@@ -204,7 +204,7 @@ def test_rfactor_factor_axis():
     s[BF].parallel(BF.op.axis[0])
     # one line to build the function.
     def check_target(target="llvm"):
-        if not tvm.runtime.enabled(target):
+        if not tvm.testing.device_enabled(target):
             return
         ctx = tvm.cpu(0)
         fapi = tvm.lower(s, args=[A, B])
@@ -223,6 +223,7 @@ def test_rfactor_factor_axis():
     check_target()
 
 
+@tvm.testing.requires_gpu
 def test_rfactor_threads():
     nn = 1027
     mm = 10
@@ -248,7 +249,7 @@ def test_rfactor_threads():
     # one line to build the function.
     def check_target(device, host="stackvm"):
         ctx = tvm.context(device, 0)
-        if not ctx.exist:
+        if not tvm.testing.device_enabled(device):
             print("skip because %s is not enabled.." % device)
             return
 
@@ -273,6 +274,7 @@ def test_rfactor_threads():
     check_target("opencl")
     check_target("rocm")
 
+@tvm.testing.requires_gpu
 def test_rfactor_elemwise_threads():
     n = 1025
     m = 10
@@ -303,7 +305,7 @@ def test_rfactor_elemwise_threads():
     # one line to build the function.
     def check_target(device, host="stackvm"):
         ctx = tvm.context(device, 0)
-        if not ctx.exist:
+        if not tvm.testing.device_enabled(device):
             print("skip because %s is not enabled.." % device)
             return
         fapi = tvm.lower(s, args=[A, C])
@@ -346,7 +348,7 @@ def test_argmax():
 
     def check_target():
         device = 'cpu'
-        if not tvm.runtime.enabled(device):
+        if not tvm.testing.device_enabled(device):
             print("skip because %s is not enabled.." % device)
             return
         ctx = tvm.context(device, 0)
@@ -371,6 +373,7 @@ def test_argmax():
     check_target()
 
 
+@tvm.testing.requires_gpu
 def test_rfactor_argmax():
     def fcombine(x, y):
         lhs = tvm.tir.Select((x[1] >= y[1]), x[0], y[0])
@@ -409,7 +412,7 @@ def test_rfactor_argmax():
 
     def check_target(device):
         ctx = tvm.context(device, 0)
-        if not ctx.exist:
+        if not tvm.testing.device_enabled(device):
             print("skip because %s is not enabled.." % device)
             return
         fapi = tvm.lower(s, args=[A0, A1, B0, B1])
@@ -432,6 +435,7 @@ def test_rfactor_argmax():
     check_target("vulkan")
     check_target("rocm")
 
+@tvm.testing.requires_gpu
 def test_warp_reduction1():
     nthx = 32
     nthy = 4
@@ -441,7 +445,7 @@ def test_warp_reduction1():
 
     def check_target(device, m, n):
         ctx = tvm.context(device, 0)
-        if not ctx.exist:
+        if not tvm.testing.device_enabled(device):
             print("skip because %s is not enabled.." % device)
             return
 
@@ -478,6 +482,7 @@ def test_warp_reduction1():
     # This is a bug in normal reduction.
     # check_target("cuda", m=10, n=37)
 
+@tvm.testing.requires_gpu
 def test_warp_reduction2():
     def fcombine(x, y):
         return x[0] + y[0], x[1] * y[1]
@@ -503,7 +508,7 @@ def test_warp_reduction2():
 
     def check_target(device):
         ctx = tvm.context(device, 0)
-        if not ctx.exist:
+        if not tvm.testing.device_enabled(device):
             print("skip because %s is not enabled.." % device)
             return
 
index 99553c3..9a61e60 100644 (file)
@@ -17,7 +17,9 @@
 import tvm
 from tvm import te
 import numpy as np
+import tvm.testing
 
+@tvm.testing.requires_gpu
 def test_scan():
     m = te.size_var("m")
     n = te.size_var("n")
@@ -47,7 +49,7 @@ def test_scan():
     # one line to build the function.
     def check_device(device):
         ctx = tvm.context(device, 0)
-        if not ctx.exist:
+        if not tvm.testing.device_enabled(device):
             print("skip because %s is not enabled.." % device)
             return
         fscan = tvm.build(s, [X, res],
index 95b94f6..5f45119 100644 (file)
@@ -26,6 +26,8 @@ from tvm import te
 from tvm import autotvm
 from tvm.autotvm.tuner import RandomTuner
 
+import tvm.testing
+
 @autotvm.template("testing/conv2d_no_batching")
 def conv2d_no_batching(N, H, W, CI, CO, KH, KW):
     """An example template for testing"""
@@ -120,26 +122,18 @@ def get_sample_task(target=tvm.target.cuda(), target_host=None):
                                target=target, target_host=target_host)
     return task, target
 
-def test_tuning():
-    def check(target, target_host):
-        ctx = tvm.context(target, 0)
-        if not ctx.exist:
-            logging.info("Skip test because %s is not available" % target)
-            return
-
-        # init task
-        task, target = get_sample_task(target, target_host)
-        logging.info("%s", task.config_space)
-
-        measure_option = autotvm.measure_option(
-            autotvm.LocalBuilder(),
-            autotvm.LocalRunner())
+@tvm.testing.parametrize_targets("cuda", "opencl")
+def test_tuning(target, ctx):
+    # init task
+    task, target = get_sample_task(target, None)
+    logging.info("%s", task.config_space)
 
-        tuner = RandomTuner(task)
-        tuner.tune(n_trial=20, measure_option=measure_option)
+    measure_option = autotvm.measure_option(
+        autotvm.LocalBuilder(),
+        autotvm.LocalRunner())
 
-    check("cuda", None)
-    check("opencl", None)
+    tuner = RandomTuner(task)
+    tuner.tune(n_trial=20, measure_option=measure_option)
 
 if __name__ == "__main__":
     # only print log when invoked from main
index 994a047..e6841dd 100644 (file)
@@ -25,6 +25,7 @@ from tvm import topi
 import tvm.topi.testing
 from tvm.topi.util import get_const_tuple
 from pytest import skip
+import tvm.testing
 
 
 def verify_conv2d_nchw(batch, in_channel, in_size, num_filter, kernel, stride, padding, dilation=1, add_bias=False, add_relu=False,
@@ -60,8 +61,8 @@ def verify_conv2d_nchw(batch, in_channel, in_size, num_filter, kernel, stride, p
 
     def check_device(device):
         ctx = tvm.context(device, 0)
-        if not ctx.exist:
-            skip("s is not enabled" % device)
+        if not tvm.testing.device_enabled(device):
+            print("Skipping %s becuase it is not enabled" % device)
         print("Running on target: %s" % device)
         with tvm.target.create(device):
             C = topi.nn.conv2d(A, W, stride, padding, dilation, layout='NCHW', out_dtype=dtype)
index d4b55f1..ada9a96 100644 (file)
@@ -23,6 +23,7 @@ import mxnet as mx
 from mxnet import gluon
 import logging
 import os
+import tvm.testing
 
 logging.basicConfig(level=logging.INFO)
 
@@ -112,6 +113,7 @@ def eval_acc(model, dataset, batch_fn, target=tvm.target.cuda(), ctx=tvm.gpu(),
     logging.info('[final] validation: acc-top1=%f acc-top5=%f', top1, top5)
     return top1
 
+@tvm.testing.requires_gpu
 def test_quantize_acc(cfg, rec_val):
     qconfig = qtz.qconfig(skip_conv_layers=[0],
                           nbit_input=cfg.nbit_input,
index 95a030f..0097a4e 100644 (file)
@@ -22,11 +22,13 @@ Support level10 operator test cases.
 import numpy as np
 import tvm
 from tvm import relay
-from tvm.relay.testing import ctx_list, run_infer_type
+from tvm.relay.testing import run_infer_type
 import tvm.topi.testing
 import random
+import tvm.testing
 
 
+@tvm.testing.uses_gpu
 def test_dyn_broadcast_to():
     dtype = 'uint8'
     rank = 3
@@ -44,7 +46,7 @@ def test_dyn_broadcast_to():
     x = np.random.uniform(size=x_shape).astype(dtype)
     dyn_shape = (1, ) * rank
     ref_res = np.broadcast_to(x, dyn_shape)
-    for target, ctx in ctx_list():
+    for target, ctx in tvm.testing.enabled_targets():
         if (target != 'cuda'):  #skip cuda because we don't have dynamic support for GPU
             for kind in ["vm", "debug"]:
                 mod = tvm.ir.IRModule.from_expr(func)
@@ -53,6 +55,7 @@ def test_dyn_broadcast_to():
                 tvm.testing.assert_allclose(op_res.asnumpy(), ref_res, rtol=1e-5)
 
 
+@tvm.testing.uses_gpu
 def test_dyn_one_hot():
     def _get_oshape(indices_shape, depth, axis):
         oshape = []
@@ -77,7 +80,7 @@ def test_dyn_one_hot():
         func = relay.Function([indices, depth_var], out)
         indices_np = np.random.randint(0, depth, size=indices_shape).astype("int32")
         out_np = tvm.topi.testing.one_hot(indices_np, on_value, off_value, depth, axis, dtype)
-        for target, ctx in ctx_list():
+        for target, ctx in tvm.testing.enabled_targets():
             if (target != 'cuda'):  #skip cuda because we don't have dynamic support for GPU
                 for kind in ["vm", "debug"]:
                     mod = tvm.ir.IRModule.from_expr(func)
index e1a0d28..bab4869 100644 (file)
@@ -21,7 +21,7 @@ import numpy as np
 import tvm
 from tvm import relay
 from tvm import te
-from tvm.relay.testing import ctx_list
+from tvm.relay.testing import enabled_targets
 import random
 from test_dynamic_op_level3 import verify_func
 import tvm.topi.testing
@@ -51,7 +51,7 @@ def test_dyn_upsampling_run():
         zz = run_infer_type(z)
         func = relay.Function([x, scale_h_var, scale_w_var], z)
 
-        for target, ctx in ctx_list():
+        for target, ctx in tvm.testing.enabled_targets():
              if "llvm" not in target: continue
              for kind in ["vm", "debug"]:
                  mod = tvm.ir.IRModule.from_expr(func)
index 91e9cc7..193de85 100644 (file)
@@ -22,11 +22,12 @@ import tvm
 from tvm import te
 from tvm import relay
 from tvm.relay import create_executor, transform
-from tvm.relay.testing import ctx_list, check_grad, run_infer_type
+from tvm.relay.testing import check_grad, run_infer_type
+import tvm.testing
 
 def verify_func(func, data, ref_res):
     assert isinstance(data, list)
-    for target, ctx in ctx_list():
+    for target, ctx in tvm.testing.enabled_targets():
         #TODO(mbrookhart): enable Cuda tests onces the VM supports dynamic shapes
         if "llvm" not in target: continue
         for kind in ["vm", "debug"]:
@@ -36,6 +37,7 @@ def verify_func(func, data, ref_res):
             tvm.testing.assert_allclose(op_res.asnumpy(), ref_res, rtol=1e-5)
             relay.backend.compile_engine.get().clear()
 
+@tvm.testing.uses_gpu
 def test_dyn_reshape():
     def verify_reshape(shape, newshape, oshape):
         x = relay.var("x", relay.TensorType(shape, "float32"))
@@ -60,6 +62,7 @@ def test_dyn_reshape():
     verify_reshape((2, 3, 4, 5), (-3, -3), (6, 20))
     verify_reshape((2, 3, 4), (0, -3), (2, 12))
 
+@tvm.testing.uses_gpu
 def test_dyn_shape_reshape():
     def verify_reshape(shape, newshape, oshape):
         x = relay.var("x", relay.TensorType(shape, "float32"))
@@ -76,6 +79,7 @@ def test_dyn_shape_reshape():
     verify_reshape((2, 3, 4), (8, 3), (8, 3))
     verify_reshape((4, 7), (2, 7, 2), (2, 7, 2))
 
+@tvm.testing.uses_gpu
 def test_dyn_tile():
     def verify_tile(dshape, reps):
         x = relay.var("x", relay.TensorType(dshape, "float32"))
@@ -92,6 +96,7 @@ def test_dyn_tile():
     verify_tile((2, 3), (3, 2, 1))
 
 
+@tvm.testing.uses_gpu
 def test_dyn_zeros_ones():
     def verify_zeros_ones(shape, dtype):
         for op, ref in [(relay.zeros, np.zeros), (relay.ones, np.ones)]:
@@ -107,6 +112,7 @@ def test_dyn_zeros_ones():
     verify_zeros_ones((1, 3), 'int64')
     verify_zeros_ones((8, 9, 1, 2), 'float32')
 
+@tvm.testing.uses_gpu
 def test_dyn_full():
     def verify_full(fill_value, src_shape, dtype):
         x = relay.var("x", relay.scalar_type(dtype))
index 8dcfd1f..226bbfe 100644 (file)
@@ -22,8 +22,9 @@ import tvm
 from tvm import te
 from tvm import relay
 from tvm.relay import transform
-from tvm.relay.testing import ctx_list, run_infer_type
+from tvm.relay.testing import run_infer_type
 import tvm.topi.testing
+import tvm.testing
 
 
 def test_resize_infer_type():
@@ -35,6 +36,7 @@ def test_resize_infer_type():
     assert zz.checked_type == relay.TensorType((n, c, relay.Any(), relay.Any()), "int8")
 
 
+@tvm.testing.uses_gpu
 def test_resize():
     def verify_resize(dshape, scale, method, layout):
         if layout == "NHWC":
@@ -57,7 +59,7 @@ def test_resize():
         zz = run_infer_type(z)
         func = relay.Function([x, size_var], z)
 
-        for target, ctx in ctx_list():
+        for target, ctx in tvm.testing.enabled_targets():
             if "llvm" not in target: continue
             for kind in ["vm", "debug"]:
                 mod = tvm.ir.IRModule.from_expr(func)
index ddfab55..6dcde95 100644 (file)
@@ -21,8 +21,9 @@ import numpy as np
 import tvm
 from tvm import te
 from tvm import relay
-from tvm.relay.testing import ctx_list
+import tvm.testing
 
+@tvm.testing.uses_gpu
 def test_dynamic_topk():
     def verify_topk(k, axis, ret_type, is_ascend, dtype):
         shape = (20, 100)
@@ -51,7 +52,7 @@ def test_dynamic_topk():
                 np_values[i, :] = np_data[i, np_indices[i, :]]
         np_indices = np_indices.astype(dtype)
 
-        for target, ctx in ctx_list():
+        for target, ctx in tvm.testing.enabled_targets():
             if "llvm" not in target: continue
             for kind in ["vm", "debug"]:
                 mod = tvm.ir.IRModule.from_expr(func)
index 6bc170d..906882d 100644 (file)
@@ -23,6 +23,7 @@ from tvm import autotvm
 from tvm import topi
 from tvm.relay.testing import run_infer_type
 from tvm.relay.testing.temp_op_attr import TempOpAttr
+import tvm.testing
 
 
 @autotvm.register_topi_compute("test/conv2d_1")
@@ -161,14 +162,14 @@ def test_compile_engine():
     z3 = engine.lower(get_func(()), "llvm")
     assert z1.same_as(z2)
     assert not z3.same_as(z1)
-    if tvm.context("cuda").exist:
+    if tvm.testing.device_enabled("cuda"):
         z4 = engine.lower(get_func(()), "cuda")
         assert not z3.same_as(z4)
 
     # Test JIT target
     for target in ["llvm"]:
         ctx = tvm.context(target)
-        if ctx.exist:
+        if tvm.testing.device_enabled(target):
             f = engine.jit(get_func((10,)), target)
             x = tvm.nd.array(np.ones(10).astype("float32"), ctx=ctx)
             y = tvm.nd.empty((10,), ctx=ctx)
index f0785bc..70a6fb1 100644 (file)
@@ -20,7 +20,7 @@ import tvm
 from tvm import relay
 from tvm.contrib import graph_runtime
 from tvm.relay.op import add
-from tvm.relay.testing.config import ctx_list
+import tvm.testing
 
 # @tq, @jr should we put this in testing ns?
 def check_rts(expr, args, expected_result, mod=None):
@@ -141,6 +141,7 @@ def test_plan_memory():
     assert len(device_types) == 1
 
 
+@tvm.testing.uses_gpu
 def test_gru_like():
     def unit(rnn_dim):
         X = relay.var("X", shape=(1, rnn_dim))
@@ -165,7 +166,7 @@ def test_gru_like():
     out_shape = (1, rnn_dim)
     z = unit(rnn_dim)
 
-    for target, ctx in ctx_list():
+    for target, ctx in tvm.testing.enabled_targets():
         with tvm.transform.PassContext(opt_level=2):
             graph, lib, params = relay.build(tvm.IRModule.from_expr(z), target)
             m = graph_runtime.create(graph, lib, ctx)
index 360b6bd..41a07e4 100644 (file)
@@ -30,7 +30,7 @@ def check_eval(expr, args, expected_result, mod=None, rtol=1e-07):
     # TODO(tqchen) add more types once the schedule register is fixed.
     for target in ["llvm"]:
         ctx = tvm.context(target, 0)
-        if not ctx.exist:
+        if not tvm.testing.device_enabled(target):
             return
         intrp = create_executor(mod=mod, ctx=ctx, target=target)
         result = intrp.evaluate(expr)(*args)
index fa56eb0..faf6867 100644 (file)
@@ -20,6 +20,7 @@ import tvm
 from tvm import te
 from tvm import relay
 from tvm.contrib.nvcc import have_fp16
+import tvm.testing
 
 
 def test_basic_build():
@@ -64,13 +65,10 @@ def test_basic_build():
                                atol=1e-5, rtol=1e-5)
 
 
+@tvm.testing.requires_cuda
 def test_fp16_build():
     dtype = "float16"
 
-    if not tvm.runtime.enabled("cuda") or not tvm.gpu(0).exist:
-        print("skip because cuda is not enabled.")
-        return
-
     ctx = tvm.gpu(0)
     if dtype == "float16" and not have_fp16(ctx.compute_version):
         print("skip because gpu does not support fp16")
@@ -100,40 +98,34 @@ def test_fp16_build():
                                atol=1e-5, rtol=1e-5)
 
 
-def test_fp16_conversion():
-    def check_conversion(tgt, ctx):
-        if not tvm.runtime.enabled(tgt):
-            print("skip because {} is not enabled.".format(tgt))
-            return
-        elif tgt == "cuda" and ctx.exist and not have_fp16(ctx.compute_version):
-            print("skip because gpu does not support fp16")
-            return
-
-        n = 10
+@tvm.testing.parametrize_targets("llvm", "cuda")
+def test_fp16_conversion(target, ctx):
+    if target == "cuda" and not have_fp16(ctx.compute_version):
+        print("skip because gpu does not support fp16")
+        return
 
-        for (src, dst) in [('float32', 'float16'), ('float16', 'float32')]:
-            x = relay.var("x", relay.TensorType((n,), src))
-            y = x.astype(dst)
-            func = relay.Function([x], y)
+    n = 10
 
-            # init input
-            X = tvm.nd.array(n * np.random.randn(n).astype(src) - n / 2)
+    for (src, dst) in [('float32', 'float16'), ('float16', 'float32')]:
+        x = relay.var("x", relay.TensorType((n,), src))
+        y = x.astype(dst)
+        func = relay.Function([x], y)
 
-            # build
-            with tvm.transform.PassContext(opt_level=1):
-                g_json, mmod, params = relay.build(tvm.IRModule.from_expr(func), tgt)
+        # init input
+        X = tvm.nd.array(n * np.random.randn(n).astype(src) - n / 2)
 
-            # test
-            rt = tvm.contrib.graph_runtime.create(g_json, mmod, ctx)
-            rt.set_input("x", X)
-            rt.run()
-            out = rt.get_output(0)
+        # build
+        with tvm.transform.PassContext(opt_level=1):
+            g_json, mmod, params = relay.build(tvm.IRModule.from_expr(func), target)
 
-            np.testing.assert_allclose(out.asnumpy(), X.asnumpy().astype(dst),
-                                       atol=1e-5, rtol=1e-5)
+        # test
+        rt = tvm.contrib.graph_runtime.create(g_json, mmod, ctx)
+        rt.set_input("x", X)
+        rt.run()
+        out = rt.get_output(0)
 
-    for target, ctx in [('llvm', tvm.cpu()), ('cuda', tvm.gpu())]:
-        check_conversion(target, ctx)
+        np.testing.assert_allclose(out.asnumpy(), X.asnumpy().astype(dst),
+                                   atol=1e-5, rtol=1e-5)
 
 
 if __name__ == "__main__":
index 437901e..3847e18 100644 (file)
@@ -20,8 +20,9 @@ import pytest
 import tvm
 from tvm import te
 from tvm import relay
-from tvm.relay.testing import check_grad, ctx_list, run_infer_type
+from tvm.relay.testing import check_grad, run_infer_type
 from tvm.relay.transform import gradient
+import tvm.testing
 
 
 def sigmoid(x):
@@ -35,6 +36,7 @@ def relu(x):
     return x_copy
 
 
+@tvm.testing.uses_gpu
 def test_unary_op():
     def check_single_op(opfunc, ref, dtype):
         shape = (10, 4)
@@ -49,7 +51,7 @@ def test_unary_op():
             fwd_func = run_infer_type(fwd_func)
             bwd_func = run_infer_type(gradient(fwd_func))
 
-            for target, ctx in ctx_list():
+            for target, ctx in tvm.testing.enabled_targets():
                 intrp = relay.create_executor(ctx=ctx, target=target)
                 op_res, (op_grad, ) = intrp.evaluate(bwd_func)(data)
                 np.testing.assert_allclose(op_grad.asnumpy(), ref_grad, rtol=0.01)
@@ -79,6 +81,7 @@ def test_unary_op():
             check_single_op(opfunc, ref, dtype)
 
 
+@tvm.testing.uses_gpu
 def test_binary_op():
     def inst(vars, sh):
         return [vars.get(s, s) for s in sh]
@@ -97,7 +100,7 @@ def test_binary_op():
         fwd_func = run_infer_type(fwd_func)
         bwd_func = run_infer_type(gradient(fwd_func))
 
-        for target, ctx in ctx_list():
+        for target, ctx in tvm.testing.enabled_targets():
             intrp = relay.create_executor(ctx=ctx, target=target)
             op_res, (op_grad0, op_grad1) = intrp.evaluate(bwd_func)(x_data, y_data)
             np.testing.assert_allclose(op_grad0.asnumpy(), ref_grad0, rtol=0.01)
index 50e3585..396e43d 100644 (file)
@@ -21,8 +21,9 @@ import tvm.topi.testing
 import tvm
 from tvm import te
 from tvm import relay
-from tvm.relay.testing import check_grad, ctx_list, run_infer_type
+from tvm.relay.testing import check_grad, run_infer_type
 from tvm.relay.transform import gradient
+import tvm.testing
 
 
 def verify_max_pool2d_grad(x_shape, pool_size, strides, padding, ceil_mode):
@@ -43,12 +44,13 @@ def verify_max_pool2d_grad(x_shape, pool_size, strides, padding, ceil_mode):
         padding=[ph, pw, ph, pw],
         pool_type='max', ceil_mode=ceil_mode)
 
-    for target, ctx in ctx_list():
+    for target, ctx in tvm.testing.enabled_targets():
         intrp = relay.create_executor(ctx=ctx, target=target)
         op_res, (op_grad, ) = intrp.evaluate(bwd_func)(data)
         np.testing.assert_allclose(op_grad.asnumpy(), ref_grad, rtol=0.01)
 
 
+@tvm.testing.uses_gpu
 def test_max_pool2d_grad():
     verify_max_pool2d_grad((1, 4, 16, 16), pool_size=(2, 2), strides=(2, 2), padding=(0, 0), ceil_mode=False)
     verify_max_pool2d_grad((1, 4, 16, 16), pool_size=(1, 1), strides=(1, 1), padding=(1, 1), ceil_mode=False)
@@ -72,11 +74,12 @@ def verify_avg_pool2d_grad(x_shape, pool_size, strides, padding, ceil_mode, coun
         padding=[ph, pw, ph, pw],
         pool_type='avg', ceil_mode=ceil_mode)
 
-    for target, ctx in ctx_list():
+    for target, ctx in tvm.testing.enabled_targets():
         intrp = relay.create_executor(ctx=ctx, target=target)
         op_res, (op_grad, ) = intrp.evaluate(bwd_func)(data)
         np.testing.assert_allclose(op_grad.asnumpy(), ref_grad, rtol=0.01)
 
+@tvm.testing.uses_gpu
 def test_avg_pool2d_grad():
     verify_avg_pool2d_grad((1, 4, 16, 16), pool_size=(2, 2), strides=(2, 2), padding=(0, 0),
                            ceil_mode=False, count_include_pad=True)
@@ -100,11 +103,12 @@ def verify_global_avg_pool2d_grad(x_shape):
         strides=(1, 1), padding=[0, 0, 0, 0], pool_type='avg',
         ceil_mode=False)
 
-    for target, ctx in ctx_list():
+    for target, ctx in tvm.testing.enabled_targets():
         intrp = relay.create_executor(ctx=ctx, target=target)
         op_res, (op_grad, ) = intrp.evaluate(bwd_func)(data)
         np.testing.assert_allclose(op_grad.asnumpy(), ref_grad, rtol=0.01)
 
+@tvm.testing.uses_gpu
 def test_global_avg_pool2d_grad():
     verify_global_avg_pool2d_grad((1, 4, 16, 16))
     verify_global_avg_pool2d_grad((1, 8, 8, 24))
@@ -139,7 +143,7 @@ def verify_conv2d_grad(dshape, wshape, strides, padding, dilation, groups=1, mod
                            .detach().numpy()
 
 
-    for target, ctx in ctx_list():
+    for target, ctx in tvm.testing.enabled_targets():
         data = tvm.nd.array(data_pt.detach().numpy(), ctx)
         weight = tvm.nd.array(weight_pt.detach().numpy(), ctx)
         intrp = relay.create_executor(ctx=ctx, target=target)
@@ -148,6 +152,7 @@ def verify_conv2d_grad(dshape, wshape, strides, padding, dilation, groups=1, mod
         np.testing.assert_allclose(grad_weight.asnumpy(), grad_weight_pt, rtol=1e-4, atol=1e-4)
 
 
+@tvm.testing.uses_gpu
 def test_conv2d_grad():
     verify_conv2d_grad((1, 4, 16, 16), (16, 4, 3, 3), [1, 1], [1, 1], [1, 1])
     verify_conv2d_grad((1, 4, 16, 16), (16, 4, 1, 1), [1, 1], [0, 0], [1, 1])
index 8ca1eae..a63ec6e 100644 (file)
@@ -20,10 +20,12 @@ import pytest
 import tvm
 from tvm import te
 from tvm import relay
-from tvm.relay.testing import check_grad, ctx_list, run_infer_type
+from tvm.relay.testing import check_grad, run_infer_type
 from tvm.relay.transform import gradient
+import tvm.testing
 
 
+@tvm.testing.uses_gpu
 def test_clip():
     for dtype in ('float32', 'float64'):
         ref = (lambda x: np.where(x > 10.0, np.zeros_like(x),
@@ -37,7 +39,7 @@ def test_clip():
         fwd_func = run_infer_type(fwd_func)
         bwd_func = run_infer_type(gradient(fwd_func))
 
-        for target, ctx in ctx_list():
+        for target, ctx in tvm.testing.enabled_targets():
             intrp = relay.create_executor(ctx=ctx, target=target)
             op_res, (op_grad, ) = intrp.evaluate(bwd_func)(data)
             np.testing.assert_allclose(op_grad.asnumpy(), ref_grad, rtol=0.01)
index 4616a14..086a880 100644 (file)
@@ -21,9 +21,10 @@ from tvm import te
 import scipy
 from tvm import relay
 from tvm.relay import transform
-from tvm.relay.testing import ctx_list, run_infer_type
+from tvm.relay.testing import run_infer_type
 import tvm.topi.testing
 from tvm.contrib.nvcc import have_fp16
+import tvm.testing
 
 
 def sigmoid(x):
@@ -39,6 +40,7 @@ def rsqrt(x):
     one = np.ones_like(x)
     return one / np.sqrt(x)
 
+@tvm.testing.uses_gpu
 def test_unary_op():
     def check_single_op(opfunc, ref, dtype):
         shape = (10, 4)
@@ -56,7 +58,7 @@ def test_unary_op():
             data = np.random.rand(*shape).astype(dtype)
             ref_res = ref(data)
             func = relay.Function([x], y)
-            for target, ctx in ctx_list():
+            for target, ctx in tvm.testing.enabled_targets():
                 # use graph by execuor default for testing, as we need
                 # create function explicitly to avoid constant-folding.
                 if dtype ==  'float16' and target == 'cuda' and not have_fp16(tvm.gpu(0).compute_version):
@@ -82,6 +84,7 @@ def test_unary_op():
             check_single_op(opfunc, ref, dtype)
 
 
+@tvm.testing.uses_gpu
 def test_binary_op():
     def inst(vars, sh):
         return [vars.get(s, s) for s in sh]
@@ -112,7 +115,7 @@ def test_binary_op():
             ref_res = ref(x_data, y_data)
             func = relay.Function([x, y], z)
 
-            for target, ctx in ctx_list():
+            for target, ctx in tvm.testing.enabled_targets():
                 # use graph by execuor default for testing, as we need
                 # create function explicitly to avoid constant-folding.
                 if dtype ==  'float16' and target == 'cuda' and not have_fp16(tvm.gpu(0).compute_version):
@@ -131,12 +134,13 @@ def test_binary_op():
             check_binary_op(opfunc, ref, dtype)
 
 
+@tvm.testing.uses_gpu
 def test_expand_dims():
     # based on topi test
     def verify_expand_dims(dshape, dtype, oshape, axis, num_newaxis):
         x = relay.Var("x", relay.TensorType(dshape, dtype))
         func = relay.Function([x], relay.expand_dims(x, axis, num_newaxis))
-        for target, ctx in ctx_list():
+        for target, ctx in tvm.testing.enabled_targets():
             if dtype ==  'float16' and target == 'cuda' and not have_fp16(tvm.gpu(0).compute_version):
                 continue
             data = np.random.uniform(size=dshape).astype(dtype)
@@ -149,6 +153,7 @@ def test_expand_dims():
         verify_expand_dims((3, 10), dtype, (1, 3, 10), -3, 1)
 
 
+@tvm.testing.uses_gpu
 def test_bias_add():
     for dtype in ['float16', 'float32']:
         xshape=(10, 2, 3, 4)
@@ -165,7 +170,7 @@ def test_bias_add():
         x_data = np.random.uniform(size=xshape).astype(dtype)
         y_data = np.random.uniform(size=bshape).astype(dtype)
         ref_res = x_data + y_data.reshape((2, 1, 1))
-        for target, ctx in ctx_list():
+        for target, ctx in tvm.testing.enabled_targets():
             if dtype ==  'float16' and target == 'cuda' and not have_fp16(tvm.gpu(0).compute_version):
                 continue
             intrp = relay.create_executor("graph", ctx=ctx, target=target)
@@ -183,6 +188,7 @@ def test_expand_dims_infer_type():
         assert yy.checked_type == relay.TensorType((n, t, 1, 100), dtype)
 
 
+@tvm.testing.uses_gpu
 def test_softmax():
     for dtype in ['float16', 'float32']:
         # Softmax accuracy for float16 is poor
@@ -197,12 +203,13 @@ def test_softmax():
         func = relay.Function([x], y)
         x_data = np.random.uniform(size=shape).astype(dtype)
         ref_res = tvm.topi.testing.softmax_python(x_data)
-        for target, ctx in ctx_list():
+        for target, ctx in tvm.testing.enabled_targets():
             intrp = relay.create_executor("graph", ctx=ctx, target=target)
             op_res = intrp.evaluate(func)(x_data)
             np.testing.assert_allclose(op_res.asnumpy(), ref_res, rtol=1e-5)
 
 
+@tvm.testing.uses_gpu
 def test_log_softmax():
     for dtype in ['float16', 'float32']:
         # Softmax accuracy for float16 is poor
@@ -217,12 +224,13 @@ def test_log_softmax():
         func = relay.Function([x], y)
         x_data = np.random.uniform(size=shape).astype(dtype)
         ref_res = tvm.topi.testing.log_softmax_python(x_data)
-        for target, ctx in ctx_list():
+        for target, ctx in tvm.testing.enabled_targets():
             intrp = relay.create_executor("graph", ctx=ctx, target=target)
             op_res = intrp.evaluate(func)(x_data)
             np.testing.assert_allclose(op_res.asnumpy(), ref_res, rtol=1e-5)
 
 
+@tvm.testing.uses_gpu
 def test_concatenate():
     for dtype in ['float16', 'float32']:
         n, t, d = te.size_var("n"), te.size_var("t"), 100
@@ -266,7 +274,7 @@ def test_concatenate():
         t_data = np.random.uniform(size=()).astype(dtype)
         ref_res = np.concatenate((x_data, y_data), axis=1) + t_data
 
-        for target, ctx in ctx_list():
+        for target, ctx in tvm.testing.enabled_targets():
             if dtype ==  'float16' and target == 'cuda' and not have_fp16(tvm.gpu(0).compute_version):
                 continue
             intrp1 = relay.create_executor("graph", ctx=ctx, target=target)
@@ -345,6 +353,7 @@ def test_dense_type_check():
     y = relay.nn.dense(x, w)
     yy = run_infer_type(y)
 
+@tvm.testing.uses_gpu
 def test_dense():
     for dtype in ['float16', 'float32']:
         # Dense accuracy for float16 is poor
@@ -383,7 +392,7 @@ def test_dense():
         w_data = np.random.rand(2, 5).astype(dtype)
         ref_res = np.dot(x_data, w_data.T)
 
-        for target, ctx in ctx_list():
+        for target, ctx in tvm.testing.enabled_targets():
             intrp1 = relay.create_executor("graph", ctx=ctx, target=target)
             intrp2 = relay.create_executor("debug", ctx=ctx, target=target)
             op_res1 = intrp1.evaluate(func)(x_data, w_data)
index a65b17f..3aaa76d 100644 (file)
@@ -22,11 +22,13 @@ from tvm import te
 import tvm.topi.testing
 from tvm import relay
 from tvm.relay import transform
-from tvm.relay.testing import ctx_list, run_infer_type
+from tvm.relay.testing import run_infer_type
 from tvm import topi
 import tvm.topi.testing
+import tvm.testing
 
 
+@tvm.testing.uses_gpu
 def test_checkpoint():
     dtype = "float32"
     xs = [relay.var("x{}".format(i), dtype) for i in range(4)]
@@ -38,7 +40,7 @@ def test_checkpoint():
     assert f.checked_type == f_checkpoint.checked_type
 
     inputs = [np.random.uniform() for _ in range(len(xs))]
-    for target, ctx in ctx_list():
+    for target, ctx in tvm.testing.enabled_targets():
         for kind in ["graph", "debug"]:
             intrp = relay.create_executor(kind, ctx=ctx, target=target)
             f_res = intrp.evaluate(f)(*inputs)
@@ -148,6 +150,7 @@ def test_checkpoint_alpha_equal_tuple():
 
     tvm.ir.assert_structural_equal(df, df_parsed)
 
+@tvm.testing.uses_gpu
 def test_collapse_sum_like():
     shape = (3, 4, 5, 6)
     shape_like = (4, 5, 6)
@@ -162,13 +165,14 @@ def test_collapse_sum_like():
     x = np.random.uniform(size=shape).astype(dtype)
     y = np.random.uniform(size=shape_like).astype(dtype)
     ref_res = np.sum(x, 0)
-    for target, ctx in ctx_list():
+    for target, ctx in tvm.testing.enabled_targets():
         for kind in ["graph", "debug"]:
             intrp = relay.create_executor(kind, ctx=ctx, target=target)
             op_res = intrp.evaluate(func)(x, y)
             tvm.testing.assert_allclose(op_res.asnumpy(), ref_res, rtol=1e-5)
 
 
+@tvm.testing.uses_gpu
 def test_collapse_sum_to():
     shape = (3, 4, 5, 6)
     shape_to = (4, 5, 6)
@@ -181,13 +185,14 @@ def test_collapse_sum_to():
     func = relay.Function([x], z)
     x = np.random.uniform(size=shape).astype(dtype)
     ref_res = np.sum(x, 0)
-    for target, ctx in ctx_list():
+    for target, ctx in tvm.testing.enabled_targets():
         for kind in ["graph", "debug"]:
             intrp = relay.create_executor(kind, ctx=ctx, target=target)
             op_res = intrp.evaluate(func)(x)
             tvm.testing.assert_allclose(op_res.asnumpy(), ref_res, rtol=1e-5)
 
 
+@tvm.testing.uses_gpu
 def test_broadcast_to():
     shape = (4, 1, 6)
     shape_like = (3, 4, 5, 6)
@@ -200,12 +205,13 @@ def test_broadcast_to():
     func = relay.Function([x], z)
     x = np.random.uniform(size=shape).astype(dtype)
     ref_res = np.broadcast_to(x, shape_like)
-    for target, ctx in ctx_list():
+    for target, ctx in tvm.testing.enabled_targets():
         for kind in ["graph", "debug"]:
             intrp = relay.create_executor(kind, ctx=ctx, target=target)
             op_res = intrp.evaluate(func)(x)
             tvm.testing.assert_allclose(op_res.asnumpy(), ref_res, rtol=1e-5)
 
+@tvm.testing.uses_gpu
 def test_broadcast_to_like():
     shape = (4, 1, 6)
     shape_like = (3, 4, 5, 6)
@@ -222,7 +228,7 @@ def test_broadcast_to_like():
     y = np.random.uniform(size=shape_like).astype(dtype)
     ref_res = np.broadcast_to(x, shape_like)
 
-    for target, ctx in ctx_list():
+    for target, ctx in tvm.testing.enabled_targets():
         for kind in ["graph", "debug"]:
             intrp = relay.create_executor(kind, ctx=ctx, target=target)
             op_res = intrp.evaluate(func)(x, y)
@@ -266,12 +272,13 @@ def verify_slice_like(data, slice_like, axes, output, dtype="float32"):
     y_data = np.random.uniform(size=slice_like).astype(dtype)
     ref_res = np_slice_like(x_data, y_data, axes)
 
-    for target, ctx in ctx_list():
+    for target, ctx in tvm.testing.enabled_targets():
         for kind in ["graph", "debug"]:
             intrp = relay.create_executor(kind, ctx=ctx, target=target)
             op_res = intrp.evaluate(func)(x_data, y_data)
             tvm.testing.assert_allclose(op_res.asnumpy(), ref_res, rtol=1e-5)
 
+@tvm.testing.uses_gpu
 def test_slice_like():
     d1, d2, d3, d4 = te.var("d1"), te.var("d2"), te.var("d3"), te.var("d4")
     verify_slice_like(data=(d1, d2, d3), slice_like=(1, 2, 3), axes=None, output=(1, 2, 3))
@@ -286,6 +293,7 @@ def test_slice_like():
                       axes=(2, 3),
                       output=(1, 3, 112, 112))
 
+@tvm.testing.uses_gpu
 def test_reverse_reshape():
     def verify_reverse_reshape(shape, newshape, oshape):
         x = relay.var("x", relay.TensorType(shape, "float32"))
@@ -297,7 +305,7 @@ def test_reverse_reshape():
         func = relay.Function([x], z)
         x_data = np.random.uniform(low=-1, high=1, size=shape).astype("float32")
         ref_res = np.reshape(x_data, oshape)
-        for target, ctx in ctx_list():
+        for target, ctx in tvm.testing.enabled_targets():
             for kind in ["graph", "debug"]:
                 intrp = relay.create_executor(kind, ctx=ctx, target=target)
                 op_res = intrp.evaluate(func)(x_data)
@@ -320,12 +328,13 @@ def verify_batch_matmul(x_shape, y_shape, out_shape, dtype="float32"):
     y_np = np.random.uniform(size=y_shape).astype(dtype)
     z_np = tvm.topi.testing.batch_matmul(x_np, y_np)
 
-    for target, ctx in ctx_list():
+    for target, ctx in tvm.testing.enabled_targets():
         for kind in ["graph", "debug"]:
             intrp = relay.create_executor(kind, ctx=ctx, target=target)
             z = intrp.evaluate(func)(x_np, y_np)
             tvm.testing.assert_allclose(z.asnumpy(), z_np, rtol=1e-5)
 
+@tvm.testing.uses_gpu
 def test_batch_matmul():
     b, m, n, k = te.size_var("b"), te.size_var("m"), te.size_var("n"), te.size_var("k")
     x = relay.var("x", relay.TensorType((b, m, k), "float32"))
@@ -339,13 +348,14 @@ def test_batch_matmul():
     verify_batch_matmul((5, 16, 32), (5, 20, 32), (5, 16, 20))
     verify_batch_matmul((30, 16, 32), (30, 20, 32), (30, 16, 20))
 
+@tvm.testing.uses_gpu
 def test_shape_of():
     shape = (10, 5, 12)
     x = relay.var("x", shape=shape)
     func = relay.Function([x], relay.op.shape_of(x))
     func = run_infer_type(func)
     x_data = np.random.rand(*shape).astype('float32')
-    for target, ctx in ctx_list():
+    for target, ctx in tvm.testing.enabled_targets():
         # Because using graph executor, this op will be optimized after
         # constant folding pass, here we only test with interpreter
         for kind in ["debug"]:
@@ -354,6 +364,7 @@ def test_shape_of():
             tvm.testing.assert_allclose(op_res.asnumpy(),
                                         np.array(shape).astype('int32'))
 
+@tvm.testing.uses_gpu
 def test_ndarray_size():
     def verify_ndarray_size(shape):
         x = relay.var("x", shape=shape)
@@ -362,7 +373,7 @@ def test_ndarray_size():
 
         x_data = np.random.uniform(size=shape).astype("float32")
         ref_res = np.size(x_data)
-        for target, ctx in ctx_list():
+        for target, ctx in tvm.testing.enabled_targets():
             for kind in ["graph", "debug"]:
                 intrp = relay.create_executor(kind, ctx=ctx, target=target)
                 op_res = intrp.evaluate(func)(x_data)
@@ -380,7 +391,7 @@ def verify_adaptive_pool(dshape, out_size, pool_type, layout, dtype, opfunc):
     np_data = np.random.uniform(low=0, high=255, size=dshape).astype(dtype)
     np_out = tvm.topi.testing.adaptive_pool(np_data, out_size, pool_type, layout)
 
-    for target, ctx in ctx_list():
+    for target, ctx in tvm.testing.enabled_targets():
         intrp1 = relay.create_executor("graph", ctx=ctx, target=target)
         relay_out = intrp1.evaluate(func)(np_data)
         tvm.testing.assert_allclose(relay_out.asnumpy(), np_out, rtol=1e-5, atol=1e-5)
@@ -396,6 +407,7 @@ def verify_adaptive_pool3d(dshape, out_size, pool_type, layout="NCHW", dtype="fl
     verify_adaptive_pool(dshape, out_size, pool_type, layout, dtype, opfunc)
 
 
+@tvm.testing.uses_gpu
 def test_adaptive_pool():
     verify_adaptive_pool2d((1, 9, 224, 224), (1, 1), "max")
     verify_adaptive_pool2d((1, 3, 224, 224), (2, 3), "avg")
@@ -409,6 +421,7 @@ def test_adaptive_pool():
     verify_adaptive_pool3d((1, 16, 32, 32, 32), (2, 4, 4), "max", layout="NDHWC")
 
 
+@tvm.testing.uses_gpu
 def test_sequence_mask():
     def _verify(data_shape, mask_value, axis, dtype, itype):
         max_length = data_shape[axis]
@@ -423,7 +436,7 @@ def test_sequence_mask():
         valid_length_np = np.random.randint(0, max_length, size=nbatch).astype(itype)
         gt_out_np = tvm.topi.testing.sequence_mask(data_np, valid_length_np, mask_value, axis)
 
-        for target, ctx in ctx_list():
+        for target, ctx in tvm.testing.enabled_targets():
             for kind in ["graph", "debug"]:
                 intrp = relay.create_executor(kind, ctx=ctx, target=target)
                 out_relay = intrp.evaluate(func)(data_np, valid_length_np)
@@ -432,6 +445,7 @@ def test_sequence_mask():
     _verify((2, 3, 5, 3), 0.0, 0, 'float32', 'int64')
     _verify((5, 8, 3), 0.1, 1, 'float64', 'float32')
 
+@tvm.testing.uses_gpu
 def test_one_hot():
     def _get_oshape(indices_shape, depth, axis):
         oshape = []
@@ -458,7 +472,7 @@ def test_one_hot():
         indices_np = np.random.randint(0, depth, size=indices_shape).astype("int32")
         out_np = tvm.topi.testing.one_hot(indices_np, on_value, off_value, depth, axis, dtype)
 
-        for target, ctx in ctx_list():
+        for target, ctx in tvm.testing.enabled_targets():
             for kind in ["graph", "debug"]:
                 intrp = relay.create_executor(kind, ctx=ctx, target=target)
                 out_relay = intrp.evaluate(func)(indices_np)
@@ -471,6 +485,7 @@ def test_one_hot():
     _verify((3, 2, 4, 5), 6, 1, 0, 1, "int32")
     _verify((3, 2, 4, 5), 6, 1.0, 0.0, 0, "float32")
 
+@tvm.testing.uses_gpu
 def test_matrix_set_diag():
     def _verify(input_shape, dtype):
         diagonal_shape = list(input_shape[:-2])
@@ -488,7 +503,7 @@ def test_matrix_set_diag():
         diagonal_np = np.random.randint(-100, 100, size=diagonal_shape).astype(dtype)
         out_np = tvm.topi.testing.matrix_set_diag(input_np, diagonal_np)
 
-        for target, ctx in ctx_list():
+        for target, ctx in tvm.testing.enabled_targets():
             for kind in ["graph", "debug"]:
                 intrp = relay.create_executor(kind, ctx=ctx, target=target)
                 out_relay = intrp.evaluate(func)(input_np, diagonal_np)
index 6258d8c..93eecfc 100644 (file)
@@ -22,12 +22,14 @@ from tvm import te
 from tvm import autotvm
 from tvm import relay
 from tvm.relay import transform
-from tvm.relay.testing import ctx_list, run_infer_type
+from tvm.relay.testing import run_infer_type
 from tvm.contrib import util
 import tvm.topi.testing
 from tvm.topi.cuda.conv3d_winograd import _infer_tile_size
+import tvm.testing
 
 
+@tvm.testing.uses_gpu
 def test_conv1d_infer_type():
     # symbolic in batch dimension
     n, c, w = te.var("n"), 10, 224
@@ -78,6 +80,7 @@ def test_conv1d_infer_type():
         (n, w, 16), "int32")
 
 
+@tvm.testing.uses_gpu
 def test_conv1d_run():
     def run_test_conv1d(dtype, out_dtype, scale, dshape, kshape,
                         padding=(1, 1),
@@ -100,9 +103,10 @@ def test_conv1d_run():
         ref_res = tvm.topi.testing.conv1d_ncw_python(
             data.astype(out_dtype), kernel.astype(out_dtype), 1, padding, dilation)
 
-        for target, ctx in ctx_list():
+        for target, ctx in tvm.testing.enabled_targets():
             if target in except_targets:
                 continue
+            ctx = tvm.context(target, 0)
             intrp1 = relay.create_executor("graph", ctx=ctx, target=target)
             op_res1 = intrp1.evaluate(func)(data, kernel)
             tvm.testing.assert_allclose(op_res1.asnumpy(), ref_res, rtol=1e-5, atol=1e-5)
@@ -122,6 +126,7 @@ def test_conv1d_run():
                     padding=(1, 1), channels=10, kernel_size=3, dilation=3)
 
 
+@tvm.testing.uses_gpu
 def test_conv2d_infer_type():
     # symbolic in batch dimension
     n, c, h, w = te.size_var("n"), 10, 224, 224
@@ -189,6 +194,7 @@ def test_conv2d_infer_type():
         (n, h, w, 16), "int32")
 
 
+@tvm.testing.uses_gpu
 def test_conv2d_run():
     def run_test_conv2d(dtype, out_dtype, scale, dshape, kshape,
                         padding=(1, 1),
@@ -219,9 +225,10 @@ def test_conv2d_run():
             ref_res = fref(data.astype(out_dtype), dkernel.astype(out_dtype))
 
 
-        for target, ctx in ctx_list():
+        for target, ctx in tvm.testing.enabled_targets():
             if target in except_targets:
                 continue
+            ctx = tvm.context(target, 0)
             intrp1 = relay.create_executor("graph", ctx=ctx, target=target)
             op_res1 = intrp1.evaluate(func)(data, kernel)
             tvm.testing.assert_allclose(op_res1.asnumpy(), ref_res, rtol=1e-4, atol=1e-4)
@@ -314,6 +321,7 @@ def test_conv2d_run():
     run_test_conv2d("float32", "float32", 1, dshape, kshape,
                     padding=(1, 1), channels=10, kernel_size=(3 ,3), dilation=(3, 3))
 
+@tvm.testing.uses_gpu
 def test_conv2d_winograd():
     class WinogradFallback(autotvm.FallbackContext):
         def _query_inside(self, target, workload):
@@ -357,9 +365,10 @@ def test_conv2d_winograd():
             groups=groups)
 
         with WinogradFallback(), tvm.transform.PassContext(opt_level=3):
-            for target, ctx in ctx_list():
+            for target, ctx in tvm.testing.enabled_targets():
                 if target != 'cuda':
                     continue
+                ctx = tvm.context(target, 0)
                 params = {'w': tvm.nd.array(kernel)}
                 graph, lib, params = relay.build_module.build(mod, target=target, params=params)
                 module = tvm.contrib.graph_runtime.create(graph, lib, ctx)
@@ -385,6 +394,7 @@ def test_conv2d_winograd():
                          padding=(2, 2), channels=192, kernel_size=(7, 7))
 
 
+@tvm.testing.uses_gpu
 def test_conv3d_infer_type():
     # symbolic in batch dimension
     n, c, d, h, w = te.size_var("n"), 10, 224, 224, 224
@@ -435,6 +445,7 @@ def test_conv3d_infer_type():
         (n, d, h, w, 16), "int32")
 
 
+@tvm.testing.uses_gpu
 def test_conv3d_run():
     def run_test_conv3d(dtype, out_dtype, scale, dshape, kshape,
                         padding=(1, 1, 1),
@@ -465,9 +476,10 @@ def test_conv3d_run():
             ref_res = fref(data.astype(out_dtype), dkernel.astype(out_dtype))
 
 
-        for target, ctx in ctx_list():
+        for target, ctx in tvm.testing.enabled_targets():
             if target in except_targets:
                 continue
+            ctx = tvm.context(target, 0)
 
             intrp1 = relay.create_executor("graph", ctx=ctx, target=target)
             op_res1 = intrp1.evaluate(func)(data, kernel)
@@ -479,6 +491,7 @@ def test_conv3d_run():
     run_test_conv3d("float32", "float32", 1, dshape, kshape,
             padding=(1, 1, 1), channels=10, kernel_size=(3, 3 ,3))
 
+@tvm.testing.uses_gpu
 def test_conv3d_ndhwc_run():
     def run_test_conv3d(dtype, out_dtype, scale, dshape, kshape,
                         padding=(1, 1, 1),
@@ -509,9 +522,10 @@ def test_conv3d_ndhwc_run():
             ref_res = fref(data.astype(out_dtype), dkernel.astype(out_dtype))
 
 
-        for target, ctx in ctx_list():
+        for target, ctx in tvm.testing.enabled_targets():
             if target in except_targets:
                 continue
+            ctx = tvm.context(target, 0)
 
             intrp1 = relay.create_executor("graph", ctx=ctx, target=target)
             op_res1 = intrp1.evaluate(func)(data, kernel)
@@ -523,6 +537,7 @@ def test_conv3d_ndhwc_run():
     run_test_conv3d("float32", "float32", 1, dshape, kshape,
             padding=(1, 1, 1), channels=10, kernel_size=(3, 3 ,3), except_targets=["cuda"])
 
+@tvm.testing.uses_gpu
 def test_conv3d_winograd():
     class WinogradFallback(autotvm.FallbackContext):
         def _query_inside(self, target, workload):
@@ -579,9 +594,10 @@ def test_conv3d_winograd():
             groups=groups)
 
         with WinogradFallback(), tvm.transform.PassContext(opt_level=3):
-            for target, ctx in ctx_list():
+            for target, ctx in tvm.testing.enabled_targets():
                 if target != 'cuda':
                     continue
+                ctx = tvm.context(target, 0)
                 params = {'w': tvm.nd.array(kernel)}
                 graph, lib, params = relay.build_module.build(mod, target=target, params=params)
                 module = tvm.contrib.graph_runtime.create(graph, lib, ctx)
@@ -612,6 +628,7 @@ def test_conv3d_winograd():
                          padding=(0, 2, 2), channels=120, kernel_size=(1, 5, 5))
 
 
+@tvm.testing.uses_gpu
 def test_conv3d_transpose_infer_type():
     # symbolic in batch dimension
     n, c, d, h, w = te.size_var("n"), 10, 224, 224, 224
@@ -649,6 +666,7 @@ def test_conv3d_transpose_infer_type():
         (n, 12, 226, 226, 226), "int32")
 
 
+@tvm.testing.uses_gpu
 def test_conv3d_transpose_ncdhw_run():
     dshape = (1, 3, 24, 24, 24)
     kshape = (3, 4, 2, 2, 2)
@@ -665,12 +683,13 @@ def test_conv3d_transpose_ncdhw_run():
     kernel = np.random.uniform(size=kshape).astype(dtype)
     ref_res = tvm.topi.testing.conv3d_transpose_ncdhw_python(data, kernel, 1, 1, 0)
 
-    for target, ctx in ctx_list():
+    for target, ctx in tvm.testing.enabled_targets():
         intrp1 = relay.create_executor("graph", ctx=ctx, target=target)
         op_res1 = intrp1.evaluate(func)(data, kernel)
         tvm.testing.assert_allclose(op_res1.asnumpy(), ref_res, rtol=1e-5, atol=1e-5)
 
 
+@tvm.testing.uses_gpu
 def test_conv2d_transpose_infer_type():
     # symbolic in batch dimension
     n, c, h, w = te.size_var("n"), 10, 10, 12
@@ -700,6 +719,7 @@ def test_conv2d_transpose_infer_type():
         (n, 15, 15, 11), "float32")
 
 
+@tvm.testing.uses_gpu
 def test_conv2d_transpose_nchw_run():
     dshape = (1, 3, 18, 18)
     kshape = (3, 10, 3, 3)
@@ -716,12 +736,13 @@ def test_conv2d_transpose_nchw_run():
     ref_res = tvm.topi.testing.conv2d_transpose_nchw_python(
         data, kernel, 2, 1, (1, 1))
 
-    for target, ctx in ctx_list():
+    for target, ctx in tvm.testing.enabled_targets():
         intrp1 = relay.create_executor("graph", ctx=ctx, target=target)
         op_res1 = intrp1.evaluate(func)(data, kernel)
         tvm.testing.assert_allclose(op_res1.asnumpy(), ref_res, rtol=1e-5, atol=1e-5)
 
 
+@tvm.testing.uses_gpu
 def test_conv2d_transpose_nhwc_run():
     dshape_nhwc = (1, 18, 18, 3)
     kshape_hwoi = (3, 3, 10, 3)
@@ -743,12 +764,13 @@ def test_conv2d_transpose_nhwc_run():
     ref_res = tvm.topi.testing.conv2d_transpose_nhwc_python(data, kernel, 'HWOI',
                                                         2, 1, output_padding=(1, 1))
 
-    for target, ctx in ctx_list():
+    for target, ctx in tvm.testing.enabled_targets():
         intrp1 = relay.create_executor("graph", ctx=ctx, target=target)
         op_res1 = intrp1.evaluate(func)(data, kernel)
         tvm.testing.assert_allclose(op_res1.asnumpy(), ref_res, rtol=1e-5, atol=1e-5)
 
 
+@tvm.testing.uses_gpu
 def test_conv1d_transpose_ncw_run():
     dshape = (1, 3, 18)
     kshape = (3, 10, 3)
@@ -765,12 +787,13 @@ def test_conv1d_transpose_ncw_run():
     ref_res = tvm.topi.testing.conv1d_transpose_ncw_python(
         data, kernel, 2, 1, output_padding=(1,))
 
-    for target, ctx in ctx_list():
+    for target, ctx in tvm.testing.enabled_targets():
         intrp1 = relay.create_executor("graph", ctx=ctx, target=target)
         op_res1 = intrp1.evaluate(func)(data, kernel)
         tvm.testing.assert_allclose(op_res1.asnumpy(), ref_res, rtol=1e-5, atol=1e-5)
 
 
+@tvm.testing.uses_gpu
 def test_upsampling_infer_type():
     n, c , h, w = te.size_var("n"), te.size_var("c"), te.size_var("h"), te.size_var("w")
     scale = tvm.tir.const(2.0, "float64")
@@ -787,6 +810,7 @@ def test_upsampling_infer_type():
     yy = run_infer_type(y)
     assert yy.checked_type == relay.TensorType((n, c, 200, 400), "float32")
 
+@tvm.testing.uses_gpu
 def test_upsampling3d_infer_type():
     n, c, d, h, w = te.size_var("n"), te.size_var("c"),\
                     te.size_var("d"), te.size_var("h"), te.size_var("w")
@@ -820,7 +844,7 @@ def _test_pool2d(opfunc, reffunc, pool_size=(2, 2), strides=(2, 2), padding=(0,
     func = relay.Function([x], y)
     data = np.random.uniform(size=dshape).astype(dtype)
     ref_res = reffunc(data.reshape(1, 3, 14, 2, 14, 2), axis=(3, 5))
-    for target, ctx in ctx_list():
+    for target, ctx in tvm.testing.enabled_targets():
         intrp1 = relay.create_executor("graph", ctx=ctx, target=target)
         op_res1 = intrp1.evaluate(func)(data)
         tvm.testing.assert_allclose(op_res1.asnumpy(), ref_res, rtol=1e-5, atol=1e-5)
@@ -840,7 +864,7 @@ def _test_pool2d_int(opfunc, reffunc, dtype):
     func = relay.Function([x], y)
     data = np.random.randint(low=-128, high=128, size=dshape)
     ref_res = reffunc(data.reshape(1,3,14,2,14,2), axis=(3,5)).astype(dtype)
-    for target, ctx in ctx_list():
+    for target, ctx in tvm.testing.enabled_targets():
         intrp1 = relay.create_executor("graph", ctx=ctx, target=target)
         op_res1 = intrp1.evaluate(func)(data)
         tvm.testing.assert_allclose(op_res1.asnumpy(), ref_res, rtol=1e-5, atol=1e-5)
@@ -865,12 +889,13 @@ def _test_global_pool2d(opfunc, reffunc):
     func = relay.Function([x], y)
     data = np.random.uniform(size=dshape).astype(dtype)
     ref_res = reffunc(data, axis=(2,3), keepdims=True)
-    for target, ctx in ctx_list():
+    for target, ctx in tvm.testing.enabled_targets():
         intrp1 = relay.create_executor("graph", ctx=ctx, target=target)
         op_res1 = intrp1.evaluate(func)(data)
         tvm.testing.assert_allclose(op_res1.asnumpy(), ref_res, rtol=1e-5, atol=1e-5)
 
 
+@tvm.testing.uses_gpu
 def test_pool2d():
     _test_pool2d(relay.nn.max_pool2d, np.max)
     _test_pool2d(relay.nn.max_pool2d, np.max, pool_size=2, strides=2, padding=0)
@@ -882,6 +907,7 @@ def test_pool2d():
     _test_global_pool2d(relay.nn.global_avg_pool2d, np.mean)
 
 
+@tvm.testing.uses_gpu
 def test_pool1d():
 
     def _test_pool1d(opfunc, pool_size=(2,), strides=(2,), padding=(0, 0)):
@@ -901,7 +927,7 @@ def test_pool1d():
         data = np.random.uniform(size=dshape).astype(dtype)
         ref_res = tvm.topi.testing.pool1d_ncw_python(data, (2,), (2,),
                                                  (0, 0), (1, 3, 16), pool_type, False)
-        for target, ctx in ctx_list():
+        for target, ctx in tvm.testing.enabled_targets():
             intrp1 = relay.create_executor("graph", ctx=ctx, target=target)
             op_res1 = intrp1.evaluate(func)(data)
             tvm.testing.assert_allclose(op_res1.asnumpy(), ref_res, rtol=1e-5, atol=1e-5)
@@ -912,6 +938,7 @@ def test_pool1d():
     _test_pool1d(relay.nn.avg_pool1d, pool_size=2, strides=2, padding=0)
 
 
+@tvm.testing.uses_gpu
 def test_pool3d():
 
     def _test_pool3d(opfunc,
@@ -939,7 +966,7 @@ def test_pool3d():
         data = np.random.uniform(size=dshape).astype(dtype)
         ref_res = tvm.topi.testing.pool3d_ncdhw_python(data, pool_size, strides,
                                                    padding, out_shape, pool_type, False)
-        for target, ctx in ctx_list():
+        for target, ctx in tvm.testing.enabled_targets():
             intrp1 = relay.create_executor("graph", ctx=ctx, target=target)
             op_res1 = intrp1.evaluate(func)(data)
             tvm.testing.assert_allclose(op_res1.asnumpy(), ref_res, rtol=1e-5, atol=1e-5)
@@ -956,6 +983,7 @@ def test_pool3d():
     _test_pool3d(relay.nn.avg_pool3d, pool_size=2, padding=0, strides=2)
 
 
+@tvm.testing.uses_gpu
 def test_avg_pool2d_no_count_pad():
     kh, kw = (4, 4)
     sh, sw = (2, 2)
@@ -985,11 +1013,12 @@ def test_avg_pool2d_no_count_pad():
     ref_res = np.maximum(b_np, 0.0)
     data = a_np
 
-    for target, ctx in ctx_list():
+    for target, ctx in tvm.testing.enabled_targets():
         intrp1 = relay.create_executor("graph", ctx=ctx, target=target)
         op_res1 = intrp1.evaluate(func)(data)
         tvm.testing.assert_allclose(op_res1.asnumpy(), ref_res, rtol=1e-5, atol=1e-5)
 
+@tvm.testing.uses_gpu
 def test_flatten_infer_type():
     d1, d2, d3, d4 = te.size_var("d1"), te.size_var("d2"), te.size_var("d3"), te.size_var("d4")
     x = relay.var("x", relay.TensorType((d1, d2, d3, d4), "float32"))
@@ -1018,7 +1047,7 @@ def test_flatten_infer_type():
     x_data = np.random.uniform(low=-1, high=1, size=shape).astype(dtype)
     ref_res = x_data.flatten().reshape(o_shape)
 
-    for target, ctx in ctx_list():
+    for target, ctx in tvm.testing.enabled_targets():
         intrp1 = relay.create_executor("graph", ctx=ctx, target=target)
         intrp2 = relay.create_executor("debug", ctx=ctx, target=target)
         op_res1 = intrp1.evaluate(func)(x_data)
@@ -1026,6 +1055,7 @@ def test_flatten_infer_type():
         op_res2 = intrp2.evaluate(func)(x_data)
         tvm.testing.assert_allclose(op_res2.asnumpy(), ref_res, rtol=1e-5)
 
+@tvm.testing.uses_gpu
 def test_pad_infer_type():
     # entirely concrete case
     n, c, h, w = 1, 2, 3, 4
@@ -1042,6 +1072,7 @@ def test_pad_infer_type():
     yy = run_infer_type(y)
     assert yy.checked_type == relay.TensorType((n + 2, 6, 9, w + 8), "float32")
 
+@tvm.testing.uses_gpu
 def test_pad_run():
     def _test_run(dtype):
         dshape = (4, 10, 7, 7)
@@ -1050,7 +1081,7 @@ def test_pad_run():
         func = relay.Function([x], y)
         data = np.random.uniform(size=dshape).astype(dtype)
         ref_res = np.pad(data, ((1, 1), (2, 2), (3, 3), (4, 4)), 'constant')
-        for target, ctx in ctx_list():
+        for target, ctx in tvm.testing.enabled_targets():
             intrp1 = relay.create_executor("graph", ctx=ctx, target=target)
             op_res1 = intrp1.evaluate(func)(data)
             tvm.testing.assert_allclose(op_res1.asnumpy(), ref_res, rtol=1e-5, atol=1e-5)
@@ -1058,6 +1089,7 @@ def test_pad_run():
     _test_run('float32')
     _test_run('int32')
 
+@tvm.testing.uses_gpu
 def test_lrn():
     n, c , h, w = te.size_var("n"), te.size_var("c"), te.size_var("h"), te.size_var("w")
     x = relay.var("x", shape=(n, c , h, w))
@@ -1081,7 +1113,7 @@ def test_lrn():
     x_data = np.random.uniform(low=-1, high=1, size=shape).astype(dtype)
     ref_res = tvm.topi.testing.lrn_python(x_data, size, axis, bias, alpha, beta)
 
-    for target, ctx in ctx_list():
+    for target, ctx in tvm.testing.enabled_targets():
         intrp1 = relay.create_executor("graph", ctx=ctx, target=target)
         intrp2 = relay.create_executor("debug", ctx=ctx, target=target)
         op_res1 = intrp1.evaluate(func)(x_data)
@@ -1089,6 +1121,7 @@ def test_lrn():
         op_res2 = intrp2.evaluate(func)(x_data)
         tvm.testing.assert_allclose(op_res2.asnumpy(), ref_res, rtol=1e-5)
 
+@tvm.testing.uses_gpu
 def test_l2_normalize():
     n, c , h, w = te.size_var("n"), te.size_var("c"), te.size_var("h"), te.size_var("w")
     x = relay.var("x", shape=(n, c , h, w))
@@ -1109,7 +1142,7 @@ def test_l2_normalize():
     x_data = np.random.uniform(low=-1, high=1, size=shape).astype(dtype)
     ref_res = tvm.topi.testing.l2_normalize_python(x_data, eps, axis)
 
-    for target, ctx in ctx_list():
+    for target, ctx in tvm.testing.enabled_targets():
         intrp1 = relay.create_executor("graph", ctx=ctx, target=target)
         intrp2 = relay.create_executor("debug", ctx=ctx, target=target)
         op_res1 = intrp1.evaluate(func)(x_data)
@@ -1126,6 +1159,7 @@ def batch_flatten(data):
     return np.reshape(data, (shape[0], target_dim))
 
 
+@tvm.testing.uses_gpu
 def test_batch_flatten():
     t1 = relay.TensorType((5, 10, 5))
     x = relay.Var("x", t1)
@@ -1133,7 +1167,7 @@ def test_batch_flatten():
 
     data = np.random.rand(5, 10, 5).astype(t1.dtype)
     ref_res = batch_flatten(data)
-    for target, ctx in ctx_list():
+    for target, ctx in tvm.testing.enabled_targets():
         intrp = relay.create_executor("graph", ctx=ctx, target=target)
         op_res = intrp.evaluate(func)(data)
         np.testing.assert_allclose(op_res.asnumpy(), ref_res, rtol=0.01)
@@ -1166,12 +1200,13 @@ def _test_upsampling(layout, method, align_corners=False):
     else:
         ref = tvm.topi.testing.bilinear_resize_python(data, (int(round(h*scale_h)),
                                                   int(round(w*scale_w))), layout)
-    for target, ctx in ctx_list():
+    for target, ctx in tvm.testing.enabled_targets():
         executor = relay.create_executor("graph", ctx=ctx, target=target)
         out = executor.evaluate(func)(data)
         tvm.testing.assert_allclose(out.asnumpy(), ref, rtol=1e-5, atol=1e-5)
 
 
+@tvm.testing.uses_gpu
 def test_upsampling():
     _test_upsampling("NCHW", "nearest_neighbor")
     _test_upsampling("NCHW", "bilinear", True)
@@ -1212,17 +1247,19 @@ def _test_upsampling3d(layout, method, coordinate_transformation_mode="half_pixe
         ref = tvm.topi.testing.trilinear_resize3d_python(data, (int(round(d*scale_d)),\
                                                      int(round(h*scale_h)),\
                                                      int(round(w*scale_w))), layout)
-    for target, ctx in ctx_list():
+    for target, ctx in tvm.testing.enabled_targets():
         executor = relay.create_executor("graph", ctx=ctx, target=target)
         out = executor.evaluate(func)(data)
         tvm.testing.assert_allclose(out.asnumpy(), ref, rtol=1e-5, atol=1e-5)
 
+@tvm.testing.uses_gpu
 def test_upsampling3d():
     _test_upsampling3d("NCDHW", "nearest_neighbor")
     _test_upsampling3d("NCDHW", "trilinear", "align_corners")
     _test_upsampling3d("NDHWC", "nearest_neighbor")
     _test_upsampling3d("NDHWC", "trilinear", "align_corners")
 
+@tvm.testing.uses_gpu
 def test_conv2d_int8_intrinsics():
     def _compile(ic, oc, target, data_layout, kernel_layout, dtypes):
         input_dtype, weight_dtype, output_dtype = dtypes
@@ -1347,6 +1384,7 @@ def test_conv2d_int8_intrinsics():
     assert "vpmulld" in asm and "vpadd" in asm
 
 
+@tvm.testing.uses_gpu
 def test_depthwise_conv2d_int8():
     input_dtype = 'uint8'
     weight_dtype = 'int8'
@@ -1376,6 +1414,7 @@ def test_depthwise_conv2d_int8():
                 graph, lib, params = relay.build(func, target, params=parameters)
 
 
+@tvm.testing.uses_gpu
 def test_bitserial_conv2d_infer_type():
     # Basic shape test with ambiguous batch.
     n, c, h, w = te.size_var("n"), 32, 224, 224
@@ -1388,6 +1427,7 @@ def test_bitserial_conv2d_infer_type():
         (n, 32, 222, 222), "int16")
 
 
+@tvm.testing.uses_gpu
 def test_bitpack_infer_type():
     # Test axis packing shape inference.
     o, i, h, w = 32, 32, 128, 128
@@ -1400,6 +1440,7 @@ def test_bitpack_infer_type():
 # TODO(@jwfromm): Need to add bitserial_conv2d & bitpack run test cases
 
 
+@tvm.testing.uses_gpu
 def test_correlation():
     def _test_correlation(data_shape, kernel_size, max_displacement, stride1, stride2, padding, is_multiply, dtype='float32'):
         data1 = relay.var("data1", relay.ty.TensorType(data_shape, dtype))
@@ -1422,7 +1463,7 @@ def test_correlation():
         data2_np = np.random.uniform(size=data_shape).astype(dtype)
         ref_res = tvm.topi.testing.correlation_nchw_python(data1_np, data2_np, kernel_size, max_displacement, stride1, stride2, padding, is_multiply)
 
-        for target, ctx in ctx_list():
+        for target, ctx in tvm.testing.enabled_targets():
             intrp1 = relay.create_executor("graph", ctx=ctx, target=target)
             op_res1 = intrp1.evaluate(func)(data1_np, data2_np)
             tvm.testing.assert_allclose(op_res1.asnumpy(), ref_res, rtol=1e-5, atol=1e-5)
index 745130d..940bb70 100644 (file)
@@ -23,7 +23,8 @@ from tvm import te
 from tvm import relay
 from tvm.error import TVMError
 from tvm.relay import create_executor, transform
-from tvm.relay.testing import ctx_list, check_grad, run_infer_type
+from tvm.relay.testing import check_grad, run_infer_type
+import tvm.testing
 
 
 def test_zeros_ones():
@@ -199,6 +200,7 @@ def test_transpose_infer_type():
         (100, t, n), "float32")
 
 
+@tvm.testing.uses_gpu
 def test_transpose():
     def verify_transpose(dshape, axes):
         x = relay.var("x", relay.TensorType(dshape, "float32"))
@@ -208,7 +210,7 @@ def test_transpose():
         x_data = np.random.uniform(low=-1, high=1, size=dshape).astype("float32")
         ref_res = np.transpose(x_data, axes=axes)
 
-        for target, ctx in ctx_list():
+        for target, ctx in tvm.testing.enabled_targets():
             for kind in ["graph", "debug"]:
                 intrp = relay.create_executor(kind, ctx=ctx, target=target)
                 op_res = intrp.evaluate(func)(x_data)
@@ -250,6 +252,7 @@ def test_reshape_infer_type():
     assert yy.checked_type == relay.TensorType(
         (n, t, 2000), "float32")
 
+@tvm.testing.uses_gpu
 def test_reshape():
     def verify_reshape(shape, newshape, oshape):
         x = relay.var("x", relay.TensorType(shape, "float32"))
@@ -262,7 +265,7 @@ def test_reshape():
         check_grad(func)
         x_data = np.random.uniform(low=-1, high=1, size=shape).astype("float32")
         ref_res = np.reshape(x_data, oshape)
-        for target, ctx in ctx_list():
+        for target, ctx in tvm.testing.enabled_targets():
             for kind in ["graph", "debug"]:
                 intrp = relay.create_executor(kind, ctx=ctx, target=target)
                 op_res = intrp.evaluate(func)(x_data)
@@ -307,6 +310,7 @@ def test_reshape_like_infer_type():
     assert zz.checked_type == relay.TensorType((1, 8, 8), "float32")
 
 
+@tvm.testing.uses_gpu
 def test_reshape_like():
     def verify_reshape_like(shape, oshape):
         x_data = np.random.uniform(low=-1, high=1, size=shape).astype("float32")
@@ -321,7 +325,7 @@ def test_reshape_like():
 
         func = relay.Function([x, y], z)
 
-        for target, ctx in ctx_list():
+        for target, ctx in tvm.testing.enabled_targets():
             for kind in ["graph", "debug"]:
                 intrp = relay.create_executor(kind, ctx=ctx, target=target)
                 op_res = intrp.evaluate(func)(x_data, y_data)
@@ -347,6 +351,7 @@ def test_take_infer_type():
     verify_take((d1, d2), (d3, d4, d5), (d1, d3, d4, d5), 1)
     verify_take((d1, d2, d3, d4), (d5, d6), (d1, d2, d5, d6, d4), -2)
 
+@tvm.testing.uses_gpu
 def test_take():
     def verify_take(src_shape, indices_src, axis=None, mode="clip"):
         src_dtype = "float32"
@@ -361,7 +366,7 @@ def test_take():
         np_mode = "raise" if mode == "fast" else mode
         ref_res = np.take(x_data, indices=indices_src, axis=axis, mode=np_mode)
 
-        for target, ctx in ctx_list():
+        for target, ctx in tvm.testing.enabled_targets():
             for kind in ["graph", "debug"]:
                 intrp = relay.create_executor(kind, ctx=ctx, target=target)
                 op_res = intrp.evaluate(func)(x_data, indices_src)
@@ -448,13 +453,14 @@ def test_full_infer_type():
     assert yy.checked_type == relay.TensorType((1, 2), "int8")
 
 
+@tvm.testing.uses_gpu
 def test_full():
     def verify_full(fill_value, src_shape, dtype):
         x = relay.var("x", relay.scalar_type(dtype))
         z = relay.full(x, src_shape, dtype)
         func = relay.Function([x], z)
         ref_res = np.full(src_shape, fill_value)
-        for target, ctx in ctx_list():
+        for target, ctx in tvm.testing.enabled_targets():
             for kind in ["graph", "debug"]:
                 intrp = relay.create_executor(kind, ctx=ctx, target=target)
                 op_res = intrp.evaluate(func)(np.array(fill_value, dtype))
@@ -481,6 +487,7 @@ def test_full_like_infer_type():
     assert yy.checked_type == relay.TensorType((n, c, h, w), "float32")
 
 
+@tvm.testing.uses_gpu
 def test_full_like():
     def verify_full_like(base, fill_value, dtype):
         x_data = np.random.uniform(low=-1, high=1, size=base).astype(dtype)
@@ -491,7 +498,7 @@ def test_full_like():
         func = relay.Function([x, y], z)
         ref_res = np.full_like(x_data, fill_value)
 
-        for target, ctx in ctx_list():
+        for target, ctx in tvm.testing.enabled_targets():
             for kind in ["graph", "debug"]:
                 intrp = relay.create_executor(kind, ctx=ctx, target=target)
                 op_res = intrp.evaluate(func)(x_data, np.array(fill_value, dtype))
@@ -500,6 +507,7 @@ def test_full_like():
     verify_full_like((1, 1), 44.0, "float32")
 
 
+@tvm.testing.uses_gpu
 def test_infer_type_leaky_relu():
     n, c , h, w = te.size_var("n"), te.size_var("c"), te.size_var("h"), te.size_var("w")
     x = relay.var("x", relay.TensorType((n, c, h, w), "float32"))
@@ -519,7 +527,7 @@ def test_infer_type_leaky_relu():
     x_data = np.random.uniform(low=-1, high=1, size=shape).astype(dtype)
     ref_res = np.where(x_data > 0, x_data, x_data * 0.1)
 
-    for target, ctx in ctx_list():
+    for target, ctx in tvm.testing.enabled_targets():
         intrp1 = relay.create_executor("graph", ctx=ctx, target=target)
         intrp2 = relay.create_executor("debug", ctx=ctx, target=target)
         op_res1 = intrp1.evaluate(func)(x_data)
@@ -555,7 +563,7 @@ def verify_infer_type_prelu(data, alpha, axis, output, dtype="float32"):
     else:
         ref_res = (x_data < 0) * (x_data * a_data.reshape(1, 1, 3)) + (x_data>=0) * x_data
 
-    for target, ctx in ctx_list():
+    for target, ctx in tvm.testing.enabled_targets():
         intrp1 = relay.create_executor("graph", ctx=ctx, target=target)
         intrp2 = relay.create_executor("debug", ctx=ctx, target=target)
         op_res1 = intrp1.evaluate(func)(x_data, a_data)
@@ -564,6 +572,7 @@ def verify_infer_type_prelu(data, alpha, axis, output, dtype="float32"):
         tvm.testing.assert_allclose(op_res2.asnumpy(), ref_res, rtol=1e-5)
 
 
+@tvm.testing.uses_gpu
 def test_infer_type_prelu():
     n, c , h, w = te.size_var("n"), te.size_var("c"), te.size_var("h"), te.size_var("w")
     verify_infer_type_prelu((n, c, h, w), (c,), 1, (n, c, h, w))
@@ -576,6 +585,7 @@ def test_infer_type_prelu():
     verify_infer_type_prelu((1, 2, 2, 3), None, 3, (1, 2, 2, 3))
 
 
+@tvm.testing.uses_gpu
 def test_arange():
     def verify_arange(start, stop, step):
         dtype = "float32"
@@ -596,7 +606,7 @@ def test_arange():
             ref_res = np.arange(start, stop, step).astype(dtype)
 
         func = relay.Function([], x)
-        for target, ctx in ctx_list():
+        for target, ctx in tvm.testing.enabled_targets():
             for kind in ["graph", "debug"]:
                 intrp = relay.create_executor(kind, ctx=ctx, target=target)
                 op_res = intrp.evaluate(func)()
@@ -613,6 +623,7 @@ def test_arange():
     # arange doesnt' support floating point right now, see type relation
     # verify_arange(20, 1, -1.5)
 
+@tvm.testing.uses_gpu
 def test_meshgrid():
     def verify_meshgrid(lengths, indexing="ij"):
         input_vars = []
@@ -632,7 +643,7 @@ def test_meshgrid():
         # Get ref
         ref_res = np.meshgrid(*input_data, indexing=indexing)
 
-        for target, ctx in ctx_list():
+        for target, ctx in tvm.testing.enabled_targets():
             for kind in ["graph", "debug"]:
                 intrp = relay.create_executor(kind, ctx=ctx, target=target)
                 op_res = intrp.evaluate(func)(*input_data)
@@ -646,6 +657,7 @@ def test_meshgrid():
     # Length 0 signifies scalar.
     verify_meshgrid([3, 5, 0])
 
+@tvm.testing.uses_gpu
 def test_tile():
     def verify_tile(dshape, reps):
         x = relay.var("x", relay.TensorType(dshape, "float32"))
@@ -655,7 +667,7 @@ def test_tile():
         x_data = np.random.uniform(low=-1, high=1, size=dshape).astype("float32")
         ref_res = np.tile(x_data, reps=reps)
 
-        for target, ctx in ctx_list():
+        for target, ctx in tvm.testing.enabled_targets():
             for kind in ["graph", "debug"]:
                 intrp = relay.create_executor(kind, ctx=ctx, target=target)
                 op_res = intrp.evaluate(func)(x_data)
@@ -664,13 +676,14 @@ def test_tile():
     verify_tile((2, 3, 4), (1, 2))
     verify_tile((2, 3), (3, 2, 1))
 
+@tvm.testing.uses_gpu
 def test_repeat():
     def verify_repeat(dshape, repeats, axis):
         x = relay.Var("x", relay.TensorType(dshape, "float32"))
         func = relay.Function([x], relay.repeat(x, repeats, axis))
         data = np.random.uniform(size=dshape).astype("float32")
         ref_res = np.repeat(data, repeats, axis)
-        for target, ctx in ctx_list():
+        for target, ctx in tvm.testing.enabled_targets():
             for kind in ["graph", "debug"]:
                 intrp = relay.create_executor(kind, ctx=ctx, target=target)
                 op_res = intrp.evaluate(func)(data)
@@ -679,6 +692,7 @@ def test_repeat():
     verify_repeat((3, 10), 2, -1)
     verify_repeat((3, 2, 4), 3, 1)
 
+@tvm.testing.uses_gpu
 def test_stack():
     def verify_stack(dshapes, axis):
         y = []
@@ -691,7 +705,7 @@ def test_stack():
         x_data = [np.random.normal(size=shape).astype("float32") for shape in dshapes]
         ref_res = np.stack(x_data, axis=axis)
 
-        for target, ctx in ctx_list():
+        for target, ctx in tvm.testing.enabled_targets():
             for kind in ["graph", "debug"]:
                 intrp = relay.create_executor(kind, ctx=ctx, target=target)
                 op_res = intrp.evaluate(func)(*x_data)
@@ -702,6 +716,7 @@ def test_stack():
     verify_stack([(2, 2, 3, 4), (2, 2, 3, 4), (2, 2, 3, 4), (2, 2, 3, 4)], -1)
 
 
+@tvm.testing.uses_gpu
 def test_reverse():
     def verify_reverse(dshape, axis):
         x = relay.var("x", relay.TensorType(dshape, "float32"))
@@ -711,7 +726,7 @@ def test_reverse():
         func = relay.Function([x], z)
         x_data = np.random.uniform(low=-1, high=1, size=dshape).astype("float32")
         ref_res = np.flip(x_data, axis)
-        for target, ctx in ctx_list():
+        for target, ctx in tvm.testing.enabled_targets():
             for kind in ["graph", "debug"]:
                 intrp = relay.create_executor(kind, ctx=ctx, target=target)
                 op_res = intrp.evaluate(func)(x_data)
@@ -721,6 +736,7 @@ def test_reverse():
     verify_reverse((2, 3, 4), -1)
 
 
+@tvm.testing.uses_gpu
 def test_reverse_sequence():
     def verify_reverse_sequence(x_data, seq_lengths, batch_axis, seq_axis, ref_res):
         seq_lengths_data = np.array(seq_lengths).astype("int32")
@@ -730,7 +746,7 @@ def test_reverse_sequence():
         assert zz.checked_type == x.type_annotation
         func = relay.Function([x], z)
 
-        for target, ctx in ctx_list():
+        for target, ctx in tvm.testing.enabled_targets():
             for kind in ["graph", "debug"]:
                 intrp = relay.create_executor(kind, ctx=ctx, target=target)
                 op_res = intrp.evaluate(func)(x_data)
@@ -881,6 +897,7 @@ def test_scatter_add():
     verify_scatter_add((16, 16, 4, 5), (16, 16, 4, 5), 3)
 
 
+@tvm.testing.uses_gpu
 def test_gather():
     def verify_gather(data, axis, indices, ref_res):
         data = np.asarray(data, dtype='float32')
@@ -893,7 +910,7 @@ def test_gather():
 
         func = relay.Function([d, i], z)
 
-        for target, ctx in ctx_list():
+        for target, ctx in tvm.testing.enabled_targets():
             for kind in ["graph", "debug"]:
                 intrp = relay.create_executor(kind, ctx=ctx, target=target)
                 op_res = intrp.evaluate(func)(data, indices)
@@ -933,6 +950,7 @@ def test_gather():
                     [-0.5700, 0.1558, -0.5700, 0.1558]]])
 
 
+@tvm.testing.uses_gpu
 def test_gather_nd():
     def verify_gather_nd(xshape, yshape, y_data):
         x = relay.var("x", relay.TensorType(xshape, "float32"))
@@ -943,7 +961,7 @@ def test_gather_nd():
         x_data = np.random.uniform(size=xshape).astype("float32")
         ref_res = x_data[tuple(y_data)]
 
-        for target, ctx in ctx_list():
+        for target, ctx in tvm.testing.enabled_targets():
             for kind in ["graph", "debug"]:
                 intrp = relay.create_executor(kind, ctx=ctx, target=target)
                 op_res = intrp.evaluate(func)(x_data, y_data)
@@ -981,6 +999,7 @@ def test_isinf():
     _verify_infiniteness_ops(relay.isinf, np.isinf)
 
     
+@tvm.testing.uses_gpu
 def test_unravel_index():
     def verify_unravel_index(indices, shape, dtype):
         x_data = np.array(indices).astype(dtype)
@@ -999,7 +1018,7 @@ def test_unravel_index():
 
         func = relay.Function([x, y], z)
         ref_res = np.unravel_index(x_data, y_data)
-        for target, ctx in ctx_list():
+        for target, ctx in tvm.testing.enabled_targets():
             for kind in ["graph", "debug"]:
                 intrp = relay.create_executor(kind, ctx=ctx, target=target)
                 op_res = intrp.evaluate(func)(x_data, y_data)
@@ -1017,6 +1036,7 @@ def test_unravel_index():
         # output which is inline with Tensorflow
         # verify_unravel_index([0, 1, 2, 5], [2, 2], dtype)
 
+@tvm.testing.uses_gpu
 def test_sparse_to_dense():
     def verify_sparse_to_dense(sparse_indices, sparse_values, default_value, output_shape, xpected):
         sparse_indices_data = np.array(sparse_indices)
@@ -1037,7 +1057,7 @@ def test_sparse_to_dense():
         assert zz.checked_type == relay.ty.TensorType(output_shape, str(sparse_values_data.dtype))
 
         func = relay.Function(args, d)
-        for target, ctx in ctx_list():
+        for target, ctx in tvm.testing.enabled_targets():
             for kind in ["graph", "debug"]:
                 intrp = relay.create_executor(kind, ctx=ctx, target=target)
                 if default_value is None:
index 8e01fa2..af38264 100644 (file)
@@ -19,10 +19,12 @@ from tvm import te
 import numpy as np
 from tvm import relay
 from tvm.relay import transform
-from tvm.relay.testing import ctx_list, run_infer_type
+from tvm.relay.testing import run_infer_type
 import tvm.topi.testing
+import tvm.testing
 
 
+@tvm.testing.uses_gpu
 def test_binary_op():
     def check_binary_op(opfunc, ref):
         n = te.size_var("n")
@@ -47,7 +49,7 @@ def test_binary_op():
             ref_res = ref(x_data, y_data)
             func = relay.Function([x, y], z)
 
-            for target, ctx in ctx_list():
+            for target, ctx in tvm.testing.enabled_targets():
                 intrp = relay.create_executor("graph", ctx=ctx, target=target)
                 op_res = intrp.evaluate(func)(x_data, y_data)
                 tvm.testing.assert_allclose(op_res.asnumpy(), ref_res)
@@ -56,6 +58,7 @@ def test_binary_op():
         check_binary_op(opfunc, ref)
 
 
+@tvm.testing.uses_gpu
 def test_cmp_type():
     for op, ref in ((relay.greater, np.greater),
                     (relay.greater_equal, np.greater_equal),
@@ -82,12 +85,13 @@ def test_cmp_type():
             ref_res = ref(x_data, y_data)
             func = relay.Function([x, y], z)
 
-            for target, ctx in ctx_list():
+            for target, ctx in tvm.testing.enabled_targets():
                 intrp = relay.create_executor("graph", ctx=ctx, target=target)
                 op_res = intrp.evaluate(func)(x_data, y_data)
                 tvm.testing.assert_allclose(op_res.asnumpy(), ref_res)
 
 
+@tvm.testing.uses_gpu
 def test_binary_int_broadcast_1():
     for op, ref in [(relay.right_shift, np.right_shift),
                     (relay.left_shift, np.left_shift)]:
@@ -107,11 +111,12 @@ def test_binary_int_broadcast_1():
             func = relay.Function([x, y], z)
             ref_res = ref(x_data, y_data)
 
-            for target, ctx in ctx_list():
+            for target, ctx in tvm.testing.enabled_targets():
                 intrp = relay.create_executor("graph", ctx=ctx, target=target)
                 op_res = intrp.evaluate(func)(x_data, y_data)
                 tvm.testing.assert_allclose(op_res.asnumpy(), ref_res)
 
+@tvm.testing.uses_gpu
 def test_binary_int_broadcast_2():
     for op, ref in [(relay.maximum, np.maximum),
                     (relay.minimum, np.minimum),
@@ -132,11 +137,12 @@ def test_binary_int_broadcast_2():
             func = relay.Function([x, y], z)
             ref_res = ref(x_data, y_data)
 
-            for target, ctx in ctx_list():
+            for target, ctx in tvm.testing.enabled_targets():
                 intrp = relay.create_executor("graph", ctx=ctx, target=target)
                 op_res = intrp.evaluate(func)(x_data, y_data)
                 tvm.testing.assert_allclose(op_res.asnumpy(), ref_res)
 
+@tvm.testing.uses_gpu
 def test_where():
     shape = (3, 4)
     dtype = "float32"
@@ -152,7 +158,7 @@ def test_where():
     x = np.random.uniform(size=shape).astype(dtype)
     y = np.random.uniform(size=shape).astype(dtype)
     ref_res = np.where(condition, x, y)
-    for target, ctx in ctx_list():
+    for target, ctx in tvm.testing.enabled_targets():
         for kind in ["graph", "debug"]:
             intrp = relay.create_executor(kind, ctx=ctx, target=target)
             op_res = intrp.evaluate(func)(condition, x, y)
@@ -195,7 +201,7 @@ def verify_reduce(funcs, data, axis, keepdims, exclude, output, dtype="float32")
             return
         ref_res = ref_func(x_data + 0, axis=axis, keepdims=keepdims)
 
-    for target, ctx in ctx_list():
+    for target, ctx in tvm.testing.enabled_targets():
         intrp1 = relay.create_executor("graph", ctx=ctx, target=target)
         intrp2 = relay.create_executor("debug", ctx=ctx, target=target)
         op_res1 = intrp1.evaluate(func)(x_data)
@@ -203,6 +209,7 @@ def verify_reduce(funcs, data, axis, keepdims, exclude, output, dtype="float32")
         op_res2 = intrp2.evaluate(func)(x_data)
         tvm.testing.assert_allclose(op_res2.asnumpy(), ref_res, rtol=1e-5)
 
+@tvm.testing.uses_gpu
 def test_reduce_functions():
     def _with_keepdims(func):
         def _wrapper(data, axis=None, keepdims=False):
@@ -282,7 +289,7 @@ def verify_mean_var_std(funcs, shape, axis, keepdims):
     ref_mean = np.mean(x_data, axis=axis, dtype=dtype, keepdims=keepdims)
     ref_res = ref_func(x_data, axis=axis, dtype=dtype, keepdims=keepdims)
 
-    for target, ctx in ctx_list():
+    for target, ctx in tvm.testing.enabled_targets():
         intrp1 = relay.create_executor("graph", ctx=ctx, target=target)
         intrp2 = relay.create_executor("debug", ctx=ctx, target=target)
         op_res1 = intrp1.evaluate(func)(x_data)
@@ -292,6 +299,7 @@ def verify_mean_var_std(funcs, shape, axis, keepdims):
         tvm.testing.assert_allclose(op_res2[0].asnumpy(), ref_mean, rtol=1e-5)
         tvm.testing.assert_allclose(op_res2[1].asnumpy(), ref_res, rtol=1e-5)
 
+@tvm.testing.uses_gpu
 def test_mean_var_std():
     for func in [[relay.mean_variance, np.var],
                  [relay.mean_std, np.std]]:
@@ -307,6 +315,7 @@ def test_mean_var_std():
         verify_mean_var_std(func, (128, 24, 128), (0, 2), True)
 
 
+@tvm.testing.uses_gpu
 def test_strided_slice():
     def verify(dshape, begin, end, strides, output, slice_mode="end",
                attr_const=True, test_ref=True, dtype="int32"):
@@ -349,7 +358,7 @@ def test_strided_slice():
 
         if not test_ref:
             return
-        for target, ctx in ctx_list():
+        for target, ctx in tvm.testing.enabled_targets():
             intrp = relay.create_executor("graph", ctx=ctx, target=target)
             op_res = intrp.evaluate(func)(x_data)
             tvm.testing.assert_allclose(op_res.asnumpy(), ref_res)
@@ -371,6 +380,7 @@ def test_strided_slice():
     verify((3, 4, 3), [1, 0, 0], [-1, 2, 3], [1, 1, 1],
            (2, 2, 3), slice_mode="size", test_ref=True)
 
+@tvm.testing.uses_gpu
 def test_strided_set():
     def verify(dshape, begin, end, strides, vshape, test_ref=True):
         x = relay.var("x", relay.TensorType(dshape, "float32"))
@@ -394,7 +404,7 @@ def test_strided_set():
         v_data = np.random.uniform(size=vshape).astype("float32")
         ref_res = tvm.topi.testing.strided_set_python(
             x_data, v_data, begin, end, strides)
-        for target, ctx in ctx_list():
+        for target, ctx in tvm.testing.enabled_targets():
             intrp = relay.create_executor("graph", ctx=ctx, target=target)
             op_res = intrp.evaluate(func)(x_data, v_data)
             tvm.testing.assert_allclose(op_res.asnumpy(), ref_res)
index 254bab5..25e9ac0 100644 (file)
@@ -22,8 +22,9 @@ import tvm
 from tvm import te
 from tvm import relay
 from tvm.relay import transform
-from tvm.relay.testing import ctx_list, run_infer_type
+from tvm.relay.testing import run_infer_type
 import tvm.topi.testing
+import tvm.testing
 
 
 def test_resize_infer_type():
@@ -40,6 +41,7 @@ def test_resize_infer_type():
     zz = run_infer_type(z)
     assert zz.checked_type == relay.TensorType((n, c, 100, 200), "int8")
 
+@tvm.testing.uses_gpu
 def test_resize():
     def verify_resize(dshape, scale, method, layout, coord_trans):
         if layout == "NHWC":
@@ -61,7 +63,7 @@ def test_resize():
         assert zz.checked_type == relay.TensorType(ref_res.shape, "float32")
         func = relay.Function([x], z)
 
-        for target, ctx in ctx_list():
+        for target, ctx in tvm.testing.enabled_targets():
             for kind in ["graph", "debug"]:
                 intrp = relay.create_executor(kind, ctx=ctx, target=target)
                 op_res = intrp.evaluate(func)(x_data)
@@ -87,7 +89,8 @@ def test_resize3d_infer_type():
     zz = run_infer_type(z)
     assert zz.checked_type == relay.TensorType((n, c, 10, 10, 20), "int8")
 
-def test_resize3d():
+@tvm.testing.parametrize_targets
+def test_resize3d(target, ctx):
     def verify_resize(dshape, scale, method, layout):
         if layout == "NDHWC":
             size = (dshape[1] * scale, dshape[2] * scale, dshape[3] * scale)
@@ -106,15 +109,15 @@ def test_resize3d():
         assert zz.checked_type == relay.TensorType(ref_res.shape, "float32")
         func = relay.Function([x], z)
 
-        for target, ctx in ctx_list():
-            for kind in ["graph", "debug"]:
-                intrp = relay.create_executor(kind, ctx=ctx, target=target)
-                op_res = intrp.evaluate(func)(x_data)
-                tvm.testing.assert_allclose(op_res.asnumpy(), ref_res, rtol=1e-4)
+        for kind in ["graph", "debug"]:
+            intrp = relay.create_executor(kind, ctx=ctx, target=target)
+            op_res = intrp.evaluate(func)(x_data)
+            tvm.testing.assert_allclose(op_res.asnumpy(), ref_res, rtol=1e-4)
     for method in ["trilinear", "nearest_neighbor"]:
         for layout in ["NDHWC", "NCDHW"]:
             verify_resize((1, 4, 4, 4, 4), 2, method, layout)
 
+@tvm.testing.uses_gpu
 def test_crop_and_resize():
     def verify_crop_and_resize(img_shape, boxes, box_indices, crop_size,
                                layout, method, extrapolation_value=0.0):
@@ -138,7 +141,7 @@ def test_crop_and_resize():
         assert zz.checked_type == relay.TensorType(ref_res.shape, "float32")
         func = relay.Function([img, bx, bx_idx], z)
 
-        for target, ctx in ctx_list():
+        for target, ctx in tvm.testing.enabled_targets():
             for kind in ["graph", "debug"]:
                 intrp = relay.create_executor(kind, ctx=ctx, target=target)
                 op_res = intrp.evaluate(func)(image_data, boxes, box_indices)
@@ -157,6 +160,7 @@ def test_crop_and_resize():
         verify_crop_and_resize((5, 3, 255, 255), boxes_nchw, indices_nchw,
                                size_nchw, 'NCHW', method, 0.1)
 
+@tvm.testing.uses_gpu
 def test_multibox_prior():
     def get_ref_result(dshape, sizes=(1.0,),
                        ratios=(1.0,), steps=(-1.0, -1.0),
@@ -213,7 +217,7 @@ def test_multibox_prior():
         data = np.random.uniform(low=-1, high=1, size=dshape).astype("float32")
         func = relay.Function([x], z)
         func = run_infer_type(func)
-        for target, ctx in ctx_list():
+        for target, ctx in tvm.testing.enabled_targets():
             intrp1 = relay.create_executor("graph", ctx=ctx, target=target)
             op_res1 = intrp1.evaluate(func)(data)
             tvm.testing.assert_allclose(op_res1.asnumpy(), ref_res, rtol=1e-5)
@@ -242,6 +246,7 @@ def test_multibox_prior():
     verify_multibox_prior(x, dshape, ref_res, clip=False, check_type_only=True)
 
 
+@tvm.testing.uses_gpu
 def test_get_valid_counts():
     def verify_get_valid_counts(dshape, score_threshold, id_index, score_index):
         dtype = "float32"
@@ -271,7 +276,7 @@ def test_get_valid_counts():
         assert "score_threshold" in z.astext()
         func = relay.Function([x], z.astuple())
         func = run_infer_type(func)
-        for target, ctx in ctx_list():
+        for target, ctx in tvm.testing.enabled_targets():
             intrp = relay.create_executor("debug", ctx=ctx, target=target)
             out = intrp.evaluate(func)(np_data)
             tvm.testing.assert_allclose(out[0].asnumpy(), np_out1, rtol=1e-3, atol=1e-04)
@@ -287,6 +292,7 @@ def test_get_valid_counts():
     verify_get_valid_counts((16, 500, 5), 0.95, -1, 0)
 
 
+@tvm.testing.uses_gpu
 def test_non_max_suppression():
     def verify_nms(x0_data, x1_data, x2_data, x3_data, dshape, ref_res,
                    ref_indices_res, iou_threshold=0.5, force_suppress=False,
@@ -319,7 +325,7 @@ def test_non_max_suppression():
         func = run_infer_type(func)
         func_indices = relay.Function([x0, x1, x2, x3], z_indices)
         func_indices = run_infer_type(func_indices)
-        for target, ctx in ctx_list():
+        for target, ctx in tvm.testing.enabled_targets():
             intrp1 = relay.create_executor("graph", ctx=ctx, target=target)
             op_res1 = intrp1.evaluate(func)(x0_data, x1_data, x2_data, x3_data)
             tvm.testing.assert_allclose(op_res1.asnumpy(), ref_res, rtol=1e-5)
@@ -366,6 +372,7 @@ def test_non_max_suppression():
                np_indices_result, top_k=2)
 
 
+@tvm.testing.uses_gpu
 def test_multibox_transform_loc():
     def test_default_value():
         num_anchors = 3
@@ -408,7 +415,7 @@ def test_multibox_transform_loc():
         nms = relay.vision.non_max_suppression(mtl[0], mtl[1], mtl[0], return_indices=False)
         func = relay.Function([cls_prob, loc_pred, anchors], nms)
         func = run_infer_type(func)
-        for target, ctx in ctx_list():
+        for target, ctx in tvm.testing.enabled_targets():
             intrp1 = relay.create_executor("graph", ctx=ctx, target=target)
             op_res1 = intrp1.evaluate(func)(np_cls_prob, np_loc_preds,
                                             np_anchors)
@@ -450,6 +457,7 @@ def test_multibox_transform_loc():
     test_threshold()
 
 
+@tvm.testing.uses_gpu
 def test_roi_align():
     def verify_roi_align(data_shape, rois_shape, pooled_size, spatial_scale, sample_ratio):
         data = relay.var("data", relay.ty.TensorType(data_shape, "float32"))
@@ -471,7 +479,7 @@ def test_roi_align():
         ref_res = tvm.topi.testing.roi_align_nchw_python(np_data, np_rois, pooled_size=pooled_size,
                                                      spatial_scale=spatial_scale,
                                                      sample_ratio=sample_ratio)
-        for target, ctx in ctx_list():
+        for target, ctx in tvm.testing.enabled_targets():
             intrp1 = relay.create_executor("graph", ctx=ctx, target=target)
             op_res1 = intrp1.evaluate(func)(np_data, np_rois)
             tvm.testing.assert_allclose(op_res1.asnumpy(), ref_res, rtol=1e-4)
@@ -483,6 +491,7 @@ def test_roi_align():
     verify_roi_align((4, 4, 16, 16), (32, 5), pooled_size=7, spatial_scale=0.5, sample_ratio=2)
 
 
+@tvm.testing.uses_gpu
 def test_roi_pool():
     def verify_roi_pool(data_shape, rois_shape, pooled_size, spatial_scale):
         data = relay.var("data", relay.ty.TensorType(data_shape, "float32"))
@@ -502,7 +511,7 @@ def test_roi_pool():
         np_rois[:, 0] = np.random.randint(low = 0, high = batch, size = num_roi).astype('float32')
         ref_res = tvm.topi.testing.roi_pool_nchw_python(np_data, np_rois, pooled_size=pooled_size,
                                                      spatial_scale=spatial_scale)
-        for target, ctx in ctx_list():
+        for target, ctx in tvm.testing.enabled_targets():
             intrp1 = relay.create_executor("graph", ctx=ctx, target=target)
             op_res1 = intrp1.evaluate(func)(np_data, np_rois)
             tvm.testing.assert_allclose(op_res1.asnumpy(), ref_res, rtol=1e-4)
@@ -514,6 +523,7 @@ def test_roi_pool():
     verify_roi_pool((4, 4, 16, 16), (32, 5), pooled_size=7, spatial_scale=0.5)
 
 
+@tvm.testing.uses_gpu
 def test_proposal():
     def verify_proposal(np_cls_prob, np_bbox_pred, np_im_info, np_out, attrs):
         cls_prob = relay.var("cls_prob", relay.ty.TensorType(np_cls_prob.shape, "float32"))
@@ -526,7 +536,7 @@ def test_proposal():
         func = relay.Function([cls_prob, bbox_pred, im_info], z)
         func = run_infer_type(func)
         for target in ['llvm', 'cuda']:
-            if not tvm.runtime.enabled(target):
+            if not tvm.testing.device_enabled(target):
                 print("Skip test because %s is not enabled." % target)
                 continue
             ctx = tvm.context(target, 0)
@@ -592,6 +602,7 @@ def test_yolo_reorg_infer_shape():
     verify_yolo_reorg((n, c, 20, 20), 10, (n, c*10*10, 2, 2))
     verify_yolo_reorg((n, c, h, w), 2, (n, c*2*2, idxd(h, 2), idxd(w, 2)))
 
+@tvm.testing.uses_gpu
 def test_yolo_reorg():
     def verify_yolo_reorg(shape, stride):
         x_data = np.random.uniform(low=-1, high=1, size=shape).astype("float32")
@@ -605,7 +616,7 @@ def test_yolo_reorg():
 
         func = relay.Function([x], z)
 
-        for target, ctx in ctx_list():
+        for target, ctx in tvm.testing.enabled_targets():
             for kind in ["graph", "debug"]:
                 intrp = relay.create_executor(kind, ctx=ctx, target=target)
                 op_res = intrp.evaluate(func)(x_data)
@@ -615,6 +626,7 @@ def test_yolo_reorg():
     verify_yolo_reorg((1, 4, 6, 6), 2)
 
 
+@tvm.testing.uses_gpu
 def test_deformable_conv2d():
     def test_infer_type(batch, in_channel, size, out_channel, deformable_groups, groups):
         data_shape = (batch, in_channel, size, size)
@@ -665,7 +677,7 @@ def test_deformable_conv2d():
         kernel = np.random.uniform(size=kernel_shape).astype(dtype)
         ref_res = tvm.topi.testing.deformable_conv2d_nchw_python(data, offset, kernel, stride=(1, 1), padding=(1, 1), dilation=(1, 1), deformable_groups=deformable_groups, groups=groups)
 
-        for target, ctx in ctx_list():
+        for target, ctx in tvm.testing.enabled_targets():
             for kind in ["graph", "debug"]:
                 intrp1 = relay.create_executor(kind, ctx=ctx, target=target)
                 op_res1 = intrp1.evaluate(func)(data, offset, kernel)
@@ -674,6 +686,7 @@ def test_deformable_conv2d():
     test_run(2, 4, 16, 4, 4, 1)
 
 
+@tvm.testing.uses_gpu
 def test_depth_to_space():
     def verify_depth_to_space(dshape, block_size, layout, mode):
         if layout == "NHWC":
@@ -696,7 +709,7 @@ def test_depth_to_space():
         assert zz.checked_type == relay.TensorType(ref_res.shape, "float32")
         func = relay.Function([x], z)
 
-        for target, ctx in ctx_list():
+        for target, ctx in tvm.testing.enabled_targets():
             for kind in ["graph", "debug"]:
                 intrp = relay.create_executor(kind, ctx=ctx, target=target)
                 op_res = intrp.evaluate(func)(x_data)
@@ -706,6 +719,7 @@ def test_depth_to_space():
             verify_depth_to_space((1, 4, 4, 4), 2, layout, mode)
 
 
+@tvm.testing.uses_gpu
 def test_space_to_depth():
     def verify_space_to_depth(dshape, block_size, layout):
         if layout == "NHWC":
@@ -728,7 +742,7 @@ def test_space_to_depth():
         assert zz.checked_type == relay.TensorType(ref_res.shape, "float32")
         func = relay.Function([x], z)
 
-        for target, ctx in ctx_list():
+        for target, ctx in tvm.testing.enabled_targets():
             for kind in ["graph", "debug"]:
                 intrp = relay.create_executor(kind, ctx=ctx, target=target)
                 op_res = intrp.evaluate(func)(x_data)
@@ -753,6 +767,7 @@ def test_dilation2d_infer_type():
         (n, 10, 217, 217), "float32")
 
 
+@tvm.testing.uses_gpu
 def test_dilation2d_run():
     def run_test_dilation2d(indata, kernel, out,
                             dtype='float32',
@@ -777,7 +792,7 @@ def test_dilation2d_run():
                                    **attrs)
         func = relay.Function([x, w], y)
 
-        for target, ctx in ctx_list():
+        for target, ctx in tvm.testing.enabled_targets():
             if target in except_targets:
                 continue
             intrp = relay.create_executor("graph", ctx=ctx, target=target)
@@ -844,6 +859,7 @@ def test_dilation2d_run():
                         data_layout='NHWC', kernel_layout='HWI')
 
 
+@tvm.testing.uses_gpu
 def test_affine_grid():
     def verify_affine_grid(num_batch, target_shape):
         dtype = 'float32'
@@ -857,7 +873,7 @@ def test_affine_grid():
         data_np = np.random.uniform(size=data_shape).astype(dtype)
         ref_res = tvm.topi.testing.affine_grid_python(data_np, target_shape)
 
-        for target, ctx in ctx_list():
+        for target, ctx in tvm.testing.enabled_targets():
             for kind in ["graph", "debug"]:
                 intrp1 = relay.create_executor(kind, ctx=ctx, target=target)
                 op_res1 = intrp1.evaluate(func)(data_np)
@@ -867,6 +883,7 @@ def test_affine_grid():
     verify_affine_grid(4, (16, 32))
 
 
+@tvm.testing.uses_gpu
 def test_grid_sample():
     def verify_grid_sample(data_shape, grid_shape):
         dtype = 'float32'
@@ -883,7 +900,7 @@ def test_grid_sample():
         grid_np = np.random.uniform(size=grid_shape, low=-1.5, high=1.5).astype(dtype)
         ref_res = tvm.topi.testing.grid_sample_nchw_python(data_np, grid_np, method='bilinear')
 
-        for target, ctx in ctx_list():
+        for target, ctx in tvm.testing.enabled_targets():
             for kind in ["graph", "debug"]:
                 intrp1 = relay.create_executor(kind, ctx=ctx, target=target)
                 op_res1 = intrp1.evaluate(func)(data_np, grid_np)
index 287e80a..e683224 100644 (file)
@@ -20,8 +20,9 @@ import numpy as np
 import tvm
 from tvm import te
 from tvm import relay
-from tvm.relay.testing import ctx_list
+import tvm.testing
 
+@tvm.testing.uses_gpu
 def test_argsort():
     def verify_argsort(shape, axis, is_ascend, dtype):
         x = relay.var("x", relay.TensorType(shape, "float32"))
@@ -33,7 +34,7 @@ def test_argsort():
         else:
             ref_res = np.argsort(-x_data, axis=axis)
 
-        for target, ctx in ctx_list():
+        for target, ctx in tvm.testing.enabled_targets():
             for kind in ["graph", "debug"]:
                 intrp = relay.create_executor(kind, ctx=ctx, target=target)
                 op_res = intrp.evaluate(func)(x_data)
@@ -44,6 +45,7 @@ def test_argsort():
         verify_argsort((3, 5, 6), axis=-1, is_ascend=False, dtype=dtype)
 
 
+@tvm.testing.uses_gpu
 def test_topk():
     def verify_topk(k, axis, ret_type, is_ascend, dtype):
         shape = (20, 100)
@@ -70,7 +72,7 @@ def test_topk():
                 np_values[i, :] = np_data[i, np_indices[i, :]]
         np_indices = np_indices.astype(dtype)
 
-        for target, ctx in ctx_list():
+        for target, ctx in tvm.testing.enabled_targets():
             for kind in ["graph", "debug"]:
                 intrp = relay.create_executor(kind, ctx=ctx, target=target)
                 op_res = intrp.evaluate(func)(np_data)
index 85dd2ed..0e0ab57 100644 (file)
@@ -21,8 +21,9 @@ import tvm
 from tvm import relay
 from tvm.relay import transform, analysis
 from tvm.relay.testing.temp_op_attr import TempOpAttr
-from tvm.relay.testing import ctx_list, run_infer_type
+from tvm.relay.testing import run_infer_type
 import numpy as np
+import tvm.testing
 
 def run_opt_pass(expr, passes):
     passes = passes if isinstance(passes, list) else [passes]
@@ -615,6 +616,7 @@ def test_alter_layout_nchw_upsamping_op():
     assert tvm.ir.structural_equal(a, b), "Actual = \n" + str(a)
 
 
+@tvm.testing.uses_gpu
 def test_alter_layout_strided_slice():
     """Test rewriting strided_slice during alter_iop_layout"""
     def before():
@@ -661,7 +663,7 @@ def test_alter_layout_strided_slice():
     mod_before['main'] = a
     mod_new['main'] = b
     with relay.build_config(opt_level=3):
-        for target, ctx in ctx_list():
+        for target, ctx in tvm.testing.enabled_targets():
             for kind in ["graph", "debug", "vm"]:
                 ex_before = relay.create_executor(kind, mod=mod_before, ctx=ctx, target=target)
                 ex_new = relay.create_executor(kind, mod=mod_new, ctx=ctx, target=target)
index e957080..7a2ff55 100644 (file)
@@ -23,7 +23,7 @@ from tvm import relay
 from tvm.contrib import graph_runtime
 from tvm.relay.expr_functor import ExprMutator
 from tvm.relay import transform
-
+import tvm.testing
 
 def run_opt_pass(expr, passes):
     passes = passes if isinstance(passes, list) else [passes]
@@ -624,22 +624,33 @@ def run_unpropagatable_graph(dev, tgt):
         tvm.testing.assert_allclose(res, ref_res, rtol=1e-5, atol=1e-5)
 
 
-def test_check_run():
-    for dev, tgt in [("opencl", "opencl"), ("cuda", "cuda"),
-                 ("opencl", str(tvm.target.intel_graphics()))]:
-        if not tvm.runtime.enabled(dev):
-            print("Skip test because %s is not enabled." % dev)
-            continue
-        run_fusible_network(dev, tgt)
-        run_unpropagatable_graph(dev, tgt)
+@tvm.testing.requires_opencl
+def test_check_run_opencl():
+    dev = "opencl"
+    tgt = "opencl"
+    run_fusible_network(dev, tgt)
+    run_unpropagatable_graph(dev, tgt)
 
 
-def test_tuple_get_item():
+@tvm.testing.requires_opencl
+def test_check_run_opencl_intel():
+    dev = "opencl"
+    tgt = str(tvm.target.intel_graphics())
+    run_fusible_network(dev, tgt)
+    run_unpropagatable_graph(dev, tgt)
+
+
+@tvm.testing.requires_cuda
+def test_check_run_cuda():
     dev = "cuda"
-    if not tvm.runtime.enabled(dev):
-        print("Skip test because %s is not enabled." % dev)
-        return
+    tgt = "cuda"
+    run_fusible_network(dev, tgt)
+    run_unpropagatable_graph(dev, tgt)
 
+
+@tvm.testing.requires_cuda
+def test_tuple_get_item():
+    dev = "cuda"
     cpu_ctx = tvm.cpu(0)
     gpu_ctx = tvm.context(dev)
 
index 6b422ca..453c469 100644 (file)
@@ -20,8 +20,9 @@ from tvm import te
 from tvm import relay
 from tvm.relay import transform
 from tvm.relay.build_module import bind_params_by_name
-from tvm.relay.testing import run_infer_type, create_workload, ctx_list
+from tvm.relay.testing import run_infer_type, create_workload
 import tvm.topi.testing
+import tvm.testing
 
 def run_opt_pass(expr, opt_pass):
     assert isinstance(opt_pass, tvm.transform.Pass)
@@ -34,7 +35,7 @@ def run_opt_pass(expr, opt_pass):
 
 def verify_func(func, data, ref_res, rtol=1e-5, atol=1e-7):
     assert isinstance(data, list)
-    for target, ctx in ctx_list():
+    for target, ctx in tvm.testing.enabled_targets():
         for kind in ["graph", "vm", "debug"]:
             mod = tvm.ir.IRModule.from_expr(func)
             intrp = relay.create_executor(kind, mod=mod, ctx=ctx, target=target)
@@ -42,6 +43,7 @@ def verify_func(func, data, ref_res, rtol=1e-5, atol=1e-7):
             tvm.testing.assert_allclose(op_res.asnumpy(), ref_res, rtol=rtol, atol=atol)
 
 
+@tvm.testing.uses_gpu
 def test_dynamic_to_static_reshape():
     def verify_reshape(shape, newshape, oshape):
         x = relay.var("x", relay.TensorType(shape, "float32"))
@@ -66,6 +68,7 @@ def test_dynamic_to_static_reshape():
     verify_reshape((4, 7), (2, 7, 2), (2, 7, 2))
 
 
+@tvm.testing.uses_gpu
 def test_dynamic_to_static_double_reshape():
     def verify_reshape(shape, newshape):
         x = relay.var("x", relay.TensorType(shape, "float32"))
@@ -90,6 +93,7 @@ def test_dynamic_to_static_double_reshape():
     verify_reshape((4, 7), (2, 7, 2))
 
 
+@tvm.testing.uses_gpu
 def test_dynamic_to_static_quad_reshape():
     def verify_reshape(shape, newshape):
         x = relay.var("x", relay.TensorType(shape, "float32"))
@@ -116,6 +120,7 @@ def test_dynamic_to_static_quad_reshape():
     verify_reshape((4, 7), (2, 7, 2))
 
 
+@tvm.testing.uses_gpu
 def test_dynamic_to_static_tile():
     def verify_tile(shape, reps, oshape):
         x = relay.var("x", relay.TensorType(shape, "float32"))
@@ -139,6 +144,7 @@ def test_dynamic_to_static_tile():
     verify_tile((4, 7), (4, 2), (16, 14))
 
 
+@tvm.testing.uses_gpu
 def test_dynamic_to_static_topk():
     def verify_topk(k, axis, ret_type, is_ascend, dtype):
         shape = (20, 100)
@@ -173,7 +179,7 @@ def test_dynamic_to_static_topk():
         assert isinstance(zz, relay.Call)
         assert zz.op == relay.op.get("topk")
 
-        for target, ctx in ctx_list():
+        for target, ctx in tvm.testing.enabled_targets():
             if "llvm" not in target: continue
             for kind in ["graph", "vm", "debug"]:
                 mod = tvm.ir.IRModule.from_expr(func2)
@@ -195,6 +201,7 @@ def test_dynamic_to_static_topk():
                 verify_topk(k, axis, ret_type, False, "float32")
 
 
+@tvm.testing.uses_gpu
 def test_dynamic_to_static_broadcast_to():
     def verify_broadcast_to(shape, broadcast_shape):
         x = relay.var("x", relay.TensorType(shape, "float32"))
@@ -219,6 +226,7 @@ def test_dynamic_to_static_broadcast_to():
     verify_broadcast_to((3, 1), (3, 3))
 
 
+@tvm.testing.uses_gpu
 def test_dynamic_to_static_zeros_ones():
     def verify_ones_zeros(shape, dtype):
         for op, ref in [(relay.zeros, np.zeros), (relay.ones, np.ones)]:
@@ -241,6 +249,7 @@ def test_dynamic_to_static_zeros_ones():
     verify_ones_zeros((9, 8, 3, 4), 'float32')
 
 
+@tvm.testing.uses_gpu
 def test_dynamic_to_static_resize():
     def verify_resize(shape, scale, method, layout):
         if layout == "NHWC":
@@ -275,6 +284,7 @@ def test_dynamic_to_static_resize():
             verify_resize((1, 4, 4, 4), 2, method, layout)
 
 
+@tvm.testing.uses_gpu
 def test_dynamic_to_static_one_hot():
     def _verify(indices_shape, depth, on_value, off_value, axis, dtype):
         indices = relay.var("indices", relay.TensorType(indices_shape, "int32"))
@@ -302,6 +312,7 @@ def test_dynamic_to_static_one_hot():
     _verify((3, 2, 4, 5), 6, 1, 0, 1, "int32")
     _verify((3, 2, 4, 5), 6, 1.0, 0.0, 0, "float32")
 
+@tvm.testing.uses_gpu
 def test_dynamic_to_static_full():
     def verify_full(fill_value, fill_shape, dtype):
         x = relay.var("x", relay.scalar_type(dtype))
@@ -310,7 +321,7 @@ def test_dynamic_to_static_full():
 
         func = run_infer_type(relay.Function([x, y], z))
         func2 = run_opt_pass(run_opt_pass(func, transform.DynamicToStatic()), transform.InferType())
-        
+
         zz = func2.body
         assert isinstance(zz, relay.Call)
         assert zz.op == relay.op.get("full")
@@ -318,7 +329,7 @@ def test_dynamic_to_static_full():
         ref_res = np.full(fill_shape, fill_value).astype(dtype)
         y_data = np.random.uniform(low=-1, high=1, size=fill_shape).astype('int64')
         verify_func(func2, [fill_value, y_data], ref_res)
-    
+
     verify_full(4, (1, 2, 3, 4), 'int32')
     verify_full(4.0, (1, 2, 8, 10), 'float32')
 
index 90e80d8..df30eb4 100644 (file)
@@ -19,6 +19,7 @@ from tvm import te
 from tvm import relay
 from tvm.relay import transform
 from tvm.relay.testing import run_opt_pass
+import tvm.testing
 
 
 def test_fuse_simple():
@@ -704,6 +705,7 @@ def test_fuse_gather_nd():
     assert tvm.ir.structural_equal(m["main"], after)
 
 
+@tvm.testing.uses_gpu
 def test_fuse_bcast_reduce_scalar():
     """Test fusion case with broadcast and reduction involving scalar"""
 
@@ -726,7 +728,7 @@ def test_fuse_bcast_reduce_scalar():
 
     orig = before()
     m = fuse2(tvm.IRModule.from_expr(orig))
-    for tgt, _ in tvm.relay.testing.config.ctx_list():
+    for tgt, ctx in tvm.testing.enabled_targets():
         relay.build(m, tgt)
     after = run_opt_pass(expected(), transform.InferType())
     assert tvm.ir.structural_equal(m["main"], after)
index 377164e..4a09e4e 100644 (file)
@@ -20,6 +20,7 @@ import tvm
 from tvm import relay
 from tvm.relay import create_executor, transform
 from tvm.relay.testing import rand, run_infer_type
+import tvm.testing
 from tvm.testing import assert_allclose
 import pytest
 
index 25299ca..9245bbd 100644 (file)
@@ -25,7 +25,8 @@ from tvm.relay import ExprFunctor
 from tvm.relay import Function, Call
 from tvm.relay import analysis
 from tvm.relay import transform as _transform
-from tvm.relay.testing import ctx_list, run_infer_type
+from tvm.relay.testing import run_infer_type
+import tvm.testing
 
 
 def get_var_func():
@@ -114,6 +115,7 @@ def check_func(func, ref_func):
     assert tvm.ir.structural_equal(func, ref_func)
 
 
+@tvm.testing.uses_gpu
 def test_module_pass():
     shape = (5, 10)
     dtype = 'float32'
@@ -178,7 +180,7 @@ def test_module_pass():
         x_nd = get_rand(shape, dtype)
         y_nd = get_rand(shape, dtype)
         ref_res = x_nd.asnumpy() + y_nd.asnumpy()
-        for target, ctx in ctx_list():
+        for target, ctx in tvm.testing.enabled_targets():
             exe1 = relay.create_executor("graph", ctx=ctx, target=target)
             exe2 = relay.create_executor("debug", ctx=ctx, target=target)
             res1 = exe1.evaluate(new_add)(x_nd, y_nd)
@@ -214,6 +216,7 @@ def test_function_class_pass():
     assert tvm.ir.structural_equal(mod["main"], mod2["main"])
 
 
+@tvm.testing.uses_gpu
 def test_function_pass():
     shape = (10, )
     dtype = 'float32'
@@ -271,7 +274,7 @@ def test_function_pass():
         # Execute the add function.
         x_nd = get_rand(shape, dtype)
         ref_res = np.log(x_nd.asnumpy() * 2)
-        for target, ctx in ctx_list():
+        for target, ctx in tvm.testing.enabled_targets():
             exe1 = relay.create_executor("graph", ctx=ctx, target=target)
             exe2 = relay.create_executor("debug", ctx=ctx, target=target)
             res1 = exe1.evaluate(new_log)(x_nd)
@@ -314,6 +317,7 @@ def test_pass_info():
     assert info.name == "xyz"
 
 
+@tvm.testing.uses_gpu
 def test_sequential_pass():
     shape = (10, )
     dtype = 'float32'
@@ -433,7 +437,7 @@ def test_sequential_pass():
         x_nd = get_rand(shape, dtype)
         y_nd = get_rand(shape, dtype)
         ref_res = np.subtract(x_nd.asnumpy() * 2, y_nd.asnumpy() * 2)
-        for target, ctx in ctx_list():
+        for target, ctx in tvm.testing.enabled_targets():
             exe1 = relay.create_executor("graph", ctx=ctx, target=target)
             exe2 = relay.create_executor("debug", ctx=ctx, target=target)
             res1 = exe1.evaluate(new_sub)(x_nd, y_nd)
@@ -444,7 +448,7 @@ def test_sequential_pass():
         # Execute the updated abs function.
         x_nd = get_rand((5, 10), dtype)
         ref_res = np.abs(x_nd.asnumpy() * 2)
-        for target, ctx in ctx_list():
+        for target, ctx in tvm.testing.enabled_targets():
             exe1 = relay.create_executor("graph", ctx=ctx, target=target)
             exe2 = relay.create_executor("debug", ctx=ctx, target=target)
             res1 = exe1.evaluate(new_abs)(x_nd)
index a69f928..710025a 100644 (file)
@@ -21,10 +21,10 @@ import tvm
 from tvm import runtime
 from tvm import relay
 from tvm.relay.scope_builder import ScopeBuilder
-from tvm.relay.testing.config import ctx_list
 from tvm.relay.prelude import Prelude
 from tvm.relay.loops import while_loop
 from tvm.relay import testing
+import tvm.testing
 
 def check_result(args, expected_result, mod=None):
     """
@@ -41,7 +41,7 @@ def check_result(args, expected_result, mod=None):
     """
     # TODO(@zhiics, @icemelon9): Disable the gpu test for now until the heterogeneous support
     #   is ready
-    for target, ctx in ctx_list():
+    for target, ctx in tvm.testing.enabled_targets():
         if "cuda" in target:
             continue
         vm = relay.create_executor('vm', ctx=ctx, target=target, mod=mod)
@@ -91,6 +91,7 @@ def test_split_no_fuse():
     res = veval(f, x_data)
     tvm.testing.assert_allclose(res.asnumpy(), np.split(x_data, 3, axis=0)[0])
 
+@tvm.testing.uses_gpu
 def test_id():
     x = relay.var('x', shape=(10, 10), dtype='float64')
     f = relay.Function([x], x)
@@ -99,6 +100,7 @@ def test_id():
     mod["main"] = f
     check_result([x_data], x_data, mod=mod)
 
+@tvm.testing.uses_gpu
 def test_op():
     x = relay.var('x', shape=(10, 10))
     f = relay.Function([x], x + x)
@@ -111,6 +113,7 @@ def any(x):
     x = relay.op.nn.batch_flatten(x)
     return relay.op.min(x, axis=[0, 1])
 
+@tvm.testing.uses_gpu
 def test_cond():
     x = relay.var('x', shape=(10, 10))
     y = relay.var('y', shape=(10, 10))
@@ -127,6 +130,7 @@ def test_cond():
     # diff
     check_result([x_data, y_data], False, mod=mod)
 
+@tvm.testing.uses_gpu
 def test_simple_if():
     x = relay.var('x', shape=(10, 10))
     y = relay.var('y', shape=(10, 10))
@@ -162,6 +166,7 @@ def test_multiple_ifs():
     res = vmobj_to_list(vm.evaluate()(False))
     assert(res == [1, 0])
 
+@tvm.testing.uses_gpu
 def test_simple_call():
     mod = tvm.IRModule({})
     sum_up = relay.GlobalVar('sum_up')
@@ -175,6 +180,7 @@ def test_simple_call():
     mod["main"] = relay.Function([iarg], sum_up(iarg))
     check_result([i_data], i_data, mod=mod)
 
+@tvm.testing.uses_gpu
 def test_count_loop():
     mod = tvm.IRModule({})
     sum_up = relay.GlobalVar('sum_up')
@@ -195,6 +201,7 @@ def test_count_loop():
     tvm.testing.assert_allclose(result.asnumpy(), i_data)
     check_result([i_data], i_data, mod=mod)
 
+@tvm.testing.uses_gpu
 def test_sum_loop():
     mod = tvm.IRModule({})
     sum_up = relay.GlobalVar('sum_up')
@@ -217,6 +224,7 @@ def test_sum_loop():
     mod["main"] = relay.Function([iarg, aarg], sum_up(iarg, aarg))
     check_result([i_data, accum_data], sum(range(1, loop_bound + 1)), mod=mod)
 
+@tvm.testing.uses_gpu
 def test_tuple_fst():
     ttype = relay.TupleType([relay.TensorType((1,)), relay.TensorType((10,))])
     tup = relay.var('tup', type_annotation=ttype)
@@ -227,6 +235,7 @@ def test_tuple_fst():
     mod["main"] = f
     check_result([(i_data, j_data)], i_data, mod=mod)
 
+@tvm.testing.uses_gpu
 def test_tuple_second():
     ttype = relay.TupleType([relay.TensorType((1,)), relay.TensorType((10,))])
     tup = relay.var('tup', type_annotation=ttype)
@@ -259,6 +268,7 @@ def test_list_constructor():
     obj = vmobj_to_list(result)
     tvm.testing.assert_allclose(obj, np.array([3,2,1]))
 
+@tvm.testing.uses_gpu
 def test_let_tensor():
     sb = relay.ScopeBuilder()
     shape = (1,)
@@ -277,6 +287,7 @@ def test_let_tensor():
     mod["main"] = f
     check_result([x_data], x_data + 42.0, mod=mod)
 
+@tvm.testing.uses_gpu
 def test_let_scalar():
     sb = relay.ScopeBuilder()
 
@@ -545,6 +556,7 @@ def test_closure():
     res = veval(main)
     tvm.testing.assert_allclose(res.asnumpy(), 3.0)
 
+@tvm.testing.uses_gpu
 def test_add_op_scalar():
     """
     test_add_op_scalar:
@@ -561,6 +573,7 @@ def test_add_op_scalar():
     mod["main"] = func
     check_result([x_data, y_data], x_data + y_data, mod=mod)
 
+@tvm.testing.uses_gpu
 def test_add_op_tensor():
     """
     test_add_op_tensor:
@@ -577,6 +590,7 @@ def test_add_op_tensor():
     mod["main"] = func
     check_result([x_data, y_data], x_data + y_data, mod=mod)
 
+@tvm.testing.uses_gpu
 def test_add_op_broadcast():
     """
     test_add_op_broadcast:
@@ -608,6 +622,7 @@ def test_vm_optimize():
     comp = relay.vm.VMCompiler()
     opt_mod, _ = comp.optimize(mod, target="llvm", params=params)
 
+@tvm.testing.uses_gpu
 def test_loop_free_var():
     x = relay.var('x', shape=(), dtype='int32')
     i = relay.var('i', shape=(), dtype='int32')
@@ -634,6 +649,7 @@ def test_loop_free_var():
         mod["main"] = relay.Function(relay.analysis.free_vars(ret), ret)
         check_result(args, expected, mod=mod)
 
+@tvm.testing.uses_gpu
 def test_vm_reshape_tensor():
     x_np = np.random.uniform(size=(8, 16)).astype("float32")
     x = relay.var("x", shape=(8, 16), dtype="float32")
index 735072c..d63251e 100644 (file)
 # under the License.
 """Common utility for topi test"""
 
-import tvm
-from tvm import te
 from tvm import autotvm
 from tvm.autotvm.task.space import FallbackConfigEntity
-from tvm import topi
-
-def get_all_backend():
-    """return all supported target
-
-    Returns
-    -------
-    targets: list
-        A list of all supported targets
-    """
-    return ['llvm', 'cuda', 'opencl', 'metal', 'rocm', 'vulkan', 'nvptx',
-            'llvm -device=arm_cpu', 'opencl -device=mali', 'aocl_sw_emu']
 
 class Int8Fallback(autotvm.FallbackContext):
     def _query_inside(self, target, workload):
index 9af30f9..8e69a76 100644 (file)
 import tvm
 from tvm import te
 from tvm import topi
+import tvm.testing
 import tvm.topi.testing
 import numpy as np
 from tvm.contrib.pickle_memoize import memoize
 
-from common import get_all_backend
 
 def verify_fifo_buffer(buffer_shape, data_shape, axis, dtype='float32'):
     buffer = te.placeholder(buffer_shape, name='buffer', dtype=dtype)
@@ -46,11 +46,7 @@ def verify_fifo_buffer(buffer_shape, data_shape, axis, dtype='float32'):
     # Get the test data
     buffer_np, data_np, out_np = get_ref_data()
 
-    def check_device(device):
-        ctx = tvm.context(device, 0)
-        if not ctx.exist:
-            print('  Skip because %s is not enabled' % device)
-            return
+    def check_device(device, ctx):
         print('  Running on target: {}'.format(device))
 
         with tvm.target.create(device):
@@ -64,8 +60,8 @@ def verify_fifo_buffer(buffer_shape, data_shape, axis, dtype='float32'):
         f(data_tvm, buffer_tvm, out_tvm)
         tvm.testing.assert_allclose(out_tvm.asnumpy(), out_np)
 
-    for device in get_all_backend():
-        check_device(device)
+    for device, ctx in tvm.testing.enabled_targets():
+        check_device(device, ctx)
 
 def verify_conv1d_integration():
     batch_size = 1
@@ -122,11 +118,7 @@ def verify_conv1d_integration():
     # Get the test data
     inc_input_np, input_window_np, kernel_np, context_np, output_window_np = get_data()
 
-    def check_device(device):
-        ctx = tvm.context(device, 0)
-        if not ctx.exist:
-            print('  Skip because %s is not enabled' % device)
-            return
+    def check_device(device, ctx):
         print('  Running on target: {}'.format(device))
 
         conv2d_nchw, schedule_conv2d_nchw = tvm.topi.testing.get_conv2d_nchw_implement(device)
@@ -184,9 +176,10 @@ def verify_conv1d_integration():
             tvm.testing.assert_allclose(output_window_tvm.asnumpy(),
                                         output_window_ref_tvm.asnumpy())
 
-    for device in get_all_backend():
-        check_device(device)
+    for device, ctx in tvm.testing.enabled_targets():
+        check_device(device, ctx)
 
+@tvm.testing.uses_gpu
 def test_fifo_buffer():
     for ndim in [1, 2, 3, 4, 5, 6]:
         for axis in range(ndim):
@@ -196,6 +189,7 @@ def test_fifo_buffer():
                   .format(buffer_shape, data_shape, axis))
             verify_fifo_buffer(buffer_shape, data_shape, axis)
 
+@tvm.testing.uses_gpu
 def test_conv1d_integration():
     print('Testing FIFO buffer with 1D convolution')
     verify_conv1d_integration()
index c8cddb6..c785c6d 100644 (file)
@@ -23,7 +23,7 @@ import tvm.topi.testing
 from tvm.topi.util import get_const_tuple
 from tvm.contrib.pickle_memoize import memoize
 
-from common import get_all_backend
+import tvm.testing
 
 _batch_matmul_implement = {
     "generic": (topi.nn.batch_matmul, topi.generic.schedule_batch_matmul),
@@ -46,11 +46,7 @@ def verify_batch_matmul(batch, M, N, K):
     # get the test data
     a_np, b_np, c_np = get_ref_data()
 
-    def check_device(device):
-        ctx = tvm.context(device, 0)
-        if not ctx.exist:
-            print("Skip because %s is not enabled" % device)
-            return
+    def check_device(device, ctx):
         print("Running on target: %s" % device)
         with tvm.target.create(device):
             fcompute, fschedule = tvm.topi.testing.dispatch(device, _batch_matmul_implement)
@@ -63,9 +59,10 @@ def verify_batch_matmul(batch, M, N, K):
         f(a, b, c)
         tvm.testing.assert_allclose(c.asnumpy(), c_np, rtol=1e-5)
 
-    for device in get_all_backend():
-        check_device(device)
+    for device, ctx in tvm.testing.enabled_targets():
+        check_device(device, ctx)
 
+@tvm.testing.uses_gpu
 def test_batch_matmul():
     verify_batch_matmul(1, 16, 16, 32)
     verify_batch_matmul(5, 16, 16, 32)
index 4ac985e..9826b88 100644 (file)
@@ -20,7 +20,6 @@ import tvm
 from tvm import te
 from tvm import topi
 import tvm.topi.testing
-from common import get_all_backend
 
 
 def verify_broadcast_to_ele(in_shape, out_shape, fbcast):
@@ -30,7 +29,7 @@ def verify_broadcast_to_ele(in_shape, out_shape, fbcast):
 
     def check_device(device):
         ctx = tvm.context(device, 0)
-        if not ctx.exist:
+        if not tvm.testing.device_enabled(device):
             print("Skip because %s is not enabled" % device)
             return
         print("Running on target: %s" % device)
@@ -44,7 +43,7 @@ def verify_broadcast_to_ele(in_shape, out_shape, fbcast):
         foo(data_nd, out_nd)
         tvm.testing.assert_allclose(out_nd.asnumpy(), out_npy)
 
-    for target in get_all_backend():
+    for target, ctx in tvm.testing.enabled_targets():
         check_device(target)
     check_device("sdaccel")
 
@@ -78,7 +77,7 @@ def verify_broadcast_binary_ele(lhs_shape, rhs_shape,
 
     def check_device(device):
         ctx = tvm.context(device, 0)
-        if not ctx.exist:
+        if not tvm.testing.device_enabled(device):
             print("Skip because %s is not enabled" % device)
             return
         print("Running on target: %s" % device)
@@ -94,11 +93,12 @@ def verify_broadcast_binary_ele(lhs_shape, rhs_shape,
         foo(lhs_nd, rhs_nd, out_nd)
         tvm.testing.assert_allclose(out_nd.asnumpy(), out_npy, rtol=1E-4, atol=1E-4)
 
-    for target in get_all_backend():
+    for target, ctx in tvm.testing.enabled_targets():
         check_device(target)
     check_device("sdaccel")
 
 
+@tvm.testing.uses_gpu
 def test_broadcast_to():
     verify_broadcast_to_ele((1,), (10,), topi.broadcast_to)
     verify_broadcast_to_ele((), (10,), topi.broadcast_to)
@@ -106,6 +106,7 @@ def test_broadcast_to():
     verify_broadcast_to_ele((1, 128, 1, 32), (64, 128, 64, 32), topi.broadcast_to)
 
 
+@tvm.testing.uses_gpu
 def test_add():
     verify_broadcast_binary_ele(
         (), (), topi.add, np.add)
@@ -113,6 +114,7 @@ def test_add():
         (5, 2, 3), (2, 1), topi.add, np.add)
 
 
+@tvm.testing.uses_gpu
 def test_subtract():
     verify_broadcast_binary_ele(
         (5, 2, 3), (), topi.subtract, np.subtract)
@@ -124,11 +126,13 @@ def test_subtract():
         (1, 32), (64, 32), topi.subtract, np.subtract)
 
 
+@tvm.testing.uses_gpu
 def test_multiply():
     verify_broadcast_binary_ele(
         (5, 64, 128), (2, 5, 64, 1), topi.multiply, np.multiply)
 
 
+@tvm.testing.uses_gpu
 def test_divide():
     verify_broadcast_binary_ele(
         None, (10,), topi.divide, np.divide, rhs_min=0.0001)
@@ -137,6 +141,7 @@ def test_divide():
     verify_broadcast_binary_ele(
         (2, 3, 1, 32), (64, 32), topi.divide, np.divide, rhs_min=0.0001)
 
+@tvm.testing.uses_gpu
 def test_floor_divide():
     def _canonical_floor_div(a,b):
         return np.floor(a / b)
@@ -147,6 +152,7 @@ def test_floor_divide():
     verify_broadcast_binary_ele(
         (2, 3, 64, 32), (64, 32), topi.floor_divide, _canonical_floor_div, rhs_min=0.0001)
 
+@tvm.testing.uses_gpu
 def test_maximum_minmum():
     verify_broadcast_binary_ele(
         (32,), (64, 32), topi.maximum, np.maximum)
@@ -154,15 +160,18 @@ def test_maximum_minmum():
         (1, 2, 2, 1, 32), (64, 32), topi.minimum, np.minimum)
 
 
+@tvm.testing.uses_gpu
 def test_power():
     verify_broadcast_binary_ele(
         (1, 2, 2), (2,), topi.power, np.power, lhs_min=0.001, rhs_min=0.001, rhs_max=2)
 
 
+@tvm.testing.uses_gpu
 def test_mod():
     verify_broadcast_binary_ele(
         (1, 2, 2), (2,), topi.mod, np.mod, lhs_min=0.001, rhs_min=1, dtype="int32")
 
+@tvm.testing.uses_gpu
 def test_floor_mod():
     def _canonical_floor_mod(a,b):
         return a - np.floor(a / b) * b
@@ -171,6 +180,7 @@ def test_floor_mod():
     verify_broadcast_binary_ele(
         (3, 4, 5), (3, 4, 5), topi.floor_mod, _canonical_floor_mod, lhs_min=0.001, rhs_min=1, dtype="float32")
 
+@tvm.testing.uses_gpu
 def test_cmp():
     # explicit specify the output type
     def greater(x, y):
@@ -208,6 +218,7 @@ def test_cmp():
         lhs_min=-3, lhs_max=3, rhs_min=-3, rhs_max=3, dtype='int32')
 
 
+@tvm.testing.uses_gpu
 def test_shift():
     # explicit specify the output type
     verify_broadcast_binary_ele(
@@ -223,6 +234,7 @@ def test_shift():
         dtype="int8", rhs_min=0, rhs_max=32)
 
 
+@tvm.testing.uses_gpu
 def test_logical_single_ele():
     def test_apply(
             func,
@@ -238,11 +250,7 @@ def test_logical_single_ele():
             assert (isinstance(B, tvm.tir.PrimExpr))
             return
 
-        def check_device(device):
-            ctx = tvm.context(device, 0)
-            if not ctx.exist:
-                print("Skip because %s is not enabled" % device)
-                return
+        def check_device(device, ctx):
             print("Running on target: %s" % device)
             with tvm.target.create(device):
                 s = tvm.topi.testing.get_broadcast_schedule(device)(B)
@@ -256,13 +264,14 @@ def test_logical_single_ele():
             foo(data_nd, out_nd)
             tvm.testing.assert_allclose(out_nd.asnumpy(), out_npy)
 
-        for device in get_all_backend():
-            check_device(device)
+        for device, ctx in tvm.testing.enabled_targets():
+            check_device(device, ctx)
 
     test_apply(topi.logical_not, "logical_not", np.logical_not, np.array([True, False, 0, 1]))
     test_apply(topi.logical_not, "logical_not", np.logical_not, np.array(np.arange(5) < 3))
 
 
+@tvm.testing.uses_gpu
 def test_bitwise_not():
     def test_apply(
             func,
@@ -279,11 +288,7 @@ def test_bitwise_not():
             assert (isinstance(B, tvm.tir.PrimExpr))
             return
 
-        def check_device(device):
-            ctx = tvm.context(device, 0)
-            if not ctx.exist:
-                print("Skip because %s is not enabled" % device)
-                return
+        def check_device(device, ctx):
             print("Running on target: %s" % device)
             with tvm.target.create(device):
                 s = tvm.topi.testing.get_broadcast_schedule(device)(B)
@@ -297,13 +302,14 @@ def test_bitwise_not():
             foo(data_nd, out_nd)
             tvm.testing.assert_allclose(out_nd.asnumpy(), out_npy)
 
-        for device in get_all_backend():
-            check_device(device)
+        for device, ctx in tvm.testing.enabled_targets():
+            check_device(device, ctx)
 
     test_apply(topi.bitwise_not, "bitwise_not", np.bitwise_not, ())
     test_apply(topi.bitwise_not, "bitwise_not", np.bitwise_not, (2, 1, 2))
 
 
+@tvm.testing.uses_gpu
 def test_logical_binary_ele():
     def test_apply(
             func,
@@ -321,11 +327,7 @@ def test_logical_binary_ele():
             assert (isinstance(C, tvm.tir.PrimExpr))
             return
 
-        def check_device(device):
-            ctx = tvm.context(device, 0)
-            if not ctx.exist:
-                print("Skip because %s is not enabled" % device)
-                return
+        def check_device(device, ctx):
             print("Running on target: %s" % device)
             with tvm.target.create(device):
                 s = tvm.topi.testing.get_broadcast_schedule(device)(C)
@@ -339,8 +341,8 @@ def test_logical_binary_ele():
             foo(lhs_nd, rhs_nd, out_nd)
             tvm.testing.assert_allclose(out_nd.asnumpy(), out_npy, rtol=1E-4, atol=1E-4)
 
-        for device in get_all_backend():
-            check_device(device)
+        for device, ctx in tvm.testing.enabled_targets():
+            check_device(device, ctx)
 
     test_apply(topi.logical_and, "logical_and", np.logical_and, True, False)
     test_apply(topi.logical_and, "logical_and", np.logical_and, [True, False], [False, False])
@@ -350,6 +352,7 @@ def test_logical_binary_ele():
     test_apply(topi.logical_xor, "logical_xor", np.logical_xor, [True, False], [False, False])
 
 
+@tvm.testing.uses_gpu
 def test_bitwise_and():
     verify_broadcast_binary_ele(
         None, None, topi.bitwise_and, np.bitwise_and,
@@ -359,6 +362,7 @@ def test_bitwise_and():
         dtype="int32")
 
 
+@tvm.testing.uses_gpu
 def test_bitwise_or():
     verify_broadcast_binary_ele(
         None, None, topi.bitwise_or, np.bitwise_or,
@@ -368,6 +372,7 @@ def test_bitwise_or():
         dtype="int32")
 
 
+@tvm.testing.uses_gpu
 def test_bitwise_xor():
     verify_broadcast_binary_ele(
         None, None, topi.bitwise_xor, np.bitwise_xor,
index b3d95dd..70af1f8 100644 (file)
@@ -19,11 +19,11 @@ import numpy as np
 import tvm
 from tvm import te
 from tvm import topi
+import tvm.testing
 import tvm.topi.testing
 from tvm.topi.util import get_const_tuple
 from tvm.contrib.pickle_memoize import memoize
 
-from common import get_all_backend
 
 def verify_clip(N, a_min, a_max, dtype):
     A = te.placeholder((N, N), dtype=dtype, name='A')
@@ -38,11 +38,7 @@ def verify_clip(N, a_min, a_max, dtype):
         return a_np, b_np
     a_np, b_np = get_ref_data()
 
-    def check_device(device):
-        ctx = tvm.context(device, 0)
-        if not ctx.exist:
-            print("Skip because %s is not enabled" % device)
-            return
+    def check_device(device, ctx):
         print("Running on target: %s" % device)
         with tvm.target.create(device):
             s = tvm.topi.testing.get_injective_schedule(device)(B)
@@ -53,9 +49,10 @@ def verify_clip(N, a_min, a_max, dtype):
         f(a, b)
         tvm.testing.assert_allclose(b.asnumpy(), b_np, rtol=1e-5)
 
-    for device in get_all_backend():
-        check_device(device)
+    for device, ctx in tvm.testing.enabled_targets():
+        check_device(device, ctx)
 
+@tvm.testing.uses_gpu
 def test_clip():
     verify_clip(1024, -127, 127, 'float32')
     verify_clip(1024, -127, 127, 'int16')
index 49f2cd1..b50aa56 100644 (file)
@@ -23,7 +23,6 @@ from tvm import topi
 import tvm.topi.testing
 from tvm.contrib.pickle_memoize import memoize
 from tvm.topi.util import get_const_tuple
-from common import get_all_backend
 
 
 _conv1d_ncw_implement = {
@@ -74,11 +73,7 @@ def verify_conv1d(batch,
 
     a_np, w_np, b_np = get_ref_data(layout)
 
-    def check_device(device):
-        ctx = tvm.context(device, 0)
-        if not ctx.exist:
-            print("Skip because %s is not enabled" % device)
-            return
+    def check_device(device, ctx):
         if layout == "NCW":
             fcompute, fschedule = tvm.topi.testing.dispatch(device, _conv1d_ncw_implement)
         else:
@@ -95,10 +90,11 @@ def verify_conv1d(batch,
         func(a, w, b)
         tvm.testing.assert_allclose(b.asnumpy(), b_np, rtol=1e-5)
 
-    for device in get_all_backend():
-        check_device(device)
+    for device, ctx in tvm.testing.enabled_targets():
+        check_device(device, ctx)
 
 
+@tvm.testing.uses_gpu
 def test_conv1d():
     for layout in ["NCW", "NWC"]:
         # Most basic test case
index 7efa96d..fc5819b 100644 (file)
@@ -23,7 +23,7 @@ from tvm import topi
 import tvm.topi.testing
 from tvm.contrib.pickle_memoize import memoize
 from tvm.topi.util import get_const_tuple
-from common import get_all_backend
+import tvm.testing
 
 _conv1d_transpose_ncw_implement = {
     "generic": (topi.nn.conv1d_transpose_ncw, topi.generic.schedule_conv1d_transpose_ncw),
@@ -49,11 +49,8 @@ def verify_conv1d_transpose_ncw(batch, in_channel, in_size, num_filter, kernel,
 
     a_np, w_np, b_np, c_np = get_ref_data()
 
-    def check_device(device):
+    def check_device(device, ctx):
         ctx = tvm.context(device, 0)
-        if not ctx.exist:
-            print("Skip because %s is not enabled" % device)
-            return
         with tvm.target.create(device):
             fcompute, fschedule = tvm.topi.testing.dispatch(device, _conv1d_transpose_ncw_implement)
             B = fcompute(A, W, stride, padding, A.dtype, output_padding)
@@ -72,10 +69,11 @@ def verify_conv1d_transpose_ncw(batch, in_channel, in_size, num_filter, kernel,
         tvm.testing.assert_allclose(b.asnumpy(), b_np, rtol=1e-5)
         tvm.testing.assert_allclose(c.asnumpy(), c_np, rtol=1e-5)
 
-    for device in get_all_backend():
-        check_device(device)
+    for device, ctx in tvm.testing.enabled_targets():
+        check_device(device, ctx)
 
 
+@tvm.testing.uses_gpu
 def test_conv1d_transpose_ncw():
     verify_conv1d_transpose_ncw(1, 3, 224, 32, 5, 1, 0, (0,))
     verify_conv1d_transpose_ncw(1, 3, 224, 32, 7, 1, 2, (0,))
index 95d5633..604d09d 100644 (file)
@@ -21,13 +21,12 @@ import tvm
 from tvm import te
 from tvm import autotvm
 from tvm import topi
+import tvm.testing
 import tvm.topi.testing
 from tvm.contrib.pickle_memoize import memoize
 from tvm.topi.nn.util import get_pad_tuple
 from tvm.topi.util import get_const_tuple
 
-from common import get_all_backend
-
 def _transform_data(data, bn):
     # NCHW -> NCHW[x]c
     batch_size, channel, height, width = data.shape
@@ -94,7 +93,7 @@ def verify_conv2d_NCHWc(batch, in_channel, in_size, num_filter, kernel, stride,
 
     def check_device(device):
         ctx = tvm.context(device, 0)
-        if not ctx.exist:
+        if not tvm.testing.device_enabled(device):
             print("Skip because %s is not enabled" % device)
             return
         print("Running on target: %s" % device)
index 20b1b4d..04f34b6 100644 (file)
@@ -23,6 +23,7 @@ from tvm import topi
 import tvm.topi.testing
 from tvm.contrib.pickle_memoize import memoize
 from tvm.topi.util import get_const_tuple
+import tvm.testing
 
 
 _conv2d_hwcn_implement = {
@@ -58,7 +59,7 @@ def verify_conv2d_hwcn(batch, in_channel, in_size, num_filter, kernel, stride, p
 
     def check_device(device):
         ctx = tvm.context(device, 0)
-        if not ctx.exist:
+        if not tvm.testing.device_enabled(device):
             print("Skip because %s is not enabled" % device)
             return
         print("Running on target: %s" % device)
@@ -94,6 +95,7 @@ def verify_conv2d_hwcn(batch, in_channel, in_size, num_filter, kernel, stride, p
         check_device(device)
 
 
+@tvm.testing.requires_gpu
 def test_conv2d_hwcn():
     verify_conv2d_hwcn(1, 256, 32, 256, 3, 1, "SAME")
     verify_conv2d_hwcn(1, 256, 32, 256, 3, 1, "SAME")
index 2c071c9..ea1aee1 100644 (file)
@@ -32,7 +32,7 @@ _conv2d_hwnc_tensorcore_implement = {
 }
 
 def verify_conv2d_hwnc(batch, in_channel, in_size, num_filter, kernel, stride,
-                       padding, dilation=1, devices='cuda', dtype='int4'):
+                       padding, dilation=1, dtype='int4'):
     """Test the conv2d with tensorcore for hwnc layout"""
     pad_top, pad_left, pad_bottom, pad_right = get_pad_tuple(padding, (kernel, kernel))
     padding_sum = pad_top + pad_left + pad_bottom + pad_right
@@ -89,7 +89,7 @@ def verify_conv2d_hwnc(batch, in_channel, in_size, num_filter, kernel, stride,
 
     def check_device(device):
         ctx = tvm.context(device, 0)
-        if not ctx.exist:
+        if not tvm.testing.device_enabled(device):
             print("Skip because %s is not enabled" % device)
             return
         if not nvcc.have_tensorcore(ctx.compute_version):
@@ -112,9 +112,10 @@ def verify_conv2d_hwnc(batch, in_channel, in_size, num_filter, kernel, stride,
         rtol = 1e-3
         tvm.testing.assert_allclose(c.asnumpy().transpose((2, 0, 1, 3)), c_np, rtol=rtol)
 
-    check_device(devices)
+    check_device('cuda')
 
 
+@tvm.testing.requires_tensorcore
 def test_conv2d_hwnc_tensorcore():
     """Test the conv2d with tensorcore for hwnc layout"""
     verify_conv2d_hwnc(8, 64, 56, 64, 3, 1, 1, dtype='int8')
index 615dc51..c18946b 100644 (file)
@@ -28,7 +28,8 @@ from tvm.topi.nn.util import get_pad_tuple
 from tvm.topi.util import get_const_tuple
 from tvm.topi.arm_cpu.conv2d_gemm import is_aarch64_arm
 
-from common import get_all_backend, Int8Fallback
+from common import Int8Fallback
+import tvm.testing
 
 def compile_conv2d_NHWC_gemm_int8_arm(batch, in_channel, in_size, num_filter, kernel, stride, padding,
                                  dilation=1, add_bias=False, add_relu=False):
@@ -45,7 +46,7 @@ def compile_conv2d_NHWC_gemm_int8_arm(batch, in_channel, in_size, num_filter, ke
     device = "llvm --device arm_cpu --mtriple aarch64-linux-gnu"
 
     ctx = tvm.context(device, 0)
-    if not ctx.exist:
+    if not tvm.testing.device_enabled(device):
         print("Skip because %s is not enabled" % device)
         return
     print("Compiling on arm AArch64 target: %s" % device)
@@ -128,7 +129,7 @@ def verify_conv2d_NHWC_gemm_int8(batch, in_channel, in_size, num_filter, kernel,
 
     def check_device(device):
         ctx = tvm.context(device, 0)
-        if not ctx.exist:
+        if not tvm.testing.device_enabled(device):
             print("Skip because %s is not enabled" % device)
             return
         print("Running on target: %s" % device)
@@ -223,7 +224,7 @@ def verify_conv2d_NCHWc_int8(batch, in_channel, in_size, num_filter, kernel, str
 
     def check_device(device):
         ctx = tvm.context(device, 0)
-        if not ctx.exist:
+        if not tvm.testing.device_enabled(device):
             print("Skip because %s is not enabled" % device)
             return
         if device == "cuda" and not tvm.contrib.nvcc.have_int8(ctx.compute_version):
@@ -293,7 +294,7 @@ def verify_conv2d_nchw_int8(batch, in_channel, in_size, num_filter, kernel, stri
 
     def check_device(device):
         ctx = tvm.context(device, 0)
-        if not ctx.exist:
+        if not tvm.testing.device_enabled(device):
             print("Skip because %s is not enabled" % device)
             return
         if device == "cuda" and not tvm.contrib.nvcc.have_int8(ctx.compute_version):
@@ -327,6 +328,7 @@ def verify_conv2d_nchw_int8(batch, in_channel, in_size, num_filter, kernel, stri
         check_device(device)
 
 
+@tvm.testing.requires_cuda
 def test_conv2d_nchw():
     with Int8Fallback():
         # ResNet18 workloads where channels in / out are multiple of oc_block_factor
index dcdf0a7..a306e3e 100644 (file)
@@ -26,7 +26,7 @@ from tvm.contrib.pickle_memoize import memoize
 from tvm.topi.nn.util import get_pad_tuple
 from tvm.topi.util import get_const_tuple
 
-from common import get_all_backend
+import tvm.testing
 
 def verify_conv2d_nchw(batch, in_channel, in_size, num_filter, kernel, stride, padding, dilation=1, add_bias=False, add_relu=False,\
         use_cudnn=False):
@@ -63,7 +63,7 @@ def verify_conv2d_nchw(batch, in_channel, in_size, num_filter, kernel, stride, p
 
     def check_device(device):
         ctx = tvm.context(device, 0)
-        if not ctx.exist:
+        if not tvm.testing.device_enabled(device):
             print("Skip because %s is not enabled" % device)
             return
         print("Running on target: %s" % device)
@@ -97,7 +97,7 @@ def verify_conv2d_nchw(batch, in_channel, in_size, num_filter, kernel, stride, p
             func(a, w, c)
         tvm.testing.assert_allclose(c.asnumpy(), c_np, rtol=1e-4)
 
-    for device in get_all_backend():
+    for device, ctx in tvm.testing.enabled_targets():
         with autotvm.tophub.context(device):  # load tophub pre-tuned parameters
             check_device(device)
 
@@ -105,6 +105,7 @@ def verify_conv2d_nchw(batch, in_channel, in_size, num_filter, kernel, stride, p
         check_device("cuda -model=unknown -libs=cudnn")
 
 
+@tvm.testing.uses_gpu
 def test_conv2d_nchw():
     # ResNet18 workloads
     verify_conv2d_nchw(1,   3, 224,  64, 7, 2, 3)
index 7750f23..29b8634 100644 (file)
@@ -23,7 +23,7 @@ from tvm import topi
 import tvm.topi.testing
 from tvm.contrib.pickle_memoize import memoize
 from tvm.topi.util import get_const_tuple
-
+import tvm.testing
 
 
 _conv2d_nhwc_implement = {
@@ -56,7 +56,7 @@ def verify_conv2d_nhwc(batch, in_channel, in_size, num_filter, kernel, stride, p
     a_np, w_np, b_np = get_ref_data()
 
     def check_device(device):
-        if not tvm.runtime.enabled(device):
+        if not tvm.testing.device_enabled(device):
             print("Skip because %s is not enabled" % device)
             return
         print("Running on target: %s" % device)
@@ -76,6 +76,7 @@ def verify_conv2d_nhwc(batch, in_channel, in_size, num_filter, kernel, stride, p
         check_device(device)
 
 
+@tvm.testing.uses_gpu
 def test_conv2d_nhwc():
     verify_conv2d_nhwc(1, 256, 32, 256, 3, 1, "SAME")
     verify_conv2d_nhwc(4, 128, 16, 128, 5, 2, "SAME")
index 4439d6a..019dd30 100644 (file)
@@ -51,7 +51,7 @@ def verify_conv2d_1x1_nhwc_pack_int8(batch, in_channel, in_size, num_filter, ker
 
     def check_device(device):
         ctx = tvm.context(device, 0)
-        if not ctx.exist:
+        if not tvm.testing.device_enabled(device):
             print("Skip because %s is not enabled" % device)
             return
         print("Running on target: %s" % device)
index 8375df3..fb0167a 100644 (file)
@@ -26,6 +26,7 @@ from tvm.contrib.pickle_memoize import memoize
 from tvm.contrib import nvcc
 from tvm.topi.nn.util import get_pad_tuple
 from tvm.topi.util import get_const_tuple
+import tvm.testing
 
 
 _conv2d_nhwc_tensorcore_implement = {
@@ -70,7 +71,7 @@ def verify_conv2d_nhwc(batch, in_channel, in_size, num_filter, kernel, stride,
 
     def check_device(device):
         ctx = tvm.context(device, 0)
-        if not ctx.exist:
+        if not tvm.testing.device_enabled(device):
             print("Skip because %s is not enabled" % device)
             return
         if not nvcc.have_tensorcore(ctx.compute_version):
@@ -105,6 +106,8 @@ def verify_conv2d_nhwc(batch, in_channel, in_size, num_filter, kernel, stride,
     check_device(devices)
 
 
+@tvm.testing.requires_cuda
+@tvm.testing.requires_gpu
 def test_conv2d_nhwc_tensorcore():
     """Test the conv2d with tensorcore for nhwc layout"""
     verify_conv2d_nhwc(16, 16, 14, 16, 3, 1, 1)
index 00b40bf..cbcc32d 100644 (file)
@@ -24,9 +24,9 @@ from tvm import topi
 import tvm.topi.testing
 from tvm import te
 from tvm.contrib.pickle_memoize import memoize
-from tvm.contrib import nvcc
 from tvm.topi.nn.util import get_pad_tuple
 from tvm.topi.util import get_const_tuple
+import tvm.testing
 
 
 _conv2d_nhwc_winograd_tensorcore = {
@@ -78,9 +78,6 @@ def verify_conv2d_nhwc(batch, in_channel, in_size, num_filter, kernel, stride,
 
     def check_device(device):
         ctx = tvm.context(device, 0)
-        if not ctx.exist:
-            print("Skip because %s is not enabled" % device)
-            return
         print("Running on target: %s" % device)
         with tvm.target.create(device):
             if bgemm == "direct":
@@ -114,6 +111,8 @@ def verify_conv2d_nhwc(batch, in_channel, in_size, num_filter, kernel, stride,
     check_device(devices)
 
 
+@tvm.testing.requires_cuda
+@tvm.testing.requires_gpu
 def test_conv2d_nhwc_winograd_direct():
     """Test the conv2d with winograd for nhwc layout"""
     # resnet 18 workloads
@@ -135,13 +134,11 @@ def test_conv2d_nhwc_winograd_direct():
     verify_conv2d_nhwc(2,  48, 56,  48, 3, 1, "SAME", add_relu=True, add_bias=True)
     verify_conv2d_nhwc(1, 48, 35,  48, 5, 1, "VALID")
 
+
+@tvm.testing.requires_cuda
+@tvm.testing.requires_tensorcore
 def test_conv2d_nhwc_winograd_tensorcore():
     """Test the conv2d with winograd for nhwc layout"""
-    if not tvm.gpu(0).exist or not tvm.runtime.enabled("cuda"):
-        print("skip because cuda is not enabled..")
-        return
-    if not nvcc.have_tensorcore(tvm.gpu(0).compute_version):
-        return
     verify_conv2d_nhwc(8,  64, 56,  64, 3, 1, 1, bgemm="tensorcore")
     verify_conv2d_nhwc(8, 128, 28, 128, 3, 1, 1, bgemm="tensorcore")
     verify_conv2d_nhwc(8, 256, 14, 256, 3, 1, 1, bgemm="tensorcore")
index 6c43b2d..8c30f44 100644 (file)
@@ -23,7 +23,7 @@ import tvm.topi.testing
 from tvm.contrib.pickle_memoize import memoize
 from tvm.topi.util import get_const_tuple
 
-from common import get_all_backend
+import tvm.testing
 
 
 _conv2d_transpose_nchw_implement = {
@@ -57,11 +57,7 @@ def verify_conv2d_transpose_nchw(batch, in_channel, in_size, num_filter, kernel,
 
     a_np, w_np, b_np, c_np = get_ref_data()
 
-    def check_device(device):
-        ctx = tvm.context(device, 0)
-        if not ctx.exist:
-            print("Skip because %s is not enabled" % device)
-            return
+    def check_device(device, ctx):
         print("Running on target: %s" % device)
         with tvm.target.create(device):
             fcompute, fschedule = tvm.topi.testing.dispatch(device, _conv2d_transpose_nchw_implement)
@@ -83,10 +79,11 @@ def verify_conv2d_transpose_nchw(batch, in_channel, in_size, num_filter, kernel,
         func2(a, w, c)
         tvm.testing.assert_allclose(b.asnumpy(), b_np, rtol=1e-5)
         tvm.testing.assert_allclose(c.asnumpy(), c_np, rtol=1e-5)
-    for device in get_all_backend():
-        check_device(device)
+    for device, ctx in tvm.testing.enabled_targets():
+        check_device(device, ctx)
 
 
+@tvm.testing.uses_gpu
 def test_conv2d_transpose_nchw():
     verify_conv2d_transpose_nchw(1, 3, (224, 224),  1, (1, 1), (1, 1), (0, 0, 0, 0), (0, 0))
     verify_conv2d_transpose_nchw(1, 3, (224, 224),  32, (3, 3), (1, 1), (0, 0, 0, 0), (0, 0))
index 800aaea..674590a 100644 (file)
@@ -26,6 +26,7 @@ import tvm.topi.testing
 from tvm.contrib.pickle_memoize import memoize
 from tvm.topi.nn.util import get_pad_tuple
 from tvm.topi.util import get_const_tuple
+import tvm.testing
 
 
 _conv2d_nchw_winograd_implement = {
@@ -70,7 +71,7 @@ def verify_conv2d_nchw(batch, in_channel, in_size, num_filter, kernel, stride, p
 
     def check_device(device):
         ctx = tvm.context(device, 0)
-        if not ctx.exist:
+        if not tvm.testing.device_enabled(device):
             print("Skip because %s is not enabled" % device)
             return
         print("Running on target: %s" % device)
@@ -102,6 +103,7 @@ def verify_conv2d_nchw(batch, in_channel, in_size, num_filter, kernel, stride, p
         check_device(device)
 
 
+@tvm.testing.uses_gpu
 def test_conv2d_nchw():
     # inception v3 workloads
     verify_conv2d_nchw(1, 128, 17, 192, 7, 1, 3, devices=['cuda'])
index ad2b93c..319fb72 100644 (file)
@@ -21,13 +21,12 @@ import tvm
 from tvm import te
 from tvm import autotvm
 from tvm import topi
+import tvm.testing
 import tvm.topi.testing
 from tvm.contrib.pickle_memoize import memoize
 from tvm.topi.nn.util import get_pad_tuple3d
 from tvm.topi.util import get_const_tuple
 
-from common import get_all_backend
-
 _conv3d_ncdhw_implement = {
     "generic": (topi.nn.conv3d_ncdhw, topi.generic.schedule_conv3d_ncdhw),
     "cpu": (topi.x86.conv3d_ncdhw, topi.x86.schedule_conv3d_ncdhw),
@@ -66,11 +65,7 @@ def verify_conv3d_ncdhw(batch, in_channel, in_size, num_filter, kernel, stride,
 
     a_np, w_np, b_np, c_np = get_ref_data()
 
-    def check_device(device):
-        ctx = tvm.context(device, 0)
-        if not ctx.exist:
-            print("Skip because %s is not enabled" % device)
-            return
+    def check_device(device, ctx):
         print("Running on target: %s" % device)
         fcompute, fschedule = tvm.topi.testing.dispatch(device, _conv3d_ncdhw_implement)
         with tvm.target.create(device):
@@ -94,10 +89,11 @@ def verify_conv3d_ncdhw(batch, in_channel, in_size, num_filter, kernel, stride,
             func(a, w, c)
         tvm.testing.assert_allclose(c.asnumpy(), c_np, rtol=1e-4)
 
-    for device in get_all_backend():
+    for device, ctx in tvm.testing.enabled_targets():
         with autotvm.tophub.context(device):  # load tophub pre-tuned parameters
-            check_device(device)
+            check_device(device, ctx)
 
+@tvm.testing.uses_gpu
 def test_conv3d_ncdhw():
     #3DCNN  workloads
     verify_conv3d_ncdhw(1, 32, 32, 5, 1, 1, 0)
index b80f96b..7e330e7 100644 (file)
@@ -24,7 +24,6 @@ import tvm.topi.testing
 from tvm.contrib.pickle_memoize import memoize
 from tvm.topi.util import get_const_tuple
 
-from common import get_all_backend
 
 _conv3d_ndhwc_implement = {
     "generic": (topi.nn.conv3d_ndhwc, topi.generic.schedule_conv3d_ndhwc),
@@ -58,11 +57,7 @@ def verify_conv3d_ndhwc(batch, in_channel, in_size, num_filter, kernel, stride,
         return a_np, w_np, b_np
     a_np, w_np, b_np = get_ref_data()
 
-    def check_device(device):
-        ctx = tvm.context(device, 0)
-        if not ctx.exist:
-            print("Skip because %s is not enabled" % device)
-            return
+    def check_device(device, ctx):
         print("Running on target: %s" % device)
         fcompute, fschedule = tvm.topi.testing.dispatch(device, _conv3d_ndhwc_implement)
         with tvm.target.create(device):
@@ -76,10 +71,11 @@ def verify_conv3d_ndhwc(batch, in_channel, in_size, num_filter, kernel, stride,
         func(a, w, b)
         tvm.testing.assert_allclose(b.asnumpy(), b_np, rtol=1e-5)
 
-    for device in get_all_backend():
-        check_device(device)
+    for device, ctx in tvm.testing.enabled_targets():
+        check_device(device, ctx)
 
 
+@tvm.testing.uses_gpu
 def test_conv3d_ndhwc():
     verify_conv3d_ndhwc(1, 16, 32, 16, 3, 1, "SAME")
     verify_conv3d_ndhwc(4, 32, 16, 32, 5, 2, "SAME")
index 2adc348..9f92efa 100644 (file)
@@ -26,6 +26,7 @@ from tvm.contrib.pickle_memoize import memoize
 from tvm.contrib import nvcc
 from tvm.topi.nn.util import get_pad_tuple3d
 from tvm.topi.util import get_const_tuple
+import tvm.testing
 
 
 _conv3d_ndhwc_tensorcore_implement = {
@@ -71,12 +72,6 @@ def verify_conv3d_ndhwc(batch, in_channel, in_size, num_filter, kernel, stride,
 
     def check_device(device):
         ctx = tvm.context(device, 0)
-        if not ctx.exist:
-            print("Skip because %s is not enabled" % device)
-            return
-        if not nvcc.have_tensorcore(ctx.compute_version):
-            print("skip because gpu does not support Tensor Cores")
-            return
         print("Running on target: %s" % device)
         with tvm.target.create(device):
             fcompute, fschedule = tvm.topi.testing.dispatch(device, _conv3d_ndhwc_tensorcore_implement)
@@ -106,6 +101,8 @@ def verify_conv3d_ndhwc(batch, in_channel, in_size, num_filter, kernel, stride,
     check_device(devices)
 
 
+@tvm.testing.requires_tensorcore
+@tvm.testing.requires_cuda
 def test_conv3d_ndhwc_tensorcore():
     """Test the conv3d with tensorcore for ndhwc layout"""
     verify_conv3d_ndhwc(16, 16, 14, 16, 3, 1, 1)
index 8e98120..25d9b72 100644 (file)
@@ -19,12 +19,11 @@ import numpy as np
 import tvm
 from tvm import te
 from tvm import topi
+import tvm.testing
 import tvm.topi.testing
 from tvm.contrib.pickle_memoize import memoize
 from tvm.topi.util import get_const_tuple
 
-from common import get_all_backend
-
 
 _conv3d_transpose_ncdhw_implement = {
     "generic": (topi.nn.conv3d_transpose_ncdhw, topi.generic.schedule_conv3d_transpose_ncdhw),
@@ -55,11 +54,7 @@ def verify_conv3d_transpose_ncdhw(batch, in_channel, in_size, num_filter, kernel
 
     a_np, w_np, b_np, c_np = get_ref_data()
 
-    def check_device(device):
-        ctx = tvm.context(device, 0)
-        if not ctx.exist:
-            print("Skip because %s is not enabled" % device)
-            return
+    def check_device(device, ctx):
         print("Running on target: %s" % device)
         with tvm.target.create(device):
             fcompute, fschedule = tvm.topi.testing.dispatch(device, _conv3d_transpose_ncdhw_implement)
@@ -81,10 +76,11 @@ def verify_conv3d_transpose_ncdhw(batch, in_channel, in_size, num_filter, kernel
         func2(a, w, c)
         tvm.testing.assert_allclose(b.asnumpy(), b_np, atol=1e-4, rtol=1e-4)
         tvm.testing.assert_allclose(c.asnumpy(), c_np, atol=1e-4, rtol=1e-4)
-    for device in get_all_backend():
-        check_device(device)
+    for device, ctx in tvm.testing.enabled_targets():
+        check_device(device, ctx)
 
 
+@tvm.testing.uses_gpu
 def test_conv3d_transpose_ncdhw():
     verify_conv3d_transpose_ncdhw(1, 3, (24, 24, 24), 1,  (1, 1, 1), (1, 1, 1), (0, 0, 0, 0, 0, 0), (0, 0, 0))
     verify_conv3d_transpose_ncdhw(1, 3, (24, 24, 24), 2, (3, 3, 3), (1, 1, 1), (0, 0, 0, 0, 0, 0), (0, 0, 0))
index 6e26130..a6e528c 100644 (file)
@@ -21,12 +21,12 @@ import tvm
 from tvm import te
 from tvm import autotvm
 from tvm import topi
+import tvm.testing
 import tvm.topi.testing
 from tvm.contrib.pickle_memoize import memoize
 from tvm.topi.nn.util import get_pad_tuple3d
 from tvm.topi.util import get_const_tuple
 
-from common import get_all_backend
 
 _conv3d_ncdhw_implement = {
     "gpu": (topi.cuda.conv3d_ncdhw_winograd, topi.cuda.schedule_conv3d_ncdhw_winograd),
@@ -78,7 +78,7 @@ def verify_conv3d_ncdhw(batch,
 
     def check_device(device):
         ctx = tvm.context(device, 0)
-        if not ctx.exist:
+        if not tvm.testing.device_enabled(device):
             print("Skip because %s is not enabled" % device)
             return
         print("Running on target: %s" % device)
@@ -117,6 +117,7 @@ def verify_conv3d_ncdhw(batch,
             check_device(device)
 
 
+@tvm.testing.requires_gpu
 def test_conv3d_ncdhw():
     # Try without depth transformation
     #3DCNN  workloads
index f5eb51c..8106392 100644 (file)
@@ -24,9 +24,6 @@ import tvm.topi.testing
 from tvm.contrib.pickle_memoize import memoize
 from tvm.topi.util import get_const_tuple
 
-from common import get_all_backend
-
-
 _correlation_implement = {
     "generic": (topi.nn.correlation_nchw, topi.generic.schedule_correlation_nchw),
     "cuda": (topi.cuda.correlation_nchw, topi.cuda.schedule_correlation_nchw),
@@ -52,11 +49,7 @@ def verify_correlation_nchw(data_shape, kernel_size, max_displacement, stride1,
 
     a_np, b_np, c_np = get_ref_data()
 
-    def check_device(device):
-        ctx = tvm.context(device, 0)
-        if not ctx.exist:
-            print("Skip because %s is not enabled" % device)
-            return
+    def check_device(device, ctx):
         print("Running on target: %s" % device)
         fcompute, fschedule = tvm.topi.testing.dispatch(
             device, _correlation_implement)
@@ -72,10 +65,11 @@ def verify_correlation_nchw(data_shape, kernel_size, max_displacement, stride1,
             func(a, b, c)
             tvm.testing.assert_allclose(c.asnumpy(), c_np, rtol=1e-5)
 
-    for device in get_all_backend():
-        check_device(device)
+    for device, ctx in tvm.testing.enabled_targets():
+        check_device(device, ctx)
 
 
+@tvm.testing.uses_gpu
 def test_correlation_nchw():
     verify_correlation_nchw((1, 3, 10, 10), kernel_size=1, max_displacement=4,
                         stride1=1, stride2=1, pad_size=4, is_multiply=True)
index a2a01fc..5d361b4 100644 (file)
@@ -23,7 +23,7 @@ import tvm.topi.testing
 from tvm.contrib.pickle_memoize import memoize
 from tvm.topi.util import get_const_tuple
 
-from common import get_all_backend
+import tvm.testing
 
 
 _deformable_conv2d_implement = {
@@ -62,7 +62,7 @@ def verify_deformable_conv2d_nchw(batch, in_channel, in_size, num_filter, kernel
 
     def check_device(device):
         ctx = tvm.context(device, 0)
-        if not ctx.exist:
+        if not tvm.testing.device_enabled(device):
             print("Skip because %s is not enabled" % device)
             return
         print("Running on target: %s" % device)
@@ -85,6 +85,7 @@ def verify_deformable_conv2d_nchw(batch, in_channel, in_size, num_filter, kernel
         check_device(device)
 
 
+@tvm.testing.uses_gpu
 def test_deformable_conv2d_nchw():
     verify_deformable_conv2d_nchw(1, 16, 7, 16, 1, 1, 0, deformable_groups=4)
     verify_deformable_conv2d_nchw(1, 16, 7, 16, 3, 1, 1, dilation=2, deformable_groups=4)
index 517cb4d..e6530e7 100644 (file)
@@ -23,7 +23,8 @@ import tvm.topi.testing
 from tvm.topi.util import get_const_tuple
 from tvm.contrib.pickle_memoize import memoize
 
-from common import get_all_backend, Int8Fallback
+from common import Int8Fallback
+import tvm.testing
 
 _dense_implement = {
     "generic": [(topi.nn.dense, topi.generic.schedule_dense)],
@@ -57,11 +58,7 @@ def verify_dense(batch, in_dim, out_dim, use_bias=True):
     # get the test data
     a_np, b_np, c_np, d_np = get_ref_data()
 
-    def check_device(device):
-        ctx = tvm.context(device, 0)
-        if not ctx.exist:
-            print("Skip because %s is not enabled" % device)
-            return
+    def check_device(device, ctx):
         print("Running on target: %s" % device)
         for fcompute, fschedule in tvm.topi.testing.dispatch(device, _dense_implement):
             with tvm.target.create(device):
@@ -76,8 +73,8 @@ def verify_dense(batch, in_dim, out_dim, use_bias=True):
             f(a, b, c, d)
             tvm.testing.assert_allclose(d.asnumpy(), d_np, rtol=1e-5)
 
-    for device in get_all_backend():
-        check_device(device)
+    for device, ctx in tvm.testing.enabled_targets():
+        check_device(device, ctx)
 
 
 def verify_dense_int8(batch, in_dim, out_dim, use_bias=True):
@@ -104,9 +101,6 @@ def verify_dense_int8(batch, in_dim, out_dim, use_bias=True):
 
     def check_device(device):
         ctx = tvm.context(device, 0)
-        if not ctx.exist:
-            print("Skip because %s is not enabled" % device)
-            return
         if device == "cuda" and not tvm.contrib.nvcc.have_int8(ctx.compute_version):
             print("Skip because int8 intrinsics are not available")
             return
@@ -128,6 +122,7 @@ def verify_dense_int8(batch, in_dim, out_dim, use_bias=True):
         check_device(device)
 
 
+@tvm.testing.uses_gpu
 def test_dense():
     verify_dense(1, 1024, 1000, use_bias=True)
     verify_dense(1, 1024, 1000, use_bias=False)
@@ -136,6 +131,8 @@ def test_dense():
     verify_dense(128, 1024, 1000, use_bias=True)
 
 
+@tvm.testing.requires_cuda
+@tvm.testing.requires_gpu
 def test_dense_int8():
     with Int8Fallback():
         verify_dense_int8(2, 1024, 1000, use_bias=True)
index 8a645e6..642d124 100644 (file)
@@ -23,7 +23,7 @@ import tvm.topi.testing
 from tvm.topi.util import get_const_tuple
 from tvm import te
 from tvm.contrib.pickle_memoize import memoize
-from tvm.contrib import nvcc
+import tvm.testing
 
 
 _dense_implement = {
@@ -53,12 +53,6 @@ def verify_dense(batch, in_dim, out_dim, use_bias=True):
 
     def check_device(device):
         ctx = tvm.context(device, 0)
-        if not ctx.exist:
-            print("Skip because %s is not enabled" % device)
-            return
-        if not nvcc.have_tensorcore(ctx.compute_version):
-            print("skip because gpu does not support Tensor Cores")
-            return
         print("Running on target: %s" % device)
         for fcompute, fschedule in tvm.topi.testing.dispatch(device, _dense_implement):
             with tvm.target.create(device):
@@ -74,10 +68,10 @@ def verify_dense(batch, in_dim, out_dim, use_bias=True):
             tvm.testing.assert_allclose(d.asnumpy(), d_np, rtol=1e-3)
 
 
-    for device in ['cuda']:
-        check_device(device)
+    check_device('cuda')
 
 
+@tvm.testing.requires_tensorcore
 def test_dense_tensorcore():
     """Test cases"""
     verify_dense(8, 16, 32, use_bias=True)
index 380f656..c949812 100644 (file)
@@ -19,10 +19,9 @@ import numpy as np
 import tvm
 from tvm import te
 from tvm import topi
+import tvm.testing
 import tvm.topi.testing
 
-from common import get_all_backend
-
 
 def verify_depth_to_space(block_size, batch, in_channel, in_height, in_width, layout='NCHW', mode='DCR'):
     out_channel = int(in_channel / (block_size * block_size))
@@ -50,11 +49,7 @@ def verify_depth_to_space(block_size, batch, in_channel, in_height, in_width, la
         a_np = np.transpose(a_np, axes=[0, 2, 3, 1])
         b_np = np.transpose(b_np, axes=[0, 2, 3, 1])
 
-    def check_device(device):
-        ctx = tvm.context(device, 0)
-        if not ctx.exist:
-            print("Skip because %s is not enabled" % device)
-            return
+    def check_device(device, ctx):
         print("Running on target: %s" % device)
         with tvm.target.create(device):
             s = tvm.topi.testing.get_injective_schedule(device)(B)
@@ -64,10 +59,11 @@ def verify_depth_to_space(block_size, batch, in_channel, in_height, in_width, la
         f(a, b)
         tvm.testing.assert_allclose(b.asnumpy(), b_np, rtol=1e-3, atol=1e-3)
 
-    for device in get_all_backend():
-        check_device(device)
+    for device, ctx in tvm.testing.enabled_targets():
+        check_device(device, ctx)
 
 
+@tvm.testing.uses_gpu
 def test_depth_to_space():
     for layout in ['NCHW', 'NHWC']:
         for mode in ['DCR', 'CDR']:
index 5497e11..bc804cb 100644 (file)
@@ -24,7 +24,7 @@ from tvm.topi.util import get_const_tuple
 from tvm.topi.nn.util import get_pad_tuple
 from tvm.contrib.pickle_memoize import memoize
 
-from common import get_all_backend
+import tvm.testing
 
 _depthwise_conv2d_nchw_implement = {
     "generic": [(topi.nn.depthwise_conv2d_nchw, topi.generic.schedule_depthwise_conv2d_nchw)],
@@ -67,11 +67,7 @@ def depthwise_conv2d_with_workload_nchw(batch, in_channel, in_height, channel_mu
 
     dtype = 'float32'
 
-    def check_device(device):
-        ctx = tvm.context(device, 0)
-        if not ctx.exist:
-            print("Skip because %s is not enabled" % device)
-            return
+    def check_device(device, ctx):
         print("Running on target: %s" % device)
 
         impl_list = tvm.topi.testing.dispatch(device, _depthwise_conv2d_nchw_implement)[:]
@@ -143,9 +139,9 @@ def depthwise_conv2d_with_workload_nchw(batch, in_channel, in_height, channel_mu
             tvm.testing.assert_allclose(scale_shift_tvm.asnumpy(), scale_shift_scipy, rtol=1e-5)
             tvm.testing.assert_allclose(relu_tvm.asnumpy(), relu_scipy, rtol=1e-5)
 
-    for device in get_all_backend():
+    for device, ctx in tvm.testing.enabled_targets():
         with autotvm.tophub.context(device):  # load tophub pre-tuned parameters
-            check_device(device)
+            check_device(device, ctx)
 
 
 def depthwise_conv2d_with_workload_nhwc(batch, in_channel, in_height, channel_multiplier, filter_height, stride_h, padding, dilation=1):
@@ -170,11 +166,7 @@ def depthwise_conv2d_with_workload_nhwc(batch, in_channel, in_height, channel_mu
 
     dtype = 'float32'
 
-    def check_device(device):
-        ctx = tvm.context(device, 0)
-        if not ctx.exist:
-            print("Skip because %s is not enabled" % device)
-            return
+    def check_device(device, ctx):
         print("Running on target: %s" % device)
 
         fcompute, fschedule = tvm.topi.testing.dispatch(device, _depthwise_conv2d_nhwc_implement)
@@ -243,9 +235,9 @@ def depthwise_conv2d_with_workload_nhwc(batch, in_channel, in_height, channel_mu
         tvm.testing.assert_allclose(scale_shift_tvm.asnumpy(), scale_shift_scipy, rtol=1e-5)
         tvm.testing.assert_allclose(relu_tvm.asnumpy(), relu_scipy, rtol=1e-5)
 
-    for device in get_all_backend():
+    for device, ctx in tvm.testing.enabled_targets():
         with autotvm.tophub.context(device):  # load tophub pre-tuned parameters
-            check_device(device)
+            check_device(device, ctx)
 
 def _transform_data(data, bn):
     # NCHW -> NCHW[x]c
@@ -298,7 +290,7 @@ def depthwise_conv2d_with_workload_NCHWc(batch, in_channel, in_height, channel_m
 
     def check_device(device):
         ctx = tvm.context(device, 0)
-        if not ctx.exist:
+        if not tvm.testing.device_enabled(device):
             print("Skip because %s is not enabled" % device)
             return
         print("Running on target: %s" % device)
@@ -360,6 +352,7 @@ def depthwise_conv2d_with_workload_NCHWc(batch, in_channel, in_height, channel_m
             check_device(device)
 
 
+@tvm.testing.uses_gpu
 def test_depthwise_conv2d():
     # mobilenet workloads
     depthwise_conv2d_with_workload_nchw(1, 32, 112, 1, 3, 1, "SAME")
index ba8bfcc..25ef6f1 100644 (file)
@@ -24,6 +24,7 @@ from tvm.topi.util import get_const_tuple
 from tvm.topi.nn.util import get_pad_tuple
 import tvm.topi.testing
 from tvm.topi.cuda.depthwise_conv2d import schedule_depthwise_conv2d_backward_input_nhwc
+import tvm.testing
 
 
 def verify_depthwise_conv2d_back_input(batch, in_channel, in_h, channel_multiplier, filter_h, stride_h, padding_h):
@@ -51,7 +52,7 @@ def verify_depthwise_conv2d_back_input(batch, in_channel, in_h, channel_multipli
 
     def check_device(device):
         ctx = tvm.context(device, 0)
-        if not ctx.exist:
+        if not tvm.testing.device_enabled(device):
             print("Skip because %s is not enabled" % device)
             return
         print("Running on target: %s" % device)
@@ -106,6 +107,7 @@ def verify_depthwise_conv2d_back_input(batch, in_channel, in_h, channel_multipli
     check_device("vulkan")
     check_device("nvptx")
 
+@tvm.testing.requires_gpu
 def test_topi_depthwise_conv2d_backward_input_nhwc():
     verify_depthwise_conv2d_back_input(16, 256, 56, 1, 3, 1, 1)
     verify_depthwise_conv2d_back_input(16, 256, 56, 2, 3, 1, 1)
index 599225d..5ebc56d 100644 (file)
@@ -24,6 +24,7 @@ from scipy import signal
 from tvm.topi.util import get_const_tuple
 from tvm.topi.nn.util import get_pad_tuple
 from tvm.topi.cuda.depthwise_conv2d import schedule_depthwise_conv2d_backward_weight_nhwc
+import tvm.testing
 
 
 def verify_depthwise_conv2d_back_weight(batch, in_channel, in_h, channel_multiplier, filter_h, stride_h, padding_h):
@@ -51,7 +52,7 @@ def verify_depthwise_conv2d_back_weight(batch, in_channel, in_h, channel_multipl
 
     def check_device(device):
         ctx = tvm.context(device, 0)
-        if not ctx.exist:
+        if not tvm.testing.device_enabled(device):
             print("Skip because %s is not enabled" % device)
             return
         print("Running on target: %s" % device)
@@ -99,6 +100,7 @@ def verify_depthwise_conv2d_back_weight(batch, in_channel, in_h, channel_multipl
     check_device("vulkan")
     check_device("nvptx")
 
+@tvm.testing.requires_gpu
 def test_topi_depthwise_conv2d_backward_weight_nhwc():
     verify_depthwise_conv2d_back_weight(16, 256, 56, 1, 3, 1, 1)
     verify_depthwise_conv2d_back_weight(16, 256, 56, 2, 3, 1, 1)
index 6050d45..2eea4b0 100644 (file)
@@ -26,7 +26,8 @@ import tvm.topi.testing
 from tvm.contrib.pickle_memoize import memoize
 from tvm.topi.util import get_const_tuple
 
-from common import get_all_backend, Int8Fallback
+from common import Int8Fallback
+import tvm.testing
 
 
 _group_conv2d_nchw_implement = {
@@ -71,7 +72,7 @@ def verify_group_conv2d_nchw(batch, in_channel, in_size, num_filter, kernel, str
 
     def check_device(device):
         ctx = tvm.context(device, 0)
-        if not ctx.exist:
+        if not tvm.testing.device_enabled(device):
             print("Skip because %s is not enabled" % device)
             return
 
@@ -148,7 +149,7 @@ def verify_group_conv2d_NCHWc_int8(batch, in_channel, in_size, num_filter, kerne
 
     def check_device(device):
         ctx = tvm.context(device, 0)
-        if not ctx.exist:
+        if not tvm.testing.device_enabled(device):
             print("Skip because %s is not enabled" % device)
             return
         if device == "cuda" and not tvm.contrib.nvcc.have_int8(ctx.compute_version):
@@ -182,6 +183,7 @@ def verify_group_conv2d_NCHWc_int8(batch, in_channel, in_size, num_filter, kerne
         check_device(device)
 
 
+@tvm.testing.uses_gpu
 def test_group_conv2d_nchw():
     # ResNeXt-50 workload
     verify_group_conv2d_nchw(1, 128, 56, 128, 3, 1, 1, 1, 32)
@@ -207,6 +209,7 @@ def test_group_conv2d_nchw():
 
 
 
+@tvm.testing.requires_cuda
 def test_group_conv2d_NCHWc_int8():
     with Int8Fallback():
         # ResNeXt-50 workload
index 6afe44e..c5eebf4 100644 (file)
@@ -27,8 +27,6 @@ from tvm.contrib.pickle_memoize import memoize
 from tvm.topi.util import get_const_tuple
 import pytest
 
-from common import get_all_backend
-
 def _transform_data(data, bn):
     # NCHW -> NCHW[x]c
     batch_size, channel, height, width = data.shape
@@ -77,7 +75,7 @@ def verify_group_conv2d_NCHWc_int8(batch, in_channel, groups, in_size, num_filte
 
     def check_device(device):
         ctx = tvm.context(device, 0)
-        if not ctx.exist:
+        if not tvm.testing.device_enabled(ctx):
             print("Skip because %s is not enabled" % device)
             return
         print("Running on target: %s" % device)
@@ -105,6 +103,7 @@ def verify_group_conv2d_NCHWc_int8(batch, in_channel, groups, in_size, num_filte
             check_device(device)
     autotvm.GLOBAL_SCOPE.silent = False
 
+@tvm.testing.uses_gpu
 @pytest.mark.skip
 def test_conv2d_NCHWc():
     # ResNet50 workloads
index 7fce69d..2fafe6c 100644 (file)
@@ -22,7 +22,6 @@ from tvm import topi
 import tvm.topi.testing
 from tvm.contrib.pickle_memoize import memoize
 
-from common import get_all_backend
 
 def verify_resize(batch, in_channel, in_height, in_width, out_height, out_width,
                   layout='NCHW', coord_trans="align_corners", method="bilinear"):
@@ -47,11 +46,7 @@ def verify_resize(batch, in_channel, in_height, in_width, out_height, out_width,
         scale_w = out_width / in_width
         b_np = tvm.topi.testing.upsampling_python(a_np, (scale_h, scale_w), layout)
 
-    def check_device(device):
-        ctx = tvm.context(device, 0)
-        if not ctx.exist:
-            print("Skip because %s is not enabled" % device)
-            return
+    def check_device(device, ctx):
         print("Running on target: %s" % device)
         with tvm.target.create(device):
             s = tvm.topi.testing.get_injective_schedule(device)(B)
@@ -62,10 +57,11 @@ def verify_resize(batch, in_channel, in_height, in_width, out_height, out_width,
 
         tvm.testing.assert_allclose(b.asnumpy(), b_np, rtol=1e-3, atol=1e-3)
 
-    for device in get_all_backend():
-        check_device(device)
+    for device, ctx in tvm.testing.enabled_targets():
+        check_device(device, ctx)
 
 
+@tvm.testing.uses_gpu
 def test_resize():
     # Scale NCHW
     verify_resize(4, 16, 32, 32, 50, 50, 'NCHW')
@@ -114,11 +110,7 @@ def verify_resize3d(batch, in_channel, in_depth, in_height, in_width, out_depth,
         scale_w = out_width / in_width
         b_np = tvm.topi.testing.upsampling3d_python(a_np, (scale_d, scale_h, scale_w), layout)
 
-    def check_device(device):
-        ctx = tvm.context(device, 0)
-        if not ctx.exist:
-            print("Skip because %s is not enabled" % device)
-            return
+    def check_device(device, ctx):
         print("Running on target: %s" % device)
         with tvm.target.create(device):
             s = tvm.topi.testing.get_injective_schedule(device)(B)
@@ -129,10 +121,11 @@ def verify_resize3d(batch, in_channel, in_depth, in_height, in_width, out_depth,
 
         tvm.testing.assert_allclose(b.asnumpy(), b_np, rtol=1e-3, atol=1e-3)
 
-    for device in get_all_backend():
-        check_device(device)
+    for device, ctx in tvm.testing.enabled_targets():
+        check_device(device, ctx)
 
 
+@tvm.testing.uses_gpu
 def test_resize3d():
     # Trilinear
     verify_resize3d(4, 8, 16, 16, 16, 25, 25, 25, 'NCDHW')
@@ -147,6 +140,7 @@ def test_resize3d():
     verify_resize3d(4, 8, 16, 16, 16, 25, 25, 25, 'NDHWC', method="nearest_neighbor")
 
 
+@tvm.testing.uses_gpu
 def test_crop_and_resize():
     def verify_crop_and_resize(image_shape, np_boxes, np_box_indices, np_crop_size, layout='NHWC',
                                method="bilinear", extrapolation_value=0.0):
@@ -174,11 +168,7 @@ def test_crop_and_resize():
         baseline_np = tvm.topi.testing.crop_and_resize_python(np_images, np_boxes, np_box_indices,
                                                           np_crop_size, layout, method,
                                                           extrapolation_value)
-        def check_device(device):
-            ctx = tvm.context(device, 0)
-            if not ctx.exist:
-                print("Skip because %s is not enabled" % device)
-                return
+        def check_device(device, ctx):
             print("Running on target: %s" % device)
             with tvm.target.create(device):
                 s = tvm.topi.testing.get_injective_schedule(device)(out)
@@ -191,8 +181,8 @@ def test_crop_and_resize():
 
             tvm.testing.assert_allclose(tvm_out.asnumpy(), baseline_np, rtol=1e-3, atol=1e-3)
 
-        for device in get_all_backend():
-            check_device(device)
+        for device, ctx in tvm.testing.enabled_targets():
+            check_device(device, ctx)
 
     boxes_1 = np.array([[.2, .3, .7, .9]], dtype="float32")
     boxes_2 = np.array([[.2, .3, .7, .9], [0, .1, .8, 1]], dtype="float32")
@@ -209,6 +199,7 @@ def test_crop_and_resize():
     verify_crop_and_resize((1, 3, 224, 224), boxes_1, indices_1, size_1, layout="NCHW")
 
 
+@tvm.testing.uses_gpu
 def test_affine_grid():
     def verify_affine_grid(num_batch, target_shape):
         dtype = "float32"
@@ -224,11 +215,7 @@ def test_affine_grid():
 
         data_np, out_np = get_ref_data()
 
-        def check_device(device):
-            ctx = tvm.context(device, 0)
-            if not ctx.exist:
-                print("Skip because %s is not enabled" % device)
-                return
+        def check_device(device, ctx):
             print("Running on target: %s" % device)
             with tvm.target.create(device):
                 s = tvm.topi.testing.get_injective_schedule(device)(out)
@@ -240,13 +227,14 @@ def test_affine_grid():
             tvm.testing.assert_allclose(
                 tvm_out.asnumpy(), out_np, rtol=1e-5, atol=1e-5)
 
-        for device in get_all_backend():
-            check_device(device)
+        for device, ctx in tvm.testing.enabled_targets():
+            check_device(device, ctx)
 
     verify_affine_grid(1, (16, 32))
     verify_affine_grid(4, (16, 32))
 
 
+@tvm.testing.uses_gpu
 def test_grid_sample():
     def verify_grid_sample(data_shape, grid_shape):
         dtype = "float32"
@@ -264,11 +252,7 @@ def test_grid_sample():
 
         data_np, grid_np, out_np = get_ref_data()
 
-        def check_device(device):
-            ctx = tvm.context(device, 0)
-            if not ctx.exist:
-                print("Skip because %s is not enabled" % device)
-                return
+        def check_device(device, ctx):
             print("Running on target: %s" % device)
             with tvm.target.create(device):
                 s = tvm.topi.testing.get_injective_schedule(device)(out)
@@ -281,8 +265,8 @@ def test_grid_sample():
             tvm.testing.assert_allclose(
                 tvm_out.asnumpy(), out_np, rtol=1e-5, atol=1e-5)
 
-        for device in get_all_backend():
-            check_device(device)
+        for device, ctx in tvm.testing.enabled_targets():
+            check_device(device, ctx)
 
     verify_grid_sample((4, 4, 16, 32), (4, 2, 8, 8))
     verify_grid_sample((4, 4, 16, 32), (4, 2, 32, 32))
index 2d57d07..13dcc71 100644 (file)
@@ -21,6 +21,7 @@ from tvm import te
 from tvm import topi
 from tvm.topi.util import get_const_tuple
 import tvm.topi.testing
+import tvm.testing
 
 _lrn_schedule = {
     "generic": topi.generic.schedule_lrn,
@@ -41,7 +42,7 @@ def verify_lrn(shape, size, axis, bias, alpha, beta):
     b_np = tvm.topi.testing.lrn_python(a_np, size, axis, bias, alpha, beta)
 
     def check_device(device):
-        if not tvm.runtime.enabled(device):
+        if not tvm.testing.device_enabled(device):
             print("Skip because %s is not enabled" % device)
             return
         print("Running on target: %s" % device)
@@ -58,6 +59,7 @@ def verify_lrn(shape, size, axis, bias, alpha, beta):
     for device in ['llvm', 'cuda', 'opencl', 'metal', 'rocm', 'vulkan', 'nvptx']:
         check_device(device)
 
+@tvm.testing.uses_gpu
 def test_lrn():
     verify_lrn((1, 3, 5, 5), 3, 1, 1.0, 1.0, 0.5)
     verify_lrn((1, 3, 5, 5), 3, 3, 1.0, 1.0, 0.5)
index 8a9754e..9a7bc6e 100644 (file)
@@ -22,7 +22,6 @@ from tvm import te
 from tvm import topi
 import tvm.topi.testing
 from tvm.topi import util
-from common import get_all_backend
 
 
 def test_util():
@@ -31,6 +30,7 @@ def test_util():
     assert util.get_const_tuple((x, x)) == (100, 100)
 
 
+@tvm.testing.uses_gpu
 def test_ewise():
     def test_apply(
         func,
@@ -57,11 +57,7 @@ def test_ewise():
             a_np += ((np.abs(np.fmod(a_np, 1)) - 0.5) < 1e-6) * 1e-4
         b_np = f_numpy(a_np)
 
-        def check_device(device):
-            ctx = tvm.context(device, 0)
-            if not ctx.exist:
-                print("Skip because %s is not enabled" % device)
-                return
+        def check_device(device, ctx):
             print("Running on target: %s" % device)
             with tvm.target.create(device):
                 s = tvm.topi.testing.get_injective_schedule(device)(B)
@@ -71,8 +67,8 @@ def test_ewise():
             foo(a, b)
             tvm.testing.assert_allclose(b.asnumpy(), b_np, rtol=1e-5, atol=1e-5)
 
-        for target in get_all_backend():
-            check_device(target)
+        for target, ctx in tvm.testing.enabled_targets():
+            check_device(target, ctx)
 
     def test_isnan(
         low,
@@ -97,11 +93,7 @@ def test_ewise():
             a_np += ((np.abs(np.fmod(a_np, 1)) - 0.5) < 1e-6) * 1e-5
         b_np = np.isnan(a_np)
 
-        def check_device(device):
-            ctx = tvm.context(device, 0)
-            if not ctx.exist:
-                print("Skip because %s is not enabled" % device)
-                return
+        def check_device(device, ctx):
             print("Running on target: %s" % device)
             with tvm.target.create(device):
                 s = tvm.topi.testing.get_injective_schedule(device)(B)
@@ -111,8 +103,8 @@ def test_ewise():
             foo(a, b)
             tvm.testing.assert_allclose(b.asnumpy(), b_np, rtol=1e-5, atol=1e-5)
 
-        for target in get_all_backend():
-            check_device(target)
+        for target, ctx in tvm.testing.enabled_targets():
+            check_device(target, ctx)
 
     def test_infiniteness_ops(topi_op, ref_op, name):
         for dtype in ['float32', 'float64', 'int32', 'int16']:
@@ -128,11 +120,7 @@ def test_ewise():
                 a_np.ravel()[np.random.choice(a_np.size, int(a_np.size * 0.5), replace=False)] = np.nan
             b_np = ref_op(a_np)
 
-            def check_device(device):
-                ctx = tvm.context(device, 0)
-                if not ctx.exist:
-                    print("Skip because %s is not enabled" % device)
-                    return
+            def check_device(device, ctx):
                 with tvm.target.create(device):
                     s = tvm.topi.testing.get_injective_schedule(device)(B)
                 foo = tvm.build(s, [A, B], device, name=name)
@@ -141,8 +129,8 @@ def test_ewise():
                 foo(a, b)
                 tvm.testing.assert_allclose(b.asnumpy(), b_np, rtol=1e-5, atol=1e-5)
 
-            for target in get_all_backend():
-                check_device(target)
+            for target, ctx in tvm.testing.enabled_targets():
+                check_device(target, ctx)
 
     test_apply(topi.floor, "floor", np.floor, -100, 100)
     test_apply(topi.ceil, "ceil", np.ceil, -100, 100)
@@ -167,6 +155,7 @@ def test_ewise():
     test_infiniteness_ops(topi.isinf, np.isinf, 'isinf')
 
 
+@tvm.testing.uses_gpu
 def test_cast():
     def verify(from_dtype, to_dtype, low=-100, high=100):
         shape = (5, 4)
@@ -181,11 +170,7 @@ def test_cast():
             a_np = a_np - a_np[2, 3]
         b_np = a_np.astype(to_dtype)
 
-        for device in get_all_backend():
-            ctx = tvm.context(device, 0)
-            if not ctx.exist:
-                print("Skip because %s is not enabled" % device)
-                continue
+        for device, ctx in tvm.testing.enabled_targets():
             print("Running on target: %s" % device)
             with tvm.target.create(device):
                 s = tvm.topi.testing.get_injective_schedule(device)(B)
@@ -223,7 +208,7 @@ def test_fastmath():
 
         def check_device(device):
             ctx = tvm.context(device, 0)
-            if not ctx.exist:
+            if not tvm.testing.device_enabled(device):
                 print("Skip because %s is not enabled" % device)
                 return
             with tvm.target.create(device):
index b24dd85..2f3a38c 100644 (file)
@@ -21,9 +21,10 @@ import numpy as np
 import tvm
 from tvm import te
 from tvm import topi
+import tvm.testing
 import tvm.topi.testing
 from tvm.topi.util import get_const_tuple
-from common import get_all_backend
+import tvm.testing
 
 _pool_schedule = {
     "generic": topi.generic.schedule_pool,
@@ -91,11 +92,7 @@ def verify_pool(n, ic, ih, kh, sh, padding, pool_type, ceil_mode, count_include_
                 b_np[:, :, i, j] = np.max(pad_np[:, :, i*sh:i*sh+kh, j*sw:j*sw+kw], axis=(2, 3))
     b_np = np.maximum(b_np, 0.0)
 
-    def check_device(device):
-        ctx = tvm.context(device, 0)
-        if not ctx.exist:
-            print("Skip because %s is not enabled" % device)
-            return
+    def check_device(device, ctx):
         print("Running on target: %s" % device)
         with tvm.target.create(device):
             s_func = tvm.topi.testing.dispatch(device, _pool_schedule)
@@ -107,8 +104,8 @@ def verify_pool(n, ic, ih, kh, sh, padding, pool_type, ceil_mode, count_include_
         f(a, b)
         tvm.testing.assert_allclose(b.asnumpy(), b_np, rtol=2e-5, atol=1e-5)
 
-    for device in get_all_backend():
-        check_device(device)
+    for device, ctx in tvm.testing.enabled_targets():
+        check_device(device, ctx)
 
 def verify_pool_grad(n, ic, ih, kh, sh, padding, pool_type, ceil_mode, count_include_pad=True,
                      add_relu=False):
@@ -147,11 +144,7 @@ def verify_pool_grad(n, ic, ih, kh, sh, padding, pool_type, ceil_mode, count_inc
     if add_relu:
         pool_grad_np = np.maximum(pool_grad_np, 0.)
 
-    def check_device(device):
-        ctx = tvm.context(device, 0)
-        if not ctx.exist:
-            print("Skip because %s is not enabled" % device)
-            return
+    def check_device(device, ctx):
         print("Running on target: %s" % device)
         with tvm.target.create(device):
             s_func = tvm.topi.testing.dispatch(device, _pool_grad_schedule)
@@ -164,9 +157,10 @@ def verify_pool_grad(n, ic, ih, kh, sh, padding, pool_type, ceil_mode, count_inc
         f(a, out_grad, pool_grad)
         tvm.testing.assert_allclose(pool_grad.asnumpy(), pool_grad_np, rtol=1e-5)
 
-    for device in get_all_backend():
-        check_device(device)
+    for device, ctx in tvm.testing.enabled_targets():
+        check_device(device, ctx)
 
+@tvm.testing.uses_gpu
 def test_pool():
     """test cases of pool"""
     verify_pool(1, 256, 32, 2, 2, [0, 0, 0, 0], 'avg', False, True)
@@ -183,6 +177,7 @@ def test_pool():
     verify_pool(1, 256, 31, 3, 3, [1, 0, 3, 2], 'max', False)
     verify_pool(1, 256, 31, 3, 3, [3, 2, 1, 0], 'max', True)
 
+@tvm.testing.uses_gpu
 def test_pool_grad():
     """test cases of pool_grad"""
     verify_pool_grad(1, 256, 32, 3, 2, [1, 1, 1, 1], 'avg', False, False)
@@ -222,11 +217,7 @@ def verify_global_pool(dshape, pool_type, layout='NCHW'):
         b_np = np.max(a_np, axis=axis, keepdims=True)
     b_np = np.maximum(b_np, 0.0)
 
-    def check_device(device):
-        ctx = tvm.context(device, 0)
-        if not ctx.exist:
-            print("Skip because %s is not enabled" % device)
-            return
+    def check_device(device, ctx):
         print("Running on target: %s" % device)
         with tvm.target.create(device):
             s_func = tvm.topi.testing.dispatch(device, _adaptive_pool_schedule)
@@ -240,9 +231,10 @@ def verify_global_pool(dshape, pool_type, layout='NCHW'):
         f(a, b)
         tvm.testing.assert_allclose(b.asnumpy(), b_np, rtol=1e-5)
 
-    for device in get_all_backend():
-        check_device(device)
+    for device, ctx in tvm.testing.enabled_targets():
+        check_device(device, ctx)
 
+@tvm.testing.uses_gpu
 def test_global_pool():
     """test cases of global_pool"""
     verify_global_pool((1, 1024, 7, 7), 'avg')
@@ -268,11 +260,7 @@ def verify_adaptive_pool(dshape, out_size, pool_type, layout="NCHW", dtype="floa
         assert len(out_size) == 3
         out = topi.nn.adaptive_pool3d(data, out_size, pool_type, layout)
 
-    def check_device(device):
-        ctx = tvm.context(device, 0)
-        if not ctx.exist:
-            print("Skip because %s is not enabled" % device)
-            return
+    def check_device(device, ctx):
         print("Running on target: %s" % device)
         with tvm.target.create(device):
             s_func = tvm.topi.testing.dispatch(device, _adaptive_pool_schedule)
@@ -286,10 +274,11 @@ def verify_adaptive_pool(dshape, out_size, pool_type, layout="NCHW", dtype="floa
         f(a, b)
         tvm.testing.assert_allclose(b.asnumpy(), np_out, rtol=4e-5, atol=1e-6)
 
-    for device in get_all_backend():
-        check_device(device)
+    for device, ctx in tvm.testing.enabled_targets():
+        check_device(device, ctx)
 
 
+@tvm.testing.uses_gpu
 def test_adaptive_pool():
     """test cases of adaptive_pool"""
     verify_adaptive_pool((1, 3, 224, 224), (1, 1), "max")
@@ -329,11 +318,7 @@ def verify_pool3d(n, ic, ih, kh, sh, padding, pool_type,
     ref_np = tvm.topi.testing.pool3d_ncdhw_python(input_np, kernel, stride, padding,
                                               output_shape, pool_type, count_include_pad, ceil_mode)
 
-    def check_device(device):
-        ctx = tvm.context(device, 0)
-        if not ctx.exist:
-            print("Skip because %s is not enabled" % device)
-            return
+    def check_device(device, ctx):
         print("Running on target: %s" % device)
         with tvm.target.create(device):
             s_func = tvm.topi.testing.dispatch(device, _pool_schedule)
@@ -345,10 +330,11 @@ def verify_pool3d(n, ic, ih, kh, sh, padding, pool_type,
         f(a, b)
         tvm.testing.assert_allclose(b.asnumpy(), ref_np, rtol=1e-5)
 
-    for device in get_all_backend():
-        check_device(device)
+    for device, ctx in tvm.testing.enabled_targets():
+        check_device(device, ctx)
 
 
+@tvm.testing.uses_gpu
 def test_pool3d():
     """test cases of pool3d"""
     verify_pool3d(1, 256, 32, 2, 2, [0, 0, 0, 0, 0, 0], 'avg', False, True)
@@ -384,11 +370,7 @@ def verify_pool1d(n, ic, iw, kw, sw, padding, pool_type,
     ref_np = tvm.topi.testing.pool1d_ncw_python(input_np, kernel, stride, padding,
                                             output_shape, pool_type, count_include_pad, ceil_mode)
 
-    def check_device(device):
-        ctx = tvm.context(device, 0)
-        if not ctx.exist:
-            print("Skip because %s is not enabled" % device)
-            return
+    def check_device(device, ctx):
         print("Running on target: %s" % device)
         with tvm.target.create(device):
             s_func = tvm.topi.testing.dispatch(device, _pool_schedule)
@@ -400,10 +382,11 @@ def verify_pool1d(n, ic, iw, kw, sw, padding, pool_type,
         f(a, b)
         tvm.testing.assert_allclose(b.asnumpy(), ref_np, rtol=1e-5)
 
-    for device in get_all_backend():
-        check_device(device)
+    for device, ctx in tvm.testing.enabled_targets():
+        check_device(device, ctx)
 
 
+@tvm.testing.uses_gpu
 def test_pool1d():
     """test cases of pool1d"""
     verify_pool1d(1, 256, 32, 2, 2, [0, 0], 'avg', False, True)
index d84182f..33706e6 100644 (file)
@@ -22,7 +22,6 @@ from tvm import te
 from tvm import topi
 import tvm.topi.testing
 
-from common import get_all_backend
 
 def _my_npy_argmax(arr, axis, keepdims):
     if not keepdims:
@@ -69,11 +68,7 @@ def verify_reduce_map_ele(in_shape, axis, keepdims, type="sum", dtype="float32")
     else:
         raise NotImplementedError
 
-    def check_device(device):
-        ctx = tvm.context(device, 0)
-        if not ctx.exist:
-            print("Skip because %s is not enabled" % device)
-            return
+    def check_device(device, ctx):
         print("Running on target: %s" % device)
         with tvm.target.create(device):
             s = tvm.topi.testing.get_reduce_schedule(device)(B)
@@ -122,10 +117,11 @@ def verify_reduce_map_ele(in_shape, axis, keepdims, type="sum", dtype="float32")
                 tvm.testing.assert_allclose(out_tvm_val, in_npy_map.min(axis=axis), 1E-3, 1E-3)
         else:
             tvm.testing.assert_allclose(out_tvm.asnumpy(), out_npy, 1E-3, 1E-3)
-    for device in get_all_backend():
-        check_device(device)
+    for device, ctx in tvm.testing.enabled_targets():
+        check_device(device, ctx)
 
 
+@tvm.testing.uses_gpu
 def test_reduce_map():
 
     verify_reduce_map_ele(in_shape=(32,),
index 1114b3f..7442538 100644 (file)
@@ -24,7 +24,7 @@ import tvm.topi.testing
 from tvm.topi.util import get_const_tuple
 from tvm.contrib.nvcc import have_fp16
 
-from common import get_all_backend
+import tvm.testing
 
 def verify_relu(m, n, dtype="float32"):
     A = te.placeholder((m, n), name='A', dtype=dtype)
@@ -33,11 +33,7 @@ def verify_relu(m, n, dtype="float32"):
     a_np = np.random.uniform(low=-1.0, high=1.0, size=get_const_tuple(A.shape)).astype(A.dtype)
     b_np = a_np * (a_np > 0)
 
-    def check_device(device):
-        ctx = tvm.context(device, 0)
-        if not ctx.exist:
-            print("Skip because %s is not enabled" % device)
-            return
+    def check_device(device, ctx):
         if dtype == "float16" and device == "cuda" and not have_fp16(tvm.gpu(0).compute_version):
             print("Skip because %s does not have fp16 support" % device)
             return
@@ -51,8 +47,8 @@ def verify_relu(m, n, dtype="float32"):
         foo(a, b)
         tvm.testing.assert_allclose(b.asnumpy(), b_np, rtol=1e-5)
 
-    for device in get_all_backend():
-        check_device(device)
+    for device, ctx in tvm.testing.enabled_targets():
+        check_device(device, ctx)
 
 
 def verify_leaky_relu(m, alpha):
@@ -92,10 +88,12 @@ def verify_prelu(x, w, axis, weight_reshape):
     out_np = _prelu_numpy(x_np, w_np)
     tvm.testing.assert_allclose(b.asnumpy(), out_np, rtol=1e-5)
 
+@tvm.testing.uses_gpu
 def test_relu():
     verify_relu(10, 128, "float32")
     verify_relu(128, 64, "float16")
 
+@tvm.testing.uses_gpu
 def test_schedule_big_array():
     verify_relu(1024 * 100 , 512)
 
index e5a1947..2b49461 100644 (file)
@@ -21,6 +21,7 @@ from tvm.topi.util import get_const_tuple
 import tvm
 from tvm import te
 import tvm.topi.testing
+import tvm.testing
 
 _reorg_schedule = {
     "generic": topi.generic.schedule_reorg,
@@ -47,7 +48,7 @@ def verify_reorg(batch, in_size, in_channel, stride):
     def check_device(device):
         '''Cheching devices is enabled or not'''
         ctx = tvm.context(device, 0)
-        if not ctx.exist:
+        if not tvm.testing.device_enabled(device):
             print("Skip because %s is not enabled" % device)
             return
         print("Running on target: %s" % device)
@@ -63,6 +64,7 @@ def verify_reorg(batch, in_size, in_channel, stride):
     for device in ['llvm', 'cuda']:
         check_device(device)
 
+@tvm.testing.uses_gpu
 def test_reorg():
     verify_reorg(1, 20, 8, 2)
 
index 1ff69be..46322ba 100644 (file)
@@ -20,11 +20,11 @@ import numpy as np
 import tvm
 from tvm import te
 from tvm import topi
+import tvm.testing
 import tvm.topi.testing
 import logging
 from tvm.topi.util import get_const_tuple
 
-from common import get_all_backend
 
 _softmax_schedule = {
     "generic": topi.generic.schedule_softmax,
@@ -33,11 +33,7 @@ _softmax_schedule = {
     "hls": topi.hls.schedule_softmax,
 }
 
-def check_device(A, B, a_np, b_np, device, name):
-    ctx = tvm.context(device, 0)
-    if not ctx.exist:
-        print("Skip because %s is not enabled" % device)
-        return
+def check_device(A, B, a_np, b_np, device, ctx, name):
     print("Running on target: %s" % device)
     with tvm.target.create(device):
         s_func = tvm.topi.testing.dispatch(device, _softmax_schedule)
@@ -59,8 +55,8 @@ def verify_softmax(m, n, dtype="float32"):
     a_np = np.random.uniform(size=get_const_tuple(A.shape)).astype(A.dtype)
     b_np = tvm.topi.testing.softmax_python(a_np)
 
-    for device in get_all_backend():
-        check_device(A, B, a_np, b_np, device, "softmax")
+    for device, ctx in tvm.testing.enabled_targets():
+        check_device(A, B, a_np, b_np, device, ctx, "softmax")
 
 def verify_softmax_4d(shape, dtype="float32"):
     A = te.placeholder(shape, dtype=dtype, name='A')
@@ -71,9 +67,10 @@ def verify_softmax_4d(shape, dtype="float32"):
     b_np = tvm.topi.testing.softmax_python(a_np.transpose(0, 2, 3, 1).reshape(h*w, c))
     b_np = b_np.reshape(1, h, w, c).transpose(0, 3, 1, 2)
 
-    for device in get_all_backend():
-        check_device(A, B, a_np, b_np, device, "softmax")
+    for device, ctx in tvm.testing.enabled_targets():
+        check_device(A, B, a_np, b_np, device, ctx, "softmax")
 
+@tvm.testing.uses_gpu
 def test_softmax():
     verify_softmax(32, 10)
     verify_softmax(3, 4)
@@ -89,10 +86,11 @@ def verify_log_softmax(m, n, dtype="float32"):
     a_np = np.random.uniform(size=get_const_tuple(A.shape)).astype(A.dtype)
     b_np = tvm.topi.testing.log_softmax_python(a_np)
 
-    for device in get_all_backend():
-        check_device(A, B, a_np, b_np, device, "log_softmax")
+    for device, ctx in tvm.testing.enabled_targets():
+        check_device(A, B, a_np, b_np, device, ctx, "log_softmax")
 
 
+@tvm.testing.uses_gpu
 def test_log_softmax():
     verify_log_softmax(32, 10)
     verify_log_softmax(3, 4)
index 7abfe58..603d2ef 100644 (file)
@@ -21,6 +21,7 @@ import tvm
 from tvm import te
 from tvm import topi
 import tvm.topi.testing
+import tvm.testing
 
 _argsort_implement = {
     "generic": (topi.argsort, topi.generic.schedule_argsort),
@@ -52,10 +53,10 @@ def verify_argsort(axis, is_ascend):
         np_indices = np_indices[:, :dshape[axis]]
 
     def check_device(device):
-        ctx = tvm.context(device, 0)
-        if not ctx.exist:
+        if not tvm.testing.device_enabled(device):
             print("Skip because %s is not enabled" % device)
             return
+        ctx = tvm.context(device, 0)
         print("Running on target: %s" % device)
         with tvm.target.create(device):
             fcompute, fschedule = tvm.topi.testing.dispatch(device, _argsort_implement)
@@ -97,7 +98,7 @@ def verify_topk(k, axis, ret_type, is_ascend, dtype):
 
     def check_device(device):
         ctx = tvm.context(device, 0)
-        if not ctx.exist:
+        if not tvm.testing.device_enabled(device):
             print("Skip because %s is not enabled" % device)
             return
         print("Running on target: %s" % device)
@@ -124,6 +125,7 @@ def verify_topk(k, axis, ret_type, is_ascend, dtype):
         check_device(device)
 
 
+@tvm.testing.uses_gpu
 def test_argsort():
     np.random.seed(0)
     for axis in [0, -1, 1]:
@@ -131,6 +133,7 @@ def test_argsort():
         verify_argsort(axis, False)
 
 
+@tvm.testing.uses_gpu
 def test_topk():
     np.random.seed(0)
     for k in [0, 1, 5]:
index f659c33..5096785 100644 (file)
@@ -21,8 +21,6 @@ from tvm import te
 from tvm import topi
 import tvm.topi.testing
 
-from common import get_all_backend
-
 
 def verify_space_to_depth(block_size, batch, in_channel, in_height, in_width, layout='NCHW'):
     out_channel = int(in_channel * (block_size * block_size))
@@ -50,11 +48,7 @@ def verify_space_to_depth(block_size, batch, in_channel, in_height, in_width, la
         a_np = np.transpose(a_np, axes=[0, 2, 3, 1])
         b_np = np.transpose(b_np, axes=[0, 2, 3, 1])
 
-    def check_device(device):
-        ctx = tvm.context(device, 0)
-        if not ctx.exist:
-            print("Skip because %s is not enabled" % device)
-            return
+    def check_device(device, ctx):
         print("Running on target: %s" % device)
         with tvm.target.create(device):
             s = tvm.topi.testing.get_injective_schedule(device)(B)
@@ -64,10 +58,11 @@ def verify_space_to_depth(block_size, batch, in_channel, in_height, in_width, la
         f(a, b)
         tvm.testing.assert_allclose(b.asnumpy(), b_np, rtol=1e-3, atol=1e-3)
 
-    for device in get_all_backend():
-        check_device(device)
+    for device, ctx in tvm.testing.enabled_targets():
+        check_device(device, ctx)
 
 
+@tvm.testing.uses_gpu
 def test_space_to_depth():
     for layout in ['NCHW', 'NHWC']:
         # Simplest possible case
index e5fd0e9..f0e701b 100644 (file)
@@ -25,6 +25,7 @@ import tvm.contrib.sparse as tvmsp
 from collections import namedtuple
 import time
 import scipy.sparse as sp
+import tvm.testing
 
 _sparse_dense_implement = {
     "generic": (topi.nn.sparse_dense, topi.generic.schedule_sparse_dense),
@@ -56,7 +57,7 @@ def verify_dynamic_csrmv(batch, in_dim, out_dim, use_bias=True):
 
     def check_device(device):
         ctx = tvm.context(device, 0)
-        if not ctx.exist:
+        if not tvm.testing.device_enabled(device):
             print("Skip because %s is not enabled" % device)
             return
         print("Running on target: %s" % device)
@@ -100,7 +101,7 @@ def verify_dynamic_csrmm(batch, in_dim, out_dim, use_bias=True):
 
     def check_device(device):
         ctx = tvm.context(device, 0)
-        if not ctx.exist:
+        if not tvm.testing.device_enabled(device):
             print("Skip because %s is not enabled" % device)
             return
         print("Running on target: %s" % device)
@@ -141,7 +142,7 @@ def verify_dense_si(batch, in_dim, out_dim, use_bias=True, dtype='float32'):
 
     def check_device(device):
         ctx = tvm.context(device, 0)
-        if not ctx.exist:
+        if not tvm.testing.device_enabled(device):
             print("Skip because %s is not enabled" % device)
             return
         print("Running on target: %s" % device)
@@ -178,7 +179,7 @@ def verify_dense_sw(batch, in_dim, out_dim, use_bias=True, dtype='float32'):
 
     def check_device(device):
         ctx = tvm.context(device, 0)
-        if not ctx.exist:
+        if not tvm.testing.device_enabled(device):
             print("Skip because %s is not enabled" % device)
             return
         print("Running on target: %s" % device)
@@ -303,7 +304,7 @@ def verify_sparse_dense_bsr(M, N, K, BS_R, BS_C, density, use_relu):
 
     def check_device(device):
         ctx = tvm.context(device, 0)
-        if not ctx.exist:
+        if not tvm.testing.device_enabled(device):
             print("Skip because %s is not enabled" % device)
             return
         print("Running on target: %s" % device)
@@ -325,11 +326,13 @@ def verify_sparse_dense_bsr(M, N, K, BS_R, BS_C, density, use_relu):
     for device in ['llvm', 'cuda']:
         check_device(device)
 
+@tvm.testing.uses_gpu
 def test_sparse_dense_bsr():
     M, N, K, BS_R, BS_C, density = 1, 64, 128, 8, 16, 0.9
     verify_sparse_dense_bsr(M, N, K, BS_R, BS_C, density, use_relu=True)
     verify_sparse_dense_bsr(M, N, K, BS_R, BS_C, density, use_relu=False)
 
+@tvm.testing.uses_gpu
 def test_sparse_dense_bsr_randomized():
     for _ in range(20):
         BS_R = np.random.randint(1, 16)
@@ -351,7 +354,7 @@ def test_sparse_dense_bsr_randomized():
 
         def check_device(device):
             ctx = tvm.context(device, 0)
-            if not ctx.exist:
+            if not tvm.testing.device_enabled(device):
                 print("Skip because %s is not enabled" % device)
                 return
             print("Running on target: %s" % device)
@@ -372,14 +375,11 @@ def test_sparse_dense_bsr_randomized():
             check_device(device)
 
 
-def test_sparse_dense():
-    test_sparse_dense_csr()
-    test_sparse_dense_bsr()
-    test_sparse_dense_bsr_randomized()
-
 if __name__ == "__main__":
     test_csrmv()
     test_csrmm()
     test_dense()
-    test_sparse_dense()
+    test_sparse_dense_csr()
+    test_sparse_dense_bsr()
+    test_sparse_dense_bsr_randomized()
     test_sparse_transpose_csr()
index 3444284..53e48bf 100644 (file)
@@ -22,6 +22,7 @@ from tvm import topi
 import tvm.topi.testing
 from tvm.contrib.pickle_memoize import memoize
 from tvm.contrib.nvcc import have_fp16
+import tvm.testing
 
 def verify_elemwise_sum(num_args, dtype):
     shape = (3,5,4)
@@ -41,7 +42,7 @@ def verify_elemwise_sum(num_args, dtype):
     np_nd = get_ref_data()
 
     def check_device(device):
-        if not tvm.runtime.enabled(device):
+        if not tvm.testing.device_enabled(device):
             print("Skip because %s is not enabled" % device)
             return
 
@@ -70,7 +71,7 @@ def verify_full(shape, dtype, fill_value):
     np_nd = get_ref_data()
 
     def check_device(device):
-        if not tvm.runtime.enabled(device):
+        if not tvm.testing.device_enabled(device):
             print("Skip because %s is not enabled" % device)
             return
 
@@ -89,7 +90,7 @@ def verify_full(shape, dtype, fill_value):
 
 def verify_vectorization(n, m, dtype):
     def check_device(device):
-        if not tvm.runtime.enabled(device):
+        if not tvm.testing.device_enabled(device):
             print("Skip because %s is not enabled" % device)
             return
         if dtype == "float16" and device == "cuda" and not have_fp16(tvm.gpu(0).compute_version):
@@ -112,6 +113,8 @@ def verify_vectorization(n, m, dtype):
     for device in ["cuda"]:
         check_device(device)
 
+@tvm.testing.requires_gpu
+@tvm.testing.requires_cuda
 def test_vectorization():
     verify_vectorization(128, 64, "float16")
 
index d8c51b8..12e50b4 100644 (file)
@@ -23,16 +23,12 @@ from tvm import topi
 import tvm.topi.testing
 from tvm.contrib.nvcc import have_fp16
 
-from common import get_all_backend
+import tvm.testing
 
 def verify_expand_dims(in_shape, out_shape, axis, num_newaxis):
     A = te.placeholder(shape=in_shape, name="A")
     B = topi.expand_dims(A, axis, num_newaxis)
-    def check_device(device):
-        ctx = tvm.context(device, 0)
-        if not ctx.exist:
-            print("Skip because %s is not enabled" % device)
-            return
+    def check_device(device, ctx):
         print("Running on target: %s" % device)
         with tvm.target.create(device):
             s = tvm.topi.testing.get_broadcast_schedule(device)(B)
@@ -44,18 +40,14 @@ def verify_expand_dims(in_shape, out_shape, axis, num_newaxis):
         foo(data_nd, out_nd)
         tvm.testing.assert_allclose(out_nd.asnumpy(), out_npy)
 
-    for device in get_all_backend():
-        check_device(device)
+    for device, ctx in tvm.testing.enabled_targets():
+        check_device(device, ctx)
 
 
 def verify_reinterpret(in_shape, in_dtype, out_dtype, generator):
     A = te.placeholder(shape=in_shape, name="A", dtype=in_dtype)
     B = topi.reinterpret(A, out_dtype)
-    def check_device(device):
-        ctx = tvm.context(device, 0)
-        if not ctx.exist:
-            print("Skip because %s is not enabled" % device)
-            return
+    def check_device(device, ctx):
         if in_dtype == "float16" and device == 'cuda' and not have_fp16(ctx.compute_version):
             print("Skip because %s does not have fp16 support" % device)
             return
@@ -70,18 +62,14 @@ def verify_reinterpret(in_shape, in_dtype, out_dtype, generator):
         foo(data_nd, out_nd)
         np.testing.assert_equal(out_nd.asnumpy(), out_npy)
 
-    for device in get_all_backend():
-        check_device(device)
+    for device, ctx in tvm.testing.enabled_targets():
+        check_device(device, ctx)
 
 
 def verify_transpose(in_shape, axes):
     A = te.placeholder(shape=in_shape, name="A")
     B = topi.transpose(A, axes)
-    def check_device(device):
-        ctx = tvm.context(device, 0)
-        if not ctx.exist:
-            print("Skip because %s is not enabled" % device)
-            return
+    def check_device(device, ctx):
         print("Running on target: %s" % device)
         with tvm.target.create(device):
             s = tvm.topi.testing.get_injective_schedule(device)(B)
@@ -93,18 +81,14 @@ def verify_transpose(in_shape, axes):
         foo(data_nd, out_nd)
         tvm.testing.assert_allclose(out_nd.asnumpy(), out_npy)
 
-    for device in get_all_backend():
-        check_device(device)
+    for device, ctx in tvm.testing.enabled_targets():
+        check_device(device, ctx)
 
 
 def verify_reshape(src_shape, dst_shape):
     A = te.placeholder(shape=src_shape, name="A")
     B = topi.reshape(A, dst_shape)
-    def check_device(device):
-        ctx = tvm.context(device, 0)
-        if not ctx.exist:
-            print("Skip because %s is not enabled" % device)
-            return
+    def check_device(device, ctx):
         print("Running on target: %s" % device)
         with tvm.target.create(device):
             s = tvm.topi.testing.get_injective_schedule(device)(B)
@@ -116,18 +100,14 @@ def verify_reshape(src_shape, dst_shape):
         foo(data_nd, out_nd)
         tvm.testing.assert_allclose(out_nd.asnumpy(), out_npy)
 
-    for device in get_all_backend():
-        check_device(device)
+    for device, ctx in tvm.testing.enabled_targets():
+        check_device(device, ctx)
 
 
 def verify_squeeze(src_shape, axis):
     A = te.placeholder(shape=src_shape, name="A")
     B = topi.squeeze(A, axis=axis)
-    def check_device(device):
-        ctx = tvm.context(device, 0)
-        if not ctx.exist:
-            print("Skip because %s is not enabled" % device)
-            return
+    def check_device(device, ctx):
         print("Running on target: %s" % device)
         with tvm.target.create(device):
             s = tvm.topi.testing.get_injective_schedule(device)(B)
@@ -141,8 +121,8 @@ def verify_squeeze(src_shape, axis):
         foo(data_nd, out_nd)
         tvm.testing.assert_allclose(out_nd.asnumpy(), out_npy)
 
-    for device in get_all_backend():
-        check_device(device)
+    for device, ctx in tvm.testing.enabled_targets():
+        check_device(device, ctx)
 
 def verify_concatenate(shapes, axis):
 
@@ -162,11 +142,7 @@ def verify_concatenate(shapes, axis):
     for i, shape in enumerate(shapes):
         tensor_l.append(te.placeholder(shape, name="A" + str(i)))
     out_tensor = topi.concatenate(a_tuple=tensor_l, axis=axis)
-    def check_device(device):
-        ctx = tvm.context(device, 0)
-        if not ctx.exist:
-            print("Skip because %s is not enabled" % device)
-            return
+    def check_device(device, ctx):
         print("Running on target: %s" % device)
         with tvm.target.create(device):
             s = get_concat_schedule(device)(out_tensor)
@@ -179,19 +155,15 @@ def verify_concatenate(shapes, axis):
         foo(*(data_nds + [out_nd]))
         tvm.testing.assert_allclose(out_nd.asnumpy(), out_npy)
 
-    for device in get_all_backend():
-        check_device(device)
+    for device, ctx in tvm.testing.enabled_targets():
+        check_device(device, ctx)
 
 def verify_stack(shapes, axis):
     tensor_l = []
     for i, shape in enumerate(shapes):
         tensor_l.append(te.placeholder(shape, name="A" + str(i)))
     out_tensor = topi.stack(tensor_l, axis)
-    def check_device(device):
-        ctx = tvm.context(device, 0)
-        if not ctx.exist:
-            print("Skip because %s is not enabled" % device)
-            return
+    def check_device(device, ctx):
         print("Running on target: %s" % device)
         with tvm.target.create(device):
             s = tvm.topi.testing.get_broadcast_schedule(device)(out_tensor)
@@ -204,18 +176,14 @@ def verify_stack(shapes, axis):
         foo(*(data_nds + [out_nd]))
         tvm.testing.assert_allclose(out_nd.asnumpy(), out_npy)
 
-    for device in get_all_backend():
-        check_device(device)
+    for device, ctx in tvm.testing.enabled_targets():
+        check_device(device, ctx)
 
 
 def verify_split(src_shape, indices_or_sections, axis):
     A = te.placeholder(shape=src_shape, name="A")
     tensor_l = topi.split(A, indices_or_sections, axis=axis)
-    def check_device(device):
-        ctx = tvm.context(device, 0)
-        if not ctx.exist:
-            print("Skip because %s is not enabled" % device)
-            return
+    def check_device(device, ctx):
         print("Running on target: %s" % device)
         with tvm.target.create(device):
             s = tvm.topi.testing.get_injective_schedule(device)(tensor_l)
@@ -229,8 +197,8 @@ def verify_split(src_shape, indices_or_sections, axis):
         for out_nd, out_npy in zip(out_nds, out_npys):
             tvm.testing.assert_allclose(out_nd.asnumpy(), out_npy)
 
-    for device in get_all_backend():
-        check_device(device)
+    for device, ctx in tvm.testing.enabled_targets():
+        check_device(device, ctx)
 
 
 def verify_expand_like(in_shape, out_shape, axis):
@@ -240,9 +208,6 @@ def verify_expand_like(in_shape, out_shape, axis):
     s = te.create_schedule([C.op])
 
     def check_device(device):
-        if not tvm.runtime.enabled(device):
-            print("Skip because %s is not enabled" % device)
-            return
         print("Running on target: %s" % device)
 
         ctx = tvm.context(device, 0)
@@ -272,7 +237,7 @@ def verify_flip(in_shape, axis):
     B = topi.flip(A, axis) + 1
     def check_device(device):
         ctx = tvm.context(device, 0)
-        if not ctx.exist:
+        if not tvm.testing.device_enabled(device):
             print("Skip because %s is not enabled" % device)
             return
         print("Running on target: %s" % device)
@@ -291,6 +256,7 @@ def verify_flip(in_shape, axis):
         check_device(device)
 
 
+@tvm.testing.uses_gpu
 def test_reverse_sequence():
     def verify_reverse_sequence(in_data, seq_lengths, batch_axis, seq_axis, ref_res):
         seq_lengths = np.array(seq_lengths).astype("int32")
@@ -298,11 +264,7 @@ def test_reverse_sequence():
         B = te.placeholder(shape=seq_lengths.shape, name="B", dtype=str(seq_lengths.dtype))
         C = topi.reverse_sequence(A, B, seq_axis, batch_axis)
 
-        def check_device(device):
-            ctx = tvm.context(device, 0)
-            if not ctx.exist:
-                print("Skip because %s is not enabled" % device)
-                return
+        def check_device(device, ctx):
             print("Running on target: %s" % device)
             with tvm.target.create(device):
                 s = tvm.topi.testing.get_injective_schedule(device)(C)
@@ -315,8 +277,8 @@ def test_reverse_sequence():
             foo(data_nd, seq_lengths_nd, out_nd)
             tvm.testing.assert_allclose(out_nd.asnumpy(), ref_res)
 
-        for device in get_all_backend():
-            check_device(device)
+        for device, ctx in tvm.testing.enabled_targets():
+            check_device(device, ctx)
 
     indata = np.array(np.arange(0, 16)).reshape([4, 4]).astype("int32")
     result = [[0, 5, 10, 15],
@@ -382,7 +344,7 @@ def verify_take(src_shape, indices_src, axis=None, mode="clip"):
 
     def check_device(device):
         ctx = tvm.context(device, 0)
-        if not ctx.exist:
+        if not tvm.testing.device_enabled(device):
             print("Skip because %s is not enabled" % device)
             return
         print("Running on target: %s" % device)
@@ -417,7 +379,7 @@ def verify_strided_slice(in_shape, begin, end, strides=None):
 
     def check_device(device):
         ctx = tvm.context(device, 0)
-        if not ctx.exist:
+        if not tvm.testing.device_enabled(device):
             print("Skip because %s is not enabled" % device)
             return
         print("Running on target: %s" % device)
@@ -449,7 +411,7 @@ def verify_strided_set(in_shape, v_shape, begin, end, strides=None):
 
     def check_device(device):
         ctx = tvm.context(device, 0)
-        if not ctx.exist:
+        if not tvm.testing.device_enabled(device):
             print("Skip because %s is not enabled" % device)
             return
         print("Running on target: %s" % device)
@@ -490,11 +452,7 @@ def verify_gather(data, axis, indices):
     var_indices = te.placeholder(shape=indices.shape, dtype=indices.dtype.name, name="indices")
     out_tensor = topi.gather(var_data, axis, var_indices)
 
-    def check_device(device):
-        ctx = tvm.context(device, 0)
-        if not ctx.exist:
-            print("Skip because %s is not enabled" % device)
-            return
+    def check_device(device, ctx):
         print("Running on target: %s" % device)
         with tvm.target.create(device):
             s = tvm.topi.testing.get_injective_schedule(device)(out_tensor)
@@ -508,8 +466,8 @@ def verify_gather(data, axis, indices):
         func(data_nd, indices_nd, out_nd)
         tvm.testing.assert_allclose(out_nd.asnumpy(), out_npys)
 
-    for device in get_all_backend():
-        check_device(device)
+    for device, ctx in tvm.testing.enabled_targets():
+        check_device(device, ctx)
 
 def verify_gather_nd(src_shape, indices_src, indices_dtype):
     src_dtype = "float32"
@@ -518,11 +476,7 @@ def verify_gather_nd(src_shape, indices_src, indices_dtype):
     indices = te.placeholder(shape=indices_src.shape, dtype=indices_dtype, name="indices")
     out_tensor = topi.gather_nd(a=A, indices=indices)
 
-    def check_device(device):
-        ctx = tvm.context(device, 0)
-        if not ctx.exist:
-            print("Skip because %s is not enabled" % device)
-            return
+    def check_device(device, ctx):
         print("Running on target: %s" % device)
         with tvm.target.create(device):
             s = tvm.topi.testing.get_injective_schedule(device)(out_tensor)
@@ -540,8 +494,8 @@ def verify_gather_nd(src_shape, indices_src, indices_dtype):
         func(data_nd, indices_nd, out_nd)
         tvm.testing.assert_allclose(out_nd.asnumpy(), out_npys)
 
-    for device in get_all_backend():
-        check_device(device)
+    for device, ctx in tvm.testing.enabled_targets():
+        check_device(device, ctx)
 
 def verify_arange(start, stop, step):
     if start is None and step is None:
@@ -557,11 +511,7 @@ def verify_arange(start, stop, step):
         A = topi.arange(start, stop, step)
         a_np = np.arange(start, stop, step)
 
-    def check_device(device):
-        ctx = tvm.context(device, 0)
-        if not ctx.exist:
-            print("Skip because %s is not enabled" % device)
-            return
+    def check_device(device, ctx):
         print("Running on target: %s" % device)
         with tvm.target.create(device):
             s = tvm.topi.testing.get_injective_schedule(device)(A)
@@ -570,17 +520,13 @@ def verify_arange(start, stop, step):
         f(a_nd)
         tvm.testing.assert_allclose(a_nd.asnumpy(), a_np)
 
-    for device in get_all_backend():
-        check_device(device)
+    for device, ctx in tvm.testing.enabled_targets():
+        check_device(device, ctx)
 
 def verify_repeat(in_shape, repeats, axis):
     A = te.placeholder(shape=in_shape, name="A")
     B = topi.repeat(A, repeats, axis)
-    def check_device(device):
-        ctx = tvm.context(device, 0)
-        if not ctx.exist:
-            print("Skip because %s is not enabled" % device)
-            return
+    def check_device(device, ctx):
         print("Running on target: %s" % device)
         with tvm.target.create(device):
             s = tvm.topi.testing.get_broadcast_schedule(device)(B)
@@ -592,17 +538,13 @@ def verify_repeat(in_shape, repeats, axis):
         foo(data_nd, out_nd)
         tvm.testing.assert_allclose(out_nd.asnumpy(), out_npy)
 
-    for device in get_all_backend():
-        check_device(device)
+    for device, ctx in tvm.testing.enabled_targets():
+        check_device(device, ctx)
 
 def verify_tile(in_shape, reps):
     A = te.placeholder(shape=in_shape, name="A")
     B = topi.tile(A, reps)
-    def check_device(device):
-        ctx = tvm.context(device, 0)
-        if not ctx.exist:
-            print("Skip because %s is not enabled" % device)
-            return
+    def check_device(device, ctx):
         print("Running on target: %s" % device)
         with tvm.target.create(device):
             s = tvm.topi.testing.get_broadcast_schedule(device)(B)
@@ -614,8 +556,8 @@ def verify_tile(in_shape, reps):
         foo(data_nd, out_nd)
         tvm.testing.assert_allclose(out_nd.asnumpy(), out_npy)
 
-    for device in get_all_backend():
-        check_device(device)
+    for device, ctx in tvm.testing.enabled_targets():
+        check_device(device, ctx)
 
 def verify_where(in_shape):
     Cond = te.placeholder(shape=in_shape, name="cond")
@@ -623,11 +565,7 @@ def verify_where(in_shape):
     A = te.placeholder(shape=in_shape, name="A")
     B = te.placeholder(shape=in_shape, name="B")
     C = topi.where(Cond, A, B)
-    def check_device(device):
-        ctx = tvm.context(device, 0)
-        if not ctx.exist:
-            print("Skip because %s is not enabled" % device)
-            return
+    def check_device(device, ctx):
         print("Running on target: %s" % device)
         with tvm.target.create(device):
             s = tvm.topi.testing.get_broadcast_schedule(device)(C)
@@ -643,19 +581,15 @@ def verify_where(in_shape):
         f(cond_nd, x_nd, y_nd, out_nd)
         tvm.testing.assert_allclose(out_nd.asnumpy(), out_npy)
 
-    for device in get_all_backend():
-        check_device(device)
+    for device, ctx in tvm.testing.enabled_targets():
+        check_device(device, ctx)
 
 def verify_one_hot(indices_shape, depth, on_value, off_value, axis, dtype):
     indices = te.placeholder(shape=indices_shape, name="indices", dtype="int32")
     on_value_const = tvm.tir.const(on_value, dtype)
     off_value_const = tvm.tir.const(off_value, dtype)
     one_hot_result = topi.transform.one_hot(indices, on_value_const, off_value_const, depth, axis, dtype)
-    def check_device(device):
-        ctx = tvm.context(device, 0)
-        if not ctx.exist:
-            print("Skip because %s is not enabled" % device)
-            return
+    def check_device(device, ctx):
         print("Running on target: %s" % device)
         with tvm.target.create(device):
             s = tvm.topi.testing.get_injective_schedule(device)(one_hot_result)
@@ -668,8 +602,8 @@ def verify_one_hot(indices_shape, depth, on_value, off_value, axis, dtype):
         out_topi = out_nd.asnumpy()
         tvm.testing.assert_allclose(out_topi, out_npy)
 
-    for device in get_all_backend():
-        check_device(device)
+    for device, ctx in tvm.testing.enabled_targets():
+        check_device(device, ctx)
 
 
 def verify_unravel_index(indices, shape, dtype):
@@ -684,11 +618,7 @@ def verify_unravel_index(indices, shape, dtype):
     Y = te.placeholder(shape=y_data.shape, dtype=dtype, name="Y")
     Z = topi.unravel_index(X, Y)
 
-    def check_device(device):
-        ctx = tvm.context(device, 0)
-        if not ctx.exist:
-            print("Skip because %s is not enabled" % device)
-            return
+    def check_device(device, ctx):
         print("Running on target: %s" % device)
         with tvm.target.create(device):
             s = tvm.topi.testing.get_injective_schedule(device)(Z)
@@ -701,8 +631,8 @@ def verify_unravel_index(indices, shape, dtype):
         foo(datax_nd, datay_nd, out_nd)
         tvm.testing.assert_allclose(out_nd.asnumpy(), out_npy)
 
-    for device in get_all_backend():
-        check_device(device)
+    for device, ctx in tvm.testing.enabled_targets():
+        check_device(device, ctx)
 
 def verify_sparse_to_dense(sparse_indices, sparse_values, default_value, output_shape, xpected):
     sparse_indices_data = np.array(sparse_indices)
@@ -720,11 +650,7 @@ def verify_sparse_to_dense(sparse_indices, sparse_values, default_value, output_
         args = [A, B, C]
         D = topi.sparse_to_dense(A, output_shape, B, C)
 
-    def check_device(device):
-        ctx = tvm.context(device, 0)
-        if not ctx.exist:
-            print("Skip because %s is not enabled" % device)
-            return
+    def check_device(device, ctx):
         print("Running on target: %s" % device)
         with tvm.target.create(device):
             s = tvm.topi.testing.get_injective_schedule(device)(D)
@@ -743,8 +669,8 @@ def verify_sparse_to_dense(sparse_indices, sparse_values, default_value, output_
 
         tvm.testing.assert_allclose(out_nd.asnumpy(), np.array(xpected))
 
-    for device in get_all_backend():
-        check_device(device)
+    for device, ctx in tvm.testing.enabled_targets():
+        check_device(device, ctx)
 
 def verify_matrix_set_diag(input_shape, dtype):
     diagonal_shape = list(input_shape[:-2])
@@ -752,11 +678,8 @@ def verify_matrix_set_diag(input_shape, dtype):
     input = te.placeholder(shape=input_shape, name="input", dtype=dtype)
     diagonal = te.placeholder(shape=diagonal_shape, name="diagonal", dtype=dtype)
     matrix_set_diag_result = topi.transform.matrix_set_diag(input, diagonal)
-    def check_device(device):
+    def check_device(device, ctx):
         ctx = tvm.context(device, 0)
-        if not ctx.exist:
-            print("Skip because %s is not enabled" % device)
-            return
         print("Running on target: %s" % device)
         with tvm.target.create(device):
             s = tvm.topi.testing.get_injective_schedule(device)(matrix_set_diag_result)
@@ -771,10 +694,11 @@ def verify_matrix_set_diag(input_shape, dtype):
         out_topi = out_nd.asnumpy()
         tvm.testing.assert_allclose(out_topi, out_npy)
 
-    for device in get_all_backend():
-        check_device(device)
+    for target, ctx in tvm.testing.enabled_targets():
+        check_device(target, ctx)
 
 
+@tvm.testing.uses_gpu
 def test_strided_slice():
     verify_strided_slice((3, 4, 3), [0, 0, 0], [4, -5, 4], [1, -1, 2])
     verify_strided_slice((3, 4, 3), [1, 1, 0], [4, 4, 3], [2, 1, 1])
@@ -784,6 +708,7 @@ def test_strided_slice():
     verify_strided_slice((3, 4, 3), [1, 1, 0], [4, 4, 3])
     verify_strided_slice((3, 4, 3), [0, 2, 0], [1, 2, 3])
 
+@tvm.testing.uses_gpu
 def test_strided_set():
     verify_strided_set((3, 4, 3), (3, 2, 2), [0, 3, 0], [4, 1, 4], [1, -1, 2])
     verify_strided_set((3, 4, 3), (3, 1, 2), [0, 0, 0], [4, -5, 4], [1, -1, 2])
@@ -795,11 +720,13 @@ def test_strided_set():
     verify_strided_set((3, 4, 3), (2, 3, 3), [1, 1, 0], [4, 4, 3])
     verify_strided_set((3, 4, 3), (2, 3, 3), [1, 1], [4, 4, 3])
 
+@tvm.testing.uses_gpu
 def test_expand_dims():
     verify_expand_dims((3, 10), (3, 10, 1, 1), 2, 2)
     verify_expand_dims((3, 10), (1, 3, 10), -3, 1)
 
 
+@tvm.testing.uses_gpu
 def test_reinterpret():
     verify_reinterpret((1000,), "float32", "int32",
                        lambda shape: np.random.randn(*shape) * 1000)
@@ -813,12 +740,14 @@ def test_reinterpret():
                        lambda shape: np.random.randint(0, 2 ** 32 - 1, size=shape))
 
 
+@tvm.testing.uses_gpu
 def test_transpose():
     verify_transpose((3, 10, 2), (1, 0, 2))
     verify_transpose((3, 10, 5), (2, 0, 1))
     verify_transpose((3, 10), None)
 
 
+@tvm.testing.uses_gpu
 def test_reshape():
     verify_reshape((1, 2, 3, 4), (2, 3, 4))
     verify_reshape((4, 2, 3, 4), (2, 4, 12))
@@ -827,10 +756,12 @@ def test_reshape():
     verify_reshape((4, 0), (2, 0, 2))
 
 
+@tvm.testing.uses_gpu
 def test_where():
     verify_where((1, 2, 3, 4))
 
 
+@tvm.testing.requires_gpu
 def test_squeeze():
     verify_squeeze((1, 2, 3, 4), 0)
     verify_squeeze((1, 2, 1, 4), None)
@@ -843,7 +774,7 @@ def test_squeeze():
     C = te.compute((1,), lambda i: E[(2 * A[0] - 1).astype('int32')])
     for device in ['cuda', 'opencl']:
         ctx = tvm.context(device, 0)
-        if ctx.exist:
+        if tvm.testing.device_enabled(device):
             with tvm.target.create(device):
                 s = tvm.topi.testing.get_injective_schedule(device)(C)
                 func = tvm.build(s, [A, C])
@@ -853,6 +784,7 @@ def test_squeeze():
             assert c.asnumpy()[0] == 2
 
 
+@tvm.testing.uses_gpu
 def test_concatenate():
     verify_concatenate([(2,), (2,), (2,)], -1)
     verify_concatenate([(2, 3, 4), (2, 2, 4), (2, 5, 4)], 1)
@@ -865,6 +797,7 @@ def test_concatenate():
     verify_concatenate([(1, 14400), (1, 2400), (1, 640), (1, 240)], 1)
 
 
+@tvm.testing.uses_gpu
 def test_stack():
     verify_stack([(2,), (2,), (2,)], -1)
     verify_stack([(2,), (2,), (2,)], 1)
@@ -873,11 +806,13 @@ def test_stack():
     verify_stack([(2, 2, 3, 4), (2, 2, 3, 4), (2, 2, 3, 4), (2, 2, 3, 4)], -1)
 
 
+@tvm.testing.uses_gpu
 def test_split():
     verify_split((2, 12, 3), 3, 1)
     verify_split((2, 12, 3), [2, 4], 1)
     verify_split((10, 12, 24), [5, 7, 9], -1)
 
+@tvm.testing.uses_gpu
 def test_flip():
     verify_flip((3, 4, 3), 1)
     verify_flip((3, 4, 3), 0)
@@ -886,12 +821,14 @@ def test_flip():
     verify_flip((3, 4, 3), -3)
     verify_flip((3, 4, 3), -2)
 
+@tvm.testing.requires_llvm
 def test_expand_like():
     verify_expand_like((3,), (2, 3), [0])
     verify_expand_like((2,), (2, 3), [1])
     verify_expand_like((3, 4), (3, 5, 4), [1])
     verify_expand_like((5, 7), (5, 6, 7, 8), [1, 3])
 
+@tvm.testing.uses_gpu
 def test_take():
     verify_take((4,), [1])
     verify_take((4,), [[0,1,2,3]])
@@ -911,6 +848,7 @@ def test_take():
     verify_take((3,4), [0, 2], axis=0, mode="fast")
     verify_take((3,4), [0, 2], axis=1, mode="fast")
 
+@tvm.testing.uses_gpu
 def test_gather():
     verify_gather([[1, 2], [3, 4]], 1, [[0, 0], [1, 0]])
     verify_gather(np.random.randn(4, 7, 5), 0, np.random.randint(low=0, high=4, size=(1, 7, 5)))
@@ -920,6 +858,7 @@ def test_gather():
     verify_gather(np.random.randn(4, 7, 5), 2, np.random.randint(low=0, high=5, size=(4, 7, 2)))
     verify_gather(np.random.randn(4, 7, 5), 2, np.random.randint(low=0, high=5, size=(4, 7, 10)))
 
+@tvm.testing.uses_gpu
 def test_gather_nd():
     for indices_dtype in ['int32', 'float32']:
         verify_gather_nd((4,), [[1.8]], indices_dtype)
@@ -935,6 +874,7 @@ def test_gather_nd():
         verify_gather_nd((2, 3, 4, 5), [[1, 0], [2, 1], [3, 2], [4, 2]],
                          indices_dtype)
 
+@tvm.testing.uses_gpu
 def test_arange():
     verify_arange(None, 20, None)
     verify_arange(None, 20, 2)
@@ -946,18 +886,21 @@ def test_arange():
     verify_arange(20, 1, -1)
     verify_arange(20, 1, -1.5)
 
+@tvm.testing.uses_gpu
 def test_repeat():
     verify_repeat((2,), 1, 0)
     verify_repeat((3, 2), 2, 0)
     verify_repeat((3, 2, 4), 3, 1)
     verify_repeat((1, 3, 2, 4), 4, -1)
 
+@tvm.testing.uses_gpu
 def test_tile():
     verify_tile((3, 2), (2, 3))
     verify_tile((3, 2, 5), (2,))
     verify_tile((3, ), (2, 3, 3))
     verify_tile((4, 0), (5,))
 
+@tvm.testing.uses_gpu
 def test_layout_transform():
     in_shape = (1, 32, 8, 8)
     A = te.placeholder(shape=in_shape, dtype="float32", name="A")
@@ -968,11 +911,7 @@ def test_layout_transform():
     output = np.reshape(output, newshape=(1, 8, 8, 2, 16))
     output = np.transpose(output, axes=(0, 3, 1, 2, 4))
 
-    def check_device(device):
-        ctx = tvm.context(device, 0)
-        if not ctx.exist:
-            print("Skip because %s is not enabled" % device)
-            return
+    def check_device(device, ctx):
         tvm_input = tvm.nd.array(input, ctx)
         tvm_output = tvm.nd.empty(output.shape, ctx=ctx, dtype=B.dtype)
         print("Running on target: %s" % device)
@@ -982,10 +921,11 @@ def test_layout_transform():
         f(tvm_input, tvm_output)
         tvm.testing.assert_allclose(tvm_output.asnumpy(), output)
 
-    for backend in get_all_backend():
-        check_device(backend)
+    for backend, ctx in tvm.testing.enabled_targets():
+        check_device(backend, ctx)
 
 
+@tvm.testing.uses_gpu
 def test_shape():
     in_shape = (8, 7, 13)
     dtype = "int32"
@@ -995,11 +935,7 @@ def test_shape():
     input = np.random.uniform(size=in_shape).astype(A.dtype)
     output = np.asarray(in_shape).astype(dtype)
 
-    def check_device(device):
-        ctx = tvm.context(device, 0)
-        if not ctx.exist:
-            print("Skip because %s is not enabled" % device)
-            return
+    def check_device(device, ctx):
         tvm_input = tvm.nd.array(input, ctx)
         tvm_output = tvm.nd.empty(output.shape, ctx=ctx, dtype=dtype)
         print("Running on target: %s" % device)
@@ -1009,10 +945,11 @@ def test_shape():
         f(tvm_input, tvm_output)
         tvm.testing.assert_allclose(tvm_output.asnumpy(), output)
 
-    for backend in get_all_backend():
-        check_device(backend)
+    for backend, ctx in tvm.testing.enabled_targets():
+        check_device(backend, ctx)
 
 
+@tvm.testing.uses_gpu
 def test_sequence_mask():
     for in_shape in (5, 10), (3, 4, 5, 4):
         for axis in [0, 1]:
@@ -1026,11 +963,7 @@ def test_sequence_mask():
                 B_data = np.random.randint(1, max_length, (batch_size,)).astype(np.int32)
                 C_gt_data = tvm.topi.testing.sequence_mask(A_data, B_data, mask_value, axis)
 
-                def check_device(device):
-                    ctx = tvm.context(device, 0)
-                    if not ctx.exist:
-                        print("Skip because %s is not enabled" % device)
-                        return
+                def check_device(device, ctx):
                     tvm_A = tvm.nd.array(A_data, ctx)
                     tvm_B = tvm.nd.array(B_data, ctx)
                     tvm_C = tvm.nd.empty(in_shape, ctx=ctx, dtype="float32")
@@ -1040,9 +973,10 @@ def test_sequence_mask():
                     f = tvm.build(s, [A, B, C], device, name="SequenceMask")
                     f(tvm_A, tvm_B, tvm_C)
                     tvm.testing.assert_allclose(tvm_C.asnumpy(), C_gt_data)
-                for backend in get_all_backend():
-                    check_device(backend)
+                for backend, ctx in tvm.testing.enabled_targets():
+                    check_device(backend, ctx)
 
+@tvm.testing.uses_gpu
 def test_ndarray_size():
     in_shape = (5, 11, 7)
     dtype = "int32"
@@ -1052,11 +986,7 @@ def test_ndarray_size():
     input = np.random.uniform(size=in_shape).astype(A.dtype)
     output = np.asarray(np.size(input)).astype(dtype)
 
-    def check_device(device):
-        ctx = tvm.context(device, 0)
-        if not ctx.exist:
-            print("Skip because %s is not enabled" % device)
-            return
+    def check_device(device, ctx):
         tvm_input = tvm.nd.array(input, ctx=ctx)
         tvm_output = tvm.nd.empty((), ctx=ctx, dtype=B.dtype)
         print("Running on target: %s" % device)
@@ -1066,18 +996,15 @@ def test_ndarray_size():
         f(tvm_input, tvm_output)
         tvm.testing.assert_allclose(tvm_output.asnumpy(), output)
 
-    for backend in get_all_backend():
-        check_device(backend)
+    for backend, ctx in tvm.testing.enabled_targets():
+        check_device(backend, ctx)
 
 
+@tvm.testing.uses_gpu
 def test_where_fusion():
     """integration test that where and zeros should be properly inlined"""
-    def check_device(device):
+    def check_device(device, ctx):
         with tvm.target.create(device):
-            ctx = tvm.context(device, 0)
-            if not ctx.exist:
-                print("Skip because %s is not enabled" % device)
-                return
             print("Running on target: %s" % device)
             conv2d_compute, conv2d_schedule = tvm.topi.testing.get_conv2d_nchw_implement(device)
             data = te.placeholder((2, 1, 2, 4), 'int8', 'data')
@@ -1093,9 +1020,10 @@ def test_where_fusion():
             s = conv2d_schedule(outs)
             tvm.build(s, [data, w, add], target=backend)
 
-    for backend in get_all_backend():
-        check_device(backend)
+    for backend, ctx in tvm.testing.enabled_targets():
+        check_device(backend, ctx)
 
+@tvm.testing.uses_gpu
 def test_one_hot():
     verify_one_hot((3,), 3, 1, 0, -1, "int32")
     verify_one_hot((3,), 3, 1.0, 0.0, -1, "float32")
@@ -1105,6 +1033,7 @@ def test_one_hot():
     verify_one_hot((3, 2, 4, 5), 6, 1.0, 0.0, 0, "float32")
 
 
+@tvm.testing.uses_gpu
 def test_unravel_index():
     for dtype in ["int32", "int64"]:
         verify_unravel_index([0, 1, 2, 3], [2, 2], dtype)
@@ -1112,6 +1041,7 @@ def test_unravel_index():
         verify_unravel_index(144, [5, 5, 5, 2], dtype)
         verify_unravel_index([100, 13, 5], [5, 5, 5, 2], dtype)
 
+@tvm.testing.uses_gpu
 def test_sparse_to_dense():
     verify_sparse_to_dense(1, 3, 0, [5], [0, 3, 0, 0, 0]) #scalar
     verify_sparse_to_dense([0, 1, 4], [3, 3, 3], 0, [5], [3, 3, 0, 0, 3]) #vector
@@ -1134,6 +1064,7 @@ def test_sparse_to_dense():
     #sparse_indices should not be > 2d tensor
     #verify_sparse_to_dense([[[[0, 1, 4], [0, 2, 4]]]], [[[3.1, 3.1, 3.1]]], 3.5, [5], [3.1, 3.1, 3.5, 3.5, 3.1])
 
+@tvm.testing.uses_gpu
 def test_matrix_set_diag():
     for dtype in ['float32', 'int32']:
         verify_matrix_set_diag((2, 2), dtype)
index 04cc310..7861a29 100644 (file)
@@ -23,8 +23,6 @@ import tvm.topi.testing
 import math
 from tvm.topi.util import nchw_pack_layout
 
-from common import get_all_backend
-
 def verify_upsampling(batch, in_channel, in_height, in_width, scale_h, scale_w,
                       layout='NCHW', method="nearest_neighbor",
                       in_batch_block = 0, in_channel_block = 0):
@@ -58,11 +56,7 @@ def verify_upsampling(batch, in_channel, in_height, in_width, scale_h, scale_w,
     else:
         b_np = tvm.topi.testing.upsampling_python(a_np, (scale_h, scale_w), layout)
 
-    def check_device(device):
-        ctx = tvm.context(device, 0)
-        if not ctx.exist:
-            print("Skip because %s is not enabled" % device)
-            return
+    def check_device(device, ctx):
         print("Running on target: %s" % device)
         with tvm.target.create(device):
             s = tvm.topi.testing.get_injective_schedule(device)(B)
@@ -73,9 +67,10 @@ def verify_upsampling(batch, in_channel, in_height, in_width, scale_h, scale_w,
 
         tvm.testing.assert_allclose(b.asnumpy(), b_np, rtol=1e-5, atol=1e-5)
 
-    for device in get_all_backend():
-        check_device(device)
+    for device, ctx in tvm.testing.enabled_targets():
+        check_device(device, ctx)
 
+@tvm.testing.uses_gpu
 def test_upsampling():
     # nearest_neighbor - NCHW
     verify_upsampling(8, 16, 32, 32, 2.0, 2.0)
@@ -141,11 +136,7 @@ def verify_upsampling3d(batch, in_channel, in_depth, in_height, in_width, scale_
     else:
         b_np = tvm.topi.testing.upsampling3d_python(a_np, (scale_d, scale_h, scale_w), layout)
 
-    def check_device(device):
-        ctx = tvm.context(device, 0)
-        if not ctx.exist:
-            print("Skip because %s is not enabled" % device)
-            return
+    def check_device(device, ctx):
         print("Running on target: %s" % device)
         with tvm.target.create(device):
             s = tvm.topi.testing.get_injective_schedule(device)(B)
@@ -156,9 +147,10 @@ def verify_upsampling3d(batch, in_channel, in_depth, in_height, in_width, scale_
 
         tvm.testing.assert_allclose(b.asnumpy(), b_np, rtol=1e-5, atol=1e-5)
 
-    for device in get_all_backend():
-        check_device(device)
+    for device, ctx in tvm.testing.enabled_targets():
+        check_device(device, ctx)
 
+@tvm.testing.uses_gpu
 def test_upsampling3d():
     # nearest_neighbor - NCDHW
     verify_upsampling3d(8, 8, 16, 16, 16, 2.0, 2.0, 2.0)
index 345e7f9..a6287b1 100644 (file)
@@ -32,4 +32,4 @@ def test_get_shape():
     verify_get_shape((2, 3, 32, 32, 16, 8), "OIHW16i8o", "HWO8oI16i", (32, 32, 2, 8, 3, 16))
 
 if __name__ == "__main__":
-    test_get_shape()
\ No newline at end of file
+    test_get_shape()
index e0e2205..691dcdf 100644 (file)
@@ -26,6 +26,8 @@ import tvm.topi.testing
 from tvm.contrib.pickle_memoize import memoize
 from tvm.topi.util import get_const_tuple
 from tvm.topi.vision import ssd, non_max_suppression, get_valid_counts
+import pytest
+import tvm.testing
 
 _get_valid_counts_implement = {
     "generic": (topi.vision.get_valid_counts, topi.generic.schedule_get_valid_counts),
@@ -88,7 +90,7 @@ def verify_get_valid_counts(dshape, score_threshold, id_index, score_index):
 
     def check_device(device):
         ctx = tvm.context(device, 0)
-        if not ctx.exist:
+        if not tvm.testing.device_enabled(device):
             print("Skip because %s is not enabled" % device)
             return
         print("Running on target: %s" % device)
@@ -114,16 +116,13 @@ def verify_get_valid_counts(dshape, score_threshold, id_index, score_index):
             tvm.testing.assert_allclose(tvm_out1.asnumpy(), np_out1, rtol=1e-3)
             tvm.testing.assert_allclose(tvm_out2.asnumpy(), np_out2, rtol=1e-3)
 
-    """ Skip this test as it is intermittent
-        see https://github.com/apache/incubator-tvm/pull/4901#issuecomment-595040094
     for device in ['llvm', 'cuda', 'opencl']:
-        # Disable gpu test for now
-        if device != "llvm":
-            continue
         check_device(device)
-    """
 
 
+@tvm.testing.uses_gpu
+@pytest.mark.skip("Skip this test as it is intermittent."
+                  "See https://github.com/apache/incubator-tvm/pull/4901#issuecomment-595040094")
 def test_get_valid_counts():
     verify_get_valid_counts((1, 1000, 5), 0.5, -1, 0)
     verify_get_valid_counts((1, 2500, 6), 0, 0, 1)
@@ -143,7 +142,7 @@ def verify_non_max_suppression(np_data, np_valid_count, np_indices, np_result, n
 
     def check_device(device):
         ctx = tvm.context(device, 0)
-        if not ctx.exist:
+        if not tvm.testing.device_enabled(device):
             print("Skip because %s is not enabled" % device)
             return
         print("Running on target: %s" % device)
@@ -179,7 +178,7 @@ def verify_non_max_suppression(np_data, np_valid_count, np_indices, np_result, n
     for device in ['llvm', 'cuda', 'opencl']:
         check_device(device)
 
-
+@tvm.testing.uses_gpu
 def test_non_max_suppression():
     np_data = np.array([[[0, 0.8, 1, 20, 25, 45], [1, 0.7, 30, 60, 50, 80],
                          [0, 0.4, 4, 21, 19, 40], [2, 0.9, 35, 61, 52, 79],
@@ -247,7 +246,7 @@ def verify_multibox_prior(dshape, sizes=(1,), ratios=(1,), steps=(-1, -1), offse
 
     def check_device(device):
         ctx = tvm.context(device, 0)
-        if not ctx.exist:
+        if not tvm.testing.device_enabled(device):
             print("Skip because %s is not enabled" % device)
             return
         print("Running on target: %s" % device)
@@ -267,12 +266,14 @@ def verify_multibox_prior(dshape, sizes=(1,), ratios=(1,), steps=(-1, -1), offse
         check_device(device)
 
 
+@tvm.testing.uses_gpu
 def test_multibox_prior():
     verify_multibox_prior((1, 3, 50, 50))
     verify_multibox_prior((1, 3, 224, 224), sizes=(0.5, 0.25, 0.1), ratios=(1, 2, 0.5))
     verify_multibox_prior((1, 32, 32, 32), sizes=(0.5, 0.25), ratios=(1, 2), steps=(2, 2), clip=True)
 
 
+@tvm.testing.uses_gpu
 def test_multibox_detection():
     batch_size = 1
     num_anchors = 3
@@ -292,7 +293,7 @@ def test_multibox_detection():
 
     def check_device(device):
         ctx = tvm.context(device, 0)
-        if not ctx.exist:
+        if not tvm.testing.device_enabled(device):
             print("Skip because %s is not enabled" % device)
             return
         print("Running on target: %s" % device)
@@ -336,7 +337,7 @@ def verify_roi_align(batch, in_channel, in_size, num_roi, pooled_size, spatial_s
 
     def check_device(device):
         ctx = tvm.context(device, 0)
-        if not ctx.exist:
+        if not tvm.testing.device_enabled(device):
             print("Skip because %s is not enabled" % device)
             return
         print("Running on target: %s" % device)
@@ -359,6 +360,7 @@ def verify_roi_align(batch, in_channel, in_size, num_roi, pooled_size, spatial_s
         check_device(device)
 
 
+@tvm.testing.uses_gpu
 def test_roi_align():
     verify_roi_align(1, 16, 32, 64, 7, 1.0, -1)
     verify_roi_align(4, 16, 32, 64, 7, 0.5, 2)
@@ -387,7 +389,7 @@ def verify_roi_pool(batch, in_channel, in_size, num_roi, pooled_size, spatial_sc
 
     def check_device(device):
         ctx = tvm.context(device, 0)
-        if not ctx.exist:
+        if not tvm.testing.device_enabled(device):
             print("Skip because %s is not enabled" % device)
             return
         print("Running on target: %s" % device)
@@ -409,6 +411,7 @@ def verify_roi_pool(batch, in_channel, in_size, num_roi, pooled_size, spatial_sc
         check_device(device)
 
 
+@tvm.testing.uses_gpu
 def test_roi_pool():
     verify_roi_pool(1, 4, 16, 32, 7, 1.0)
     verify_roi_pool(4, 4, 16, 32, 7, 0.5)
@@ -421,7 +424,7 @@ def verify_proposal(np_cls_prob, np_bbox_pred, np_im_info, np_out, attrs):
 
     def check_device(device):
         ctx = tvm.context(device, 0)
-        if not ctx.exist:
+        if not tvm.testing.device_enabled(device):
             print("Skip because %s is not enabled" % device)
             return
         print("Running on target: %s" % device)
@@ -441,6 +444,7 @@ def verify_proposal(np_cls_prob, np_bbox_pred, np_im_info, np_out, attrs):
         check_device(device)
 
 
+@tvm.testing.uses_gpu
 def test_proposal():
     attrs = {'scales': (0.5,),'ratios': (0.5,),
         'feature_stride': 16,
index 9282667..c94515a 100644 (file)
@@ -21,6 +21,7 @@ import tvm
 from tvm import topi
 from tvm import te, auto_scheduler
 import tempfile
+import tvm.testing
 
 from test_auto_scheduler_common import matmul_auto_scheduler_test, get_tiled_matmul
 
@@ -46,7 +47,7 @@ def record_common(dag, s):
 
 
 def test_record_split_reorder_fuse_annotation():
-    if not tvm.runtime.enabled("llvm"):
+    if not tvm.testing.device_enabled("llvm"):
         return
 
     A = te.placeholder((512, 512), name='A')
@@ -80,7 +81,7 @@ def test_record_split_reorder_fuse_annotation():
 
 
 def test_record_compute_at_root_inline_cache_read_write():
-    if not tvm.runtime.enabled("llvm"):
+    if not tvm.testing.device_enabled("llvm"):
         return
 
     A = te.placeholder((512, 512), name='A')
@@ -108,7 +109,7 @@ def test_record_compute_at_root_inline_cache_read_write():
 
 
 def test_record_follow_split_follow_fused_split():
-    if not tvm.runtime.enabled("llvm"):
+    if not tvm.testing.device_enabled("llvm"):
         return
 
     A = te.placeholder((512, 512), name='A')
@@ -142,7 +143,7 @@ def test_record_follow_split_follow_fused_split():
 
 
 def test_record_pragma_storage_align_rfactor():
-    if not tvm.runtime.enabled("llvm"):
+    if not tvm.testing.device_enabled("llvm"):
         return
 
     A = te.placeholder((512, 512), name='A')
@@ -165,7 +166,7 @@ def test_record_pragma_storage_align_rfactor():
 
 
 def test_measure_local_builder_runner():
-    if not tvm.runtime.enabled("llvm"):
+    if not tvm.testing.device_enabled("llvm"):
         return
 
     dag, s0 = get_tiled_matmul()
@@ -183,7 +184,7 @@ def test_measure_local_builder_runner():
 
 
 def test_measure_local_builder_rpc_runner():
-    if not tvm.runtime.enabled("llvm"):
+    if not tvm.testing.device_enabled("llvm"):
         return
 
     dag, s0 = get_tiled_matmul()
index 21ac984..bf7cefa 100644 (file)
@@ -54,6 +54,9 @@ def search_common(workload=matmul_auto_scheduler_test, target="llvm",
         tuning_options = auto_scheduler.TuningOptions(num_measure_trials=num_measure_trials,
                 runner=runner, verbose=1, measure_callbacks=[auto_scheduler.RecordToFile(log_file)])
         sch, args = auto_scheduler.auto_schedule(task, search_policy, tuning_options)
+        print("*"*80)
+        print(target)
+        print("*"*80)
         inp, res = auto_scheduler.load_best(log_file, workload_key, target)
 
         print("==== Python Code ====")
@@ -78,9 +81,8 @@ def search_common(workload=matmul_auto_scheduler_test, target="llvm",
     print()
 
 
+@tvm.testing.requires_llvm
 def test_workload_registry_search_basic():
-    if not tvm.runtime.enabled("llvm"):
-        return
     # wrap the search in a new thread to avoid the conflict
     # between python's multiprocessing and tvm's thread pool
     t = PropagatingThread(target=search_common, kwargs={'seed': 944563397})
@@ -96,9 +98,8 @@ def test_workload_registry_search_basic():
     t.join()
 
 
+@tvm.testing.requires_llvm
 def test_sketch_search_policy_basic():
-    if not tvm.runtime.enabled("llvm"):
-        return
     # wrap the search in a new thread to avoid the conflict
     # between python's multiprocessing and tvm's thread pool
     t = PropagatingThread(target=search_common,
@@ -107,9 +108,8 @@ def test_sketch_search_policy_basic():
     t.join()
 
 
+@tvm.testing.requires_llvm
 def test_sketch_search_policy_xgbmodel():
-    if not tvm.runtime.enabled("llvm"):
-        return
     # wrap the search in a new thread to avoid the conflict
     # between python's multiprocessing and tvm's thread pool
     t = PropagatingThread(target=search_common,
@@ -119,9 +119,8 @@ def test_sketch_search_policy_xgbmodel():
     t.join()
 
 
+@tvm.testing.requires_cuda
 def test_sketch_search_policy_cuda_rpc_runner():
-    if not tvm.runtime.enabled("cuda"):
-        return
     measure_ctx = auto_scheduler.LocalRPCMeasureContext()
     # wrap the search in a new thread to avoid the conflict
     # between python's multiprocessing and tvm's thread pool
index f518866..c35a3f7 100644 (file)
@@ -18,6 +18,7 @@
 """ Test sketch generation. """
 
 import tvm
+import tvm.testing
 from tvm import te, auto_scheduler
 from tvm.auto_scheduler import _ffi_api
 from tvm.auto_scheduler.loop_state import Stage
@@ -233,10 +234,8 @@ def test_cpu_conv2d_winograd_sketch():
     assert sketches[1] != sketches[2]
 
 
+@tvm.testing.requires_cuda
 def test_cuda_matmul_sketch():
-    if not tvm.context("cuda", 0).exist:
-        return
-
     sketches = generate_sketches(matmul_auto_scheduler_test, (512, 512, 512), 'cuda')
     ''' 1 multi-level tiling sketch '''
     assert len(sketches) == 1
@@ -265,10 +264,8 @@ def test_cuda_matmul_sketch():
     assert_is_tiled(sketches[1].stages[5])
 
 
+@tvm.testing.requires_cuda
 def test_cuda_conv2d_bn_relu_sketch():
-    if not tvm.context("cuda", 0).exist:
-        return
-
     sketches = generate_sketches(conv2d_nchw_bn_relu_auto_scheduler_test,
                                  (1, 56, 56, 512, 512, 3, 1, 1), 'cuda')
     ''' 1 multi-level tiling sketch '''
@@ -286,20 +283,16 @@ def test_cuda_conv2d_bn_relu_sketch():
     assert_is_tiled(sketches[0].stages[12])
 
 
+@tvm.testing.requires_cuda
 def test_cuda_max_pool2d_sketch():
-    if not tvm.context("cuda", 0).exist:
-        return
-
     sketches = generate_sketches(max_pool2d_auto_scheduler_test, (1, 56, 56, 512, 0), 'cuda')
     ''' 1 default sketch '''
     assert len(sketches) == 1
     assert len(sketches[0].transform_steps) == 0
 
 
+@tvm.testing.requires_cuda
 def test_cuda_min_sketch():
-    if not tvm.context("cuda", 0).exist:
-        return
-
     sketches = generate_sketches(min_nm_auto_scheduler_test, (10, 1024), 'cuda')
     ''' 1 cross thread reuction sketch + 1 default sketch '''
     assert len(sketches) == 2
@@ -309,10 +302,8 @@ def test_cuda_min_sketch():
     assert len(sketches[1].transform_steps) == 0
 
 
+@tvm.testing.requires_cuda
 def test_cuda_softmax_sketch():
-    if not tvm.context("cuda", 0).exist:
-        return
-
     sketches = generate_sketches(softmax_nm_auto_scheduler_test, (2, 1024), 'cuda')
     ''' (1 cross thread reuction sketch + 1 default sketch) * (1 cross thread reuction sketch + 1 default sketch) '''
     assert len(sketches) == (2 * 2)
@@ -346,10 +337,8 @@ def test_cuda_softmax_sketch():
     assert_compute_at_condition(sketches[3].stages[2], "inlined")
 
 
+@tvm.testing.requires_cuda
 def test_cuda_conv2d_winograd_sketch():
-    if not tvm.context("cuda", 0).exist:
-        return
-
     sketches = generate_sketches(conv2d_winograd_nhwc_auto_scheduler_test,
                                  (1, 28, 28, 128, 128, 3, 1, 1), 'cuda')
     ''' 1 multi-level tiling sketch '''
index c7fa2ea..2875fd7 100644 (file)
@@ -65,4 +65,4 @@ def test_random_tuner():
 
 if __name__ == '__main__':
     test_gridsearch_tuner()
-    test_random_tuner()
\ No newline at end of file
+    test_random_tuner()
index dd5d708..0dfdbbd 100644 (file)
@@ -102,4 +102,4 @@ if __name__ == '__main__':
     wrap_error(Module4, 60)
     wrap_error(Module5, 70)
     wrap_error(Module6, 77)
-    wrap_error(Module7, 84)
\ No newline at end of file
+    wrap_error(Module7, 84)
index ee2cd71..d718f20 100644 (file)
@@ -21,6 +21,7 @@ import json
 from tvm import rpc
 from tvm.contrib import util, graph_runtime
 
+@tvm.testing.requires_llvm
 def test_graph_simple():
     n = 4
     A = te.placeholder((n,), name='A')
@@ -52,9 +53,6 @@ def test_graph_simple():
     graph = json.dumps(graph)
 
     def check_verify():
-        if not tvm.runtime.enabled("llvm"):
-            print("Skip because llvm is not enabled")
-            return
         mlib = tvm.build(s, [A, B], "llvm", name="myadd")
         mod = graph_runtime.create(graph, mlib, tvm.cpu(0))
         a = np.random.uniform(size=(n,)).astype(A.dtype)
@@ -63,9 +61,6 @@ def test_graph_simple():
         np.testing.assert_equal(out.asnumpy(), a + 1)
 
     def check_remote():
-        if not tvm.runtime.enabled("llvm"):
-            print("Skip because llvm is not enabled")
-            return
         mlib = tvm.build(s, [A, B], "llvm", name="myadd")
         server = rpc.Server("localhost")
         remote = rpc.connect(server.host, server.port)
@@ -93,9 +88,6 @@ def test_graph_simple():
         params = {'x': x_in}
         graph, lib, params = relay.build(func, target="llvm", params=params)
 
-        if not tvm.runtime.enabled("llvm"):
-            print("Skip because llvm is not enabled")
-            return
         mod_shared = graph_runtime.create(graph, lib, tvm.cpu(0))
         mod_shared.load_params(relay.save_param_dict(params))
         num_mods = 10
index ce47b16..f284ba6 100644 (file)
@@ -23,6 +23,7 @@ from tvm import rpc
 from tvm.contrib import util
 from tvm.contrib.debugger import debug_runtime as graph_runtime
 
+@tvm.testing.requires_llvm
 def test_graph_simple():
     n = 4
     A = te.placeholder((n,), name='A')
@@ -54,9 +55,6 @@ def test_graph_simple():
     graph = json.dumps(graph)
 
     def check_verify():
-        if not tvm.runtime.enabled("llvm"):
-            print("Skip because llvm is not enabled")
-            return
         mlib = tvm.build(s, [A, B], "llvm", name="myadd")
         try:
             mod = graph_runtime.create(graph, mlib, tvm.cpu(0))
@@ -115,9 +113,6 @@ def test_graph_simple():
         assert(not os.path.exists(directory))
 
     def check_remote():
-        if not tvm.runtime.enabled("llvm"):
-            print("Skip because llvm is not enabled")
-            return
         mlib = tvm.build(s, [A, B], "llvm", name="myadd")
         server = rpc.Server("localhost")
         remote = rpc.connect(server.host, server.port)
index 56ae250..512fefd 100644 (file)
@@ -20,6 +20,7 @@ from tvm.relay import testing
 import tvm
 from tvm.contrib import graph_runtime
 from tvm.contrib.debugger import debug_runtime
+import tvm.testing
 
 def input_shape(mod):
     return [int(x) for x in mod["main"].checked_type.arg_types[0].shape]
@@ -42,7 +43,7 @@ def verify(data):
     return out
 
 def test_legacy_compatibility():
-    if not tvm.runtime.enabled("llvm"):
+    if not tvm.testing.device_enabled("llvm"):
         print("Skip because llvm is not enabled")
         return
     mod, params = relay.testing.synthetic.get_workload()
@@ -58,7 +59,7 @@ def test_legacy_compatibility():
     tvm.testing.assert_allclose(out, verify(data), atol=1e-5)
 
 def test_cpu():
-    if not tvm.runtime.enabled("llvm"):
+    if not tvm.testing.device_enabled("llvm"):
         print("Skip because llvm is not enabled")
         return
     mod, params = relay.testing.synthetic.get_workload()
@@ -83,10 +84,9 @@ def test_cpu():
     out = gmod.get_output(0).asnumpy()
     tvm.testing.assert_allclose(out, verify(data), atol=1e-5)
 
+@tvm.testing.requires_cuda
+@tvm.testing.requires_gpu
 def test_gpu():
-    if not tvm.runtime.enabled("cuda"):
-        print("Skip because cuda is not enabled")
-        return
     mod, params = relay.testing.synthetic.get_workload()
     with relay.build_config(opt_level=3):
         complied_graph_lib = relay.build_module.build(mod, "cuda", params=params)
@@ -110,9 +110,10 @@ def test_gpu():
     out = gmod.get_output(0).asnumpy()
     tvm.testing.assert_allclose(out, verify(data), atol=1e-5)
 
+@tvm.testing.uses_gpu
 def test_mod_export():
     def verify_cpu_export(obj_format):
-        if not tvm.runtime.enabled("llvm"):
+        if not tvm.testing.device_enabled("llvm"):
             print("Skip because llvm is not enabled")
             return
         mod, params = relay.testing.synthetic.get_workload()
@@ -150,7 +151,7 @@ def test_mod_export():
         tvm.testing.assert_allclose(out, verify(data), atol=1e-5)
 
     def verify_gpu_export(obj_format):
-        if not tvm.runtime.enabled("cuda"):
+        if not tvm.testing.device_enabled("cuda"):
             print("Skip because cuda is not enabled")
             return
         mod, params = relay.testing.synthetic.get_workload()
@@ -188,7 +189,7 @@ def test_mod_export():
         tvm.testing.assert_allclose(out, verify(data), atol=1e-5)
 
     def verify_rpc_cpu_export(obj_format):
-        if not tvm.runtime.enabled("llvm"):
+        if not tvm.testing.device_enabled("llvm"):
             print("Skip because llvm is not enabled")
             return
         mod, params = relay.testing.synthetic.get_workload()
@@ -230,7 +231,7 @@ def test_mod_export():
         tvm.testing.assert_allclose(out, verify(data), atol=1e-5)
 
     def verify_rpc_gpu_export(obj_format):
-        if not tvm.runtime.enabled("cuda"):
+        if not tvm.testing.device_enabled("cuda"):
             print("Skip because cuda is not enabled")
             return
         mod, params = relay.testing.synthetic.get_workload()
@@ -278,9 +279,10 @@ def test_mod_export():
         verify_rpc_cpu_export(obj_format)
         verify_rpc_gpu_export(obj_format)
 
+@tvm.testing.uses_gpu
 def test_remove_package_params():
     def verify_cpu_remove_package_params(obj_format):
-        if not tvm.runtime.enabled("llvm"):
+        if not tvm.testing.device_enabled("llvm"):
             print("Skip because llvm is not enabled")
             return
         mod, params = relay.testing.synthetic.get_workload()
@@ -326,7 +328,7 @@ def test_remove_package_params():
         tvm.testing.assert_allclose(out, verify(data), atol=1e-5)
 
     def verify_gpu_remove_package_params(obj_format):
-        if not tvm.runtime.enabled("cuda"):
+        if not tvm.testing.device_enabled("cuda"):
             print("Skip because cuda is not enabled")
             return
         mod, params = relay.testing.synthetic.get_workload()
@@ -372,7 +374,7 @@ def test_remove_package_params():
         tvm.testing.assert_allclose(out, verify(data), atol=1e-5)
 
     def verify_rpc_cpu_remove_package_params(obj_format):
-        if not tvm.runtime.enabled("llvm"):
+        if not tvm.testing.device_enabled("llvm"):
             print("Skip because llvm is not enabled")
             return
         mod, params = relay.testing.synthetic.get_workload()
@@ -423,7 +425,7 @@ def test_remove_package_params():
         tvm.testing.assert_allclose(out, verify(data), atol=1e-5)
 
     def verify_rpc_gpu_remove_package_params(obj_format):
-        if not tvm.runtime.enabled("cuda"):
+        if not tvm.testing.device_enabled("cuda"):
             print("Skip because cuda is not enabled")
             return
         mod, params = relay.testing.synthetic.get_workload()
@@ -480,7 +482,7 @@ def test_remove_package_params():
         verify_rpc_gpu_remove_package_params(obj_format)
 
 def test_debug_graph_runtime():
-    if not tvm.runtime.enabled("llvm"):
+    if not tvm.testing.device_enabled("llvm"):
         print("Skip because llvm is not enabled")
         return
     mod, params = relay.testing.synthetic.get_workload()
index 9a859da..bc5e7fb 100644 (file)
@@ -19,6 +19,8 @@ from tvm.relay import testing
 import tvm
 from tvm import te
 
+import tvm.testing
+
 from tvm.contrib import util
 header_file_dir_path = util.tempdir()
 
@@ -59,10 +61,11 @@ def generate_engine_module():
     return csource_module
 
 
+@tvm.testing.uses_gpu
 def test_mod_export():
     def verify_gpu_mod_export(obj_format):
         for device in ["llvm", "cuda"]:
-            if not tvm.runtime.enabled(device):
+            if not tvm.testing.device_enabled(device):
                 print("skip because %s is not enabled..." % device)
                 return
 
@@ -89,7 +92,7 @@ def test_mod_export():
 
     def verify_multi_dso_mod_export(obj_format):
         for device in ["llvm"]:
-            if not tvm.runtime.enabled(device):
+            if not tvm.testing.device_enabled(device):
                 print("skip because %s is not enabled..." % device)
                 return
 
@@ -117,7 +120,7 @@ def test_mod_export():
 
     def verify_json_import_dso(obj_format):
         for device in ["llvm"]:
-            if not tvm.runtime.enabled(device):
+            if not tvm.testing.device_enabled(device):
                 print("skip because %s is not enabled..." % device)
                 return
 
@@ -173,7 +176,7 @@ def test_mod_export():
             print("Skip test because gcc is not available.")
 
         for device in ["llvm"]:
-            if not tvm.runtime.enabled(device):
+            if not tvm.testing.device_enabled(device):
                 print("skip because %s is not enabled..." % device)
                 return
 
index c7a5544..6e7df06 100644 (file)
@@ -22,6 +22,7 @@ import os
 import sys
 import numpy as np
 import subprocess
+import tvm.testing
 
 runtime_py = """
 import os
@@ -42,7 +43,7 @@ print("Finish runtime checking...")
 """
 
 def test_dso_module_load():
-    if not tvm.runtime.enabled("llvm"):
+    if not tvm.testing.device_enabled("llvm"):
         return
     dtype = 'int64'
     temp = util.tempdir()
@@ -90,6 +91,7 @@ def test_dso_module_load():
         shell=True)
 
 
+@tvm.testing.requires_gpu
 def test_device_module_dump():
     # graph
     n = tvm.runtime.convert(1024)
@@ -104,7 +106,7 @@ def test_device_module_dump():
 
     def check_device(device):
         ctx = tvm.context(device, 0)
-        if not ctx.exist:
+        if not tvm.testing.device_enabled(device):
             print("Skip because %s is not enabled" % device)
             return
         temp = util.tempdir()
@@ -132,7 +134,7 @@ def test_device_module_dump():
 
     def check_stackvm(device):
         ctx = tvm.context(device, 0)
-        if not ctx.exist:
+        if not tvm.testing.device_enabled(device):
             print("Skip because %s is not enabled" % device)
             return
         temp = util.tempdir()
@@ -161,7 +163,7 @@ def test_combine_module_llvm():
 
     def check_llvm():
         ctx = tvm.cpu(0)
-        if not tvm.runtime.enabled("llvm"):
+        if not tvm.testing.device_enabled("llvm"):
             print("Skip because llvm is not enabled" )
             return
         temp = util.tempdir()
@@ -186,7 +188,7 @@ def test_combine_module_llvm():
 
     def check_system_lib():
         ctx = tvm.cpu(0)
-        if not tvm.runtime.enabled("llvm"):
+        if not tvm.testing.device_enabled("llvm"):
             print("Skip because llvm is not enabled" )
             return
         temp = util.tempdir()
index 3631295..bda987d 100644 (file)
 import tvm
 from tvm import te
 import numpy as np
-
-def enabled_ctx_list():
-    ctx_list = [('cpu', tvm.cpu(0)),
-                ('gpu', tvm.gpu(0)),
-                ('cl', tvm.opencl(0)),
-                ('metal', tvm.metal(0)),
-                ('rocm', tvm.rocm(0)),
-                ('vulkan', tvm.vulkan(0)),
-                ('vpi', tvm.vpi(0))]
-    for k, v  in ctx_list:
-        assert tvm.context(k, 0) == v
-    ctx_list = [x[1] for x in ctx_list if x[1].exist]
-    return ctx_list
-
-ENABLED_CTX_LIST = enabled_ctx_list()
-print("Testing using contexts:", ENABLED_CTX_LIST)
+import tvm.testing
 
 
+@tvm.testing.uses_gpu
 def test_nd_create():
-    for ctx in ENABLED_CTX_LIST:
+    for target, ctx in tvm.testing.enabled_targets():
         for dtype in ["uint8", "int8", "uint16", "int16", "uint32", "int32",
                       "float32"]:
             x = np.random.randint(0, 10, size=(3, 4))
index 7f01f88..50c753f 100644 (file)
@@ -179,6 +179,7 @@ def test_rpc_file_exchange():
     rev = remote.download("dat.bin")
     assert(rev == blob)
 
+@tvm.testing.requires_llvm
 def test_rpc_remote_module():
     if not tvm.runtime.enabled("rpc"):
         return
@@ -197,9 +198,6 @@ def test_rpc_remote_module():
         "rpc.Connect", server1.host, server1.port, "x1"])
 
     def check_remote(remote):
-        if not tvm.runtime.enabled("llvm"):
-            print("Skip because llvm is not enabled")
-            return
         temp = util.tempdir()
         ctx = remote.cpu(0)
         f = tvm.build(s, [A, B], "llvm", name="myadd")
@@ -215,9 +213,6 @@ def test_rpc_remote_module():
         np.testing.assert_equal(b.asnumpy(), a.asnumpy() + 1)
 
     def check_minrpc():
-        if not tvm.runtime.enabled("llvm"):
-            print("Skip because llvm is not enabled")
-            return
         if tvm.get_global_func("rpc.PopenSession", allow_missing=True) is None:
             return
         # export to minrpc
@@ -254,10 +249,7 @@ def test_rpc_remote_module():
         runtime initializes. We leave it as an example
         on how to do rpc when we want to do linking on remote.
         """
-        if not tvm.runtime.enabled("llvm"):
-            print("Skip because llvm is not enabled")
-            return
-        if not tvm.runtime.enabled("opencl"):
+        if not tvm.testing.device_enabled("opencl"):
             print("Skip because opencl is not enabled")
             return
         temp = util.tempdir()
index 0059083..758643d 100644 (file)
@@ -22,10 +22,12 @@ from tvm.contrib import graph_runtime
 import tvm
 from tvm import te
 import ctypes
+import tvm.testing
 
+@tvm.testing.uses_gpu
 def test_synthetic():
     for device in ["llvm", "cuda"]:
-        if not tvm.runtime.enabled(device):
+        if not tvm.testing.device_enabled(device):
             print("skip because %s is not enabled..." % device)
             return
 
@@ -70,10 +72,11 @@ def test_synthetic():
     tvm.testing.assert_allclose(out, verify(data), atol=1e-5)
 
 
+@tvm.testing.uses_gpu
 def test_cuda_lib():
     ctx = tvm.gpu(0)
     for device in ["llvm", "cuda"]:
-        if not tvm.runtime.enabled(device):
+        if not tvm.testing.device_enabled(device):
             print("skip because %s is not enabled..." % device)
             return
     nn = 12
@@ -99,4 +102,4 @@ def test_cuda_lib():
 
 if __name__ == "__main__":
     test_synthetic()
-    #test_system_lib()
+    test_cuda_lib()
index cdb343f..f8d6e32 100644 (file)
@@ -19,7 +19,9 @@
 import tvm
 from tvm import te
 import numpy as np
+import tvm.testing
 
+@tvm.testing.uses_gpu
 def test_cmp_load_store():
     n = 32
     A = te.placeholder((n,), name='A')
@@ -30,7 +32,7 @@ def test_cmp_load_store():
 
 
     def check_llvm():
-        if not tvm.runtime.enabled("llvm"):
+        if not tvm.testing.device_enabled("llvm"):
             return
         s = te.create_schedule(D.op)
         xo, xi = s[C].split(C.op.axis[0], factor=4)
@@ -48,9 +50,9 @@ def test_cmp_load_store():
             d.asnumpy(), np.logical_and(a.asnumpy() > b.asnumpy(), a.asnumpy() > 1).astype('float32'))
 
     def check_device(device):
-        ctx = tvm.context(device, 0)
-        if not ctx.exist:
+        if not tvm.testing.device_enabled(device):
             return
+        ctx = tvm.context(device, 0)
         s = te.create_schedule(D.op)
         for stage in [C, D]:
             xo, xi = s[stage].split(stage.op.axis[0], factor=4)
index 3ea413c..64a10d8 100644 (file)
@@ -23,6 +23,7 @@ from tvm import rpc
 from tvm.contrib import util, cc
 import numpy as np
 
+@tvm.testing.requires_llvm
 def test_llvm_add_pipeline():
     nn = 1024
     n = tvm.runtime.convert(nn)
@@ -43,9 +44,6 @@ def test_llvm_add_pipeline():
             assert struct.unpack(endian + 'h', arr[0x12:0x14])[0] == e_machine
 
     def build_i386():
-        if not tvm.runtime.enabled("llvm"):
-            print("Skip because llvm is not enabled..")
-            return
         temp = util.tempdir()
         target = "llvm -mtriple=i386-pc-linux-gnu"
         f = tvm.build(s, [A, B, C], target)
index 7fdd259..567f5ea 100644 (file)
@@ -22,16 +22,16 @@ from tvm import topi
 import unittest
 from tvm.contrib.nvcc import have_fp16, have_int8
 from tvm.contrib import nvcc
+import tvm.testing
 
 tx = te.thread_axis("threadIdx.x")
 bx = te.thread_axis("blockIdx.x")
 
+@tvm.testing.requires_gpu
+@tvm.testing.requires_cuda
 def test_cuda_vectorize_add():
     num_thread = 8
     def check_cuda(dtype, n, lanes):
-        if not tvm.gpu(0).exist or not tvm.runtime.enabled("cuda"):
-            print("skip because cuda is not enabled..")
-            return
         if dtype == "float16" and not have_fp16(tvm.gpu(0).compute_version):
             print("Skip because gpu does not have fp16 support")
             return
@@ -66,12 +66,11 @@ def test_cuda_vectorize_add():
     check_cuda("float16", 64, 6)
     check_cuda("float16", 64, 8)
 
+@tvm.testing.requires_gpu
+@tvm.testing.requires_cuda
 def test_cuda_multiply_add():
     num_thread = 8
     def check_cuda(dtype, n, lanes):
-        if not tvm.gpu(0).exist or not tvm.runtime.enabled("cuda"):
-            print("skip because cuda is not enabled..")
-            return
         if dtype == "int8" and not have_int8(tvm.gpu(0).compute_version):
             print("skip because gpu does not support int8")
             return
@@ -98,12 +97,11 @@ def test_cuda_multiply_add():
         tvm.testing.assert_allclose(d.asnumpy(), np_d)
     check_cuda("int8", 64, 4)
 
+@tvm.testing.requires_gpu
+@tvm.testing.requires_cuda
 def test_cuda_vectorize_load():
     num_thread = 8
     def check_cuda(dtype, n, lanes):
-        if not tvm.gpu(0).exist or not tvm.runtime.enabled("cuda"):
-            print("skip because cuda is not enabled..")
-            return
         ctx = tvm.gpu(0)
         A = te.placeholder((n,), name='A', dtype="%sx%d" % (dtype, lanes))
         B = te.compute((n,), lambda i: A[i], name='B')
@@ -123,11 +121,10 @@ def test_cuda_vectorize_load():
     check_cuda("int8", 64, 8)
     check_cuda("int8", 64, 16)
 
+@tvm.testing.requires_gpu
+@tvm.testing.requires_cuda
 def test_cuda_make_int8():
     def check_cuda(n, value, lanes):
-        if not tvm.gpu(0).exist or not tvm.runtime.enabled("cuda"):
-            print("skip because cuda is not enabled..")
-            return
         dtype = 'int8'
         ctx = tvm.gpu(0)
         A = te.compute((n, lanes), lambda i,j: tvm.tir.const(value, dtype=dtype))
@@ -151,6 +148,8 @@ def test_cuda_make_int8():
     check_cuda(64, -3, 2)
 
 
+@tvm.testing.requires_gpu
+@tvm.testing.requires_cuda
 def test_cuda_inf_nan():
     target = 'cuda'
     def check_inf_nan(ctx, n, value, dtype):
@@ -165,10 +164,6 @@ def test_cuda_inf_nan():
         # Only need to test compiling here
         fun(a, c)
 
-    if not tvm.gpu(0).exist or not tvm.runtime.enabled("cuda"):
-        print("skip because cuda is not enabled..")
-        return
-
     ctx = tvm.context(target, 0)
 
     check_inf_nan(ctx, 1, -float('inf'), 'float32')
@@ -179,11 +174,9 @@ def test_cuda_inf_nan():
     check_inf_nan(ctx, 1, float('nan'), 'float64')
 
 
+@tvm.testing.requires_gpu
+@tvm.testing.requires_cuda
 def test_cuda_shuffle():
-    if not tvm.gpu(0).exist or not tvm.runtime.enabled("cuda"):
-        print("skip because cuda is not enabled..")
-        return
-
     idxm = tvm.tir.indexmod
     a = te.placeholder((64, ), 'int32')
     b = te.placeholder((64, ), 'int32')
@@ -227,99 +220,82 @@ def test_cuda_shuffle():
         module(nda, ndb, ndc)
         tvm.testing.assert_allclose(ndc.asnumpy(), ref)
 
-def test_crossthread_reduction1():
-    def check(device):
-        ctx = tvm.context(device, 0)
-        if not ctx.exist or not tvm.runtime.enabled(device):
-            print("skip because", device, "is not enabled..")
-            return
-        n = te.var("n")
-        m = te.var("m")
-        A = te.placeholder((n, m), name='A')
-        k = te.reduce_axis((0, m), "m")
-        B = te.compute((n,), lambda i: te.sum(A[i, k], axis=k), name="B")
-
-        def sched(nthd):
-            s = te.create_schedule(B.op)
-            ko, _ = s[B].split(B.op.reduce_axis[0], nparts=nthd)
-            s[B].bind(ko, te.thread_axis("threadIdx.x"))
-            s[B].bind(B.op.axis[0], te.thread_axis("blockIdx.x"))
-            func = tvm.build(s, [A, B], device)
-            return func
-
-        def verify(nthd):
-            func = sched(nthd)
-            nn = 3
-            # checks three typical cases
-            vals = [nthd-1, nthd, nthd+1]
-            for kk in [x for x in vals]:
-                size = (nn, kk)
-                a = tvm.nd.array(np.random.uniform(size=size).astype(A.dtype), ctx)
-                b = tvm.nd.array(np.zeros(nn, dtype=B.dtype), ctx)
-                func(a, b)
-                tvm.testing.assert_allclose(b.asnumpy(), \
-                    np.sum(a.asnumpy(), axis=1), rtol=1e-3)
-
-        verify(16)
-        verify(32)
-        verify(64)
-
-    check("cuda")
-    check("rocm")
-
-
-def test_crossthread_reduction2():
-    def check(device):
-        ctx = tvm.context(device, 0)
-        if not ctx.exist or not tvm.runtime.enabled(device):
-            print("skip because", device, "is not enabled..")
-            return
-
-        n = te.var("n")
-        k0 = te.var("k0")
-        k1 = te.var("k1")
-        A = te.placeholder((n, k0, k1), name='A')
-        k0 = te.reduce_axis((0, k0), "k0")
-        k1 = te.reduce_axis((0, k1), "k1")
-        B = te.compute((n,), lambda i: te.sum(A[i, k0, k1], axis=(k0, k1)), name="B")
-
-        def sched(nthdx, nthdy):
-            s = te.create_schedule(B.op)
-            k0o, _ = s[B].split(B.op.reduce_axis[0], nparts=nthdx)
-            k1o, _ = s[B].split(B.op.reduce_axis[1], nparts=nthdy)
-            s[B].bind(k0o, te.thread_axis("threadIdx.x"))
-            s[B].bind(k1o, te.thread_axis("threadIdx.y"))
-            s[B].bind(B.op.axis[0], te.thread_axis("blockIdx.x"))
-            func = tvm.build(s, [A, B], device)
-            return func
-
-        def verify(nthdx, nthdy):
-            func = sched(nthdx, nthdy)
-            nn = 3
-            # checks three typical cases
-            vx = [nthdx-1, nthdx, nthdx+1]
-            vy = [nthdy-1, nthdy, nthdy+1]
-            for kk0, kk1 in [(x, y) for x in vx for y in vy]:
-                size = (nn, kk0, kk1)
-                a = tvm.nd.array(np.random.uniform(size=size).astype(A.dtype), ctx)
-                b = tvm.nd.array(np.zeros(nn, dtype=B.dtype), ctx)
-                func(a, b)
-                tvm.testing.assert_allclose(b.asnumpy(), \
-                    np.sum(a.asnumpy(), axis=(1, 2)), rtol=1e-3)
-
-        verify(16, 16)
-        verify(32, 32)
-        verify(16, 32)
-        verify(32, 16)
-
-    check("cuda")
-    check("rocm")
+@tvm.testing.parametrize_targets("cuda", "rocm")
+def test_crossthread_reduction1(target, ctx):
+    n = te.var("n")
+    m = te.var("m")
+    A = te.placeholder((n, m), name='A')
+    k = te.reduce_axis((0, m), "m")
+    B = te.compute((n,), lambda i: te.sum(A[i, k], axis=k), name="B")
 
+    def sched(nthd):
+        s = te.create_schedule(B.op)
+        ko, _ = s[B].split(B.op.reduce_axis[0], nparts=nthd)
+        s[B].bind(ko, te.thread_axis("threadIdx.x"))
+        s[B].bind(B.op.axis[0], te.thread_axis("blockIdx.x"))
+        func = tvm.build(s, [A, B], target)
+        return func
+
+    def verify(nthd):
+        func = sched(nthd)
+        nn = 3
+        # checks three typical cases
+        vals = [nthd-1, nthd, nthd+1]
+        for kk in [x for x in vals]:
+            size = (nn, kk)
+            a = tvm.nd.array(np.random.uniform(size=size).astype(A.dtype), ctx)
+            b = tvm.nd.array(np.zeros(nn, dtype=B.dtype), ctx)
+            func(a, b)
+            tvm.testing.assert_allclose(b.asnumpy(), \
+                np.sum(a.asnumpy(), axis=1), rtol=1e-3)
+
+    verify(16)
+    verify(32)
+    verify(64)
+
+
+@tvm.testing.parametrize_targets("cuda", "rocm")
+def test_crossthread_reduction2(target, ctx):
+    n = te.var("n")
+    k0 = te.var("k0")
+    k1 = te.var("k1")
+    A = te.placeholder((n, k0, k1), name='A')
+    k0 = te.reduce_axis((0, k0), "k0")
+    k1 = te.reduce_axis((0, k1), "k1")
+    B = te.compute((n,), lambda i: te.sum(A[i, k0, k1], axis=(k0, k1)), name="B")
+
+    def sched(nthdx, nthdy):
+        s = te.create_schedule(B.op)
+        k0o, _ = s[B].split(B.op.reduce_axis[0], nparts=nthdx)
+        k1o, _ = s[B].split(B.op.reduce_axis[1], nparts=nthdy)
+        s[B].bind(k0o, te.thread_axis("threadIdx.x"))
+        s[B].bind(k1o, te.thread_axis("threadIdx.y"))
+        s[B].bind(B.op.axis[0], te.thread_axis("blockIdx.x"))
+        func = tvm.build(s, [A, B], target)
+        return func
+
+    def verify(nthdx, nthdy):
+        func = sched(nthdx, nthdy)
+        nn = 3
+        # checks three typical cases
+        vx = [nthdx-1, nthdx, nthdx+1]
+        vy = [nthdy-1, nthdy, nthdy+1]
+        for kk0, kk1 in [(x, y) for x in vx for y in vy]:
+            size = (nn, kk0, kk1)
+            a = tvm.nd.array(np.random.uniform(size=size).astype(A.dtype), ctx)
+            b = tvm.nd.array(np.zeros(nn, dtype=B.dtype), ctx)
+            func(a, b)
+            tvm.testing.assert_allclose(b.asnumpy(), \
+                np.sum(a.asnumpy(), axis=(1, 2)), rtol=1e-3)
+
+    verify(16, 16)
+    verify(32, 32)
+    verify(16, 32)
+    verify(32, 16)
+
+@tvm.testing.requires_gpu
+@tvm.testing.requires_cuda
 def test_cuda_reduction_binding():
-    if not tvm.gpu(0).exist or not tvm.runtime.enabled("cuda"):
-        print("skip because cuda is not enabled..")
-        return
-
     k = te.reduce_axis((0, 32), 'k')
     A = te.placeholder((96, 32), name='A')
     B = te.compute( (96,), lambda m:
@@ -334,46 +310,39 @@ def test_cuda_reduction_binding():
 
     fcuda = tvm.build(s, [A, B], "cuda")
 
-def test_rfactor_predicates():
-    def check(device):
-        ctx = tvm.context(device, 0)
-        if not ctx.exist or not tvm.runtime.enabled(device):
-            print("skip because", device, "is not enabled..")
-            return
-
-        n = te.reduce_axis((0, 129), 'n')
-        A = te.placeholder((129,), name='A')
-        B = te.compute( (1, ), lambda b:
-                         te.sum(A[n],
-                                 axis=n),
-                         name='B'
-        )
+@tvm.testing.parametrize_targets("cuda", "rocm")
+def test_rfactor_predicates(target, ctx):
+    n = te.reduce_axis((0, 129), 'n')
+    A = te.placeholder((129,), name='A')
+    B = te.compute( (1, ), lambda b:
+                     te.sum(A[n],
+                             axis=n),
+                     name='B'
+    )
 
-        s = te.create_schedule(B.op)
-
-        _, ni = s[B].split(s[B].op.reduce_axis[0], factor=8)
+    s = te.create_schedule(B.op)
 
-        BF = s.rfactor(B, ni, 0)
-        s[B].set_store_predicate(tx.var.equal(0))
+    _, ni = s[B].split(s[B].op.reduce_axis[0], factor=8)
 
-        s[B].bind(s[B].op.reduce_axis[0], tx)
-        s[B].bind(s[B].op.axis[0], bx)
+    BF = s.rfactor(B, ni, 0)
+    s[B].set_store_predicate(tx.var.equal(0))
 
-        s[BF].compute_at(s[B], s[B].op.axis[0])
+    s[B].bind(s[B].op.reduce_axis[0], tx)
+    s[B].bind(s[B].op.axis[0], bx)
 
-        _, noi = s[BF].split(s[BF].op.reduce_axis[0], factor=2)
+    s[BF].compute_at(s[B], s[B].op.axis[0])
 
-        BF2 = s.rfactor(BF, noi, 0)
+    _, noi = s[BF].split(s[BF].op.reduce_axis[0], factor=2)
 
-        s[BF].bind(s[BF].op.axis[0], tx)
-        s[BF2].compute_at(s[BF], s[BF].op.axis[1])
+    BF2 = s.rfactor(BF, noi, 0)
 
-        fcuda = tvm.build(s, [A, B], device)
+    s[BF].bind(s[BF].op.axis[0], tx)
+    s[BF2].compute_at(s[BF], s[BF].op.axis[1])
 
-    check("cuda")
-    check("rocm")
+    fcuda = tvm.build(s, [A, B], target)
 
-@unittest.skipIf(not tvm.gpu(0).exist or not tvm.runtime.enabled("cuda"), "skip because cuda is not enabled..")
+@tvm.testing.requires_gpu
+@tvm.testing.requires_cuda
 def test_cuda_const_float_to_half():
     # This import is required to use nvcc to perform code gen;
     # otherwise it is found that the code gen is done by nvrtc.
@@ -398,16 +367,14 @@ def test_cuda_const_float_to_half():
     func(a, c)
     np.testing.assert_equal(c.asnumpy(), a_np > b.value)
 
+@tvm.testing.requires_gpu
+@tvm.testing.requires_cuda
 def test_cuda_reduction():
     def check(device, dtype, m=32, n=32):
-        ctx = tvm.context(device, 0)
-        if not ctx.exist or not tvm.runtime.enabled(device):
-            print("skip because", device, "is not enabled..")
+        if not tvm.testing.device_enabled(device):
+            print("Skipping", device)
             return
-        if dtype == "float16" and not have_fp16(ctx.compute_version):
-            print("Skip because gpu does not have fp16 support")
-            return
-
+        ctx = tvm.context(device, 0)
         a = te.placeholder((m, n), name="a", dtype=dtype)
         b = te.placeholder((m, n), name="b", dtype=dtype)
         c = a + b
@@ -430,12 +397,14 @@ def test_cuda_reduction():
     check("rocm", "float32")
     check("cuda", "float16")
 
+@tvm.testing.requires_gpu
+@tvm.testing.requires_cuda
 def test_cuda_mix_threaded_and_normal_reduction():
     def check(device, dtype, m=32, n=32):
-        ctx = tvm.context(device, 0)
-        if not ctx.exist or not tvm.runtime.enabled(device):
-            print("skip because", device, "is not enabled..")
+        if not tvm.testing.device_enabled(device):
+            print("Skipping", device)
             return
+        ctx = tvm.context(device, 0)
         if dtype == "float16" and not have_fp16(ctx.compute_version):
             print("Skip because gpu does not have fp16 support")
             return
@@ -458,11 +427,9 @@ def test_cuda_mix_threaded_and_normal_reduction():
     check("rocm", "float32")
     check("cuda", "float16")
 
+@tvm.testing.requires_gpu
+@tvm.testing.requires_cuda
 def test_cuda_floordiv_with_vectorization():
-    if not tvm.gpu(0).exist or not tvm.runtime.enabled("cuda"):
-        print("skip because cuda is not enabled..")
-        return
-
     with tvm.target.cuda():
         # B[i] = A[floordiv(i, k)]
         n = 256
@@ -485,11 +452,9 @@ def test_cuda_floordiv_with_vectorization():
         func(a_nd, b_nd)
         tvm.testing.assert_allclose(b_nd.asnumpy(), b_np, rtol=1e-3)
 
+@tvm.testing.requires_gpu
+@tvm.testing.requires_cuda
 def test_cuda_floormod_with_vectorization():
-    if not tvm.gpu(0).exist or not tvm.runtime.enabled("cuda"):
-        print("skip because cuda is not enabled..")
-        return
-
     with tvm.target.cuda():
         # B[i] = A[floormod(i, k)]
         n = 256
@@ -512,11 +477,9 @@ def test_cuda_floormod_with_vectorization():
         func(a_nd, b_nd)
         tvm.testing.assert_allclose(b_nd.asnumpy(), b_np, rtol=1e-3)
 
+@tvm.testing.requires_gpu
+@tvm.testing.requires_cuda
 def test_vectorized_casts():
-    if not tvm.gpu(0).exist or not tvm.runtime.enabled("cuda"):
-        print("skip because cuda is not enabled..")
-        return
-
     def check(t0, t1):
         if (t0 ==  "float16" or t1 == "float16") and not have_fp16(tvm.gpu(0).compute_version):
             print("Skip because gpu does not have fp16 support")
@@ -571,6 +534,8 @@ def sched(B):
     s[B].bind(iio, tx)
     return s
 
+@tvm.testing.requires_gpu
+@tvm.testing.requires_cuda
 def test_vectorized_intrin1():
     test_funcs = [
         (tvm.tir.floor, lambda x : np.floor(x)),
@@ -594,9 +559,6 @@ def test_vectorized_intrin1():
         (tvm.tir.sqrt,  lambda x : np.sqrt(x)),
     ]
     def run_test(tvm_intrin, np_func, dtype):
-        if not tvm.gpu(0).exist or not tvm.runtime.enabled("cuda"):
-            print("skip because cuda is not enabled..")
-            return
         if dtype == "float16" and not have_fp16(tvm.gpu(0).compute_version):
             print("Skip because gpu does not have fp16 support")
             return
@@ -627,6 +589,8 @@ def test_vectorized_intrin1():
         run_test(*func, "float32")
         run_test(*func, "float16")
 
+@tvm.testing.requires_gpu
+@tvm.testing.requires_cuda
 def test_vectorized_intrin2(dtype="float32"):
     c2 = tvm.tir.const(2, dtype=dtype)
     test_funcs = [
@@ -634,10 +598,6 @@ def test_vectorized_intrin2(dtype="float32"):
         (tvm.tir.fmod,  lambda x : np.fmod(x, 2.0))
     ]
     def run_test(tvm_intrin, np_func):
-        if not tvm.gpu(0).exist or not tvm.runtime.enabled("cuda"):
-            print("skip because cuda is not enabled..")
-            return
-
         n = 128
         A = te.placeholder((n,), dtype=dtype, name='A')
         B = te.compute((n,), lambda i: tvm_intrin(A[i], c2), name='B')
@@ -652,6 +612,8 @@ def test_vectorized_intrin2(dtype="float32"):
     for func in test_funcs:
         run_test(*func)
 
+@tvm.testing.requires_gpu
+@tvm.testing.requires_cuda
 def test_vectorized_popcount():
     def ref_popcount(x):
         cnt = 0
@@ -661,10 +623,6 @@ def test_vectorized_popcount():
         return cnt
 
     def run_test(dtype):
-        if not tvm.gpu(0).exist or not tvm.runtime.enabled("cuda"):
-            print("skip because cuda is not enabled..")
-            return
-
         n = 128
         A = te.placeholder((n,), dtype=dtype, name='A')
         B = te.compute((n,), lambda i: tvm.tir.popcount(A[i]), name='B')
@@ -680,11 +638,10 @@ def test_vectorized_popcount():
     run_test("uint32")
     run_test("uint64")
 
+@tvm.testing.requires_gpu
+@tvm.testing.requires_cuda
 def test_cuda_vectorize_load_permute_pad():
     def check_cuda(dtype, n, l, padding, lanes):
-        if not tvm.gpu(0).exist or not tvm.runtime.enabled("cuda"):
-            print("skip because cuda is not enabled..")
-            return
         if dtype == "float16" and not have_fp16(tvm.gpu(0).compute_version):
             print("Skip because gpu does not have fp16 support")
             return
@@ -755,23 +712,21 @@ def vcf_check_common(s, args):
 
     tvm.tir.stmt_functor.ir_transform(stmt['main'].body, pre_visit, post_visit)
 
-    if not tvm.gpu(0).exist or not tvm.runtime.enabled("cuda"):
-        print("CUDA device not found, skip the verification.")
-        return
-    else:
-        tgt = tvm.target.cuda()
-        mod = tvm.build(s, args, tgt)
-        # To check if every vectorize loop transforms to correct instruction
-        # print(mod.imported_modules[0].get_source())
-
-        ctx = tvm.context("cuda", 0)
-        a = tvm.nd.array(np.random.uniform(size=(512, 512)).astype("float32"), ctx)
-        b = tvm.nd.array(np.random.uniform(size=(512, 512)).astype("float32"), ctx)
-        c = tvm.nd.array(np.zeros((512, 512), dtype="float32"), ctx)
-        mod(a, b, c)
-        tvm.testing.assert_allclose(c.asnumpy(), np.dot(
-            a.asnumpy(), b.asnumpy()), rtol=1e-5)
-
+    tgt = tvm.target.cuda()
+    mod = tvm.build(s, args, tgt)
+    # To check if every vectorize loop transforms to correct instruction
+    # print(mod.imported_modules[0].get_source())
+
+    ctx = tvm.context("cuda", 0)
+    a = tvm.nd.array(np.random.uniform(size=(512, 512)).astype("float32"), ctx)
+    b = tvm.nd.array(np.random.uniform(size=(512, 512)).astype("float32"), ctx)
+    c = tvm.nd.array(np.zeros((512, 512), dtype="float32"), ctx)
+    mod(a, b, c)
+    tvm.testing.assert_allclose(c.asnumpy(), np.dot(
+        a.asnumpy(), b.asnumpy()), rtol=1e-5)
+
+@tvm.testing.requires_gpu
+@tvm.testing.requires_cuda
 def test_vectorized_cooperative_fetching_x():
     N = 512
     A = te.placeholder((N, N), name='A', dtype='float32')
@@ -821,6 +776,8 @@ def test_vectorized_cooperative_fetching_x():
 
     vcf_check_common(s, [A, B, C])
 
+@tvm.testing.requires_gpu
+@tvm.testing.requires_cuda
 def test_vectorized_cooperative_fetching_xy():
     N = 512
     A = te.placeholder((N, N), name='A')
@@ -874,11 +831,9 @@ def test_vectorized_cooperative_fetching_xy():
 
     vcf_check_common(s, [A, B, C])
 
+@tvm.testing.requires_gpu
+@tvm.testing.requires_cuda
 def test_unrolled_vectorization():
-    if not tvm.gpu(0).exist or not tvm.runtime.enabled("cuda"):
-        print("skip because cuda is not enabled..")
-        return
-
     dtype = 'float32'
     target = 'cuda'
 
index ddb35f3..3289e38 100644 (file)
@@ -18,7 +18,9 @@ import tvm
 from tvm import te
 from tvm.contrib import util
 import numpy as np
+import tvm.testing
 
+@tvm.testing.requires_gpu
 def test_large_uint_imm():
     value =  (1 << 63) + 123
     other = tvm.tir.const(3, "uint64")
@@ -32,9 +34,9 @@ def test_large_uint_imm():
     s[A].bind(xo, te.thread_axis("blockIdx.x"))
 
     def check_target(device):
-        ctx = tvm.context(device, 0)
-        if not ctx.exist:
+        if not tvm.testing.device_enabled(device):
             return
+        ctx = tvm.context(device, 0)
         f = tvm.build(s, [A], device)
         # launch the kernel.
         a = tvm.nd.empty((n, ), dtype=A.dtype, ctx=ctx)
@@ -45,6 +47,7 @@ def test_large_uint_imm():
     check_target("vulkan")
 
 
+@tvm.testing.requires_gpu
 def test_add_pipeline():
     n = te.size_var('n')
     A = te.placeholder((n,), name='A')
@@ -64,11 +67,9 @@ def test_add_pipeline():
     s[D].bind(xo, te.thread_axis("blockIdx.x"))
 
     def check_target(device, host="stackvm"):
-        ctx = tvm.context(device, 0)
-        if not ctx.exist:
-            return
-        if not tvm.runtime.enabled(host):
+        if not tvm.testing.device_enabled(device) or not tvm.testing.device_enabled(host):
             return
+        ctx = tvm.context(device, 0)
         mhost = tvm.driver.build(s, [A, B, D], target=device, target_host=host)
         f = mhost.entry_func
         # launch the kernel.
index 4104af8..ef98816 100644 (file)
@@ -17,7 +17,9 @@
 import tvm
 from tvm import te
 import numpy as np
+import tvm.testing
 
+@tvm.testing.uses_gpu
 def test_add_pipeline():
     nn = 64
     max_threads = 4
@@ -51,7 +53,7 @@ def test_add_pipeline():
     print(tvm.lower(s_gpu, [A, C_gpu], simple_mode=True))
 
     def check_target(target):
-        if not tvm.runtime.enabled(target):
+        if not tvm.testing.device_enabled(target):
             return
         s = s_gpu if target in ['opencl', 'cuda'] else s_cpu
         C = C_gpu if target in ['opencl', 'cuda'] else C_cpu
@@ -86,7 +88,7 @@ def test_pack_buffer_simple():
 
 
     def check_target(target):
-        if not tvm.runtime.enabled(target):
+        if not tvm.testing.device_enabled(target):
             return
         # build and invoke the kernel.
         f = tvm.build(s, [A, C], target)
@@ -116,7 +118,7 @@ def test_pack_buffer_intermediate():
     s = te.create_schedule(C.op)
 
     def check_target(target):
-        if not tvm.runtime.enabled(target):
+        if not tvm.testing.device_enabled(target):
             return
         # build and invoke the kernel.
         f = tvm.build(s, [A, C], target)
index d690364..fd7a764 100644 (file)
@@ -24,6 +24,7 @@ import math
 import re
 
 
+@tvm.testing.requires_llvm
 def test_llvm_intrin():
     ib = tvm.tir.ir_builder.create()
     n = tvm.runtime.convert(4)
@@ -44,6 +45,7 @@ def test_llvm_intrin():
     fcode = tvm.build(mod, None, "llvm")
 
 
+@tvm.testing.requires_llvm
 def test_llvm_void_intrin():
     ib = tvm.tir.ir_builder.create()
     A = ib.pointer("uint8", name="A")
@@ -56,6 +58,7 @@ def test_llvm_void_intrin():
     fcode = tvm.build(mod, None, "llvm")
 
 
+@tvm.testing.requires_llvm
 def test_llvm_overloaded_intrin():
     # Name lookup for overloaded intrinsics in LLVM 4- requires a name
     # that includes the overloaded types.
@@ -80,6 +83,7 @@ def test_llvm_overloaded_intrin():
     f = tvm.build(s, [A, C], target = 'llvm')
 
 
+@tvm.testing.requires_llvm
 def test_llvm_import():
     # extern "C" is necessary to get the correct signature
     cc_code = """
@@ -93,8 +97,6 @@ def test_llvm_import():
                     tvm.tir.call_pure_extern("float32", "my_add", A(*i), 1.0),
                     name='B')
     def check_llvm(use_file):
-        if not tvm.runtime.enabled("llvm"):
-            return
         if not clang.find_clang(required=False):
             print("skip because clang is not available")
             return
@@ -120,6 +122,7 @@ def test_llvm_import():
 
 
 
+@tvm.testing.requires_llvm
 def test_llvm_lookup_intrin():
     ib = tvm.tir.ir_builder.create()
     A = ib.pointer("uint8x8", name="A")
@@ -132,6 +135,7 @@ def test_llvm_lookup_intrin():
     fcode = tvm.build(mod, None, "llvm")
 
 
+@tvm.testing.requires_llvm
 def test_llvm_large_uintimm():
     value =  (1 << 63) + 123
     other = tvm.tir.const(3, "uint64")
@@ -139,8 +143,6 @@ def test_llvm_large_uintimm():
     s = te.create_schedule(A.op)
 
     def check_llvm():
-        if not tvm.runtime.enabled("llvm"):
-            return
         f = tvm.build(s, [A], "llvm")
         ctx = tvm.cpu(0)
         # launch the kernel.
@@ -151,6 +153,7 @@ def test_llvm_large_uintimm():
     check_llvm()
 
 
+@tvm.testing.requires_llvm
 def test_llvm_add_pipeline():
     nn = 1024
     n = tvm.runtime.convert(nn)
@@ -170,8 +173,6 @@ def test_llvm_add_pipeline():
     s[C].vectorize(xi)
 
     def check_llvm():
-        if not tvm.runtime.enabled("llvm"):
-            return
         # Specifically allow offset to test codepath when offset is available
         Ab = tvm.tir.decl_buffer(
             A.shape, A.dtype,
@@ -194,6 +195,7 @@ def test_llvm_add_pipeline():
     check_llvm()
 
 
+@tvm.testing.requires_llvm
 def test_llvm_persist_parallel():
     n = 128
     A = te.placeholder((n,), name='A')
@@ -210,8 +212,6 @@ def test_llvm_persist_parallel():
     s[C].pragma(xi, "parallel_stride_pattern")
 
     def check_llvm():
-        if not tvm.runtime.enabled("llvm"):
-            return
         # BUILD and invoke the kernel.
         f = tvm.build(s, [A, C], "llvm")
         ctx = tvm.cpu(0)
@@ -226,10 +226,9 @@ def test_llvm_persist_parallel():
     check_llvm()
 
 
+@tvm.testing.requires_llvm
 def test_llvm_flip_pipeline():
     def check_llvm(nn, base):
-        if not tvm.runtime.enabled("llvm"):
-            return
         n = tvm.runtime.convert(nn)
         A = te.placeholder((n + base), name='A')
         C = te.compute((n,), lambda i: A(nn + base- i - 1), name='C')
@@ -253,10 +252,9 @@ def test_llvm_flip_pipeline():
     check_llvm(128, 1)
 
 
+@tvm.testing.requires_llvm
 def test_llvm_vadd_pipeline():
     def check_llvm(n, lanes):
-        if not tvm.runtime.enabled("llvm"):
-            return
         A = te.placeholder((n,), name='A', dtype="float32x%d" % lanes)
         B = te.compute((n,), lambda i: A[i], name='B')
         C = te.compute((n,), lambda i: B[i] + tvm.tir.const(1, A.dtype), name='C')
@@ -282,10 +280,9 @@ def test_llvm_vadd_pipeline():
     check_llvm(512, 2)
 
 
+@tvm.testing.requires_llvm
 def test_llvm_madd_pipeline():
     def check_llvm(nn, base, stride):
-        if not tvm.runtime.enabled("llvm"):
-            return
         n = tvm.runtime.convert(nn)
         A = te.placeholder((n + base, stride), name='A')
         C = te.compute((n, stride), lambda i, j: A(base + i, j) + 1, name='C')
@@ -310,6 +307,7 @@ def test_llvm_madd_pipeline():
         check_llvm(4, 0, 3)
 
 
+@tvm.testing.requires_llvm
 def test_llvm_temp_space():
     nn = 1024
     n = tvm.runtime.convert(nn)
@@ -319,8 +317,6 @@ def test_llvm_temp_space():
     s = te.create_schedule(C.op)
 
     def check_llvm():
-        if not tvm.runtime.enabled("llvm"):
-            return
         # build and invoke the kernel.
         f = tvm.build(s, [A, C], "llvm")
         ctx = tvm.cpu(0)
@@ -333,6 +329,7 @@ def test_llvm_temp_space():
             c.asnumpy(), a.asnumpy() + 1 + 1)
     check_llvm()
 
+@tvm.testing.requires_llvm
 def test_multiple_func():
     nn = 1024
     n = tvm.runtime.convert(nn)
@@ -344,8 +341,6 @@ def test_multiple_func():
     s[C].parallel(xo)
     s[C].vectorize(xi)
     def check_llvm():
-        if not tvm.runtime.enabled("llvm"):
-            return
         # build two functions
         f2 = tvm.lower(s, [A, B, C], name="fadd1")
         f1 = tvm.lower(s, [A, B, C], name="fadd2")
@@ -369,10 +364,9 @@ def test_multiple_func():
 
 
 
+@tvm.testing.requires_llvm
 def test_llvm_condition():
     def check_llvm(n, offset):
-        if not tvm.runtime.enabled("llvm"):
-            return
         A = te.placeholder((n, ), name='A')
         C = te.compute((n,), lambda i: tvm.tir.if_then_else(i >= offset, A[i], 0.0), name='C')
         s = te.create_schedule(C.op)
@@ -389,10 +383,9 @@ def test_llvm_condition():
     check_llvm(64, 8)
 
 
+@tvm.testing.requires_llvm
 def test_llvm_bool():
     def check_llvm(n):
-        if not tvm.runtime.enabled("llvm"):
-            return
         A = te.placeholder((n, ), name='A', dtype="int32")
         C = te.compute((n,), lambda i: A[i].equal(1).astype("float"), name='C')
         s = te.create_schedule(C.op)
@@ -408,10 +401,9 @@ def test_llvm_bool():
     check_llvm(64)
 
 
+@tvm.testing.requires_llvm
 def test_rank_zero():
     def check_llvm(n):
-        if not tvm.runtime.enabled("llvm"):
-            return
         A = te.placeholder((n, ), name='A')
         scale = te.placeholder((), name='scale')
         k = te.reduce_axis((0, n), name="k")
@@ -431,10 +423,9 @@ def test_rank_zero():
         tvm.testing.assert_allclose(d.asnumpy(), d_np)
     check_llvm(64)
 
+@tvm.testing.requires_llvm
 def test_rank_zero_bound_checkers():
     def check_llvm(n):
-        if not tvm.runtime.enabled("llvm"):
-            return
         with tvm.transform.PassContext(config={"tir.instrument_bound_checkers": True}):
             A = te.placeholder((n, ), name='A')
             scale = te.placeholder((), name='scale')
@@ -456,6 +447,7 @@ def test_rank_zero_bound_checkers():
     check_llvm(64)
 
 
+@tvm.testing.requires_llvm
 def test_alignment():
     n = tvm.runtime.convert(1024)
     A = te.placeholder((n,), name='A')
@@ -496,6 +488,7 @@ def test_alignment():
     assert has_call_to_assume()
 
 
+@tvm.testing.requires_llvm
 def test_llvm_div():
     """Check that the semantics of div and mod is correct"""
     def check(start, end, dstart, dend, dtype, floor_div=False):
@@ -595,6 +588,7 @@ def test_llvm_div():
         check(0, 255, dstart, dend, 'uint8', floor_div=False)
         check(0, 255, dstart, dend, 'uint8', floor_div=True)
 
+@tvm.testing.requires_llvm
 def test_llvm_fp_math():
     def check_llvm_reciprocal(n):
         A = te.placeholder((n,), name='A')
@@ -629,6 +623,7 @@ def test_llvm_fp_math():
     check_llvm_sigmoid(16)
 
 
+@tvm.testing.requires_llvm
 def test_dwarf_debug_information():
     nn = 1024
     n = tvm.runtime.convert(nn)
@@ -640,8 +635,6 @@ def test_dwarf_debug_information():
     s[C].parallel(xo)
     s[C].vectorize(xi)
     def check_llvm_object():
-        if not tvm.runtime.enabled("llvm"):
-            return
         if tvm.target.codegen.llvm_version_major() < 5:
             return
         if tvm.target.codegen.llvm_version_major() > 6:
@@ -676,8 +669,6 @@ def test_dwarf_debug_information():
             assert re.search(r"""DW_AT_name.*fadd2""", str(output))
 
     def check_llvm_ir():
-        if not tvm.runtime.enabled("llvm"):
-            return
         if tvm.target.codegen.llvm_version_major() < 5:
             return
         if tvm.target.codegen.llvm_version_major() > 6:
@@ -704,6 +695,7 @@ def test_dwarf_debug_information():
     check_llvm_ir()
 
 
+@tvm.testing.requires_llvm
 def test_llvm_shuffle():
     a = te.placeholder((8, ), 'int32')
     b = te.placeholder((8, ), 'int32')
@@ -760,6 +752,7 @@ def np_bf16_cast_and_cast_back(arr):
     ''' Convert a numpy array of float to bf16 and cast back'''
     return np_bf162np_float(np_float2np_bf16(arr))
 
+@tvm.testing.requires_llvm
 def test_llvm_bf16():
     def dotest(do_vectorize):
         np.random.seed(122)
@@ -784,6 +777,7 @@ def test_llvm_bf16():
     dotest(True)
     dotest(False)
 
+@tvm.testing.requires_llvm
 def test_llvm_crt_static_lib():
     A = te.placeholder((32, ), dtype='bfloat16')
     B = te.placeholder((32, ), dtype='bfloat16')
index e403589..9a03a79 100644 (file)
 # under the License.
 import tvm
 from tvm import te
+import tvm.testing
 
 target = 'opencl'
 
+@tvm.testing.requires_gpu
+@tvm.testing.requires_opencl
 def test_opencl_ternary_expression():
     def check_if_then_else(ctx, n, dtype):
         A = te.placeholder((n,), name='A', dtype=dtype)
@@ -52,10 +55,6 @@ def test_opencl_ternary_expression():
         # Only need to test compiling here
         fun(a, c)
 
-    if not tvm.runtime.enabled(target):
-        print("skip because opencl is not enabled..")
-        return
-
     ctx = tvm.context(target, 0)
 
     check_if_then_else(ctx, 1, 'int8')
@@ -67,6 +66,8 @@ def test_opencl_ternary_expression():
     check_select(ctx, 1, 'int16')
     check_select(ctx, 1, 'uint16')
 
+@tvm.testing.requires_gpu
+@tvm.testing.requires_opencl
 def test_opencl_inf_nan():
     def check_inf_nan(ctx, n, value, dtype):
         A = te.placeholder((n,), name='A', dtype=dtype)
@@ -80,10 +81,6 @@ def test_opencl_inf_nan():
         # Only need to test compiling here
         fun(a, c)
 
-    if not tvm.runtime.enabled(target):
-        print("skip because opencl is not enabled..")
-        return
-
     ctx = tvm.context(target, 0)
 
     check_inf_nan(ctx, 1, -float('inf'), 'float32')
@@ -94,6 +91,8 @@ def test_opencl_inf_nan():
     check_inf_nan(ctx, 1, float('nan'), 'float64')
 
 
+@tvm.testing.requires_gpu
+@tvm.testing.requires_opencl
 def test_opencl_max():
     def check_max(ctx, n, dtype):
         A = te.placeholder((n,), name='A', dtype=dtype)
@@ -109,10 +108,6 @@ def test_opencl_max():
         # Only need to test compiling here
         fun(a, c)
 
-    if not tvm.runtime.enabled(target):
-        print("skip because opencl is not enabled..")
-        return
-
     ctx = tvm.context(target, 0)
 
     check_max(ctx, 1, 'int8')
index 4c6304a..2adc1c8 100644 (file)
@@ -24,7 +24,7 @@ ty = te.thread_axis("threadIdx.y")
 bx = te.thread_axis("blockIdx.x")
 by = te.thread_axis("blockIdx.y")
 
-@unittest.skipIf(not tvm.rocm(0).exist or not tvm.runtime.enabled("rocm"), "skip because rocm is not enabled..")
+@tvm.testing.requires_rocm
 def test_rocm_cross_thread_reduction():
     # based on the reduction tutorial
     n = te.size_var("n")
@@ -52,7 +52,7 @@ def test_rocm_cross_thread_reduction():
       b.asnumpy(),  np.sum(a.asnumpy(), axis=1), rtol=1e-4)
 
 
-@unittest.skipIf(not tvm.rocm(0).exist or not tvm.runtime.enabled("rocm"), "skip because rocm is not enabled..")
+@tvm.testing.requires_rocm
 def test_rocm_inf_nan():
     def check_inf_nan(ctx, n, value, dtype):
         A = te.placeholder((n,), name='A', dtype=dtype)
@@ -75,7 +75,7 @@ def test_rocm_inf_nan():
     check_inf_nan(ctx, 1, float('nan'), 'float32')
     check_inf_nan(ctx, 1, float('nan'), 'float64')
 
-@unittest.skipIf(not tvm.rocm(0).exist or not tvm.runtime.enabled("rocm"), "skip because rocm is not enabled..")
+@tvm.testing.requires_rocm
 def test_rocm_reduction_binding():
     k = te.reduce_axis((0, 32), 'k')
     A = te.placeholder((96, 32), name='A')
@@ -89,7 +89,7 @@ def test_rocm_reduction_binding():
     mo, _ = s[B].split(B.op.axis[0], 32)
     s[B].bind(mo, bx)
 
-@unittest.skipIf(not tvm.rocm(0).exist or not tvm.runtime.enabled("rocm"), "skip because rocm is not enabled..")
+@tvm.testing.requires_rocm
 def test_rocm_copy():
 
     def check_rocm(dtype, n):
@@ -107,7 +107,7 @@ def test_rocm_copy():
         peturb = np.random.uniform(low=0.5, high=1.5)
         check_rocm(dtype, int(peturb * (2 ** logN)))
 
-@unittest.skipIf(not tvm.rocm(0).exist or not tvm.runtime.enabled("rocm"), "skip because rocm is not enabled..")
+@tvm.testing.requires_rocm
 def test_rocm_vectorize_add():
     num_thread = 8
 
index e03d689..55c7c31 100644 (file)
 # specific language governing permissions and limitations
 # under the License.
 import tvm
+import tvm.testing
 from tvm import te
 import numpy as np
 
 def run_jit(fapi, check):
     for target in ["llvm", "stackvm"]:
-        if not tvm.runtime.enabled(target):
+        if not tvm.testing.device_enabled(target):
             continue
         f = tvm.driver.build(fapi, target=target)
         s = f.get_source()
index 722a9ec..a036cd8 100644 (file)
@@ -20,11 +20,8 @@ import re
 import numpy as np
 
 
+@tvm.testing.requires_vulkan
 def test_vector_comparison():
-    if not tvm.runtime.enabled("vulkan"):
-        print("Skipping due to no Vulkan module")
-        return
-
     target = 'vulkan'
 
     def check_correct_assembly(dtype):
@@ -60,12 +57,10 @@ tx = te.thread_axis("threadIdx.x")
 bx = te.thread_axis("blockIdx.x")
 
 
+@tvm.testing.requires_vulkan
 def test_vulkan_copy():
 
     def check_vulkan(dtype, n):
-        if not tvm.vulkan(0).exist or not tvm.runtime.enabled("vulkan"):
-            print("skip because vulkan is not enabled..")
-            return
         A = te.placeholder((n,), name='A', dtype=dtype)
         ctx = tvm.vulkan(0)
         a_np = np.random.uniform(size=(n,)).astype(A.dtype)
@@ -81,13 +76,11 @@ def test_vulkan_copy():
         check_vulkan(dtype, int(peturb * (2 ** logN)))
 
 
+@tvm.testing.requires_vulkan
 def test_vulkan_vectorize_add():
     num_thread = 8
 
     def check_vulkan(dtype, n, lanes):
-        if not tvm.vulkan(0).exist or not tvm.runtime.enabled("vulkan"):
-            print("skip because vulkan is not enabled..")
-            return
         A = te.placeholder((n,), name='A', dtype="%sx%d" % (dtype, lanes))
         B = te.compute((n,), lambda i: A[i]+tvm.tir.const(1, A.dtype), name='B')
         s = te.create_schedule(B.op)
@@ -106,6 +99,7 @@ def test_vulkan_vectorize_add():
     check_vulkan("float16", 64, 2)
 
 
+@tvm.testing.requires_vulkan
 def test_vulkan_stress():
     """
     Launch a randomized test with multiple kernels per stream, multiple uses of
@@ -118,9 +112,6 @@ def test_vulkan_stress():
 
     def run_stress():
         def worker():
-            if not tvm.vulkan(0).exist or not tvm.runtime.enabled("vulkan"):
-                print("skip because vulkan is not enabled..")
-                return
             A = te.placeholder((n,), name='A', dtype="float32")
             B = te.placeholder((n,), name='B', dtype="float32")
             functions = [
index 5b4a309..5bebf3d 100644 (file)
@@ -17,7 +17,7 @@
 
 import tvm
 from tvm import te
-from tvm.testing import check_numerical_grads, assert_allclose
+from tvm.testing import assert_allclose
 from tvm import topi
 from tvm.topi.util import get_const_tuple
 import pytest
@@ -30,10 +30,7 @@ def check_grad(out, inputs, args=[], data_range=(-10, 10), desired_grads=None, a
 
     def check_device(device, host="llvm"):
         ctx = tvm.context(device, 0)
-        if not tvm.runtime.enabled(host):
-            return
-        if not ctx.exist:
-            print("skip because %s is not enabled.." % device)
+        if not tvm.testing.device_enabled(host):
             return
 
         sout = te.create_schedule(out.op)
@@ -74,7 +71,7 @@ def check_grad(out, inputs, args=[], data_range=(-10, 10), desired_grads=None, a
                 out_data = tvm.nd.empty(out_shape, out.dtype)
                 mout(out_data, *[tvm.nd.array(d) for d in list(in_data)])
                 return out_data.asnumpy().sum()
-            check_numerical_grads(forward, [d.asnumpy() for d in input_data + arg_vals], g_res)
+            tvm.testing.check_numerical_grads(forward, [d.asnumpy() for d in input_data + arg_vals], g_res)
 
     check_device("cpu")
 
index 8ab65f1..6640420 100644 (file)
@@ -21,6 +21,8 @@ from tvm.contrib import util
 from tvm.te.hybrid import script
 from tvm.te.hybrid.runtime import HYBRID_GLOBALS
 
+import tvm.testing
+
 @pytest.mark.skip
 def run_and_check(func, args, var_dict={}, target='llvm', sch=None, outs=None):
     def tvm_val_2_py_val(val):
@@ -316,11 +318,9 @@ def test_if():
     run_and_check(func, ins, outs=outs)
 
 
+@tvm.testing.requires_gpu
+@tvm.testing.requires_cuda
 def test_bind():
-    if not tvm.gpu(0).exist:
-        print('[Warning] No GPU found! Skip bind test!')
-        return
-
     @script
     def vec_add(a, b):
         c = output_tensor((1000, ), 'float32')
@@ -463,6 +463,8 @@ def test_non_zero():
     func, ins, outs = run_and_check(triangle, [a, b])
     run_and_check(func, ins, outs=outs)
 
+@tvm.testing.requires_gpu
+@tvm.testing.requires_cuda
 def test_allocate():
     @te.hybrid.script
     def blur2d(a):
@@ -482,27 +484,24 @@ def test_allocate():
     func, ins, outs = run_and_check(blur2d, [a])
     run_and_check(func, ins, outs=outs)
 
-    if tvm.gpu().exist:
-        @te.hybrid.script
-        def share_vec_add(a, b):
-            c = output_tensor((256, ), 'float32')
-            shared = allocate((256, ), 'float32', 'shared')
-            for i in bind("threadIdx.x", 256):
-                shared[i] = a[i]
-            local = allocate((256, ), 'float32', 'local')
-            for i in bind("threadIdx.x", 256):
-                local[i] = b[i]
-            for i in bind("threadIdx.x", 256):
-                c[i] = shared[i] + local[i]
-            return c
-
-        a = te.placeholder((256, ), dtype='float32', name='a')
-        b = te.placeholder((256, ), dtype='float32', name='b')
-        c = share_vec_add(a, b)
-        func, ins, outs = run_and_check(share_vec_add, [a, b], target='cuda')
-        run_and_check(func, ins, outs=outs, target='cuda')
-    else:
-        print('[Warning] No GPU found! Skip shared mem test!')
+    @te.hybrid.script
+    def share_vec_add(a, b):
+        c = output_tensor((256, ), 'float32')
+        shared = allocate((256, ), 'float32', 'shared')
+        for i in bind("threadIdx.x", 256):
+            shared[i] = a[i]
+        local = allocate((256, ), 'float32', 'local')
+        for i in bind("threadIdx.x", 256):
+            local[i] = b[i]
+        for i in bind("threadIdx.x", 256):
+            c[i] = shared[i] + local[i]
+        return c
+
+    a = te.placeholder((256, ), dtype='float32', name='a')
+    b = te.placeholder((256, ), dtype='float32', name='b')
+    c = share_vec_add(a, b)
+    func, ins, outs = run_and_check(share_vec_add, [a, b], target='cuda')
+    run_and_check(func, ins, outs=outs, target='cuda')
 
 def test_upstream():
     @te.hybrid.script
index 1f17914..a57a340 100644 (file)
@@ -18,7 +18,7 @@ import tvm
 from tvm import te
 from tvm import topi
 import numpy as np
-from tvm.contrib import nvcc
+import tvm.testing
 
 def tensor_core_matmul(warp_tile_m=16, m=64, n=32, l=96):
     A = te.placeholder((n, l), name='A', dtype='float16')
@@ -204,26 +204,14 @@ def tensor_core_batch_matmul(warp_tile_m=16, m=64, n=32, l=96, batch=2):
       c_np[bs, :, :] = np.dot(a_np[bs, :, :], b_np[bs, :, :])
     np.testing.assert_allclose(c_np, c.asnumpy(), rtol=1e-3)
 
+@tvm.testing.requires_tensorcore
 def test_tensor_core_matmul():
-    if not tvm.gpu(0).exist or not tvm.runtime.enabled("cuda"):
-        print("skip because cuda is not enabled..")
-        return
-    if not nvcc.have_tensorcore(tvm.gpu(0).compute_version):
-        print("skip because gpu does not support tensor core")
-        return
-
     tensor_core_matmul(16) #test with warp_tile 16x16x16
     tensor_core_matmul(8) #test with warp_tile 8x32x16
     tensor_core_matmul(32) #test with warp_tile 32x8x16
 
+@tvm.testing.requires_tensorcore
 def test_tensor_core_batch_matmul():
-    if not tvm.gpu(0).exist or not tvm.runtime.enabled("cuda"):
-        print("skip because cuda is not enabled..")
-        return
-    if not nvcc.have_tensorcore(tvm.gpu(0).compute_version):
-        print("skip because gpu does not support tensor core")
-        return
-
     tensor_core_batch_matmul()
 
 if __name__ == '__main__':
index aa87665..8b70c82 100644 (file)
@@ -18,7 +18,7 @@ import tvm
 from tvm import te
 import numpy as np
 from tvm.topi.testing import conv2d_nhwc_python
-from tvm.contrib import nvcc
+import tvm.testing
 
 VERIFY = True
 
@@ -103,14 +103,8 @@ def intrin_wmma_store_matrix(shape):
     return te.decl_tensor_intrin(C.op, intrin_func, binds={A: BA, C: BC})
 
 
+@tvm.testing.requires_tensorcore
 def test_tensor_core_batch_matmal():
-    if not tvm.gpu(0).exist or not tvm.runtime.enabled("cuda"):
-        print("skip because cuda is not enabled..")
-        return
-    if not nvcc.have_tensorcore(tvm.gpu(0).compute_version):
-        print("skip because gpu does not support tensor core")
-        return
-
     batch_size = 4
     n = 512
     m, l = n, n
@@ -216,14 +210,8 @@ def test_tensor_core_batch_matmal():
 
 
 
+@tvm.testing.requires_tensorcore
 def test_tensor_core_batch_conv():
-    if not tvm.gpu(0).exist or not tvm.runtime.enabled("cuda"):
-        print("skip because cuda is not enabled..")
-        return
-    if not nvcc.have_tensorcore(tvm.gpu(0).compute_version):
-        print("skip because gpu does not support tensor core")
-        return
-
     # The sizes of inputs and filters
     batch_size = 32
     height = 14
index 9714368..4b11c00 100644 (file)
@@ -20,6 +20,7 @@ from tvm import te
 from tvm import topi
 import tvm.topi.testing
 from tvm.topi.util import get_const_tuple
+import tvm.testing
 
 
 def test_operator_type_and_tags():
@@ -103,10 +104,10 @@ def verify_tensor_scalar_bop(shape, typ="add"):
         raise NotImplementedError()
 
     def check_device(device):
-        ctx = tvm.context(device, 0)
-        if not ctx.exist:
+        if not tvm.testing.device_enabled(device):
             print("Skip because %s is not enabled" % device)
             return
+        ctx = tvm.context(device, 0)
         print("Running on target: %s" % device)
         with tvm.target.create(device):
             s = tvm.topi.testing.get_elemwise_schedule(device)(B)
@@ -150,7 +151,7 @@ def verify_broadcast_bop(lhs_shape, rhs_shape, typ="add"):
 
     def check_device(device):
         ctx = tvm.context(device, 0)
-        if not ctx.exist:
+        if not tvm.testing.device_enabled(device):
             print("Skip because %s is not enabled" % device)
             return
         print("Running on target: %s" % device)
@@ -183,10 +184,11 @@ def verify_broadcast_bop(lhs_shape, rhs_shape, typ="add"):
         check_device(device)
 
 
+@tvm.testing.uses_gpu
 def verify_conv2d_scalar_bop(batch, in_size, in_channel, num_filter, kernel, stride, padding, typ="add"):
     def check_device(device):
         ctx = tvm.context(device, 0)
-        if not ctx.exist:
+        if not tvm.testing.device_enabled(device):
             print("Skip because %s is not enabled" % device)
             return
         print("Running on target: %s" % device)
@@ -239,6 +241,7 @@ def verify_conv2d_scalar_bop(batch, in_size, in_channel, num_filter, kernel, str
         check_device(device)
 
 
+@tvm.testing.uses_gpu
 def test_tensor_scalar_bop():
     verify_tensor_scalar_bop((1,), typ="add")
     verify_tensor_scalar_bop((3, 5), typ="sub")
@@ -246,6 +249,7 @@ def test_tensor_scalar_bop():
     verify_tensor_scalar_bop((2, 3, 1, 32), typ="div")
 
 
+@tvm.testing.uses_gpu
 def test_broadcast_bop():
     verify_broadcast_bop((2, 3), (), typ="add")
     verify_broadcast_bop((5, 2, 3), (1,), typ="add")
@@ -254,6 +258,7 @@ def test_broadcast_bop():
     verify_broadcast_bop((2, 3, 1, 32), (64, 32), typ="div")
 
 
+@tvm.testing.uses_gpu
 def test_conv2d_scalar_bop():
     verify_conv2d_scalar_bop(1, 16, 4, 4, 3, 1, 1, typ="add")
     verify_conv2d_scalar_bop(1, 32, 2, 1, 3, 1, 1, typ="sub")
index ea16801..c7be325 100644 (file)
@@ -17,7 +17,7 @@
 import numpy as np
 import tvm
 from tvm import te
-from tvm.testing import check_numerical_grads
+import tvm.testing
 
 def test_check_numerical_grads():
     # Functions and their derivatives
@@ -46,7 +46,7 @@ def test_check_numerical_grads():
         func_forw = lambda x: np.sum(func(x)[0])
         grads = [func(x_input)[1]]
 
-        check_numerical_grads(func_forw, [x_input], grads)
+        tvm.testing.check_numerical_grads(func_forw, [x_input], grads)
 
     # Check functions with multiple arguments
     for f1 in functions:
@@ -57,13 +57,13 @@ def test_check_numerical_grads():
             func_forw = lambda x, y: np.sum(f1(x)[0] + f2(y)[0])
             grads = [f1(x_input)[1], f2(y_input)[1]]
 
-            check_numerical_grads(func_forw, [x_input, y_input], grads)
+            tvm.testing.check_numerical_grads(func_forw, [x_input, y_input], grads)
 
             # Same thing but with keyword arguments
             func_forw = lambda x, y: np.sum(f1(x)[0] + f2(y)[0])
             grads = {'x': f1(x_input)[1], 'y': f2(y_input)[1]}
 
-            check_numerical_grads(func_forw, {'x': x_input, 'y': y_input}, grads)
+            tvm.testing.check_numerical_grads(func_forw, {'x': x_input, 'y': y_input}, grads)
 
     def _noise1(x, atol=1e-2, rtol=0.1):
         # We go in random direction using twice the original tolerance to be sure this
@@ -93,23 +93,23 @@ def test_check_numerical_grads():
             grads = [_noise1(f1(x_input)[1]), _noise1(f2(y_input)[1])]
 
             try:
-                check_numerical_grads(func_forw, [x_input, y_input], grads)
+                tvm.testing.check_numerical_grads(func_forw, [x_input, y_input], grads)
             except AssertionError as e:
                 pass
             else:
-                raise AssertionError("check_numerical_grads didn't raise an exception")
+                raise AssertionError("tvm.testing.check_numerical_grads didn't raise an exception")
 
             func_forw = lambda x, y: np.sum(f1(x)[0] + f2(y)[0])
             grads = {'x': _noise2(f1(x_input)[1]), 'y': _noise2(f2(y_input)[1])}
 
             try:
-                check_numerical_grads(func_forw, {'x': x_input, 'y': y_input}, grads)
+                tvm.testing.check_numerical_grads(func_forw, {'x': x_input, 'y': y_input}, grads)
             except AssertionError as e:
                 pass
             else:
-                raise AssertionError("check_numerical_grads didn't raise an exception")
+                raise AssertionError("tvm.testing.check_numerical_grads didn't raise an exception")
 
 
 if __name__ == "__main__":
-    test_check_numerical_grads()
+    test_tvm.testing.check_numerical_grads()
 
index 2e37de4..ec3c762 100644 (file)
@@ -17,6 +17,7 @@
 """Test gpu code verifier"""
 import tvm
 from tvm import te
+import tvm.testing
 
 def get_verify_pass(valid, **kwargs):
     def _fverify(f, *_):
@@ -25,6 +26,7 @@ def get_verify_pass(valid, **kwargs):
     return tvm.tir.transform.prim_func_pass(_fverify, opt_level=0)
 
 
+@tvm.testing.requires_gpu
 def test_shared_memory():
     def check_shared_memory(dtype):
         N = 1024
@@ -47,7 +49,7 @@ def test_shared_memory():
         # thread usage: M
 
         for target in ['opencl', 'cuda']:
-            if not tvm.context(target).exist:
+            if not tvm.testing.device_enabled(target):
                 continue
             valid = [None]
             with tvm.transform.PassContext(config={"tir.add_lower_pass": [
@@ -66,6 +68,7 @@ def test_shared_memory():
     check_shared_memory('float32')
     check_shared_memory('int8x4')
 
+@tvm.testing.requires_gpu
 def test_local_memory():
     N = 1024
     M = 128
@@ -83,7 +86,7 @@ def test_local_memory():
     # thread usage: M
 
     for target in ['opencl', 'cuda']:
-        if not tvm.context(target).exist:
+        if not tvm.testing.device_enabled(target):
             continue
 
         valid = [None]
@@ -101,6 +104,7 @@ def test_local_memory():
             tvm.build(s, [A, B], target)
         assert valid[0]
 
+@tvm.testing.requires_gpu
 def test_num_thread():
     N = 1024
     M = 128
@@ -118,7 +122,7 @@ def test_num_thread():
     # thread usage: N
 
     for target in ['opencl', 'cuda']:
-        if not tvm.context(target).exist:
+        if not tvm.testing.device_enabled(target):
             continue
 
         valid = [None]
@@ -152,6 +156,7 @@ def test_num_thread():
             tvm.build(s, [A, B], target)
         assert valid[0]
 
+@tvm.testing.requires_gpu
 def test_multiple_kernels():
     N = 1024
 
@@ -168,7 +173,7 @@ def test_multiple_kernels():
     # thread usage: N
 
     for target in ['opencl', 'cuda']:
-        if not tvm.context(target).exist:
+        if not tvm.testing.device_enabled(target):
             continue
 
         valid = [None]
@@ -186,6 +191,7 @@ def test_multiple_kernels():
             tvm.build(s, [A, C], target)
         assert valid[0]
 
+@tvm.testing.requires_gpu
 def test_wrong_bind():
     N = 1024
 
@@ -199,7 +205,7 @@ def test_wrong_bind():
     s[B].bind(s[B].op.axis[1], te.thread_axis("threadIdx.x"))
 
     for target in ['opencl', 'cuda']:
-        if not tvm.context(target).exist:
+        if not tvm.testing.device_enabled(target):
             continue
 
         valid = [None]
@@ -208,6 +214,7 @@ def test_wrong_bind():
             tvm.build(s, [A, B], target)
         assert not valid[0]
 
+@tvm.testing.requires_gpu
 def test_vectorize():
     N = 1024
 
@@ -224,7 +231,7 @@ def test_vectorize():
     s[B].vectorize(ji)
 
     for target in ['opencl', 'cuda']:
-        if not tvm.context(target).exist:
+        if not tvm.testing.device_enabled(target):
             continue
 
         valid = [None]
@@ -233,6 +240,7 @@ def test_vectorize():
             tvm.lower(s, [A, B])
         assert not valid[0]
 
+@tvm.testing.requires_gpu
 def test_vthread():
     N = 1024
 
@@ -245,7 +253,7 @@ def test_vthread():
     s[B].bind(s[B].op.axis[1], te.thread_axis("vthread"))
 
     for target in ['opencl', 'cuda']:
-        if not tvm.context(target).exist:
+        if not tvm.testing.device_enabled(target):
             continue
 
         valid = [None]
index 386fceb..7022e28 100644 (file)
@@ -17,6 +17,7 @@
 import tvm
 import pytest
 from tvm import te
+import tvm.testing
 
 # The following DLDeviceType/TVMDeviceExtType values
 # are originally defined in dlpack.h and c_runtime_api.h.
@@ -27,6 +28,7 @@ other_devices = ["llvm", "ext_dev"]
 # All computations are bound.
 # So VerifyMemory pass is expected to succeed.
 #
+@tvm.testing.uses_gpu
 def test_verify_memory_all_bind():
   n = te.var("n")
   A = te.placeholder((n,), name='A')
@@ -41,15 +43,17 @@ def test_verify_memory_all_bind():
   mod = tvm.lower(s, [A, B])
 
   for dev_type in gpu_devices + other_devices:
-      binded_mod = tvm.tir.transform.Apply(
-          lambda f: f.with_attr("target", tvm.target.create(dev_type)))(mod)
-      tvm.tir.transform.VerifyMemory()(binded_mod)
+      if tvm.testing.device_enabled(dev_type):
+          binded_mod = tvm.tir.transform.Apply(
+              lambda f: f.with_attr("target", tvm.target.create(dev_type)))(mod)
+          tvm.tir.transform.VerifyMemory()(binded_mod)
 
 
 
 # Computations are not bound.
 # So VerifyMemory pass fails when device type is GPU.
 #
+@tvm.testing.uses_gpu
 def test_verify_memory_not_bind():
   n = te.var("n")
   A = te.placeholder((n,), name='A')
@@ -61,20 +65,23 @@ def test_verify_memory_not_bind():
   mod = tvm.lower(s, [A, B])
 
   for dev_type in gpu_devices:
-      binded_mod = tvm.tir.transform.Apply(
-          lambda f: f.with_attr("target", tvm.target.create(dev_type)))(mod)
-      with pytest.raises(RuntimeError):
-          tvm.tir.transform.VerifyMemory()(binded_mod)
+      if tvm.testing.device_enabled(dev_type):
+          binded_mod = tvm.tir.transform.Apply(
+              lambda f: f.with_attr("target", tvm.target.create(dev_type)))(mod)
+          with pytest.raises(RuntimeError):
+              tvm.tir.transform.VerifyMemory()(binded_mod)
 
   for dev_type in other_devices:
-      binded_mod = tvm.tir.transform.Apply(
-          lambda f: f.with_attr("target", tvm.target.create(dev_type)))(mod)
-      tvm.tir.transform.VerifyMemory()(binded_mod)
+      if tvm.testing.device_enabled(dev_type):
+          binded_mod = tvm.tir.transform.Apply(
+              lambda f: f.with_attr("target", tvm.target.create(dev_type)))(mod)
+          tvm.tir.transform.VerifyMemory()(binded_mod)
 
 
 # Computations are partially bound.
 # So VerifyMemory pass fails when device type is GPU.
 #
+@tvm.testing.uses_gpu
 def test_verify_memory_partially_bind():
   n = te.var("n")
   A = te.placeholder((n,), name='A')
@@ -91,15 +98,17 @@ def test_verify_memory_partially_bind():
   mod = tvm. lower(s, [A, B, C, D])
 
   for dev_type in gpu_devices:
-      binded_mod = tvm.tir.transform.Apply(
-          lambda f: f.with_attr("target", tvm.target.create(dev_type)))(mod)
-      with pytest.raises(RuntimeError):
-          tvm.tir.transform.VerifyMemory()(binded_mod)
+      if tvm.testing.device_enabled(dev_type):
+          binded_mod = tvm.tir.transform.Apply(
+              lambda f: f.with_attr("target", tvm.target.create(dev_type)))(mod)
+          with pytest.raises(RuntimeError):
+              tvm.tir.transform.VerifyMemory()(binded_mod)
 
   for dev_type in other_devices:
-      binded_mod = tvm.tir.transform.Apply(
-          lambda f: f.with_attr("target", tvm.target.create(dev_type)))(mod)
-      tvm.tir.transform.VerifyMemory()(binded_mod)
+      if tvm.testing.device_enabled(dev_type):
+          binded_mod = tvm.tir.transform.Apply(
+              lambda f: f.with_attr("target", tvm.target.create(dev_type)))(mod)
+          tvm.tir.transform.VerifyMemory()(binded_mod)
 
 
 
index 7ee1e53..f7e8f2f 100644 (file)
@@ -119,6 +119,7 @@ def test_buffer_index_merge_mult_mod():
     assert_simplified_equal(index_simplified, index_direct)
 
 
+@tvm.testing.requires_llvm
 def test_buffer_broadcast():
     m0, m1, m2 = te.size_var("m0"), te.size_var("m1"), te.size_var("m2")
     n0, n1, n2 = te.size_var("n0"), te.size_var("n1"), te.size_var("n2")
@@ -134,8 +135,6 @@ def test_buffer_broadcast():
     s = te.create_schedule(C.op)
 
     def check():
-        if not tvm.runtime.enabled("llvm"):
-            return
         fadd = tvm.build(s, [A, B, C], target='llvm', name='bcast_add', binds={A:Ab, B:Bb})
         ctx = tvm.cpu(0)
         a = tvm.nd.array(np.random.uniform(size=(2, 4, 3)).astype(A.dtype), ctx)
@@ -147,6 +146,7 @@ def test_buffer_broadcast():
     check()
 
 
+@tvm.testing.requires_llvm
 def test_buffer_broadcast_expr():
     n0, m0, x = te.size_var('n0'), te.size_var('m0'), te.size_var('x')
     n1, m1 = te.size_var('n1'), te.size_var('m1')
@@ -162,8 +162,6 @@ def test_buffer_broadcast_expr():
     s = te.create_schedule(C.op)
 
     def check_stride():
-        if not tvm.runtime.enabled("llvm"):
-            return
         fadd = tvm.build(s, [A, B, C, o1, x], target='llvm', name='bcast_add',
                          binds={A:Ab, B:Bb, C:Cc})
         ctx = tvm.cpu(0)
@@ -174,8 +172,6 @@ def test_buffer_broadcast_expr():
         tvm.testing.assert_allclose(c.asnumpy(), a.asnumpy() + b.asnumpy())
 
     def check_no_stride():
-        if not tvm.runtime.enabled("llvm"):
-            return
         fadd = tvm.build(s, [A, B, C, o1, x], target='llvm', name='bcast_add',
                          binds={A: Ab, B: Bb, C: Cc})
         ctx = tvm.cpu(0)
@@ -186,8 +182,6 @@ def test_buffer_broadcast_expr():
         tvm.testing.assert_allclose(c.asnumpy(), a.asnumpy() + b.asnumpy())
 
     def check_auto_bind():
-        if not tvm.runtime.enabled("llvm"):
-            return
         # Let build bind buffers
         fadd = tvm.build(s, [A, B, C, o1, x], target='llvm', name='bcast_add')
         ctx = tvm.cpu(0)
index 95047f5..7664806 100644 (file)
@@ -17,6 +17,7 @@
 import tvm
 from tvm import te
 import numpy as np
+import tvm.testing
 
 def test_for():
     ib = tvm.tir.ir_builder.create()
@@ -90,7 +91,7 @@ def test_cpu():
                    name="vector_add", dtype=dtype)
     s = te.create_schedule(C.op)
     def check_target(target):
-        if not tvm.runtime.enabled(target):
+        if not tvm.testing.device_enabled(target):
             return
         # build and invoke the kernel.
         fadd = tvm.build(s, [A, B, C], target)
@@ -103,6 +104,7 @@ def test_cpu():
         tvm.testing.assert_allclose(c.asnumpy(), a.asnumpy() + b.asnumpy())
     check_target("llvm")
 
+@tvm.testing.requires_gpu
 def test_gpu():
     n = te.size_var('n')
     dtype = "float32"
@@ -133,7 +135,7 @@ def test_gpu():
     stmt = tvm.te.schedule.ScheduleOps(s, bounds)
     def check_target(target):
         n = 1024
-        if not tvm.runtime.enabled(target):
+        if not tvm.testing.device_enabled(target):
             return
         # build and invoke the kernel.
         fadd = tvm.build(s, [A, B, C], target)
index 186a52d..7c93b4e 100644 (file)
@@ -19,7 +19,7 @@ from tvm import te
 from tvm import relay
 import numpy as np
 import pytest
-from tvm.relay.testing import ctx_list
+from tvm.testing import enabled_targets
 
 var_list = []
 
@@ -711,7 +711,7 @@ def test_hoisting_op_conv():
     kernel = np.random.uniform(-scale, scale, size=kshape).astype(dtype)
 
     params = {'w': tvm.nd.array(kernel)}
-    for target, ctx in ctx_list():
+    for target, ctx in enabled_targets():
         with tvm.transform.PassContext(opt_level=3):
             graph, lib, params = relay.build_module.build(mod, target=target, params=params)
             m = tvm.contrib.graph_runtime.create(graph, lib, ctx)
index fa27fdd..bb35f32 100644 (file)
@@ -25,6 +25,7 @@ def collect_visit(stmt, f):
     return ret
 
 
+@tvm.testing.requires_llvm
 @pytest.mark.xfail
 def test_out_of_bounds_llvm(index_a, index_b):
     n = te.size_var("n")
@@ -43,6 +44,7 @@ def test_out_of_bounds_llvm(index_a, index_b):
     c = tvm.nd.array(np.zeros(1024, dtype=C.dtype), ctx)
     fadd (a, b, c)
 
+@tvm.testing.requires_llvm
 def test_in_bounds_llvm():
     n = te.size_var("n")
     A = te.placeholder ((n,), name='A')
@@ -59,6 +61,7 @@ def test_in_bounds_llvm():
     c = tvm.nd.array(np.zeros(1024, dtype=C.dtype), ctx)
     fadd (a, b, c)
 
+@tvm.testing.requires_llvm
 @pytest.mark.xfail
 def test_out_of_bounds_vectorize_llvm(nn, index_a, index_b):
     n = tvm.runtime.convert(nn)
@@ -80,6 +83,7 @@ def test_out_of_bounds_vectorize_llvm(nn, index_a, index_b):
     c = tvm.nd.array(np.zeros(n, dtype=c.dtype), ctx)
     f(a, b, c)
 
+@tvm.testing.requires_llvm
 def test_in_bounds_vectorize_llvm():
     n = 512
     lanes = 2
@@ -105,6 +109,7 @@ def test_in_bounds_vectorize_llvm():
     f(a, c)
     tvm.testing.assert_allclose(c.asnumpy(), a.asnumpy() + 1)
 
+@tvm.testing.requires_llvm
 def test_in_bounds_loop_partition_basic_llvm():
     n = te.size_var('n')
     A = te.placeholder((n, ), name='A')
@@ -122,6 +127,7 @@ def test_in_bounds_loop_partition_basic_llvm():
     t = tvm.nd.empty((32,), T.dtype, ctx)
     f(a, b, t)
 
+@tvm.testing.requires_llvm
 @pytest.mark.xfail
 def test_out_of_bounds_loop_partition_basic_llvm(index_a, index_b):
     n = te.size_var('n')
@@ -186,6 +192,7 @@ def test_in_bounds_const_loop_partition_ir():
     assert(len(branch_collector) ==  2)
 
 
+@tvm.testing.requires_llvm
 def test_in_bounds_const_loop_partition_llvm():
     with tvm.transform.PassContext(config={
         "tir.instrument_bound_checkers": True,
@@ -207,6 +214,7 @@ def test_in_bounds_const_loop_partition_llvm():
         t = tvm.nd.empty((n,), T.dtype, ctx)
         f(a, b, t)
 
+@tvm.testing.requires_llvm
 @pytest.mark.xfail
 def test_out_of_bounds_const_loop_partition_llvm(index_a, index_b):
     with tvm.transform.PassContext(config={
@@ -229,6 +237,7 @@ def test_out_of_bounds_const_loop_partition_llvm(index_a, index_b):
         t = tvm.nd.empty((n,), T.dtype, ctx)
         f(a, b, t)
 
+@tvm.testing.requires_llvm
 def test_in_bounds_conv_llvm(loop_tiling=False):
     HSTR = WSTR = 1
     in_channel = 128
@@ -264,6 +273,7 @@ def test_in_bounds_conv_llvm(loop_tiling=False):
     conv_out = tvm.nd.empty ((batch_size, out_channel, out_height, out_width), "float32", ctx)
     f(data_input, kernel_input, conv_out)
 
+@tvm.testing.requires_llvm
 @pytest.mark.xfail
 def test_out_of_bounds_conv_llvm(data_offsets, kernel_offsets, loop_tiling=False):
     HSTR = WSTR = 1
@@ -307,6 +317,7 @@ def test_out_of_bounds_conv_llvm(data_offsets, kernel_offsets, loop_tiling=False
     conv_out = tvm.nd.empty ((batch_size, out_channel, out_height, out_width), "float32", ctx)
     f(data_input, kernel_input, conv_out)
 
+@tvm.testing.requires_llvm
 def test_in_bounds_tensors_with_same_shapes1D_llvm():
     n = te.size_var('n')
     k = te.size_var('k')
@@ -325,6 +336,7 @@ def test_in_bounds_tensors_with_same_shapes1D_llvm():
     t = tvm.nd.empty((32,), T.dtype, ctx)
     f(a, b, t)
 
+@tvm.testing.requires_llvm
 @pytest.mark.xfail
 def test_out_of_bounds_tensors_with_diff_shapes1D_llvm(a_shape, b_shape, c_shape):
     n = te.size_var('n')
@@ -344,6 +356,7 @@ def test_out_of_bounds_tensors_with_diff_shapes1D_llvm(a_shape, b_shape, c_shape
     t = tvm.nd.empty((c_shape,), T.dtype, ctx)
     f(a, b, t)
 
+@tvm.testing.requires_llvm
 def test_in_bounds_tensors_with_same_shapes2D_llvm():
     n = te.size_var('n')
     k = te.size_var('k')
@@ -362,6 +375,7 @@ def test_in_bounds_tensors_with_same_shapes2D_llvm():
     t = tvm.nd.empty((32, 32), T.dtype, ctx)
     f(a, b, t)
 
+@tvm.testing.requires_llvm
 @pytest.mark.xfail
 def test_out_of_bounds_tensors_with_diff_shapes2D_llvm(a_shape, b_shape, c_shape):
     n = te.size_var('n')
@@ -381,6 +395,7 @@ def test_out_of_bounds_tensors_with_diff_shapes2D_llvm(a_shape, b_shape, c_shape
     t = tvm.nd.empty((c_shape[0],c_shape[1]), T.dtype, ctx)
     f(a, b, t)
 
+@tvm.testing.requires_llvm
 def test_in_bounds_tensors_with_same_shapes3D_llvm():
     n = te.size_var('n')
     k = te.size_var('k')
@@ -400,6 +415,7 @@ def test_in_bounds_tensors_with_same_shapes3D_llvm():
     t = tvm.nd.empty((32, 32, 32), T.dtype, ctx)
     f(a, b, t)
 
+@tvm.testing.requires_llvm
 @pytest.mark.xfail
 def test_out_of_bounds_tensors_with_diff_shapes3D_llvm(a_shape, b_shape, c_shape):
     n = te.size_var('n')
@@ -420,10 +436,9 @@ def test_out_of_bounds_tensors_with_diff_shapes3D_llvm(a_shape, b_shape, c_shape
     t = tvm.nd.empty((c_shape[0],c_shape[1],c_shape[2]), T.dtype, ctx)
     f(a, b, t)
 
+@tvm.testing.requires_llvm
 @pytest.mark.xfail
 def test_out_of_bounds_tensors_with_zero_shape_op_with_not_zero_shape_llvm():
-    if not tvm.runtime.enabled("llvm"):
-        return
     n = 64
     A = te.placeholder((n, ), name='A')
     scale = te.placeholder((), name='scale')
index 98c79e4..fbd4ce6 100644 (file)
@@ -15,6 +15,7 @@
  # specific language governing permissions and limitations
  # under the License.
 import tvm
+import tvm.testing
 from tvm import te
 import numpy as np
 
@@ -47,9 +48,6 @@ def check_value(expr, vx, vy, data, fref):
     C = te.compute((n,), make_binds)
     s = te.create_schedule([C.op])
 
-    if not tvm.runtime.enabled("llvm"):
-        return
-
     f = tvm.build(s, [A, B, C], "llvm")
     a = tvm.nd.array(np.array([x for x, y in data], dtype=expr.dtype))
     b = tvm.nd.array(np.array([y for x, y in data], dtype=expr.dtype))
@@ -69,6 +67,7 @@ def get_ref_data():
     return list(itertools.product(x, y))
 
 
+@tvm.testing.requires_llvm
 def test_lower_floordiv():
     data = get_ref_data()
     for dtype in ["int32", "int64", "int16"]:
@@ -92,6 +91,7 @@ def test_lower_floordiv():
         check_value(res, x, y, [(a, b) for a, b in data if b == 8], lambda a, b: a // b)
 
 
+@tvm.testing.requires_llvm
 def test_lower_floormod():
     data = get_ref_data()
     for dtype in ["int32", "int64", "int16"]:
index 5801200..eecc7f1 100644 (file)
@@ -19,7 +19,9 @@ from tvm import te
 from tvm.contrib.nvcc import have_fp16
 
 import numpy as np
+import tvm.testing
 
+@tvm.testing.requires_cuda
 def test_lower_warp_memory_local_scope():
     m = 128
     A = te.placeholder((m,), name='A')
@@ -47,6 +49,7 @@ def test_lower_warp_memory_local_scope():
     assert(fdevice.body.body.value.value == "local")
     assert(fdevice.body.body.body.extents[0].value == 2)
 
+@tvm.testing.requires_cuda
 def test_lower_warp_memory_correct_indices():
     n = 32
     A = te.placeholder((2, n, n), name='A', dtype="float32")
@@ -83,11 +86,10 @@ def test_lower_warp_memory_correct_indices():
     assert "threadIdx.x" in idx_names
     assert "threadIdx.y" not in idx_names
 
+@tvm.testing.requires_gpu
+@tvm.testing.requires_cuda
 def test_lower_warp_memory_cuda_end_to_end():
     def check_cuda(dtype):
-        if not tvm.gpu(0).exist or not tvm.runtime.enabled("cuda"):
-            print("skip because cuda is not enabled..")
-            return
         if dtype == "float16" and not have_fp16(tvm.gpu(0).compute_version):
             print("Skip because gpu does not have fp16 support")
             return
@@ -127,11 +129,10 @@ def test_lower_warp_memory_cuda_end_to_end():
     check_cuda("float32")
     check_cuda("float16")
 
+@tvm.testing.requires_gpu
+@tvm.testing.requires_cuda
 def test_lower_warp_memory_cuda_half_a_warp():
     def check_cuda(dtype):
-        if not tvm.gpu(0).exist or not tvm.runtime.enabled("cuda"):
-            print("skip because cuda is not enabled..")
-            return
         if dtype == "float16" and not have_fp16(tvm.gpu(0).compute_version):
             print("Skip because gpu does not have fp16 support")
             return
@@ -170,11 +171,10 @@ def test_lower_warp_memory_cuda_half_a_warp():
     check_cuda("float32")
     check_cuda("float16")
 
+@tvm.testing.requires_gpu
+@tvm.testing.requires_cuda
 def test_lower_warp_memory_cuda_2_buffers():
     def check_cuda(dtype):
-        if not tvm.gpu(0).exist or not tvm.runtime.enabled("cuda"):
-            print("skip because cuda is not enabled..")
-            return
         if dtype == "float16" and not have_fp16(tvm.gpu(0).compute_version):
             print("Skip because gpu does not have fp16 support")
             return
@@ -218,6 +218,7 @@ def test_lower_warp_memory_cuda_2_buffers():
     check_cuda("float32")
     check_cuda("float16")
 
+@tvm.testing.requires_gpu
 def test_lower_warp_memory_roundup():
     def check(device, m):
         A = te.placeholder((m,), name='A')
@@ -246,7 +247,7 @@ def test_lower_warp_memory_roundup():
             tvm.testing.assert_allclose(B_nd.asnumpy(), B_np)
 
     for device in ['cuda', 'rocm']:
-        if not tvm.context(device, 0).exist or not tvm.runtime.enabled(device):
+        if not tvm.testing.device_enabled(device):
             print("skip because", device,"is not enabled..")
             continue
         check(device, m=31)
index 3ff6804..75b3193 100644 (file)
@@ -16,7 +16,9 @@
 # under the License.
 import tvm
 from tvm import te
+import tvm.testing
 
+@tvm.testing.requires_cuda
 def test_thread_storage_sync():
     m = te.size_var('m')
     l = te.size_var('l')
index 61c079a..475ce1c 100755 (executable)
@@ -20,9 +20,9 @@
 set +u
 
 if [[ ! -z $CI_PYTEST_ADD_OPTIONS ]]; then
-    export PYTEST_ADDOPTS="-v $CI_PYTEST_ADD_OPTIONS"
+    export PYTEST_ADDOPTS="-v $CI_PYTEST_ADD_OPTIONS $PYTEST_ADDOPTS"
 else
-    export PYTEST_ADDOPTS="-v "
+    export PYTEST_ADDOPTS="-v $PYTEST_ADDOPTS"
 fi
 set -u
 
index e5f9b20..3c5839b 100755 (executable)
@@ -24,6 +24,8 @@ source tests/scripts/setup-pytest-env.sh
 export TVM_BIND_THREADS=0
 export OMP_NUM_THREADS=1
 
+export TVM_TEST_TARGETS="llvm;cuda"
+
 find . -type f -path "*.pyc" | xargs rm -f
 
 # Rebuild cython
index 10354e5..6dfcabc 100755 (executable)
@@ -25,6 +25,8 @@ source tests/scripts/setup-pytest-env.sh
 export TVM_BIND_THREADS=0
 export OMP_NUM_THREADS=1
 
+export TVM_TEST_TARGETS="llvm"
+
 find . -type f -path "*.pyc" | xargs rm -f
 
 # Rebuild cython
index d61895c..741f15b 100755 (executable)
@@ -63,7 +63,7 @@ TVM_FFI=ctypes python3 -m pytest apps/dso_plugin_module
 TVM_FFI=ctypes python3 -m pytest tests/python/integration
 TVM_FFI=ctypes python3 -m pytest tests/python/contrib
 
-TVM_FFI=ctypes python3 -m pytest tests/python/relay
+TVM_TEST_TARGETS="${TVM_RELAY_TEST_TARGETS:-llvm;cuda}" TVM_FFI=ctypes python3 -m pytest tests/python/relay
 
 # Do not enable OpenGL
 # TVM_FFI=cython python -m pytest tests/webgl
index 6b2755a..c2a9e0c 100755 (executable)
@@ -16,4 +16,8 @@
 # specific language governing permissions and limitations
 # under the License.
 
+export TVM_TEST_TARGETS="cuda;opencl;metal;rocm;vulkan;nvptx;opencl -device=mali,aocl_sw_emu"
+export PYTEST_ADDOPTS="-m gpu $PYTEST_ADDOPTS"
+export TVM_RELAY_TEST_TARGETS="cuda"
+
 ./tests/scripts/task_python_integration.sh
index 637be67..56722b1 100755 (executable)
@@ -16,4 +16,7 @@
 # specific language governing permissions and limitations
 # under the License.
 
+export TVM_TEST_TARGETS="cuda;opencl;metal;rocm;vulkan;nvptx;opencl -device=mali,aocl_sw_emu"
+export PYTEST_ADDOPTS="-m gpu $PYTEST_ADDOPTS"
+
 ./tests/scripts/task_python_unittest.sh
index 46162e1..3643c8d 100644 (file)
@@ -27,7 +27,6 @@ import tvm
 from tvm import te
 
 from matplotlib import pyplot as plt
-from tvm.relay.testing.config import ctx_list
 from tvm import relay
 from tvm.contrib import graph_runtime
 from tvm.contrib.download import download_testdata
@@ -70,7 +69,6 @@ supported_model = [
 
 model_name = supported_model[0]
 dshape = (1, 3, 512, 512)
-target_list = ctx_list()
 
 ######################################################################
 # Download and pre-process demo image
@@ -105,9 +103,11 @@ def run(lib, ctx):
     class_IDs, scores, bounding_boxs = m.get_output(0), m.get_output(1), m.get_output(2)
     return class_IDs, scores, bounding_boxs
 
-for target, ctx in target_list:
-    lib = build(target)
-    class_IDs, scores, bounding_boxs = run(lib, ctx)
+for target in ["llvm", "cuda"]:
+    ctx = tvm.context(target, 0)
+    if ctx.exist:
+        lib = build(target)
+        class_IDs, scores, bounding_boxs = run(lib, ctx)
 
 ######################################################################
 # Display result