import tvm_ext
import tvm
import tvm._ffi.registry
+import tvm.testing
from tvm import te
import numpy as np
B = te.compute((n,), lambda *i: A(*i) + 1.0, name='B')
s = te.create_schedule(B.op)
def check_llvm():
- if not tvm.runtime.enabled("llvm"):
+ if not tvm.testing.device_enabled("llvm"):
return
f = tvm.build(s, [A, B], "ext_dev", "llvm")
ctx = tvm.ext_dev(0)
s = te.create_schedule(B.op)
def check_llvm():
- if not tvm.runtime.enabled("llvm"):
+ if not tvm.testing.device_enabled("llvm"):
return
f = tvm.build(s, [A, B], "llvm")
ctx = tvm.cpu(0)
--- /dev/null
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+import tvm.testing
+from pytest import ExitCode
+
+def pytest_configure(config):
+ print("enabled targets:", "; ".join(map(lambda x: x[0], tvm.testing.enabled_targets())))
+ print("pytest marker:", config.option.markexpr)
+
+def pytest_sessionfinish(session, exitstatus):
+ # Don't exit with an error if we select a subset of tests that doesn't
+ # include anything
+ if session.config.option.markexpr != '':
+ if exitstatus == ExitCode.NO_TESTS_COLLECTED:
+ session.exitstatus = ExitCode.OK
- Stick to language features as in ``python 3.5``
+Writing Python Tests
+--------------------
+We use `pytest <https://docs.pytest.org/en/stable/>`_ for all python testing. ``tests/python`` contains all the tests.
+
+If you want your test to run over a variety of targets, use the :py:func:`tvm.testing.parametrize_targets` decorator. For example:
+
+.. code:: python
+
+ @tvm.testing.parametrize_targets
+ def test_mytest(target, ctx):
+ ...
+
+will run `test_mytest` with `target="llvm"`, `target="cuda"`, and few others. This also ensures that your test is run on the correct hardware by the CI. If you only want to test against a couple targets use `@tvm.testing.parametrize_targets("target_1", "target_2")`. If you want to test on a single target, use the associated decorator from :py:func:`tvm.testing`. For example, CUDA tests use the `@tvm.testing.requires_cuda` decorator.
+
Handle Integer Constant Expression
----------------------------------
We often need to handle constant integer expressions in TVM. Before we do so, the first question we want to ask is that is it really necessary to get a constant integer. If symbolic expression also works and let the logic flow, we should use symbolic expression as much as possible. So the generated code works for shapes that are not known ahead of time.
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
-"""Configuration about tests"""
-from __future__ import absolute_import as _abs
-
-import os
-import tvm
-
-
-def ctx_list():
- """Get context list for testcases"""
- device_list = os.environ.get("RELAY_TEST_TARGETS", "")
- device_list = (device_list.split(",") if device_list
- else ["llvm", "cuda"])
- device_list = set(device_list)
- res = [(device, tvm.context(device, 0)) for device in device_list]
- return [x for x in res if x[1].exist]
+[pytest]
+markers =
+ gpu: mark a test as requiring a gpu
+ tensorcore: mark a test as requiring a tensorcore
+ cuda: mark a test as requiring cuda
+ opencl: mark a test as requiring opencl
+ rocm: mark a test as requiring rocm
+ vulkan: mark a test as requiring vulkan
+ metal: mark a test as requiring metal
+ llvm: mark a test as requiring llvm
import tvm.relay as relay
import tvm.relay.op as op
from tvm.relay import Prelude
+from tvm.testing import enabled_targets
from . import mlp
from . import resnet
from . import temp_op_attr
from . import synthetic
-from .config import ctx_list
from .init import create_workload
from .nat import add_nat_definitions, count, make_nat_value, make_nat_expr
from .py_converter import to_python, run_as_python
if test_inputs is None:
test_inputs = inputs
- for target, ctx in ctx_list():
+ for target, ctx in enabled_targets():
intrp = relay.create_executor(ctx=ctx, target=target)
# Get analytic gradients.
# under the License.
# pylint: disable=invalid-name,unnecessary-comprehension
-""" TVM testing utilities """
+""" TVM testing utilities
+
+Testing Markers
+***************
+
+We use pytest markers to specify the requirements of test functions. Currently
+there is a single distinction that matters for our testing environment: does
+the test require a gpu. For tests that require just a gpu or just a cpu, we
+have the decorator :py:func:`requires_gpu` that enables the test when a gpu is
+available. To avoid running tests that don't require a gpu on gpu nodes, this
+decorator also sets the pytest marker `gpu` so we can use select the gpu subset
+of tests (using `pytest -m gpu`).
+
+Unfortunately, many tests are written like this:
+
+.. python::
+
+ def test_something():
+ for target in all_targets():
+ do_something()
+
+The test uses both gpu and cpu targets, so the test needs to be run on both cpu
+and gpu nodes. But we still want to only run the cpu targets on the cpu testing
+node. The solution is to mark these tests with the gpu marker so they will be
+run on the gpu nodes. But we also modify all_targets (renamed to
+enabled_targets) so that it only returns gpu targets on gpu nodes and cpu
+targets on cpu nodes (using an environment variable).
+
+Instead of using the all_targets function, future tests that would like to
+test against a variety of targets should use the
+:py:func:`tvm.testing.parametrize_targets` functionality. This allows us
+greater control over which targets are run on which testing nodes.
+
+If in the future we want to add a new type of testing node (for example
+fpgas), we need to add a new marker in `tests/python/pytest.ini` and a new
+function in this module. Then targets using this node should be added to the
+`TVM_TEST_TARGETS` environment variable in the CI.
+"""
import logging
+import os
+import pytest
import numpy as np
import tvm
import tvm.arith
import tvm.tir
import tvm.te
import tvm._ffi
+from tvm.contrib import nvcc
def assert_allclose(actual, desired, rtol=1e-7, atol=1e-7):
constraints_trans.dst_to_src, constraints_trans.src_to_dst)
+def _get_targets():
+ target_str = os.environ.get("TVM_TEST_TARGETS", "")
+ if len(target_str) == 0:
+ target_str = DEFAULT_TEST_TARGETS
+ targets = {
+ dev
+ for dev in target_str.split(";")
+ if len(dev) > 0 and tvm.context(dev, 0).exist and tvm.runtime.enabled(dev)
+ }
+ if len(targets) == 0:
+ logging.warning(
+ "None of the following targets are supported by this build of TVM: %s."
+ " Try setting TVM_TEST_TARGETS to a supported target. Defaulting to llvm.",
+ target_str,
+ )
+ return {"llvm"}
+ return targets
+
+
+DEFAULT_TEST_TARGETS = (
+ "llvm;cuda;opencl;metal;rocm;vulkan;nvptx;"
+ "llvm -device=arm_cpu;opencl -device=mali,aocl_sw_emu"
+)
+
+
+def device_enabled(target):
+ """Check if a target should be used when testing.
+
+ It is recommended that you use :py:func:`tvm.testing.parametrize_targets`
+ instead of manually checking if a target is enabled.
+
+ This allows the user to control which devices they are testing against. In
+ tests, this should be used to check if a device should be used when said
+ device is an optional part of the test.
+
+ Parameters
+ ----------
+ target : str
+ Target string to check against
+
+ Returns
+ -------
+ bool
+ Whether or not the device associated with this target is enabled.
+
+ Example
+ -------
+ >>> @tvm.testing.uses_gpu
+ >>> def test_mytest():
+ >>> for target in ["cuda", "llvm"]:
+ >>> if device_enabled(target):
+ >>> test_body...
+
+ Here, `test_body` will only be reached by with `target="cuda"` on gpu test
+ nodes and `target="llvm"` on cpu test nodes.
+ """
+ assert isinstance(target, str), "device_enabled requires a target as a string"
+ target_kind = target.split(" ")[
+ 0
+ ] # only check if device name is found, sometime there are extra flags
+ return any([target_kind in test_target for test_target in _get_targets()])
+
+
+def enabled_targets():
+ """Get all enabled targets with associated contexts.
+
+ In most cases, you should use :py:func:`tvm.testing.parametrize_targets` instead of
+ this function.
+
+ In this context, enabled means that TVM was built with support for this
+ target and the target name appears in the TVM_TEST_TARGETS environment
+ variable. If TVM_TEST_TARGETS is not set, it defaults to variable
+ DEFAULT_TEST_TARGETS in this module.
+
+ If you use this function in a test, you **must** decorate the test with
+ :py:func:`tvm.testing.uses_gpu` (otherwise it will never be run on the gpu).
+
+ Returns
+ -------
+ targets: list
+ A list of pairs of all enabled devices and the associated context
+ """
+ return [(tgt, tvm.context(tgt)) for tgt in _get_targets()]
+
+
+def _compose(args, decs):
+ """Helper to apply multiple markers
+ """
+ if len(args) > 0:
+ f = args[0]
+ for d in reversed(decs):
+ f = d(f)
+ return f
+ return decs
+
+
+def uses_gpu(*args):
+ """Mark to differentiate tests that use the GPU is some capacity.
+
+ These tests will be run on CPU-only test nodes and on test nodes with GPUS.
+ To mark a test that must have a GPU present to run, use
+ :py:func:`tvm.testing.requires_gpu`.
+
+ Parameters
+ ----------
+ f : function
+ Function to mark
+ """
+ _uses_gpu = [pytest.mark.gpu]
+ return _compose(args, _uses_gpu)
+
+
+def requires_gpu(*args):
+ """Mark a test as requiring a GPU to run.
+
+ Tests with this mark will not be run unless a gpu is present.
+
+ Parameters
+ ----------
+ f : function
+ Function to mark
+ """
+ _requires_gpu = [
+ pytest.mark.skipif(not tvm.gpu().exist, reason="No GPU present"),
+ *uses_gpu(),
+ ]
+ return _compose(args, _requires_gpu)
+
+
+
+
+def requires_cuda(*args):
+ """Mark a test as requiring the CUDA runtime.
+
+ This also marks the test as requiring a gpu.
+
+ Parameters
+ ----------
+ f : function
+ Function to mark
+ """
+ _requires_cuda = [
+ pytest.mark.cuda,
+ pytest.mark.skipif(
+ not device_enabled("cuda"), reason="CUDA support not enabled"
+ ),
+ *requires_gpu(),
+ ]
+ return _compose(args, _requires_cuda)
+
+
+
+
+def requires_opencl(*args):
+ """Mark a test as requiring the OpenCL runtime.
+
+ This also marks the test as requiring a gpu.
+
+ Parameters
+ ----------
+ f : function
+ Function to mark
+ """
+ _requires_opencl = [
+ pytest.mark.opencl,
+ pytest.mark.skipif(
+ not device_enabled("opencl"), reason="OpenCL support not enabled"
+ ),
+ *requires_gpu(),
+ ]
+ return _compose(args, _requires_opencl)
+
+
+
+
+def requires_rocm(*args):
+ """Mark a test as requiring the rocm runtime.
+
+ This also marks the test as requiring a gpu.
+
+ Parameters
+ ----------
+ f : function
+ Function to mark
+ """
+ _requires_rocm = [
+ pytest.mark.rocm,
+ pytest.mark.skipif(
+ not device_enabled("rocm"), reason="rocm support not enabled"
+ ),
+ *requires_gpu(),
+ ]
+ return _compose(args, _requires_rocm)
+
+
+
+
+def requires_metal(*args):
+ """Mark a test as requiring the metal runtime.
+
+ This also marks the test as requiring a gpu.
+
+ Parameters
+ ----------
+ f : function
+ Function to mark
+ """
+ _requires_metal = [
+ pytest.mark.metal,
+ pytest.mark.skipif(
+ not device_enabled("metal"), reason="metal support not enabled"
+ ),
+ *requires_gpu(),
+ ]
+ return _compose(args, _requires_metal)
+
+
+
+
+def requires_vulkan(*args):
+ """Mark a test as requiring the vulkan runtime.
+
+ This also marks the test as requiring a gpu.
+
+ Parameters
+ ----------
+ f : function
+ Function to mark
+ """
+ _requires_vulkan = [
+ pytest.mark.vulkan,
+ pytest.mark.skipif(
+ not device_enabled("vulkan"), reason="vulkan support not enabled"
+ ),
+ *requires_gpu(),
+ ]
+ return _compose(args, _requires_vulkan)
+
+
+
+
+def requires_tensorcore(*args):
+ """Mark a test as requiring a tensorcore to run.
+
+ Tests with this mark will not be run unless a tensorcore is present.
+
+ Parameters
+ ----------
+ f : function
+ Function to mark
+ """
+ _requires_tensorcore = [
+ pytest.mark.tensorcore,
+ pytest.mark.skipif(
+ not tvm.gpu().exist or not nvcc.have_tensorcore(tvm.gpu(0).compute_version),
+ reason="No tensorcore present",
+ ),
+ *requires_gpu(),
+ ]
+ return _compose(args, _requires_tensorcore)
+
+
+
+
+def requires_llvm(*args):
+ """Mark a test as requiring llvm to run.
+
+ Parameters
+ ----------
+ f : function
+ Function to mark
+ """
+ _requires_llvm = [
+ pytest.mark.llvm,
+ pytest.mark.skipif(
+ not device_enabled("llvm"), reason="LLVM support not enabled"
+ ),
+ ]
+ return _compose(args, _requires_llvm)
+
+
+def _target_to_requirement(target):
+ # mapping from target to decorator
+ if target.startswith("cuda"):
+ return requires_cuda()
+ if target.startswith("rocm"):
+ return requires_rocm()
+ if target.startswith("vulkan"):
+ return requires_vulkan()
+ if target.startswith("nvptx"):
+ return [*requires_llvm(), *requires_gpu()]
+ if target.startswith("metal"):
+ return requires_metal()
+ if target.startswith("opencl"):
+ return requires_opencl()
+ if target.startswith("llvm"):
+ return requires_llvm()
+ return []
+
+
+def parametrize_targets(*args):
+ """Parametrize a test over all enabled targets.
+
+ Use this decorator when you want your test to be run over a variety of
+ targets and devices (including cpu and gpu devices).
+
+ Parameters
+ ----------
+ f : function
+ Function to parametrize. Must be of the form `def test_xxxxxxxxx(target, ctx)`:,
+ where `xxxxxxxxx` is any name.
+ targets : list[str], optional
+ Set of targets to run against. If not supplied,
+ :py:func:`tvm.testing.enabled_targets` will be used.
+
+ Example
+ -------
+ >>> @tvm.testing.parametrize
+ >>> def test_mytest(target, ctx):
+ >>> ... # do something
+
+ Or
+
+ >>> @tvm.testing.parametrize("llvm", "cuda")
+ >>> def test_mytest(target, ctx):
+ >>> ... # do something
+ """
+ def wrap(targets):
+ def func(f):
+ params = [
+ pytest.param(target, tvm.context(target, 0), marks=_target_to_requirement(target))
+ for target in targets
+ ]
+ return pytest.mark.parametrize("target,ctx", params)(f)
+ return func
+ if len(args) == 1 and callable(args[0]):
+ targets = [t for t, _ in enabled_targets()]
+ return wrap(targets)(args[0])
+ return wrap(args)
+
+
tvm._ffi._init_api("testing", __name__)
"docs/_static/css/tvm_theme.css",
"docs/_static/img/tvm-logo-small.png",
"docs/_static/img/tvm-logo-square.png",
+ # pytest config
+ "pytest.ini",
}
from tvm.contrib import cblas
from tvm.contrib import mkl
from tvm.contrib import mkldnn
+import tvm.testing
def verify_matmul_add(m, l, n, lib, transa=False, transb=False, dtype="float32"):
bias = te.var('bias', dtype=dtype)
return np.dot(a, b) + bb
def verify(target="llvm"):
- if not tvm.runtime.enabled(target):
+ if not tvm.testing.device_enabled(target):
print("skip because %s is not enabled..." % target)
return
if not tvm.get_global_func(lib.__name__ + ".matmul", True):
return np.dot(a, b) + bb
def verify(target="llvm"):
- if not tvm.runtime.enabled(target):
+ if not tvm.testing.device_enabled(target):
print("skip because %s is not enabled..." % target)
return
if not tvm.get_global_func("tvm.contrib.mkl.matmul_u8s8s32", True):
return tvm.topi.testing.batch_matmul(a, b)
def verify(target="llvm"):
- if not tvm.runtime.enabled(target):
+ if not tvm.testing.device_enabled(target):
print("skip because %s is not enabled..." % target)
return
if not tvm.get_global_func(lib.__name__ + ".matmul", True):
import numpy as np
from tvm.contrib import cublas
from tvm.contrib import cublaslt
+import tvm.testing
def verify_matmul_add(in_dtype, out_dtype, rtol=1e-5):
n = 1024
s = te.create_schedule(C.op)
def verify(target="cuda"):
- if not tvm.runtime.enabled(target):
- print("skip because %s is not enabled..." % target)
- return
if not tvm.get_global_func("tvm.contrib.cublas.matmul", True):
print("skip because extern function is not available")
return
s = te.create_schedule(C.op)
def verify(target="cuda"):
- if not tvm.runtime.enabled(target):
- print("skip because %s is not enabled..." % target)
- return
if not tvm.get_global_func("tvm.contrib.cublaslt.matmul", True):
print("skip because extern function is not available")
return
s = te.create_schedule(C.op)
def verify(target="cuda"):
- if not tvm.runtime.enabled(target):
- print("skip because %s is not enabled..." % target)
- return
if not tvm.get_global_func("tvm.contrib.cublas.matmul", True):
print("skip because extern function is not available")
return
b.asnumpy().astype(C.dtype)).astype(C.dtype), rtol=rtol)
verify()
+@tvm.testing.requires_cuda
def test_matmul_add():
verify_matmul_add('float', 'float', rtol=1e-3)
verify_matmul_add('float16', 'float')
verify_matmul_add('float16', 'float16', rtol=1e-2)
verify_matmul_add('int8', 'int32')
+@tvm.testing.requires_cuda
def test_matmul_add_igemm():
verify_matmul_add_igemm('int8', 'int32')
+@tvm.testing.requires_cuda
def test_batch_matmul():
verify_batch_matmul('float', 'float')
verify_batch_matmul('float16', 'float')
from tvm.contrib.nvcc import have_fp16
import numpy as np
import tvm.topi.testing
+import tvm.testing
def verify_conv2d(data_dtype, conv_dtype, tensor_format=0, groups=1):
in_channel = 4
height = 32
width = 32
- if not tvm.runtime.enabled("cuda"):
- print("skip because cuda is not enabled...")
- return
if not tvm.get_global_func("tvm.contrib.cudnn.conv.output_shape", True):
print("skip because cudnn is not enabled...")
return
f(x, w, y)
tvm.testing.assert_allclose(y.asnumpy(), c_np, atol=1e-2, rtol=1e-2)
+@tvm.testing.requires_gpu
def test_conv2d():
verify_conv2d("float32", "float32", tensor_format=0)
verify_conv2d("float16", "float32", tensor_format=1)
height = 32
width = 32
- if not tvm.runtime.enabled("cuda"):
- print("skip because cuda is not enabled...")
- return
if not tvm.get_global_func("tvm.contrib.cudnn.conv.output_shape", True):
print("skip because cudnn is not enabled...")
return
f(x, w, y)
tvm.testing.assert_allclose(y.asnumpy(), c_np, atol=3e-5, rtol=1e-4)
+@tvm.testing.requires_gpu
def test_conv3d():
verify_conv3d("float32", "float32", tensor_format=0)
verify_conv3d("float32", "float32", tensor_format=0, groups=2)
f(a, b)
tvm.testing.assert_allclose(b.asnumpy(), b_np, rtol=1e-3)
+@tvm.testing.requires_gpu
def test_softmax():
- if not tvm.runtime.enabled("cuda"):
- print("skip because cuda is not enabled...")
- return
if not tvm.get_global_func("tvm.contrib.cudnn.conv.output_shape", True):
print("skip because cudnn is not enabled...")
return
# pylint: disable=import-self, invalid-name, unused-argument, too-many-lines, len-as-condition
import tvm
+import tvm.testing
from tvm import te
import numpy as np
from tvm.topi.x86.tensor_intrin import dot_16x1x16_uint8_int8_int32_cascadelake
import pytest
+@tvm.testing.requires_llvm
@pytest.mark.skip("skip because feature not enabled")
def test_fc_int8_acc32():
m = 1024
# (ignoring processor)" error with the following setting. After LLVM 8.0 is enabled in the
# test, we should use cascadelake setting.
def verify(target="llvm -mcpu=cascadelake"):
- if not tvm.runtime.enabled(target):
+ if not tvm.testing.device_enabled(target):
print("skip because %s is not enabled..." % target)
return
import numpy as np
+@tvm.testing.requires_rocm
def test_conv2d():
in_channel = 3
out_channel = 64
dilation_w = 1
xshape = [1, in_channel, 128, 128]
- if not tvm.runtime.enabled("rocm"):
- print("skip because rocm is not enabled...")
- return
if not tvm.get_global_func("tvm.contrib.miopen.conv2d.setup", True):
print("skip because miopen is not enabled...")
return
import numpy as np
from tvm.contrib import mps
+@tvm.testing.requires_metal
def test_matmul():
- if not tvm.runtime.enabled("metal"):
- print("skip because %s is not enabled..." % "metal")
- return
n = 1024
l = 128
m = 256
c.asnumpy(), np.dot(a.asnumpy(), b.asnumpy()) + 1, rtol=1e-5)
verify(A, B, D, s)
+@tvm.testing.requires_metal
def test_conv2d():
- if not tvm.runtime.enabled("metal"):
- print("skip because %s is not enabled..." % "metal")
- return
n = 1
h = 14
w = 14
import pytest
+@tvm.testing.requires_llvm
def test_fully_connected_inference():
n = 1024
l = 128
s = te.create_schedule(D.op)
def verify(target="llvm"):
- if not tvm.runtime.enabled(target):
- pytest.skip("%s is not enabled..." % target)
if not tvm.get_global_func("tvm.contrib.nnpack.fully_connected_inference", True):
pytest.skip("extern function is not available")
if not nnpack.is_available():
nb[n, f] += out[::stride, ::stride]
return nb
+@tvm.testing.requires_llvm
def test_convolution_inference():
BATCH = 8
IH = 48
def verify(target="llvm",
algorithm=nnpack.ConvolutionAlgorithm.AUTO,
with_bias=True):
- if not tvm.runtime.enabled(target):
- pytest.skip("%s is not enabled..." % target)
if not tvm.get_global_func("tvm.contrib.nnpack.fully_connected_inference", True):
pytest.skip("extern function is not available")
if not nnpack.is_available():
verify(algorithm=algorithm, with_bias=with_bias)
+@tvm.testing.requires_llvm
def test_convolution_inference_without_weight_transform():
BATCH = 6
IH = 48
def verify(target="llvm",
algorithm=nnpack.ConvolutionAlgorithm.AUTO,
with_bias=True):
- if not tvm.runtime.enabled(target):
- pytest.skip("%s is not enabled..." % target)
if not tvm.get_global_func("tvm.contrib.nnpack.fully_connected_inference", True):
pytest.skip("extern function is not available")
if not nnpack.is_available():
import numpy as np
from tvm.contrib import random
from tvm import rpc
-
-def enabled_ctx_list():
- ctx_list = [('cpu', tvm.cpu(0)),
- ('gpu', tvm.gpu(0)),
- ('cl', tvm.opencl(0)),
- ('metal', tvm.metal(0)),
- ('rocm', tvm.rocm(0)),
- ('vulkan', tvm.vulkan(0)),
- ('vpi', tvm.vpi(0))]
- for k, v in ctx_list:
- assert tvm.context(k, 0) == v
- ctx_list = [x[1] for x in ctx_list if x[1].exist]
- return ctx_list
-
-ENABLED_CTX_LIST = enabled_ctx_list()
+import tvm.testing
def test_randint():
m = 10240
s = te.create_schedule(A.op)
def verify(target="llvm"):
- if not tvm.runtime.enabled(target):
+ if not tvm.testing.device_enabled(target):
print("skip because %s is not enabled..." % target)
return
if not tvm.get_global_func("tvm.contrib.random.randint", True):
s = te.create_schedule(A.op)
def verify(target="llvm"):
- if not tvm.runtime.enabled(target):
+ if not tvm.testing.device_enabled(target):
print("skip because %s is not enabled..." % target)
return
if not tvm.get_global_func("tvm.contrib.random.uniform", True):
s = te.create_schedule(A.op)
def verify(target="llvm"):
- if not tvm.runtime.enabled(target):
+ if not tvm.testing.device_enabled(target):
print("skip because %s is not enabled..." % target)
return
if not tvm.get_global_func("tvm.contrib.random.normal", True):
assert abs(np.std(na) - 4) < 1e-2
verify()
+@tvm.testing.uses_gpu
def test_random_fill():
def test_local(ctx, dtype):
if not tvm.get_global_func("tvm.contrib.random.random_fill", True):
if not tvm.get_global_func("tvm.contrib.random.random_fill", True):
print("skip because extern function is not available")
return
- if not tvm.runtime.enabled("rpc") or not tvm.runtime.enabled("llvm"):
+ if not tvm.testing.device_enabled("rpc") or not tvm.runtime.enabled("llvm"):
return
np_ones = np.ones((512, 512), dtype=dtype)
server = rpc.Server("localhost")
for dtype in ["bool", "int8", "uint8", "int16", "uint16", "int32", "int32",
"int64", "uint64", "float16", "float32", "float64"]:
- for ctx in ENABLED_CTX_LIST:
+ for _, ctx in tvm.testing.enabled_targets():
test_local(ctx, dtype)
test_rpc(dtype)
test_uniform()
test_normal()
test_random_fill()
+
import numpy as np
from tvm.contrib import rocblas
+@tvm.testing.requires_rocm
def test_matmul_add():
n = 1024
l = 128
s = te.create_schedule(C.op)
def verify(target="rocm"):
- if not tvm.runtime.enabled(target):
- print("skip because %s is not enabled..." % target)
- return
if not tvm.get_global_func("tvm.contrib.rocblas.matmul", True):
print("skip because extern function is not available")
return
import tvm
from tvm import te
from tvm.contrib import graph_runtime
-from tvm.relay.testing.config import ctx_list
from tvm import relay
from model_zoo import c2_squeezenet, c2_resnet50, c2_vgg19
from caffe2.python import workspace, core
from caffe2.proto import caffe2_pb2
from collections import namedtuple
+import tvm.testing
def get_tvm_output(model,
dtype = 'float32'
data = np.random.uniform(size=data_shape).astype(dtype)
c2_out = get_caffe2_output(model, data, dtype)
- for target, ctx in ctx_list():
+ for target, ctx in tvm.testing.enabled_targets():
tvm_out = get_tvm_output(model, data, target, ctx, out_shape, dtype)
tvm.testing.assert_allclose(c2_out, tvm_out, rtol=1e-5, atol=1e-5)
+@tvm.testing.uses_gpu
def test_forward_squeezenet1_1():
verify_caffe2_forward_impl(c2_squeezenet, (1, 3, 224, 224), (1, 1000, 1, 1))
+@tvm.testing.uses_gpu
def test_forward_resnet50():
verify_caffe2_forward_impl(c2_resnet50, (1, 3, 224, 224), (1, 1000))
+@tvm.testing.uses_gpu
def test_forward_vgg19():
verify_caffe2_forward_impl(c2_vgg19, (1, 3, 224, 224), (1, 1000))
Model = namedtuple('Model', ['init_net', 'predict_net'])
+@tvm.testing.uses_gpu
def test_elementwise_add():
data_shape = (1, 16, 9, 9)
init_net = caffe2_pb2.NetDef()
verify_caffe2_forward_impl(model, data_shape, data_shape)
+@tvm.testing.uses_gpu
def test_elementwise_add_with_broadcast():
data_shape = (1, 16, 9, 9)
init_net = caffe2_pb2.NetDef()
verify_caffe2_forward_impl(model, data_shape, data_shape)
+@tvm.testing.uses_gpu
def test_normalize_yuv():
data_shape = (1, 3, 96, 96)
init_net = caffe2_pb2.NetDef()
from tvm import topi
import tvm.topi.testing
from tvm import relay
-from tvm.relay.testing.config import ctx_list
from tvm.topi.testing import conv2d_nchw_python
import coremltools as cm
import model_zoo
+import tvm.testing
def get_tvm_output(func, x, params, target, ctx,
out_shape=(1, 1000), input_name='image', dtype='float32'):
shape_dict = {input_name : x.shape}
# Some Relay passes change operators on the fly. Ensuring that we generate
# new graph for each target.
- for target, ctx in ctx_list():
+ for target, ctx in tvm.testing.enabled_targets():
mod, params = relay.frontend.from_coreml(model, shape_dict)
tvm_output = get_tvm_output(mod["main"], x, params, target, ctx)
print(target, ctx, model_name, 'prediction id: ', np.argmax(tvm_output.flat))
+@tvm.testing.uses_gpu
def test_mobilenet_checkonly():
model_file = model_zoo.get_mobilenet()
run_model_checkonly(model_file, 'mobilenet')
+@tvm.testing.uses_gpu
def test_resnet50_checkonly():
model_file = model_zoo.get_resnet50()
run_model_checkonly(model_file, 'resnet50')
output_name='output',
mode='ADD')
model = cm.models.MLModel(builder.spec)
- for target, ctx in ctx_list():
+ for target, ctx in tvm.testing.enabled_targets():
out = run_tvm_graph(model, target, ctx, [a_np1, a_np2], ['input1', 'input2'], b_np.shape, dtype)
tvm.testing.assert_allclose(out, b_np, rtol=1e-5)
+@tvm.testing.uses_gpu
def test_forward_AddLayerParams():
verify_AddLayerParams((1, 2, 2), 0)
verify_AddLayerParams((1, 2, 2), 1)
output_name='output',
mode='MULTIPLY')
model = cm.models.MLModel(builder.spec)
- for target, ctx in ctx_list():
+ for target, ctx in tvm.testing.enabled_targets():
out = run_tvm_graph(model, target, ctx, [a_np1, a_np2], ['input1', 'input2'], b_np.shape, dtype)
tvm.testing.assert_allclose(out, b_np, rtol=1e-5)
+@tvm.testing.uses_gpu
def test_forward_MultiplyLayerParams():
verify_MultiplyLayerParams((1, 2, 2), 0)
verify_MultiplyLayerParams((1, 2, 2), 1)
output_name='output',
mode='CONCAT')
model = cm.models.MLModel(builder.spec)
- for target, ctx in ctx_list():
+ for target, ctx in tvm.testing.enabled_targets():
out = run_tvm_graph(model, target, ctx, [a_np1, a_np2], ['input1', 'input2'], b_np.shape, dtype)
tvm.testing.assert_allclose(out, b_np, rtol=1e-5)
+@tvm.testing.uses_gpu
def test_forward_ConcatLayerParams():
verify_ConcatLayerParams((1, 1, 2, 2), (1, 2, 2, 2))
verify_ConcatLayerParams((1, 2, 4, 4), (1, 3, 4, 4))
output_name='output')
model = cm.models.MLModel(builder.spec)
- for target, ctx in ctx_list():
+ for target, ctx in tvm.testing.enabled_targets():
out = run_tvm_graph(model, target, ctx, a_np, 'input', b_np.shape, dtype)
tvm.testing.assert_allclose(out, b_np, rtol=1e-5)
+@tvm.testing.uses_gpu
def test_forward_UpsampleLayerParams():
verify_UpsampleLayerParams((1, 16, 32, 32), 2, 'NN')
verify_UpsampleLayerParams((1, 4, 6, 6), 3, 'BILINEAR')
builder.add_l2_normalize(name='L2', epsilon=eps, input_name='input', output_name='output')
model = cm.models.MLModel(builder.spec)
- for target, ctx in ctx_list():
+ for target, ctx in tvm.testing.enabled_targets():
out = run_tvm_graph(model, target, ctx, a_np, 'input', b_np.shape, dtype)
tvm.testing.assert_allclose(out, b_np, rtol=1e-5)
+@tvm.testing.uses_gpu
def test_forward_l2_normalize():
verify_l2_normalize((1, 3, 20, 20), 0.001)
local_size=size)
model = cm.models.MLModel(builder.spec)
- for target, ctx in ctx_list():
+ for target, ctx in tvm.testing.enabled_targets():
out = run_tvm_graph(model, target, ctx, a_np, 'input', b_np.shape, dtype)
tvm.testing.assert_allclose(out, b_np, rtol=1e-5)
+@tvm.testing.uses_gpu
def test_forward_lrn():
verify_lrn((1, 3, 10, 20), 3, 1.0, 1.0, 0.5)
output_name='output',
mode='AVE')
model = cm.models.MLModel(builder.spec)
- for target, ctx in ctx_list():
+ for target, ctx in tvm.testing.enabled_targets():
out = run_tvm_graph(model, target, ctx, [a_np1, a_np2], ['input1', 'input2'], b_np.shape, dtype)
tvm.testing.assert_allclose(out, b_np, rtol=1e-5)
+@tvm.testing.uses_gpu
def test_forward_average():
verify_average((1, 3, 20, 20), (1, 3, 20, 20))
verify_average((3, 20, 20), (1, 3, 20, 20))
output_name='output',
mode='MAX')
model = cm.models.MLModel(builder.spec)
- for target, ctx in ctx_list():
+ for target, ctx in tvm.testing.enabled_targets():
out = run_tvm_graph(model, target, ctx, [a_np1, a_np2, a_np3],
['input1', 'input2', 'input3'], b_np.shape, dtype)
tvm.testing.assert_allclose(out, b_np, rtol=1e-5)
+@tvm.testing.uses_gpu
def test_forward_max():
verify_max((1, 3, 20, 20))
verify_max((20, 20))
output_name='output',
mode='MIN')
model = cm.models.MLModel(builder.spec)
- for target, ctx in ctx_list():
+ for target, ctx in tvm.testing.enabled_targets():
out = run_tvm_graph(model, target, ctx, [a_np1, a_np2, a_np3],
['input1', 'input2', 'input3'], b_np.shape, dtype)
tvm.testing.assert_allclose(out, b_np, rtol=1e-5)
+@tvm.testing.uses_gpu
def test_forward_min():
verify_min((1, 3, 20, 20))
verify_min((20, 20))
mode='sqrt')
model = cm.models.MLModel(builder.spec)
- for target, ctx in ctx_list():
+ for target, ctx in tvm.testing.enabled_targets():
out = run_tvm_graph(model, target, ctx, [a_np],
['input'], ref_val.shape, dtype)
tvm.testing.assert_allclose(out, ref_val, rtol=1e-5)
epsilon=epsilon)
model = cm.models.MLModel(builder.spec)
- for target, ctx in ctx_list():
+ for target, ctx in tvm.testing.enabled_targets():
out = run_tvm_graph(model, target, ctx, [a_np],
['input'], ref_val.shape, dtype)
tvm.testing.assert_allclose(out, ref_val, rtol=1e-5)
epsilon=epsilon)
model = cm.models.MLModel(builder.spec)
- for target, ctx in ctx_list():
+ for target, ctx in tvm.testing.enabled_targets():
out = run_tvm_graph(model, target, ctx, [a_np],
['input'], ref_val.shape, dtype)
tvm.testing.assert_allclose(out, ref_val, rtol=1e-5)
alpha=alpha)
model = cm.models.MLModel(builder.spec)
- for target, ctx in ctx_list():
+ for target, ctx in tvm.testing.enabled_targets():
out = run_tvm_graph(model, target, ctx, [a_np],
['input'], ref_val.shape, dtype)
tvm.testing.assert_allclose(out, ref_val, rtol=1e-5)
mode='exp')
model = cm.models.MLModel(builder.spec)
- for target, ctx in ctx_list():
+ for target, ctx in tvm.testing.enabled_targets():
out = run_tvm_graph(model, target, ctx, [a_np],
['input'], ref_val.shape, dtype)
tvm.testing.assert_allclose(out, ref_val, rtol=1e-5)
mode='log')
model = cm.models.MLModel(builder.spec)
- for target, ctx in ctx_list():
+ for target, ctx in tvm.testing.enabled_targets():
out = run_tvm_graph(model, target, ctx, [a_np],
['input'], ref_val.shape, dtype)
tvm.testing.assert_allclose(out, ref_val, rtol=1e-5)
mode='abs')
model = cm.models.MLModel(builder.spec)
- for target, ctx in ctx_list():
+ for target, ctx in tvm.testing.enabled_targets():
out = run_tvm_graph(model, target, ctx, [a_np],
['input'], ref_val.shape, dtype)
tvm.testing.assert_allclose(out, ref_val, rtol=1e-5)
alpha=alpha)
model = cm.models.MLModel(builder.spec)
- for target, ctx in ctx_list():
+ for target, ctx in tvm.testing.enabled_targets():
out = run_tvm_graph(model, target, ctx, [a_np],
['input'], ref_val.shape, dtype)
tvm.testing.assert_allclose(out, ref_val, rtol=1e-5)
+@tvm.testing.uses_gpu
def test_forward_unary():
verify_unary_sqrt((1, 3, 20, 20))
verify_unary_rsqrt((1, 3, 20, 20))
verify_unary_threshold((1, 3, 20, 20), alpha=5.0)
+@tvm.testing.uses_gpu
def test_forward_reduce():
from enum import Enum
class ReduceAxis(Enum):
mode=mode)
model = cm.models.MLModel(builder.spec)
- for target, ctx in ctx_list():
+ for target, ctx in tvm.testing.enabled_targets():
out = run_tvm_graph(model, target, ctx, [a_np],
['input'], ref_val.shape, dtype)
tvm.testing.assert_allclose(out, ref_val, rtol=1e-5, atol=1e-5)
mode=mode)
model = cm.models.MLModel(builder.spec)
- for target, ctx in ctx_list():
+ for target, ctx in tvm.testing.enabled_targets():
out = run_tvm_graph(model, target, ctx, [a_np],
['input'], ref_val.shape, dtype)
tvm.testing.assert_allclose(out, ref_val, rtol=1e-5)
output_names=output_names)
model = cm.models.MLModel(builder.spec)
- for target, ctx in ctx_list():
+ for target, ctx in tvm.testing.enabled_targets():
out = run_tvm_graph(model, target, ctx, [a_np],
['input'], output_shapes, [dtype] * len(output_shapes))
tvm.testing.assert_allclose(out, ref_val, rtol=1e-5)
builder.add_elementwise(name='add', input_names=['input1', 'input2'],
output_name='output', alpha=0, mode='ADD')
model = cm.models.MLModel(builder.spec)
- for target, ctx in ctx_list():
+ for target, ctx in tvm.testing.enabled_targets():
out = run_tvm_graph(model, target, ctx, [a_np, a_np],
['input1', 'input2'], b_np.shape, dtype)
tvm.testing.assert_allclose(out, b_np, rtol=1e-5)
+@tvm.testing.uses_gpu
def test_forward_image_scaler():
verify_image_scaler((3, 224, 224), image_scale=0.17)
verify_image_scaler((3, 224, 224),
input_name='input1',
output_name='output')
model = cm.models.MLModel(builder.spec)
- for target, ctx in ctx_list():
+ for target, ctx in tvm.testing.enabled_targets():
out = run_tvm_graph(model, target, ctx, [a_np],
['input1'], output_shape=None)
tvm.testing.assert_allclose(out, b_np, rtol=1e-5)
+@tvm.testing.uses_gpu
def test_forward_convolution():
verify_convolution((1, 3, 224, 224), filter=(32, 3, 3, 3), padding='VALID')
verify_convolution((1, 3, 224, 224), filter=(32, 3, 3, 3), padding='SAME')
from tvm import te
from tvm import relay
from tvm.contrib import graph_runtime
-from tvm.relay.testing.config import ctx_list
import keras
+import tvm.testing
try:
import tensorflow.compat.v1 as tf
xs = [np.random.uniform(size=shape, low=-1.0, high=1.0) for shape in in_shapes]
keras_out = get_keras_output(xs)
keras_out = keras_out if isinstance(keras_out, list) else [keras_out]
- for target, ctx in ctx_list():
+ for target, ctx in tvm.testing.enabled_targets():
inputs = [to_channels_first(x) for x in xs] if need_transpose else xs
tvm_out = get_tvm_output(inputs, target, ctx)
for kout, tout in zip(keras_out, tvm_out):
tvm.testing.assert_allclose(kout, tout, rtol=1e-5, atol=1e-5)
+@tvm.testing.uses_gpu
class TestKeras:
scenarios = [using_classic_keras, using_tensorflow_keras]
import tvm
from tvm import te
from tvm.contrib import graph_runtime
-from tvm.relay.testing.config import ctx_list
from tvm import relay
import mxnet as mx
from mxnet import gluon
from mxnet.gluon.model_zoo import vision
-import model_zoo
import random
import pytest
+import model_zoo
+
+import tvm.testing
def verify_mxnet_frontend_impl(mx_symbol,
data_shape=(1, 3, 224, 224),
x = np.random.uniform(size=data_shape)
if gluon_impl:
gluon_out, gluon_sym = get_gluon_output(name, x)
- for target, ctx in ctx_list():
+ for target, ctx in tvm.testing.enabled_targets():
tvm_out = get_tvm_output(gluon_sym, x, None, None, target, ctx, dtype)
tvm.testing.assert_allclose(gluon_out, tvm_out, rtol=1e-5, atol=1e-5)
else:
mx_out, args, auxs = get_mxnet_output(mx_symbol, x, dtype)
assert "data" not in args
- for target, ctx in ctx_list():
+ for target, ctx in tvm.testing.enabled_targets():
tvm_out = get_tvm_output(mx_symbol, x, args, auxs, target, ctx, dtype)
tvm.testing.assert_allclose(mx_out, tvm_out, rtol=1e-5, atol=1e-5)
+@tvm.testing.uses_gpu
def test_forward_mlp():
mlp = model_zoo.mx_mlp()
verify_mxnet_frontend_impl(mlp,
data_shape=(1, 1, 28, 28),
out_shape=(1, 10))
+@tvm.testing.uses_gpu
def test_forward_vgg():
for n in [11]:
mx_sym = model_zoo.mx_vgg(n)
verify_mxnet_frontend_impl(mx_sym)
+@tvm.testing.uses_gpu
def test_forward_resnet():
for n in [18]:
mx_sym = model_zoo.mx_resnet(18)
verify_mxnet_frontend_impl(mx_sym)
+@tvm.testing.uses_gpu
def test_forward_leaky_relu():
data = mx.sym.var('data')
data = mx.sym.concat(data, -data, dim=1) # negative part explicitly
mx_sym = mx.sym.LeakyReLU(data, act_type='leaky')
verify_mxnet_frontend_impl(mx_sym, (1, 3, 100, 100), (1, 6, 100, 100))
+@tvm.testing.uses_gpu
def test_forward_elu():
data = mx.sym.var('data')
data = mx.sym.concat(data, -data, dim=1) # negative part explicitly
mx_sym = mx.sym.LeakyReLU(data, act_type='elu')
verify_mxnet_frontend_impl(mx_sym, (1, 3, 100, 100), (1, 6, 100, 100))
+@tvm.testing.uses_gpu
def test_forward_rrelu():
data = mx.sym.var('data')
data = mx.sym.concat(data, -data, dim=1) # negative part explicitly
mx_sym = mx.sym.LeakyReLU(data, act_type='rrelu', lower_bound=0.3, upper_bound=0.7)
verify_mxnet_frontend_impl(mx_sym[0], (1, 3, 100, 100), (1, 6, 100, 100))
+@tvm.testing.uses_gpu
def test_forward_prelu():
data = mx.sym.var('data')
data = mx.sym.concat(data, -data, dim=1) # negative part explicitly
mx_sym = mx.sym.LeakyReLU(data, act_type='prelu')
verify_mxnet_frontend_impl(mx_sym, (1, 3, 100, 100), (1, 6, 100, 100))
+@tvm.testing.uses_gpu
def test_forward_gelu():
data = mx.sym.var('data')
data = mx.sym.concat(data, -data, dim=1) # negative part explicitly
mx_sym = mx.sym.LeakyReLU(data, act_type='gelu')
verify_mxnet_frontend_impl(mx_sym, (1, 3, 100, 100), (1, 6, 100, 100))
+@tvm.testing.uses_gpu
def test_forward_softrelu():
data = mx.sym.var('data')
data = mx.sym.concat(data, -data, dim=1) # negative part explicitly
mx_sym = mx.sym.Activation(data, act_type='softrelu')
verify_mxnet_frontend_impl(mx_sym, (1, 3, 100, 100), (1, 6, 100, 100))
+@tvm.testing.uses_gpu
def test_forward_fc_flatten():
# test flatten=True option in mxnet 0.11.1
data = mx.sym.var('data')
except:
pass
+@tvm.testing.uses_gpu
def test_forward_clip():
data = mx.sym.var('data')
data = mx.sym.concat(data, -data, dim=1) # negative part explicitly
mx_sym = mx.sym.clip(data, a_min=0, a_max=1)
verify_mxnet_frontend_impl(mx_sym, (1, 3, 100, 100), (1, 6, 100, 100))
+@tvm.testing.uses_gpu
def test_forward_split():
data = mx.sym.var('data')
mx_sym = mx.sym.split(data, axis=1, num_outputs=4, squeeze_axis=False)
verify_mxnet_frontend_impl(mx_sym, (1, 4, 2, 1), (1, 1, 2, 1))
+@tvm.testing.uses_gpu
def test_forward_split_squeeze():
data = mx.sym.var('data')
mx_sym = mx.sym.split(data, axis=1, num_outputs=4, squeeze_axis=True)
verify_mxnet_frontend_impl(mx_sym, (1, 4, 2, 1), (1, 2, 1))
+@tvm.testing.uses_gpu
def test_forward_expand_dims():
data = mx.sym.var('data')
mx_sym = mx.sym.expand_dims(data, axis=1)
verify_mxnet_frontend_impl(mx_sym, (2, 3, 4), (2, 1, 3, 4))
+@tvm.testing.uses_gpu
def test_forward_pooling():
data = mx.sym.var('data')
mx_sym = mx.sym.Pooling(data, kernel=(3, 3), pad=(1, 1), pool_type='avg')
mx_sym = mx.sym.Pooling(data, kernel=(3, 3), pad=(1, 1), pool_type='max')
verify_mxnet_frontend_impl(mx_sym, (1, 20, 8, 8), (1, 20, 8, 8))
+@tvm.testing.uses_gpu
def test_forward_pooling3d():
data = mx.sym.var('data')
mx_sym = mx.sym.Pooling(data, kernel=(3, 3, 3), pad=(1, 1, 1), pool_type='avg')
mx_sym = mx.sym.Pooling(data, kernel=(3, 3, 3), pad=(1, 1, 1), pool_type='max')
verify_mxnet_frontend_impl(mx_sym, (1, 20, 8, 8, 8), (1, 20, 8, 8, 8))
+@tvm.testing.uses_gpu
def test_forward_adaptive_pooling():
data = mx.sym.var('data')
mx_sym = mx.sym.contrib.AdaptiveAvgPooling2D(data, output_size=(1,))
mx_sym = mx.sym.contrib.AdaptiveAvgPooling2D(data, output_size=(3, 3))
verify_mxnet_frontend_impl(mx_sym, (1, 20, 8, 8), (1, 20, 3, 3))
+@tvm.testing.uses_gpu
def test_forward_lrn():
data = mx.sym.var('data')
mx_sym = mx.sym.LRN(data, alpha=2, beta=2, knorm=1, nsize=5)
verify_mxnet_frontend_impl(mx_sym, (1, 10, 24, 24), (1, 10, 24, 24))
+@tvm.testing.uses_gpu
def test_forward_ones():
data = mx.sym.var('data')
ones = mx.sym.ones(shape=(2, 3, 4), dtype='float32')
mx_sym = mx.sym.elemwise_add(data, ones)
verify_mxnet_frontend_impl(mx_sym, (2, 3, 4), (2, 3, 4))
+@tvm.testing.uses_gpu
def test_forward_zeros():
data = mx.sym.var('data')
zeros = mx.sym.zeros(shape=(2, 3, 4), dtype='float32')
mx_sym = mx.sym.elemwise_add(data, zeros)
verify_mxnet_frontend_impl(mx_sym, (2, 3, 4), (2, 3, 4))
+@tvm.testing.uses_gpu
def test_forward_ones_like():
data = mx.sym.var('data')
mx_sym = mx.sym.ones_like(data, dtype='float32')
verify_mxnet_frontend_impl(mx_sym, (2, 3, 4), (2, 3, 4))
+@tvm.testing.uses_gpu
def test_forward_make_loss():
data = mx.sym.var('data')
ones = mx.sym.ones(shape=(2, 3, 4), dtype='float32')
mx_sym = mx.sym.make_loss((data-ones)**2/2, dtype='float32')
verify_mxnet_frontend_impl(mx_sym, (2, 3, 4), (2, 3, 4))
+@tvm.testing.uses_gpu
def test_forward_zeros_like():
data = mx.sym.var('data')
mx_sym = mx.sym.zeros_like(data, dtype='float32')
verify_mxnet_frontend_impl(mx_sym, (2, 3, 4), (2, 3, 4))
+@tvm.testing.uses_gpu
def test_forward_argmax():
data = mx.sym.var('data')
mx_sym = mx.sym.argmax(data, axis=1)
verify_mxnet_frontend_impl(mx_sym, (5, 3), (5,))
+@tvm.testing.uses_gpu
def test_forward_argmin():
data = mx.sym.var('data')
mx_sym = mx.sym.argmin(data, axis=0)
verify_mxnet_frontend_impl(mx_sym, (5, 4), (4,))
+@tvm.testing.uses_gpu
def test_forward_slice():
data = mx.sym.var('data')
mx_sym = mx.sym.slice(data, begin=(0, 1), end=(2, 4))
mx_sym = mx.sym.slice(data, begin=(-1, 1), end=(-3, 4), step=(-1, 2))
verify_mxnet_frontend_impl(mx_sym, (3, 4), (2, 2))
+@tvm.testing.uses_gpu
def test_forward_where():
cond = mx.sym.var('cond')
x = mx.sym.var('x')
mx_out = mx.nd.where(mx_cond, mx_x, mx_y).asnumpy()
mod, _ = relay.frontend.from_mxnet(mx_sym, shapes, args, auxs)
- for target, ctx in ctx_list():
+ for target, ctx in tvm.testing.enabled_targets():
for kind in ["graph", "debug"]:
intrp = relay.create_executor(kind, mod=mod, ctx=ctx, target=target)
op_res = intrp.evaluate()(np_cond, np_x, np_y)
tvm.testing.assert_allclose(op_res.asnumpy(), mx_out)
+@tvm.testing.uses_gpu
def test_forward_arange():
def _mx_symbol(F, start, stop, step):
if start is None and step is None:
ref_res = _mx_symbol(mx.nd, start, stop, step).asnumpy()
mx_sym = _mx_symbol(mx.sym, start, stop, step)
mod, _ = relay.frontend.from_mxnet(mx_sym, {})
- for target, ctx in ctx_list():
+ for target, ctx in tvm.testing.enabled_targets():
for kind in ["graph", "debug"]:
intrp = relay.create_executor(kind, mod=mod, ctx=ctx, target=target)
op_res = intrp.evaluate()()
op = getattr(F, op_name)
return op(*inputs)
+@tvm.testing.uses_gpu
def test_forward_broadcast_ops():
for op in ["broadcast_add",
"broadcast_plus",
ref_res = _mx_symbol(mx.nd, op, [mx.nd.array(a_np), mx.nd.array(b_np)])
shapes = {'a': a_shape, 'b': b_shape}
mod, _ = relay.frontend.from_mxnet(mx_sym, shapes, dtype)
- for target, ctx in ctx_list():
+ for target, ctx in tvm.testing.enabled_targets():
for kind in ["graph", "debug"]:
intrp = relay.create_executor(kind, mod=mod, ctx=ctx, target=target)
op_res = intrp.evaluate()(a_np, b_np)
tvm.testing.assert_allclose(op_res.asnumpy(), ref_res.asnumpy())
+@tvm.testing.uses_gpu
def test_forward_elemwise_ops():
for op in ["elemwise_add", "elemwise_sub", "elemwise_mul",
"elemwise_div", "maximum", "minimum",
ref_res = op(mx.nd.array(a_np), mx.nd.array(b_np))
shapes = {'a': shape, 'b': shape}
mod, _ = relay.frontend.from_mxnet(mx_sym, shapes, dtype)
- for target, ctx in ctx_list():
+ for target, ctx in tvm.testing.enabled_targets():
for kind in ["graph", "debug"]:
intrp = relay.create_executor(kind, mod=mod, ctx=ctx, target=target)
op_res = intrp.evaluate()(a_np, b_np)
tvm.testing.assert_allclose(op_res.asnumpy(), ref_res.asnumpy())
+@tvm.testing.uses_gpu
def test_forward_softmin():
data = mx.sym.var('data')
mx_sym = mx.sym.softmin(data)
verify_mxnet_frontend_impl(mx_sym, (1, 3, 100, 100), (1, 3, 100, 100))
+@tvm.testing.uses_gpu
def test_forward_unary_ops():
for op in ["abs", "sqrt", "ceil", "floor", "round", "reciprocal", "trunc",
"softsign", "hard_sigmoid",
ref_res = _mx_symbol(mx.nd, op, [mx.nd.array(a_np)])
shapes = {'a': shape}
mod, _ = relay.frontend.from_mxnet(mx_sym, shapes, dtype)
- for target, ctx in ctx_list():
+ for target, ctx in tvm.testing.enabled_targets():
for kind in ["graph", "debug"]:
intrp = relay.create_executor(kind, mod=mod, ctx=ctx, target=target)
op_res = intrp.evaluate()(a_np)
tvm.testing.assert_allclose(op_res.asnumpy(), ref_res.asnumpy(), rtol=1e-5, atol=1e-5)
+@tvm.testing.uses_gpu
def test_forward_scalar_ops():
for op in [operator.add, operator.sub, operator.mul, operator.truediv,
operator.pow, operator.lt, operator.le, operator.eq,
ref_res = op(mx.nd.array(a_np), b_scalar)
shapes = {'a': a_shape}
mod, _ = relay.frontend.from_mxnet(mx_sym, shapes, dtype)
- for target, ctx in ctx_list():
+ for target, ctx in tvm.testing.enabled_targets():
for kind in ["graph", "debug"]:
intrp = relay.create_executor(kind, mod=mod, ctx=ctx, target=target)
op_res = intrp.evaluate()(a_np)
ref_res = _mx_symbol(mx.nd, op, [mx.nd.array(a_np), b_scalar])
shapes = {'a': a_shape}
mod, _ = relay.frontend.from_mxnet(mx_sym, shapes, dtype)
- for target, ctx in ctx_list():
+ for target, ctx in tvm.testing.enabled_targets():
for kind in ["graph", "debug"]:
intrp = relay.create_executor(kind, mod=mod, ctx=ctx, target=target)
op_res = intrp.evaluate()(a_np)
tvm.testing.assert_allclose(op_res.asnumpy(), ref_res.asnumpy())
+@tvm.testing.uses_gpu
def test_forward_slice_axis():
def verify(shape, axis, begin, end):
data_np = np.random.uniform(size=shape).astype("float32")
ref_res = mx.nd.slice_axis(mx.nd.array(data_np), axis, begin, end)
mx_sym = mx.sym.slice_axis(mx.sym.var("data"), axis, begin, end)
mod, _ = relay.frontend.from_mxnet(mx_sym, {"data": shape})
- for target, ctx in ctx_list():
+ for target, ctx in tvm.testing.enabled_targets():
for kind in ["graph", "debug"]:
intrp = relay.create_executor(kind, mod=mod, ctx=ctx, target=target)
op_res = intrp.evaluate()(data_np)
verify((3, 4), 1, -3, -1)
verify((3, 4), -1, -3, -1)
+@tvm.testing.uses_gpu
def test_forward_slice_like():
def verify(x_shape, y_shape, axes):
x_np = np.random.uniform(size=x_shape).astype("float32")
ref_res = mx.nd.slice_like(mx.nd.array(x_np), mx.nd.array(y_np), axes=axes)
mx_sym = mx.sym.slice_like(mx.sym.var("x"), mx.sym.var("y"), axes=axes)
mod, _ = relay.frontend.from_mxnet(mx_sym, {"x": x_shape, "y": y_shape})
- for target, ctx in ctx_list():
+ for target, ctx in tvm.testing.enabled_targets():
for kind in ["graph", "debug"]:
intrp = relay.create_executor(kind, mod=mod, ctx=ctx, target=target)
op_res = intrp.evaluate()(x_np, y_np)
verify((3, 4), (2, 3), (0))
verify((3, 4), (2, 3), (-1))
+@tvm.testing.uses_gpu
def test_forward_sequence_reverse():
def verify(shape, seq_lengths, use_seq_lengths, seq_axis):
data_np = np.random.uniform(size=shape).astype("float32")
mx_sym = mx.sym.SequenceReverse(*mx_sym_args)
mod, _ = relay.frontend.from_mxnet(mx_sym, *from_mxnet_args)
- for target, ctx in ctx_list():
+ for target, ctx in tvm.testing.enabled_targets():
for kind in ["graph", "debug"]:
intrp = relay.create_executor(kind, mod=mod, ctx=ctx, target=target)
op_res = intrp.evaluate()(*in_data)
# MXNet accepts axis value as 0 only
# verify((3, 4, 5, 6), None, False, 2)
+@tvm.testing.uses_gpu
def test_forward_l2_normalize():
data = mx.sym.var('data')
mx_sym = mx.sym.L2Normalization(data, mode="channel")
verify_mxnet_frontend_impl(mx_sym, (2, 3, 4, 5), (2, 3, 4, 5))
+@tvm.testing.uses_gpu
def test_forward_shape_array():
def verify(shape):
x_np = np.random.uniform(size=shape).astype("float32")
ref_res = mx.nd.shape_array(mx.nd.array(x_np))
mx_sym = mx.sym.shape_array(mx.sym.var("x"))
mod, _ = relay.frontend.from_mxnet(mx_sym, {"x": shape})
- for target, ctx in ctx_list():
+ for target, ctx in tvm.testing.enabled_targets():
for kind in ["debug"]:
intrp = relay.create_executor(kind, mod=mod, ctx=ctx, target=target)
op_res = intrp.evaluate()(x_np)
verify((3, 4, 5))
verify((3, 4, 5, 6))
+@tvm.testing.uses_gpu
def test_forward_squeeze():
def verify(shape, axis):
x_np = np.random.uniform(size=shape).astype("float32")
ref_res = mx.nd.squeeze(mx.nd.array(x_np), axis=axis)
mx_sym = mx.sym.squeeze(mx.sym.var("x"), axis=axis)
mod, _ = relay.frontend.from_mxnet(mx_sym, {"x": shape})
- for target, ctx in ctx_list():
+ for target, ctx in tvm.testing.enabled_targets():
for kind in ["graph", "debug"]:
intrp = relay.create_executor(kind, mod=mod, ctx=ctx, target=target)
op_res = intrp.evaluate()(x_np)
verify((1, 3, 1), 2)
verify((1, 3, 1), (0, 2))
+@tvm.testing.uses_gpu
def test_forward_broadcast_axis():
def verify(shape, axis, size):
x_np = np.random.uniform(size=shape).astype("float32")
mx_sym = _mx_symbol(mx.sym, op, [mx.sym.var('x'),axis,size])
ref_res = _mx_symbol(mx.nd, op, [mx.nd.array(x_np),axis,size])
mod, _ = relay.frontend.from_mxnet(mx_sym, {"x": shape})
- for target, ctx in ctx_list():
+ for target, ctx in tvm.testing.enabled_targets():
for kind in ["graph", "debug"]:
intrp = relay.create_executor(kind, mod=mod, ctx=ctx, target=target)
op_res = intrp.evaluate()(x_np)
verify((1, 2, 1), (0, 2), (2, 3))
+@tvm.testing.uses_gpu
def test_forward_broadcast_to():
def verify(input_shape, shape):
x_np = np.random.uniform(size=input_shape).astype("float32")
ref_res = mx.nd.broadcast_to(mx.nd.array(x_np), shape=shape)
mx_sym = mx.sym.broadcast_to(mx.sym.var("x"), shape=shape)
mod, _ = relay.frontend.from_mxnet(mx_sym, {"x": input_shape})
- for target, ctx in ctx_list():
+ for target, ctx in tvm.testing.enabled_targets():
for kind in ["graph", "debug"]:
intrp = relay.create_executor(kind, mod=mod, ctx=ctx, target=target)
op_res = intrp.evaluate()(x_np)
verify((4, 1, 32, 32), (4, 8, 32, 32))
+@tvm.testing.uses_gpu
def test_forward_logical_not():
a_shape = (3, 4, 5)
dtype = 'float32'
ref_res = mx.nd.logical_not(mx.nd.array(a_np))
shapes = {'a': a_shape}
mod, _ = relay.frontend.from_mxnet(mx_sym, shapes, dtype)
- for target, ctx in ctx_list():
+ for target, ctx in tvm.testing.enabled_targets():
for kind in ["graph", "debug"]:
intrp = relay.create_executor(kind, mod=mod, ctx=ctx, target=target)
op_res = intrp.evaluate()(a_np)
tvm.testing.assert_allclose(op_res.asnumpy(), ref_res.asnumpy())
+@tvm.testing.uses_gpu
def test_forward_full():
def verify(val, shape, dtype):
ctx = mx.cpu()
ref_res = mx.nd.full(shape, val, dtype=dtype)
mx_sym = mx.sym.full(shape, val, dtype=dtype)
mod, _ = relay.frontend.from_mxnet(mx_sym, {})
- for target, ctx in ctx_list():
+ for target, ctx in tvm.testing.enabled_targets():
# Skip testing graph runtime because this op will be optimized out
# by constant folding.
for kind in ["debug"]:
verify(2, (3, 4), "int32")
verify(3.5, (1, 3, 4), "float32")
+@tvm.testing.uses_gpu
def test_forward_embedding():
def verify(data_shape, weight_shape):
in_dim, out_dim = weight_shape
input_dim=in_dim, output_dim=out_dim)
mod, _ = relay.frontend.from_mxnet(
mx_sym, {"x": data_shape, "w": weight_shape})
- for target, ctx in ctx_list():
+ for target, ctx in tvm.testing.enabled_targets():
for kind in ["graph", "debug"]:
intrp = relay.create_executor(kind, mod=mod, ctx=ctx, target=target)
op_res = intrp.evaluate()(x=x_np, w=w_np)
verify((2, 2), (4, 5))
verify((2, 3, 4), (4, 5))
+@tvm.testing.uses_gpu
def test_forward_smooth_l1():
data = mx.sym.var('data')
mx_sym = mx.sym.smooth_l1(data)
mx_sym = mx.sym.smooth_l1(data, scalar=1.0)
verify_mxnet_frontend_impl(mx_sym, (3, 4), (3, 4))
+@tvm.testing.uses_gpu
def test_forward_take():
def verify(shape, indices_src, axis, mode="clip"):
x_np = np.random.uniform(size=shape).astype("float32")
ref_res = mx.nd.take(mx.nd.array(x_np), mx.nd.array(indices_np), axis, mode)
mx_sym = mx.sym.take(mx.sym.var("x"), mx.sym.var("y"), axis, mode)
mod, _ = relay.frontend.from_mxnet(mx_sym, {"x": shape, "y": indices_np.shape})
- for target, ctx in ctx_list():
+ for target, ctx in tvm.testing.enabled_targets():
for kind in ["graph", "debug"]:
intrp = relay.create_executor(kind, mod=mod, ctx=ctx, target=target)
op_res = intrp.evaluate()(x_np, indices_np)
verify((3,4), [-1, 5], 1)
verify((3,4), [-1, 5], 1, mode="wrap")
+@tvm.testing.uses_gpu
def test_forward_gather_nd():
def verify(xshape, yshape, y_data, error=False):
x_data = np.random.uniform(size=xshape).astype("float32")
ref_res = mx.nd.gather_nd(mx.nd.array(x_data), mx.nd.array(y_data))
mx_sym = mx.sym.gather_nd(mx.sym.var("x_data"), mx.sym.var("y_data"))
mod, _ = relay.frontend.from_mxnet(mx_sym, {"x_data": xshape, "y_data": yshape}, {"x_data": "float32", "y_data": "int32"})
- for target, ctx in ctx_list():
+ for target, ctx in tvm.testing.enabled_targets():
for kind in ["graph", "debug"]:
intrp = relay.create_executor(kind, mod=mod, ctx=ctx, target=target)
op_res = intrp.evaluate()(x_data, y_data)
verify((3, 2), (2, 2, 3), [[[0, 1, 2], [2, 0, 1]], [[0, 0, 0], [1, 1, 1]]])
verify((1, 4), (1, 1), [[0]])
+@tvm.testing.uses_gpu
def test_forward_bilinear_resize():
# add tests including scale_height and scale_width when mxnet is updated to version 1.5
data = mx.sym.var('data')
mx_sym = mx.sym.contrib.BilinearResize2D(data, height=5, width=10)
verify_mxnet_frontend_impl(mx_sym, (1, 2, 3, 4), (1, 2, 5, 10))
+@tvm.testing.uses_gpu
def test_forward_grid_generator():
def verify(shape, transform_type, target_shape):
x = np.random.uniform(size=shape).astype("float32")
mx_sym = mx.sym.GridGenerator(mx.sym.var("x"), transform_type, target_shape)
shape_dict = {"x": x.shape}
mod, _ = relay.frontend.from_mxnet(mx_sym, shape_dict)
- for target, ctx in ctx_list():
+ for target, ctx in tvm.testing.enabled_targets():
for kind in ["graph", "debug"]:
intrp = relay.create_executor(
kind, mod=mod, ctx=ctx, target=target)
verify((4, 2, 16, 16), 'warp', None)
verify((1, 2, 16, 16), 'warp', None)
+@tvm.testing.uses_gpu
def test_forward_bilinear_sampler():
def verify(data_shape, grid_shape):
data = np.random.uniform(size=data_shape).astype("float32")
mx_sym = mx.sym.BilinearSampler(mx.sym.var("data"), mx.sym.var("grid"))
shape_dict = {"data": data.shape, "grid": grid.shape}
mod, _ = relay.frontend.from_mxnet(mx_sym, shape_dict)
- for target, ctx in ctx_list():
+ for target, ctx in tvm.testing.enabled_targets():
for kind in ["graph", "debug"]:
intrp = relay.create_executor(
kind, mod=mod, ctx=ctx, target=target)
verify((4, 4, 16, 32), (4, 2, 8, 8))
verify((4, 4, 16, 32), (4, 2, 32, 32))
+@tvm.testing.uses_gpu
def test_forward_rnn_layer():
def verify(mode, seq_len, input_size, hidden_size, num_layers,
batch=1, init_states=True, bidirectional=False):
mod, params = relay.frontend.from_mxnet(
mx_sym, shape=shape_dict, arg_params=mx_params)
- for target, ctx in ctx_list():
+ for target, ctx in tvm.testing.enabled_targets():
# only test graph runtime because debug runtime is too slow
for kind in ["graph"]:
intrp = relay.create_executor(kind, mod=mod, ctx=ctx, target=target)
# verify(mode, 10, 64, 64, 3, init_states=False)
# verify(mode, 10, 64, 64, 3, batch=2, bidirectional=True, init_states=False)
+@tvm.testing.uses_gpu
def test_forward_Crop():
def verify(xshape, yshape, offset=None):
x_data = np.random.uniform(size=xshape).astype("float32")
mx_sym = mx.sym.Crop(mx.sym.var("x"), mx.sym.var("y"), offset=offset)
ref_res = mx.nd.Crop(mx.nd.array(x_data), mx.nd.array(y_data), offset=offset)
mod, _ = relay.frontend.from_mxnet(mx_sym, {"x": xshape, "y": yshape})
- for target, ctx in ctx_list():
+ for target, ctx in tvm.testing.enabled_targets():
for kind in ["graph", "debug"]:
intrp = relay.create_executor(kind, mod=mod, ctx=ctx, target=target)
if offset is None or offset == (0, 0):
verify((5, 32, 40, 40), (5, 32, 25, 25))
verify((5, 32, 40, 40), (5, 32, 25, 25), (5, 5))
+@tvm.testing.uses_gpu
def test_forward_argsort():
def verify(shape, axis, is_ascend, dtype="float32"):
x_np = np.random.uniform(size=shape).astype("float32")
ref_res = mx.nd.argsort(mx.nd.array(x_np), axis=axis, is_ascend=is_ascend, dtype=dtype)
mx_sym = mx.sym.argsort(mx.sym.var("x"), axis=axis, is_ascend=is_ascend, dtype=dtype)
mod, _ = relay.frontend.from_mxnet(mx_sym, {"x": shape})
- for target, ctx in ctx_list():
+ for target, ctx in tvm.testing.enabled_targets():
for kind in ["graph", "debug"]:
intrp = relay.create_executor(kind, mod=mod, ctx=ctx, target=target)
op_res = intrp.evaluate()(x_np)
verify((1, 4, 6), axis=1, is_ascend=True)
verify((3, 5, 6), axis=-3, is_ascend=False, dtype="int32")
+@tvm.testing.uses_gpu
def test_forward_topk():
def verify(shape, k, axis, ret_type, is_ascend=False, dtype="float32"):
x_np = np.random.uniform(size=shape).astype("float32")
mx_sym = mx.sym.topk(mx.sym.var("x"), k=k, axis=axis, ret_typ=ret_type,
is_ascend=is_ascend, dtype=dtype)
mod, _ = relay.frontend.from_mxnet(mx_sym, {"x": shape})
- for target, ctx in ctx_list():
+ for target, ctx in tvm.testing.enabled_targets():
for kind in ["graph", "debug"]:
intrp = relay.create_executor(kind, mod=mod, ctx=ctx, target=target)
op_res = intrp.evaluate()(x_np)
verify((3, 5, 6), k=2, axis=1, ret_type="value", is_ascend=True)
verify((3, 5, 6), k=0, axis=2, ret_type="both", dtype="int32")
+@tvm.testing.uses_gpu
def test_forward_sequence_mask():
def verify(shape, use_sequence_length, value, axis, dtype, itype):
data_np = np.random.uniform(size=shape).astype(dtype)
value=value,
axis=axis)
mod, _ = relay.frontend.from_mxnet(mx_sym, {"data": shape}, dtype={"data": dtype})
- for target, ctx in ctx_list():
+ for target, ctx in tvm.testing.enabled_targets():
for kind in ['graph', 'debug']:
if use_sequence_length is False and kind == 'graph':
# Disable the test for 'graph' when it's identity.
verify((5, 4, 3), False, 1.0, 1, 'float64', 'float64')
verify((5, 4, 3, 2), True, 1.0, 0, 'float32', 'float32')
+@tvm.testing.uses_gpu
def test_forward_contrib_div_sqrt_dim():
def verify(shape):
x_np = np.random.uniform(size=shape).astype("float32")
ref_res = mx.nd.contrib.div_sqrt_dim(mx.nd.array(x_np))
mx_sym = mx.sym.contrib.div_sqrt_dim(mx.sym.var("x"))
mod, _ = relay.frontend.from_mxnet(mx_sym, {"x": shape})
- for target, ctx in ctx_list():
+ for target, ctx in tvm.testing.enabled_targets():
for kind in ["graph", "debug"]:
intrp = relay.create_executor(kind, mod=mod, ctx=ctx, target=target)
op_res = intrp.evaluate()(x_np)
verify((3, 4))
verify((3, 4, 5))
+@tvm.testing.uses_gpu
def test_forward_batch_norm():
def verify(shape, axis=1, fix_gamma=False):
x = np.random.uniform(size=shape).astype("float32")
"mean": moving_mean.shape, "var": moving_var.shape}
mod, _ = relay.frontend.from_mxnet(mx_sym, shape_dict)
#print(mod)
- for target, ctx in ctx_list():
+ for target, ctx in tvm.testing.enabled_targets():
for kind in ["graph", "debug"]:
intrp = relay.create_executor(kind, mod=mod, ctx=ctx, target=target)
op_res = intrp.evaluate()(x, gamma, beta, moving_mean, moving_var)
verify((2, 3, 4, 5), fix_gamma=True)
+@tvm.testing.uses_gpu
def test_forward_instance_norm():
def verify(shape, axis=1, epsilon=1e-5):
x = np.random.uniform(size=shape).astype("float32")
mx_sym = mx.sym.InstanceNorm(mx.sym.var("x"), mx.sym.var("gamma"), mx.sym.var("beta"), epsilon)
shape_dict = {"x": x.shape, "gamma": gamma.shape, "beta": beta.shape}
mod, _ = relay.frontend.from_mxnet(mx_sym, shape_dict)
- for target, ctx in ctx_list():
+ for target, ctx in tvm.testing.enabled_targets():
for kind in ["graph", "debug"]:
intrp = relay.create_executor(kind, mod=mod, ctx=ctx, target=target)
op_res = intrp.evaluate()(x, gamma, beta)
verify((8, 7, 6, 5, 4))
+@tvm.testing.uses_gpu
def test_forward_layer_norm():
def verify(shape, axis=-1):
x = np.random.uniform(size=shape).astype("float32")
mx.sym.var("beta"), axis=axis)
shape_dict = {"x": x.shape, "gamma": gamma.shape, "beta": beta.shape}
mod, _ = relay.frontend.from_mxnet(mx_sym, shape_dict)
- for target, ctx in ctx_list():
+ for target, ctx in tvm.testing.enabled_targets():
for kind in ["graph", "debug"]:
intrp = relay.create_executor(kind, mod=mod, ctx=ctx, target=target)
op_res = intrp.evaluate()(x, gamma, beta)
verify((2, 5), axis=0)
verify((2, 5, 6))
+@tvm.testing.uses_gpu
def test_forward_one_hot():
def verify(indices_shape, depth, on_value, off_value, dtype):
x = np.random.randint(0, 5, size=indices_shape)
mx_sym = mx.sym.one_hot(mx.sym.var("x"), depth, on_value, off_value, dtype)
shape_dict = {"x": x.shape}
mod, _ = relay.frontend.from_mxnet(mx_sym, shape_dict)
- for target, ctx in ctx_list():
+ for target, ctx in tvm.testing.enabled_targets():
for kind in ["graph", "debug"]:
intrp = relay.create_executor(kind, mod=mod, ctx=ctx, target=target)
op_res = intrp.evaluate()(x.astype("float32"))
verify((3, 2, 4, 5), 6, 1, 0, "int32")
verify((3, 2, 4, 5), 6, 1.0, 0.0, "float32")
+@tvm.testing.uses_gpu
def test_forward_pad():
def verify(data_shape, out_shape, mode, pad_width, constant_value=0.0):
data = mx.sym.var('data')
pad_width=(0,0,0,0,1,2,3,4,5,6))
+@tvm.testing.uses_gpu
def test_forward_slice():
def verify(data_shape, out_shape, begin, end):
data = mx.sym.var('data')
verify(data_shape=(1,1,10), out_shape=(1,1,8), begin=(None, None, 2), end=(None, None, None))
+@tvm.testing.uses_gpu
def test_forward_convolution():
def verify(data_shape, kernel_size, stride, pad, num_filter, is_depthwise=False):
if is_depthwise:
pad=pad, num_filter=num_filter, num_group=groups)
shape_dict = {"x": x.shape, "weight": weight.shape, "bias": bias.shape}
mod, _ = relay.frontend.from_mxnet(mx_sym, shape_dict)
- for target, ctx in ctx_list():
+ for target, ctx in tvm.testing.enabled_targets():
for kind in ["graph", "debug"]:
intrp = relay.create_executor(kind, mod=mod, ctx=ctx, target=target)
op_res = intrp.evaluate()(x, weight, bias)
verify(data_shape=(1, 8, 16, 16, 16), kernel_size=(3, 3, 3), stride=(2, 2, 2), pad=(1, 1, 1), num_filter=2)
verify(data_shape=(20, 8, 16, 16, 16), kernel_size=(3, 3, 3), stride=(1, 1, 1), pad=(1, 1, 1), num_filter=2)
+@tvm.testing.uses_gpu
def test_forward_deconvolution():
def verify(data_shape, kernel_size, stride, pad, num_filter):
weight_shape=(data_shape[1], num_filter) + kernel_size
pad=pad, num_filter=num_filter, no_bias=False)
shape_dict = {"x": x.shape, "weight": weight.shape, "bias": bias.shape}
mod, _ = relay.frontend.from_mxnet(mx_sym, shape_dict)
- for target, ctx in ctx_list():
+ for target, ctx in tvm.testing.enabled_targets():
for kind in ["graph", "debug"]:
intrp = relay.create_executor(kind, mod=mod, ctx=ctx, target=target)
op_res = intrp.evaluate()(x, weight, bias)
verify(data_shape=(1, 8, 32, 32), kernel_size=(3, 3), stride=(1, 1), pad=(1, 1), num_filter=2)
verify(data_shape=(20, 8, 32, 32), kernel_size=(3, 3), stride=(1, 1), pad=(1, 1), num_filter=2)
+@tvm.testing.uses_gpu
def test_forward_cond():
def verify(a_np, b_np):
a_nd, b_nd = mx.nd.array(a_np), mx.nd.array(b_np)
shape_dict = {"a": a_np.shape, "b": b_np.shape}
mod, _ = relay.frontend.from_mxnet(mx_sym, shape_dict)
- for target, ctx in ctx_list():
+ for target, ctx in tvm.testing.enabled_targets():
for kind in ["debug", "vm"]:
intrp = relay.create_executor(kind, mod=mod, ctx=ctx, target=target)
op_res = intrp.evaluate()(a_np, b_np)
verify(np.asarray([1.0], 'float32'), np.asarray([2.0],'float32'))
verify(np.asarray([4.0], 'float32'), np.asarray([3.0],'float32'))
+@tvm.testing.uses_gpu
def test_forward_amp_cast():
def verify(from_dtype, to_dtype):
from_np = np.random.uniform(size=(1,3,18)).astype(from_dtype)
shape_dict = {'x': (1,3,18)}
dtype_dict = {'x': from_dtype}
mod, _ = relay.frontend.from_mxnet(mx_sym, shape_dict, dtype_dict)
- for target, ctx in ctx_list():
+ for target, ctx in tvm.testing.enabled_targets():
for kind in ["graph", "vm", "debug"]:
intrp = relay.create_executor(kind, mod=mod, ctx=ctx, target=target)
op_res = intrp.evaluate()(from_np)
verify('float32', 'float16')
verify('float16', 'float32')
+@tvm.testing.uses_gpu
def test_forward_amp_multicast():
def verify(dtypes, cast_narrow, expected_dtype):
x_nps = [np.random.uniform(size=(1,3,18)).astype(dtype) for dtype in dtypes]
shape_dict[str(i)] = (1,3,18)
dtype_dict[str(i)] = dtype
mod, _ = relay.frontend.from_mxnet(mx_sym, shape_dict, dtype_dict)
- for target, ctx in ctx_list():
+ for target, ctx in tvm.testing.enabled_targets():
for kind in ["graph", "vm", "debug"]:
intrp = relay.create_executor(kind, mod=mod, ctx=ctx, target=target)
op_res = intrp.evaluate()(*x_nps)
verify(['float16', 'float16'], True, 'float16')
+@tvm.testing.uses_gpu
def test_forward_unravel_index():
def verify(x, shape, dtype):
a_np = np.array(x).astype(dtype)
shapes = {'a': a_np.shape}
mod, _ = relay.frontend.from_mxnet(mx_sym, shapes, dtype)
- for target, ctx in ctx_list():
+ for target, ctx in tvm.testing.enabled_targets():
for kind in ["graph", "vm", "debug"]:
intrp = relay.create_executor(kind, mod=mod, ctx=ctx, target=target)
op_res = intrp.evaluate()(a_np)
# verify([0, 1, 2, 5], [2, 2], dtype)
+@tvm.testing.uses_gpu
def test_forward_swap_axis():
def _verify_swap_axis(in_shape, out_shape, dim1, dim2):
data = mx.sym.var('data')
# _verify_swap_axis((4, 5), (5, 4), 0, 0)
+@tvm.testing.uses_gpu
def test_forward_depth_to_space():
def verify(shape, blocksize=2):
x = np.random.uniform(size=shape).astype("float32")
mx_sym = mx.sym.depth_to_space(mx.sym.var("x"), blocksize)
shape_dict = {"x": x.shape, }
mod, _ = relay.frontend.from_mxnet(mx_sym, shape_dict)
- for target, ctx in ctx_list():
+ for target, ctx in tvm.testing.enabled_targets():
for kind in ["graph", "debug"]:
intrp = relay.create_executor(kind, mod=mod, ctx=ctx, target=target)
op_res = intrp.evaluate()(x)
verify((1, 18, 3, 3), 3)
+@tvm.testing.uses_gpu
def test_forward_space_to_depth():
def verify(shape, blocksize=2):
x = np.random.uniform(size=shape).astype("float32")
mx_sym = mx.sym.space_to_depth(mx.sym.var("x"), blocksize)
shape_dict = {"x": x.shape, }
mod, _ = relay.frontend.from_mxnet(mx_sym, shape_dict)
- for target, ctx in ctx_list():
+ for target, ctx in tvm.testing.enabled_targets():
for kind in ["graph", "debug"]:
intrp = relay.create_executor(kind, mod=mod, ctx=ctx, target=target)
op_res = intrp.evaluate()(x)
verify((1, 1, 9, 9), 3)
+@tvm.testing.uses_gpu
def test_forward_correlation():
def verify(data_shape, kernel_size, max_displacement, stride1, stride2, pad_size,
is_multiply):
is_multiply=is_multiply)
shape_dict = {"data1": data1.shape, "data2": data2.shape}
mod, _ = relay.frontend.from_mxnet(mx_sym, shape_dict)
- for target, ctx in ctx_list():
+ for target, ctx in tvm.testing.enabled_targets():
for kind in ["graph", "debug"]:
intrp = relay.create_executor(kind, mod=mod, ctx=ctx, target=target)
op_res = intrp.evaluate()(data1, data2)
verify((5, 1, 11, 11), kernel_size = 5, max_displacement = 1, stride1 = 1, stride2 = 1, pad_size = 2, is_multiply = False)
+@tvm.testing.uses_gpu
def test_forward_arange_like():
def verify(data_shape, start=None, step=None, axis=None):
attrs = {}
mx_sym = mx.sym.contrib.arange_like(data, **attrs)
mod, _ = relay.frontend.from_mxnet(mx_sym, {"data": data_shape})
- for target, ctx in ctx_list():
+ for target, ctx in tvm.testing.enabled_targets():
for kind in ["graph"]:
intrp = relay.create_executor(kind, mod=mod, ctx=ctx, target=target)
op_res = intrp.evaluate()()
verify(data_shape=(3, 4, 5), start=2., step=3., axis=1)
+@tvm.testing.uses_gpu
def test_forward_interleaved_matmul_selfatt_qk():
def verify(batch, seq_length, num_heads, head_dim):
data_shape = (seq_length, batch, num_heads * head_dim * 3)
mx_sym = mx.sym.contrib.interleaved_matmul_selfatt_qk(data, heads=num_heads)
mod, _ = relay.frontend.from_mxnet(mx_sym, {"data": data_shape})
- for target, ctx in ctx_list():
+ for target, ctx in tvm.testing.enabled_targets():
for kind in ["graph"]:
intrp = relay.create_executor(kind, mod=mod, ctx=ctx, target=target)
op_res = intrp.evaluate()(data_np)
verify(3, 10, 6, 8)
+@tvm.testing.uses_gpu
def test_forward_interleaved_matmul_selfatt_valatt():
def verify(batch, seq_length, num_heads, head_dim):
data_shape = (seq_length, batch, num_heads * head_dim * 3)
data, weight, heads=num_heads)
mod, _ = relay.frontend.from_mxnet(
mx_sym, {"data": data_shape, "weight": weight_shape})
- for target, ctx in ctx_list():
+ for target, ctx in tvm.testing.enabled_targets():
for kind in ["graph"]:
intrp = relay.create_executor(kind, mod=mod, ctx=ctx, target=target)
op_res = intrp.evaluate()(data=data_np, weight=weight_np)
verify(3, 10, 6, 8)
+@tvm.testing.uses_gpu
def test_forward_box_decode():
def verify(data_shape, anchor_shape, stds=[1, 1, 1, 1], clip=-1, in_format="corner"):
dtype = "float32"
mx_sym = mx.sym.contrib.box_decode(mx.sym.var("data"), mx.sym.var("anchors"), stds[0], stds[1], stds[2], stds[3], clip, in_format)
shape_dict = {"data": data_shape, "anchors": anchor_shape}
mod, _ = relay.frontend.from_mxnet(mx_sym, shape_dict)
- for target, ctx in ctx_list():
+ for target, ctx in tvm.testing.enabled_targets():
for kind in ["graph", "debug"]:
intrp = relay.create_executor(kind, mod=mod, ctx=ctx, target=target)
op_res = intrp.evaluate()(data, anchors)
verify((1, 10, 4), (1, 10, 4), in_format="center")
+@tvm.testing.uses_gpu
def test_forward_softmax():
def verify(data_shape, axis, use_length, length):
dtype = "float32"
shape_dict = {"data": data_shape}
mod, _ = relay.frontend.from_mxnet(mx_sym, shape_dict)
- for target, ctx in ctx_list():
+ for target, ctx in tvm.testing.enabled_targets():
for kind in ["graph", "debug"]:
intrp = relay.create_executor(kind, mod=mod, ctx=ctx, target=target)
if use_length:
@pytest.mark.parametrize("mode", ["constant", "edge", "reflect"])
@pytest.mark.parametrize("dtype", ['float64', 'float32', 'int64', 'int32'])
@pytest.mark.parametrize("constant_value", [0.0, 3.0])
-@pytest.mark.parametrize("target, ctx", ctx_list())
+@tvm.testing.parametrize_targets
@pytest.mark.parametrize("kind", ["graph", "vm", "debug"])
def test_forward_npi_pad(data_shape, pad_width, mode, dtype, constant_value,target, ctx, kind):
data_np = np.random.uniform(size=data_shape).astype(dtype)
op_res = intrp.evaluate()(data_np)
tvm.testing.assert_allclose(op_res.asnumpy(), ref_res.asnumpy(), rtol=1e-5)
-
+
@pytest.mark.skipif(not hasattr(mx.sym.np, 'pad'), reason="test'll abort with Mxnet 1.x, skip for now")
@pytest.mark.parametrize("data_shape", [(2,2,2),(2,7,2)])
@pytest.mark.parametrize("dtype", ['float64', 'float32', 'int64', 'int32', 'bool'])
@pytest.mark.parametrize("axes", [(1,0,2),None])
-@pytest.mark.parametrize("target, ctx", ctx_list())
+@tvm.testing.parametrize_targets
@pytest.mark.parametrize("kind", ["graph", "vm", "debug"])
def test_forward_npi_transpose(data_shape, axes, dtype,target, ctx, kind):
data_np = np.random.uniform(size=data_shape).astype(dtype)
[((2,2),(2,2),1),((2,4),(2,3),1),((1,3,2),(1,3,5),2),((1,3,3),(1,3,3),1),((1,3),(1,3),0)]
)
@pytest.mark.parametrize("dtype", ['float64', 'float32', 'int64', 'int32'])
-@pytest.mark.parametrize("target, ctx", ctx_list())
+@tvm.testing.parametrize_targets
@pytest.mark.parametrize("kind", ["graph", "vm", "debug"])
def test_forward_npi_concatenate(data_shape1, data_shape2, axis, dtype,target, ctx, kind):
data_np1 = np.random.uniform(size=data_shape1).astype(dtype)
@pytest.mark.parametrize("data_shape", [(2,2,2),(2,7,2),(2,2,2,1,2,3,1),(1,8)])
@pytest.mark.parametrize("dtype", ['float64', 'float32', 'int64', 'int32', 'bool'])
-@pytest.mark.parametrize("target, ctx", ctx_list())
+@tvm.testing.parametrize_targets
@pytest.mark.parametrize("kind", ["graph", "vm", "debug"])
def test_forward_np_copy(data_shape,dtype,target, ctx, kind):
data_np = np.random.uniform(size=data_shape).astype(dtype)
@pytest.mark.parametrize("dtype", ['float64', 'float32', 'int64', 'int32', 'bool'])
-@pytest.mark.parametrize("target, ctx", ctx_list())
+@tvm.testing.parametrize_targets
@pytest.mark.parametrize("kind", ["graph", "vm", "debug"])
@pytest.mark.parametrize("data_shape,out_shape,reverse",
[((2, 3, 8),(-2, -2, 2, -1),False),
@pytest.mark.parametrize("data_shape", [(2,2,2),(2,7,2),(2,2,2,1,2,3,1),(1,8),(2,2),(1,3)])
@pytest.mark.parametrize("dtype", ['float64', 'float32', 'int64', 'int32'])
-@pytest.mark.parametrize("target, ctx", ctx_list())
+@tvm.testing.parametrize_targets
@pytest.mark.parametrize("kind", ["graph", "vm", "debug"])
def test_forward_npi_binary(data_shape,dtype,target, ctx, kind):
ref_ops = [mx.np.power, mx.np.multiply, mx.np.add, mx.np.less]
@pytest.mark.parametrize("data_shape", [(2,2,2),(2,7,2),(2,2,2,1,2,3,1),(1,8),(2,2),(1,3)])
@pytest.mark.parametrize("dtype", ['float64', 'float32', 'int64', 'int32'])
-@pytest.mark.parametrize("target, ctx", ctx_list())
+@tvm.testing.parametrize_targets
@pytest.mark.parametrize("scalar", [1.0,2.0,3.0,4.0])
@pytest.mark.parametrize("kind", ["graph", "vm", "debug"])
def test_forward_npi_binary_scalar(data_shape,dtype,scalar,target, ctx, kind):
@pytest.mark.parametrize("data_shape", [(2,2,2),(2,7,2),(2,2,2,1,2,3,1),(1,8),(2,2),(1,3)])
@pytest.mark.parametrize("dtype", ['float64', 'float32'])
-@pytest.mark.parametrize("target, ctx", ctx_list())
+@tvm.testing.parametrize_targets
@pytest.mark.parametrize("kind", ["graph", "vm", "debug"])
def test_forward_npi_tanh(data_shape,dtype,target, ctx, kind):
data_np1 = np.random.uniform(size=data_shape).astype(dtype)
@pytest.mark.parametrize("data_dtype", ['float64', 'float32', 'int64', 'int32', 'bool'])
@pytest.mark.parametrize("cond_dtype", ['float64', 'float32', 'int64', 'int32', 'bool'])
@pytest.mark.parametrize("scalar", [1.0,2.0])
-@pytest.mark.parametrize("target, ctx", ctx_list())
+@tvm.testing.parametrize_targets
@pytest.mark.parametrize("kind", ["graph", "vm", "debug"])
def test_forward_npi_where_rscalar(data_shape,cond_dtype,data_dtype,scalar,target, ctx, kind):
if data_dtype == 'bool':
@pytest.mark.parametrize("dtype", ['float64', 'float32', 'int64', 'int32', 'bool'])
-@pytest.mark.parametrize("target, ctx", ctx_list())
+@tvm.testing.parametrize_targets
@pytest.mark.parametrize("kind", ["graph", "vm", "debug"])
@pytest.mark.parametrize("data_shape, axis, indices_or_sections, squeeze_axis",
[((3,2,1),1,2,False),((3,2,1),0,3,False),((3,2,1),0,3,True),((3,2,1),0,(1,2),False)])
from tvm import te
from tvm import relay
from tvm.contrib import graph_runtime
-from tvm.relay.testing.config import ctx_list
import scipy
+import tvm.testing
def get_input_data_shape_dict(graph_def, input_data):
x = np.random.uniform(size=data_shape)
model = onnx.load_model(graph_file)
c2_out = get_onnxruntime_output(model, x, dtype)
- for target, ctx in ctx_list():
+ for target, ctx in tvm.testing.enabled_targets():
tvm_out = get_tvm_output(model, x, target, ctx, out_shape, dtype)
tvm.testing.assert_allclose(c2_out, tvm_out, rtol=1e-5, atol=1e-5)
+@tvm.testing.uses_gpu
def test_reshape():
in_shape = (4, 3, 3, 4)
ref_shape = (6, 2, 4, 3)
model = helper.make_model(graph, producer_name='reshape_test')
- for target, ctx in ctx_list():
+ for target, ctx in tvm.testing.enabled_targets():
x = np.random.uniform(size=in_shape).astype('int32')
tvm_out = get_tvm_output(model, x, target, ctx, ref_shape, 'float32')
tvm.testing.assert_allclose(ref_shape, tvm_out.shape)
+@tvm.testing.uses_gpu
def test_expand():
def _test_expand(name, data, shape, ref_data):
model = helper.make_model(graph, producer_name=name)
- for target, ctx in ctx_list():
+ for target, ctx in tvm.testing.enabled_targets():
tvm_out = get_tvm_output(model, data, target, ctx, ref_data.shape, 'float32')
tvm.testing.assert_allclose(ref_data, tvm_out)
model = helper.make_model(graph, producer_name='depth_to_space_test')
- for target, ctx in ctx_list():
+ for target, ctx in tvm.testing.enabled_targets():
x = np.random.uniform(size=inshape).astype('float32')
tvm_out = get_tvm_output(model, x, target, ctx, outshape, 'float32')
onnx_out = get_onnxruntime_output(model, x, 'float32')
tvm.testing.assert_allclose(onnx_out, tvm_out)
+@tvm.testing.uses_gpu
def test_depth_to_space():
# current onnx.checker use OpSet-1 version of DepthToSpace, which doesn't have a mode argument.
# TO-DO, we can add mode arguement to test CRD mode and DCR mode
model = helper.make_model(graph, producer_name='space_to_depth_test')
- for target, ctx in ctx_list():
+ for target, ctx in tvm.testing.enabled_targets():
x = np.random.uniform(size=inshape).astype('float32')
tvm_out = get_tvm_output(model, x, target, ctx, outshape, 'float32')
onnx_out = get_onnxruntime_output(model, x, 'float32')
tvm.testing.assert_allclose(onnx_out, tvm_out)
+@tvm.testing.uses_gpu
def test_space_to_depth():
verify_space_to_depth((1, 1, 4, 6), (1, 4, 2, 3), 2)
+@tvm.testing.uses_gpu
def test_shape():
in_shape = (4, 3, 3, 4)
ref_shape = (6, 2, 4, 3)
model = helper.make_model(graph, producer_name='shape_test')
- for target, ctx in ctx_list():
+ for target, ctx in tvm.testing.enabled_targets():
x = np.random.uniform(size=in_shape).astype('int32')
tvm_out = get_tvm_output(model, x, target, ctx, ref_shape, 'int32')
model = helper.make_model(graph, producer_name='power_test')
- for target, ctx in ctx_list():
+ for target, ctx in tvm.testing.enabled_targets():
tvm_out = get_tvm_output(model, [x, y], target, ctx, np_res.shape)
tvm.testing.assert_allclose(np_res, tvm_out, rtol=1e-5, atol=1e-5)
+@tvm.testing.uses_gpu
def test_power():
_test_power_iteration((1, 3), (1))
_test_power_iteration((2, 3), (2, 3))
_test_power_iteration((2, 3), (1, 3))
+@tvm.testing.uses_gpu
def test_squeeze():
in_shape = (1, 3, 1, 3, 1, 1)
out_shape = (3, 3)
model = helper.make_model(graph, producer_name='squeeze_test')
- for target, ctx in ctx_list():
+ for target, ctx in tvm.testing.enabled_targets():
x = np.random.uniform(size=in_shape).astype('float32')
tvm_out = get_tvm_output(model, x, target, ctx, out_shape, 'float32')
tvm.testing.assert_allclose(out_shape, tvm_out.shape)
+@tvm.testing.uses_gpu
def test_flatten():
in_shape = (1, 3, 4, 4)
model = helper.make_model(graph, producer_name='flatten_test')
- for target, ctx in ctx_list():
+ for target, ctx in tvm.testing.enabled_targets():
x = np.random.uniform(size=in_shape).astype('int32')
tvm_out = get_tvm_output(model, x, target, ctx, ref_shape, 'float32')
tvm.testing.assert_allclose(ref_shape, tvm_out.shape)
+@tvm.testing.uses_gpu
def test_unsqueeze():
in_shape = (3, 3)
axis = (0, 3, 4)
model = helper.make_model(graph, producer_name='squeeze_test')
- for target, ctx in ctx_list():
+ for target, ctx in tvm.testing.enabled_targets():
x = np.random.uniform(size=in_shape).astype('float32')
tvm_out = get_tvm_output(model, x, target, ctx, out_shape, 'float32')
TensorProto.FLOAT, list(out_np.shape))])
model = helper.make_model(graph, producer_name='gather_test')
- for target, ctx in ctx_list():
+ for target, ctx in tvm.testing.enabled_targets():
tvm_out = get_tvm_output(
model, [x, indices], target, ctx, out_np.shape)
tvm.testing.assert_allclose(out_np, tvm_out)
+@tvm.testing.uses_gpu
def test_gather():
verify_gather((4,), [1], 0, 'int32')
verify_gather((1, 4), [0], 0, 'int32')
model = helper.make_model(graph, producer_name='scatter_test')
onnx_out = get_onnxruntime_output(model, [x, indices, updates])
- for target, ctx in ctx_list():
+ for target, ctx in tvm.testing.enabled_targets():
tvm_out = get_tvm_output(
model, [x, indices, updates], target, ctx, onnx_out[0].shape)
tvm.testing.assert_allclose(onnx_out[0], tvm_out)
+@tvm.testing.uses_gpu
def test_scatter():
verify_scatter((4,), [1], 0)
verify_scatter((1, 4), [[0]], 0)
model = helper.make_model(graph, producer_name='slice_test')
- for target, ctx in ctx_list():
+ for target, ctx in tvm.testing.enabled_targets():
tvm_out = get_tvm_output(
model, indata, target, ctx, outdata.shape, 'float32', opset=1)
initializer=initializer)
model = helper.make_model(graph, producer_name='slice_test')
- for target, ctx in ctx_list():
+ for target, ctx in tvm.testing.enabled_targets():
tvm_out = get_tvm_output(model,
indata,
target,
tvm.testing.assert_allclose(outdata, tvm_out)
+@tvm.testing.uses_gpu
def test_slice():
x = np.random.randn(20, 10, 5).astype(np.float32)
_test_slice_iteration_v1(x, x[0:3, 0:10], starts=(0, 0), ends=(3, 10), axes=(0, 1))
model = helper.make_model(graph, producer_name=opname+'_test')
- for target, ctx in ctx_list():
+ for target, ctx in tvm.testing.enabled_targets():
tvm_out = get_tvm_output(
model, indata, target, ctx, outdata.shape, dtype)
tvm.testing.assert_allclose(outdata, tvm_out)
+@tvm.testing.uses_gpu
def test_floor():
_test_onnx_op_elementwise((2, 4, 5, 6), np.floor,
{}, 'float32', 'Floor', {})
+@tvm.testing.uses_gpu
def test_ceil():
_test_onnx_op_elementwise((2, 4, 5, 6), np.ceil, {}, 'float32', 'Ceil', {})
+@tvm.testing.uses_gpu
def test_clip():
_test_onnx_op_elementwise((2, 4, 5, 6),
np.clip,
{'min': -1.0, 'max': 1.0})
-
+@tvm.testing.uses_gpu
def test_round():
_test_onnx_op_elementwise((2, 4, 5, 6), np.round, {}, 'float32', 'Round', {})
model = helper.make_model(graph, producer_name=opname+'_test')
- for target, ctx in ctx_list():
+ for target, ctx in tvm.testing.enabled_targets():
tvm_out = get_tvm_output(
model, indata, target, ctx, outdata.shape, dtype)
tvm.testing.assert_allclose(outdata, tvm_out)
+@tvm.testing.uses_gpu
def test_isinf():
_test_finite_ops((2, 4, 5, 6), np.isinf, {}, 'float32', 'IsInf', {})
+@tvm.testing.uses_gpu
def test_isnan():
_test_finite_ops((2, 4, 5, 6), np.isnan, {}, 'float32', 'IsNaN', {})
TensorProto.FLOAT, list(out_np.shape))])
model = helper.make_model(graph, producer_name='gather_test')
- for target, ctx in ctx_list():
+ for target, ctx in tvm.testing.enabled_targets():
tvm_out = get_tvm_output(
model, [x, indices], target, ctx, out_np.shape)
tvm.testing.assert_allclose(out_np, tvm_out)
+@tvm.testing.uses_gpu
def test_gather_nd():
verify_gather_nd((2, 2), [[0,0],[1,1]], 'int32')
verify_gather_nd((3, 3, 3), [[0,1],[1,0]] , 'float32')
verify_gather_nd((4, 3, 5, 6), [[2, 1, 0, 0]], 'float32')
+@tvm.testing.uses_gpu
def test_onehot():
indices_shape = [10]
indices_array = np.random.randint(
model = helper.make_model(graph, producer_name="onehot_test")
- for target, ctx in ctx_list():
+ for target, ctx in tvm.testing.enabled_targets():
tvm_out = get_tvm_output(
model, [indices_array], target, ctx, out_np.shape)
tvm.testing.assert_allclose(out_np, tvm_out, rtol=1e-5, atol=1e-5)
+@tvm.testing.uses_gpu
def test_matmul():
a_shape = (4, 3)
b_shape = (3, 4)
model = helper.make_model(graph, producer_name='matmul_test')
- for target, ctx in ctx_list():
+ for target, ctx in tvm.testing.enabled_targets():
tvm_out = get_tvm_output(
model, [a_array, b_array], target, ctx, out_np.shape)
tvm.testing.assert_allclose(out_np, tvm_out, rtol=1e-5, atol=1e-5)
model = helper.make_model(graph, producer_name='matmul_test')
- for target, ctx in ctx_list():
+ for target, ctx in tvm.testing.enabled_targets():
tvm_out = get_tvm_output(
model, [a_array, b_array], target, ctx, out_np.shape)
tvm.testing.assert_allclose(out_np, tvm_out, rtol=1e-5, atol=1e-5)
+@tvm.testing.uses_gpu
def test_batch_matmul():
verify_batch_matmul((2, 3, 4, 3), (2, 3, 3, 4))
verify_batch_matmul((2, 4, 3), (3, 4))
py_out = in_array / ((bias + (alpha / nsize) * square_sum) ** beta)
return py_out
- for target, ctx in ctx_list():
+ for target, ctx in tvm.testing.enabled_targets():
input_name = model.graph.input[0].name
py_out = _get_python_lrn()
tvm_out = get_tvm_output(
tvm.testing.assert_allclose(py_out, tvm_out, rtol=1e-5, atol=1e-5)
+@tvm.testing.uses_gpu
def test_lrn():
verify_lrn((5, 5, 5, 5), 3, 'float32')
verify_lrn((5, 5, 5, 5), 3, 'float32', alpha=0.0002, beta=0.5, bias=2.0)
helper.make_tensor_value_info("beta", TensorProto.FLOAT, (shape[1],))],
outputs=[helper.make_tensor_value_info("y", TensorProto.FLOAT, list(shape))])
model = helper.make_model(graph, producer_name='instance_norm_test')
- for target, ctx in ctx_list():
+ for target, ctx in tvm.testing.enabled_targets():
tvm_out = get_tvm_output(
model, [x, gamma, beta], target, ctx, shape, 'float32')
tvm.testing.assert_allclose(y, tvm_out, rtol=1e-5, atol=1e-5)
+@tvm.testing.uses_gpu
def test_instance_norm():
verify_instance_norm((2, 3, 4, 5))
verify_instance_norm((32, 64, 80, 64))
model = helper.make_model(graph, producer_name='upsample_nearest_test')
- for target, ctx in ctx_list():
+ for target, ctx in tvm.testing.enabled_targets():
tvm_out = get_tvm_output(
model, in_array, target, ctx, out_shape, 'float32')
tvm.testing.assert_allclose(out_array, tvm_out)
model = helper.make_model(graph, producer_name='upsample_nearest_test')
- for target, ctx in ctx_list():
+ for target, ctx in tvm.testing.enabled_targets():
tvm_out = get_tvm_output(
model, in_array, target, ctx, out_shape, 'float32')
tvm.testing.assert_allclose(out_array, tvm_out)
model = helper.make_model(graph, producer_name='upsample_bilinear_test')
- for target, ctx in ctx_list():
+ for target, ctx in tvm.testing.enabled_targets():
tvm_out = get_tvm_output(
model, in_array, target, ctx, out_shape, 'float32')
tvm.testing.assert_allclose(out_array, tvm_out, rtol=1e-5, atol=1e-5)
model = helper.make_model(
graph, producer_name='upsample_bilinear_opset9_test')
- for target, ctx in ctx_list():
+ for target, ctx in tvm.testing.enabled_targets():
tvm_out = get_tvm_output(
model, in_array, target, ctx, out_shape, 'float32')
tvm.testing.assert_allclose(out_array, tvm_out, rtol=1e-5, atol=1e-5)
model = helper.make_model(
graph, producer_name='upsample_trilinear_test')
- for target, ctx in ctx_list():
+ for target, ctx in tvm.testing.enabled_targets():
tvm_out = get_tvm_output(
model, in_array, target, ctx, out_shape, 'float32')
tvm.testing.assert_allclose(out_array, tvm_out, rtol=1e-5, atol=1e-5)
+@tvm.testing.uses_gpu
def test_upsample():
_test_upsample_nearest()
_test_upsample_bilinear()
model = helper.make_model(graph, producer_name=opname+'_test')
- for target, ctx in ctx_list():
+ for target, ctx in tvm.testing.enabled_targets():
tvm_out = get_tvm_output(
model, indata, target, ctx, outshape, 'float32')
tvm.testing.assert_allclose(outdata, tvm_out, rtol=1e-5, atol=1e-5)
+@tvm.testing.uses_gpu
def test_softmax():
_test_softmax((1, 10), None)
_test_softmax((1, 10), 1)
model = helper.make_model(graph, producer_name='Min_test')
- for target, ctx in ctx_list():
+ for target, ctx in tvm.testing.enabled_targets():
tvm_out = get_tvm_output(
model, [a_np1, a_np2, a_np3], target, ctx, b_np.shape)
tvm.testing.assert_allclose(b_np, tvm_out, rtol=1e-5, atol=1e-5)
+@tvm.testing.uses_gpu
def test_forward_min():
verify_min((1, 3, 20, 20))
verify_min((20, 20))
model = helper.make_model(graph, producer_name='Max_test')
- for target, ctx in ctx_list():
+ for target, ctx in tvm.testing.enabled_targets():
tvm_out = get_tvm_output(
model, [a_np1, a_np2, a_np3], target, ctx, b_np.shape)
tvm.testing.assert_allclose(b_np, tvm_out, rtol=1e-5, atol=1e-5)
+@tvm.testing.uses_gpu
def test_forward_max():
verify_max((1, 3, 20, 20))
verify_max((20, 20))
model = helper.make_model(graph, producer_name='Mean_test')
- for target, ctx in ctx_list():
+ for target, ctx in tvm.testing.enabled_targets():
tvm_out = get_tvm_output(
model, [a_np1, a_np2, a_np3], target, ctx, b_np.shape)
tvm.testing.assert_allclose(b_np, tvm_out, rtol=1e-5, atol=1e-5)
+@tvm.testing.uses_gpu
def test_forward_mean():
verify_mean((1, 3, 20, 20))
verify_mean((20, 20))
model = helper.make_model(graph, producer_name='HardSigmoid_test')
- for target, ctx in ctx_list():
+ for target, ctx in tvm.testing.enabled_targets():
tvm_out = get_tvm_output(model, [a_np1], target, ctx, b_np.shape)
tvm.testing.assert_allclose(b_np, tvm_out, rtol=1e-5, atol=1e-5)
+@tvm.testing.uses_gpu
def test_forward_hardsigmoid():
verify_hardsigmoid((1, 3, 20, 20), 0.5, 0.6)
verify_hardsigmoid((20, 20), 0.3, 0.4)
model = helper.make_model(graph, producer_name='argmin_test')
- for target, ctx in ctx_list():
+ for target, ctx in tvm.testing.enabled_targets():
tvm_out = get_tvm_output(
model, [a_np1], target, ctx, b_np.shape, b_np.dtype)
tvm.testing.assert_allclose(b_np, tvm_out, rtol=1e-5, atol=1e-5)
model = helper.make_model(graph, producer_name='argmax_test')
- for target, ctx in ctx_list():
+ for target, ctx in tvm.testing.enabled_targets():
tvm_out = get_tvm_output(
model, [a_np1], target, ctx, b_np.shape, b_np.dtype)
tvm.testing.assert_allclose(b_np, tvm_out, rtol=1e-5, atol=1e-5)
+@tvm.testing.uses_gpu
def test_forward_arg_min_max():
'''Verify argmin and argmax'''
verify_argmin([3, 4, 4])
model = helper.make_model(graph, producer_name='fill_test')
- for target, ctx in ctx_list():
+ for target, ctx in tvm.testing.enabled_targets():
tvm_out = get_tvm_output(model, [], target, ctx, out.shape)
tvm.testing.assert_allclose(out, tvm_out, rtol=1e-5, atol=1e-5)
+@tvm.testing.uses_gpu
def test_constantofshape():
verify_constantofshape((2, 3, 4, 5), 10, 'float32')
verify_constantofshape((3, 3), 0, 'int32')
TensorProto.FLOAT, list(outdata.shape))])
model = helper.make_model(graph, producer_name='pad_test')
# tvm result
- for target, ctx in ctx_list():
+ for target, ctx in tvm.testing.enabled_targets():
tvm_out = get_tvm_output(
model, indata, target, ctx, outdata.shape, 'float32', opset=2)
tvm.testing.assert_allclose(outdata, tvm_out, rtol=1e-5, atol=1e-5)
TensorProto.FLOAT, list(outdata.shape))])
model = helper.make_model(graph, producer_name='pad_test')
# tvm result
- for target, ctx in ctx_list():
+ for target, ctx in tvm.testing.enabled_targets():
tvm_out = get_tvm_output(
model, inputs, target, ctx, outdata.shape, 'float32', opset=11)
tvm.testing.assert_allclose(outdata, tvm_out, rtol=1e-5, atol=1e-5)
+@tvm.testing.uses_gpu
def test_pad():
verify_pad(np.random.randn(2, 2).astype(
np.float32), [0, 1, 0, 0], 'constant', 0.0)
model = helper.make_model(graph, producer_name='reduce_test')
onnx_out = get_onnxruntime_output(model, data, 'float32')
- for target, ctx in ctx_list():
+ for target, ctx in tvm.testing.enabled_targets():
tvm_out = get_tvm_output(model, data, target, ctx, outshape, 'float32')
tvm.testing.assert_allclose(onnx_out, tvm_out, rtol=1e-5, atol=1e-5)
+@tvm.testing.uses_gpu
def test_all_reduce_funcs():
funcs = ["ReduceMax",
"ReduceMean",
])
model = helper.make_model(graph, producer_name='split_test')
- for target, ctx in ctx_list():
+ for target, ctx in tvm.testing.enabled_targets():
output_shape = [o.shape for o in outdatas]
output_type = ['float32', 'float32', 'float32']
tvm_out = get_tvm_output(
tvm.testing.assert_allclose(o, t)
+@tvm.testing.uses_gpu
def test_split():
# 1D
verify_split([1., 2., 3., 4., 5., 6.], [
verify_split([1, 2, 3], [[1], [2], [3]], False)
+@tvm.testing.uses_gpu
def test_binary_ops():
in_shape = (1, 2, 3, 3)
dtype = "float32"
outputs=[helper.make_tensor_value_info("out",
TensorProto.FLOAT, list(out_shape))])
model = helper.make_model(graph, producer_name='_test')
- for target, ctx in ctx_list():
+ for target, ctx in tvm.testing.enabled_targets():
tvm_out = get_tvm_output(model, [x, y], target, ctx)
tvm.testing.assert_allclose(out_np, tvm_out, rtol=1e-5, atol=1e-5)
verify_binary_ops("Equal", x, y, x == y, broadcast=True)
+@tvm.testing.uses_gpu
def test_single_ops():
in_shape = (1, 2, 3, 3)
dtype = "float32"
outputs=[helper.make_tensor_value_info("out",
TensorProto.FLOAT, list(out_shape))])
model = helper.make_model(graph, producer_name='_test')
- for target, ctx in ctx_list():
+ for target, ctx in tvm.testing.enabled_targets():
tvm_out = get_tvm_output(model, [x], target, ctx)
tvm.testing.assert_allclose(out_np, tvm_out, rtol=rtol, atol=atol)
verify_single_ops("SoftPlus", x, np.log(1 + np.exp(x)))
+@tvm.testing.uses_gpu
def test_leaky_relu():
def leaky_relu_x(x, alpha):
return np.where(x >= 0, x, x * alpha)
{'alpha': 0.25})
+@tvm.testing.uses_gpu
def test_elu():
def elu_x(x, alpha):
return np.where(x > 0, x, alpha * (np.exp(x) - 1.0))
{'alpha': 0.25})
+@tvm.testing.uses_gpu
def test_selu():
def selu_x(x, alpha, gamma):
return gamma * np.where(x > 0, x, alpha * (np.exp(x) - 1.0))
{'alpha': 0.25, 'gamma': 0.3})
+@tvm.testing.uses_gpu
def test_prelu():
def verify_prelu(x_shape, a_shape):
node = helper.make_node('PRelu',
verify_prelu([2,12,16,16], [1, 12, 1, 1])
+@tvm.testing.uses_gpu
def test_ThresholdedRelu():
def ThresholdedRelu_x(x, alpha):
out_np = np.clip(x, alpha, np.inf)
{'alpha': 0.25})
+@tvm.testing.uses_gpu
def test_ScaledTanh():
def ScaledTanh_x(x, alpha, beta):
return alpha * np.tanh(beta * x)
{'alpha': 0.25, 'beta': 0.3})
+@tvm.testing.uses_gpu
def test_ParametricSoftplus():
def ParametricSoftplus_x(x, alpha, beta):
return alpha * np.log(np.exp(beta * x) + 1)
{'alpha': 0.25, 'beta': 0.3})
+@tvm.testing.uses_gpu
def test_Scale():
def Scale_x(x, scale):
return scale * x
{'scale': 0.25})
+@tvm.testing.uses_gpu
def test_LogSoftmax():
_test_onnx_op_elementwise((1, 4),
tvm.topi.testing.log_softmax_python,
torch.onnx.export(model(), dummy_input, file_name,
export_params=True, verbose=False)
onnx_model = onnx.load(file_name)
- for target, ctx in ctx_list():
+ for target, ctx in tvm.testing.enabled_targets():
input_data = np.random.uniform(size=input_size).astype('int32')
c2_out = get_onnxruntime_output(onnx_model, input_data)
tvm_out = get_tvm_output(onnx_model, input_data, target, ctx)
tvm.testing.assert_allclose(c2_out, tvm_out)
+@tvm.testing.uses_gpu
def test_resnet():
check_torch_conversion(torchvision.models.resnet18, (1, 3, 224, 224))
# check_torch_conversion(torchvision.models.resnet101, (1,3,224,224))
# check_torch_conversion(torchvision.models.squeezenet1_0, (1,3,224,224))
+@tvm.testing.uses_gpu
def test_densenet():
check_torch_conversion(torchvision.models.densenet161, (1, 3, 224, 224))
+@tvm.testing.uses_gpu
def test_inception():
check_torch_conversion(torchvision.models.inception_v3, (1, 3, 224, 224))
# check_torch_conversion(torchvision.models.shufflenetv2, (1,3,224,224))
+@tvm.testing.uses_gpu
def test_sign():
def Sign_x(x):
return np.sign(x)
model = helper.make_model(graph, producer_name='not_test')
- for target, ctx in ctx_list():
+ for target, ctx in tvm.testing.enabled_targets():
tvm_out = get_tvm_output(model, [x], target, ctx, outdata.shape)
tvm.testing.assert_allclose(outdata, tvm_out)
+@tvm.testing.uses_gpu
def test_not():
# 2d
verify_not(indata=(np.random.randn(3, 4) > 0), dtype=bool)
model = helper.make_model(graph, producer_name='and_test')
- for target, ctx in ctx_list():
+ for target, ctx in tvm.testing.enabled_targets():
tvm_out = get_tvm_output(model, [x, y], target, ctx, outdata.shape)
tvm.testing.assert_allclose(outdata, tvm_out)
+@tvm.testing.uses_gpu
def test_and():
# 2d
x = (np.random.randn(3, 4) > 0)
model = helper.make_model(graph, producer_name='tile_test')
- for target, ctx in ctx_list():
+ for target, ctx in tvm.testing.enabled_targets():
tvm_out = get_tvm_output(
model, [indata], target, ctx, outdata.shape, opset=1)
tvm.testing.assert_allclose(outdata, tvm_out)
model = helper.make_model(graph, producer_name='tile_test')
- for target, ctx in ctx_list():
+ for target, ctx in tvm.testing.enabled_targets():
tvm_out = get_tvm_output(model, [indata],
target,
ctx,
tvm.testing.assert_allclose(outdata, tvm_out)
+@tvm.testing.uses_gpu
def test_tile():
x = np.random.rand(2, 3, 4, 5).astype(np.float32)
repeats = np.random.randint(
outputs=[helper.make_tensor_value_info('out', TensorProto.FLOAT, list(outdata.shape))])
model = helper.make_model(graph, producer_name='erf_test')
- for target, ctx in ctx_list():
+ for target, ctx in tvm.testing.enabled_targets():
tvm_out = get_tvm_output(model, [indata], target, ctx, outdata.shape)
tvm.testing.assert_allclose(outdata, tvm_out)
+@tvm.testing.uses_gpu
def test_erf():
x = np.random.rand(2, 3, 4, 6).astype(np.float32)
z = scipy.special.erf(x)
outputs=[helper.make_tensor_value_info('out', dtype, list(outdata.shape))])
model = helper.make_model(graph, producer_name='where_test')
- for target, ctx in ctx_list():
+ for target, ctx in tvm.testing.enabled_targets():
tvm_out = get_tvm_output(model, [condition, x, y], target, ctx, outdata.shape)
tvm.testing.assert_allclose(outdata, tvm_out)
+@tvm.testing.uses_gpu
def test_where():
condition = np.array([[1, 0], [1, 1]], dtype=np.bool)
x = np.array([[1, 2], [3, 4]], dtype=np.int64)
model = helper.make_model(graph, producer_name='or_test')
- for target, ctx in ctx_list():
+ for target, ctx in tvm.testing.enabled_targets():
tvm_out = get_tvm_output(model, [x, y], target, ctx, outdata.shape)
tvm.testing.assert_allclose(outdata, tvm_out)
+@tvm.testing.uses_gpu
def test_or():
# 2d
x = (np.random.randn(3, 4) > 0)
verify_or(indata=[x, y], dtype=bool)
+@tvm.testing.uses_gpu
def test_batch_norm():
def verify_batch_norm(in_shape):
batchnorm = onnx.helper.make_node('BatchNormalization',
model = helper.make_model(graph, producer_name='batchnorm_test')
- for target, ctx in ctx_list():
+ for target, ctx in tvm.testing.enabled_targets():
x = np.random.uniform(size=in_shape).astype('float32')
scale = np.random.uniform(size=in_shape[1]).astype('float32')
b = np.random.uniform(size=in_shape[1]).astype('float32')
verify_batch_norm([16, 16, 10, 10])
+@tvm.testing.uses_gpu
def test_batch_norm_dynamic_subgraph():
def verify_batch_norm_dynamic_subgraph(in_shape, o_shape):
batchnorm = onnx.helper.make_node('BatchNormalization',
model = helper.make_model(graph, producer_name='batchnorm_test')
- for target, ctx in ctx_list():
+ for target, ctx in tvm.testing.enabled_targets():
x = np.random.uniform(size=in_shape).astype('float32')
inp = np.random.uniform(size=o_shape).astype('float32')
scale = np.random.uniform(size=in_shape[1]).astype('float32')
model = helper.make_model(graph, producer_name='conv_test')
- for target, ctx in ctx_list():
+ for target, ctx in tvm.testing.enabled_targets():
x = np.random.uniform(size=x_shape).astype('float32')
W = np.random.uniform(size=w_shape).astype('float32')
tvm_out = get_tvm_output(model, [x, W], target, ctx, y_shape)
tvm.testing.assert_allclose(onnx_out, tvm_out, rtol=1e-5, atol=1e-5)
+@tvm.testing.uses_gpu
def test_conv():
def repeat(N, D):
return tuple([N for _ in range(D)])
model = helper.make_model(graph, producer_name='convtranspose_trest')
- for target, ctx in ctx_list():
+ for target, ctx in tvm.testing.enabled_targets():
x = np.random.uniform(size=x_shape).astype('float32')
W = np.random.uniform(size=w_shape).astype('float32')
tvm_out = get_tvm_output(model, [x, W], target, ctx, y_shape)
tvm.testing.assert_allclose(onnx_out, tvm_out, rtol=1e-5, atol=1e-5)
+@tvm.testing.uses_gpu
def test_convtranspose():
# Convolution Transpose with padding
# (1, 1, 3, 3) input tensor
verify_convtranspose((1, 1, 3, 3), (1, 2, 3, 3), (1, 2, 7, 3), [1, 2, 1, 2])
+@tvm.testing.uses_gpu
def test_unsqueeze_constant():
from torch.nn import Linear, Sequential, Module
class Flatten(Module):
model = helper.make_model(graph, producer_name='pooling_test')
- for target, ctx in ctx_list():
+ for target, ctx in tvm.testing.enabled_targets():
onnx_out = get_onnxruntime_output(model, x_np, 'float32')
tvm_out = get_tvm_output(
model, [x_np], target, ctx, out_shape)
tvm.testing.assert_allclose(onnx_out, tvm_out, rtol=1e-5, atol=1e-5)
+@tvm.testing.uses_gpu
def test_pooling():
for mode in ['max', 'average']:
# Pool1D
onnx_out = get_onnxruntime_output(model, [x_np, y_np], dtype)[0]
- for target, ctx in ctx_list():
+ for target, ctx in tvm.testing.enabled_targets():
tvm_out = get_tvm_output(
model, [x_np, y_np], target, ctx, out_shape)
tvm.testing.assert_allclose(onnx_out, tvm_out, rtol=1e-5, atol=1e-5)
+@tvm.testing.uses_gpu
def test_mod():
# Mod
verify_mod(x_shape=[1, 32, 32], y_shape=[1, 1, 32], fmod=0, out_shape=(1, 32, 32), dtype="int32")
onnx_dtype, list(out_shape))])
model = helper.make_model(graph, producer_name='xor_test')
- for target, ctx in ctx_list():
+ for target, ctx in tvm.testing.enabled_targets():
tvm_out = get_tvm_output(
model, [x_np, y_np], target, ctx, out_shape)
tvm.testing.assert_allclose(np_out, tvm_out, rtol=1e-5, atol=1e-5)
+@tvm.testing.uses_gpu
def test_xor():
# XOR
verify_xor(x_shape=[1, 32, 32], y_shape=[1, 32, 32])
model = helper.make_model(graph, producer_name='pool_test')
onnx_out = get_onnxruntime_output(model, [x_np, rois_np], 'float32')[0]
- for target, ctx in ctx_list():
+ for target, ctx in tvm.testing.enabled_targets():
tvm_out = get_tvm_output(
model, [x_np, rois_np], target, ctx, out_shape)
tvm.testing.assert_allclose(onnx_out, tvm_out, rtol=1e-5, atol=1e-5)
+@tvm.testing.uses_gpu
def test_max_roi_pool():
verify_max_roi_pool(x_shape=[1, 3, 6, 6],
rois_shape=[3, 5],
model = helper.make_model(graph, producer_name='lppool_test')
- for target, ctx in ctx_list():
+ for target, ctx in tvm.testing.enabled_targets():
onnx_out = get_onnxruntime_output(model, x_np, 'float32')
tvm_out = get_tvm_output(
model, [x_np], target, ctx, out_shape)
tvm.testing.assert_allclose(onnx_out, tvm_out, rtol=1e-5, atol=1e-5)
+@tvm.testing.uses_gpu
def test_lppool():
# Pool1D
verify_lppool(x_shape=[1, 1, 32], kernel_shape=[3], p=2, strides=[1], pads=[1, 1],
model = helper.make_model(graph, producer_name='rnn_test')
- for target, ctx in ctx_list():
+ for target, ctx in tvm.testing.enabled_targets():
onnx_out = get_onnxruntime_output(model, input_values, 'float32')
tvm_out = get_tvm_output(
model,
tvm.testing.assert_allclose(o_out, t_out, rtol=5e-3, atol=5e-3)
+@tvm.testing.uses_gpu
def test_lstm():
# No bias.
verify_rnn(
rnn_type='LSTM')
+@tvm.testing.uses_gpu
def test_gru():
# No bias.
verify_rnn(
rnn_type='GRU')
+@tvm.testing.uses_gpu
def test_resize():
def make_constant_node(name, data_type, dims, vals):
return helper.make_node('Constant',
model = helper.make_model(graph, producer_name='resize_test')
- for target, ctx in ctx_list():
+ for target, ctx in tvm.testing.enabled_targets():
x = np.random.uniform(size=ishape).astype('float32')
onnx_out = get_onnxruntime_output(model, x, 'float32')
tvm_out = get_tvm_output(model, x, target, ctx, oshape, 'float32', opset=11)
verify([1, 16, 32, 32], [], [1, 1, 0.5, 0.5], "linear", "half_pixel")
+@tvm.testing.uses_gpu
def test_nonzero():
def verify_nonzero(indata, outdata, dtype):
result = np.array((np.nonzero(input_data))) # expected output [[0, 1, 2, 2], [0, 1, 0, 1]]
verify_nonzero(input_data, result, dtype=np.int64)
+@tvm.testing.uses_gpu
def test_topk():
def verify_topk(input_dims, K, axis=-1):
output_dims = list(input_dims)
verify_topk([n, n, n], 5, 2)
+@tvm.testing.uses_gpu
def test_roi_align():
def verify_roi_align(input_dims, num_roi, output_height, output_width, sampling_ratio=0, spatial_scale=1.0):
output_dims = [num_roi, input_dims[1], output_height, output_width]
from tvm import relay
from tvm.contrib import graph_runtime
from tvm.contrib.nvcc import have_fp16
-from tvm.relay.testing.config import ctx_list
+import tvm.testing
sys.setrecursionlimit(10000)
def verify_model(model_name, input_data=[],
custom_convert_map={},
- ctx_list=ctx_list(),
rtol=1e-5, atol=1e-5):
"""Assert that the output of a compiled model matches with that of its
baseline."""
[inp.cpu().numpy() for inp in baseline_input]))
with tvm.transform.PassContext(opt_level=3):
- for target, ctx in ctx_list:
+ for target, ctx in tvm.testing.enabled_targets():
relay_graph, relay_lib, relay_params = relay.build(mod, target=target, params=params)
relay_model = graph_runtime.create(relay_graph, relay_lib, ctx)
relay_model.set_input(**relay_params)
torch.cuda.empty_cache()
# Single operator tests
+@tvm.testing.uses_gpu
def test_forward_add():
torch.set_grad_enabled(False)
input_shape = [10]
verify_model(Add3().float().eval(), input_data=input_data)
verify_model(Add4().float().eval(), input_data=input_data)
+@tvm.testing.uses_gpu
def test_forward_subtract():
torch.set_grad_enabled(False)
input_shape = [10]
verify_model(Subtract3().float().eval(), input_data=input_data)
verify_model(Subtract4().float().eval(), input_data=input_data)
+@tvm.testing.uses_gpu
def test_forward_multiply():
torch.set_grad_enabled(False)
input_shape = [10]
verify_model(Multiply4().float().eval(), input_data=input_data)
+@tvm.testing.uses_gpu
def test_min_max():
class Max(Module):
def forward(self, inp):
verify_model(Min3(), input_data=input_data)
+@tvm.testing.uses_gpu
def test_forward_reciprocal():
torch.set_grad_enabled(False)
input_shape = [2, 1, 10, 1, 10]
input_data = torch.rand(input_shape).float()
verify_model(Reciprocal1().float().eval(), input_data=input_data)
+@tvm.testing.uses_gpu
def test_forward_repeat():
torch.set_grad_enabled(False)
input_shape = [1, 3]
verify_model(Repeat2().float().eval(), input_data=input_data)
verify_model(Repeat3().float().eval(), input_data=input_data)
+@tvm.testing.uses_gpu
def test_forward_repeat_interleave():
torch.set_grad_enabled(False)
input_shape = [2, 2, 3]
verify_model(RepeatInterleave3().float().eval(), input_data=input_data)
verify_model(RepeatInterleave4().float().eval(), input_data=input_data)
+@tvm.testing.uses_gpu
def test_forward_unsqueeze():
torch.set_grad_enabled(False)
input_shape = [10, 10]
input_data = torch.rand(input_shape).float()
verify_model(Unsqueeze1().float().eval(), input_data=input_data)
+@tvm.testing.uses_gpu
def test_forward_squeeze():
torch.set_grad_enabled(False)
input_shape = [2, 1, 10, 1, 10]
verify_model(Squeeze1().float().eval(), input_data=input_data)
verify_model(Squeeze2().float().eval(), input_data=input_data)
+@tvm.testing.uses_gpu
def test_forward_arange():
torch.set_grad_enabled(False)
verify_model(Arange11().float().eval())
verify_model(Arange12().float().eval())
+@tvm.testing.uses_gpu
def test_forward_mesh_grid():
torch.set_grad_enabled(False)
verify_model(MeshGrid1().float().eval())
verify_model(MeshGrid2().float().eval())
+@tvm.testing.uses_gpu
def test_forward_abs():
torch.set_grad_enabled(False)
input_shape = [2, 1, 10, 1, 10]
input_data = torch.rand(input_shape).float()
verify_model(Abs1().float().eval(), input_data=input_data)
+@tvm.testing.uses_gpu
def test_forward_concatenate():
torch.set_grad_enabled(False)
input_shape = [1, 3, 10, 10]
verify_model(Concatenate1().float().eval(), input_data=input_data)
verify_model(Concatenate2().float().eval(), input_data=input_data)
+@tvm.testing.uses_gpu
def test_forward_relu():
torch.set_grad_enabled(False)
input_shape = [10, 10]
input_data = torch.rand(input_shape).float()
verify_model(torch.nn.ReLU().eval(), input_data=input_data)
+@tvm.testing.uses_gpu
def test_forward_prelu():
torch.set_grad_enabled(False)
input_shape = [1, 3, 10, 10]
input_data = torch.rand(input_shape).float()
verify_model(torch.nn.PReLU(num_parameters=3).eval(), input_data=input_data)
+@tvm.testing.uses_gpu
def test_forward_leakyrelu():
torch.set_grad_enabled(False)
input_shape = [1, 3, 10, 10]
verify_model(torch.nn.LeakyReLU(negative_slope=1.0, inplace=True).eval(), input_data=input_data)
verify_model(torch.nn.LeakyReLU(negative_slope=1.25, inplace=True).eval(), input_data=input_data)
+@tvm.testing.uses_gpu
def test_forward_elu():
torch.set_grad_enabled(False)
input_shape = [1, 3, 10, 10]
verify_model(torch.nn.ELU(alpha=1.0).eval(), input_data=input_data)
verify_model(torch.nn.ELU(alpha=1.3).eval(), input_data=input_data)
+@tvm.testing.uses_gpu
def test_forward_celu():
torch.set_grad_enabled(False)
input_shape = [1, 3, 10, 10]
verify_model(torch.nn.CELU(alpha=1.0).eval(), input_data=input_data)
verify_model(torch.nn.CELU(alpha=1.3).eval(), input_data=input_data)
+@tvm.testing.uses_gpu
def test_forward_gelu():
torch.set_grad_enabled(False)
input_shape = [1, 3, 10, 10]
input_data = torch.rand(input_shape).float()
verify_model(torch.nn.GELU().eval(), input_data=input_data)
+@tvm.testing.uses_gpu
def test_forward_selu():
torch.set_grad_enabled(False)
input_shape = [1, 3, 10, 10]
input_data = torch.rand(input_shape).float()
verify_model(torch.nn.SELU().eval(), input_data=input_data)
+@tvm.testing.uses_gpu
def test_forward_softplus():
torch.set_grad_enabled(False)
input_shape = [1, 3, 10, 10]
verify_model(torch.nn.Softplus(beta=1.5, threshold=20).eval(), input_data=input_data)
verify_model(torch.nn.Softplus(beta=5, threshold=10).eval(), input_data=input_data)
+@tvm.testing.uses_gpu
def test_forward_softsign():
torch.set_grad_enabled(False)
input_shape = [1, 3, 10, 10]
input_data = torch.rand(input_shape).float()
verify_model(torch.nn.Softsign().eval(), input_data=input_data)
+@tvm.testing.uses_gpu
def test_forward_log_sigmoid():
torch.set_grad_enabled(False)
input_shape = [10, 10]
input_data = torch.rand(input_shape).float()
verify_model(torch.nn.LogSigmoid().eval(), input_data=input_data)
+@tvm.testing.uses_gpu
def test_forward_adaptiveavgpool():
torch.set_grad_enabled(False)
input_shape = [1, 3, 10, 10]
verify_model(torch.nn.AdaptiveAvgPool2d([1, 1]).eval(), input_data=input_data)
verify_model(torch.nn.AdaptiveAvgPool2d([10, 10]).eval(), input_data=input_data)
+@tvm.testing.uses_gpu
def test_forward_maxpool2d():
torch.set_grad_enabled(False)
input_shape = [1, 3, 10, 10]
verify_model(MaxPool2DWithIndices().float().eval(), input_data=input_data)
+@tvm.testing.uses_gpu
def test_forward_maxpool1d():
torch.set_grad_enabled(False)
input_shape = [1, 3, 10]
stride=2).eval(),
input_data)
+@tvm.testing.uses_gpu
def test_forward_maxpool3d():
torch.set_grad_enabled(False)
input_shape = [1, 3, 10, 10, 10]
stride=2).eval(),
input_data)
+@tvm.testing.uses_gpu
def test_forward_split():
torch.set_grad_enabled(False)
input_shape = [4, 10]
verify_model(Split([2, 3, 5], 1).float().eval(),
input_data=input_data)
+@tvm.testing.uses_gpu
def test_forward_avgpool():
torch.set_grad_enabled(False)
input_shape = [1, 3, 10, 10]
verify_model(torch.nn.AvgPool2d(kernel_size=[10, 10]).eval(), input_data=input_data)
verify_model(AvgPool2D2().float().eval(), input_data=input_data)
+@tvm.testing.uses_gpu
def test_forward_avgpool3d():
torch.set_grad_enabled(False)
input_shape = [1, 3, 10, 10, 10]
verify_model(torch.nn.AvgPool3d(kernel_size=[10, 10, 10]).eval(), input_data=input_data)
verify_model(AvgPool3D1().float().eval(), input_data=input_data)
+@tvm.testing.uses_gpu
def test_forward_hardtanh():
torch.set_grad_enabled(False)
input_shape = [10]
input_data = torch.rand(input_shape).float()
verify_model(torch.nn.Hardtanh().eval(), input_data=input_data)
+@tvm.testing.uses_gpu
def test_forward_conv():
torch.set_grad_enabled(False)
conv1d_input_shape = [1, 3, 10]
verify_model(Conv1D2().float().eval(), input_data=conv1d_input_data)
verify_model(Conv1D3().float().eval(), input_data=conv1d_input_data)
+@tvm.testing.uses_gpu
def test_forward_conv_transpose():
torch.set_grad_enabled(False)
conv2d_input_shape = [1, 3, 10, 10]
verify_model(torch.nn.ConvTranspose1d(3, 12, 3, bias=False), input_data=conv1d_input_data)
+@tvm.testing.uses_gpu
def test_forward_threshold():
torch.set_grad_enabled(False)
input_shape = [1, 3]
input_data = torch.rand(input_shape).float()
verify_model(torch.nn.Threshold(0, 0).float().eval(), input_data=input_data)
+@tvm.testing.uses_gpu
def test_forward_contiguous():
torch.set_grad_enabled(False)
input_shape = [10]
verify_model(Contiguous1().float().eval(), input_data=input_data)
+@tvm.testing.uses_gpu
def test_forward_batchnorm():
def init_weight(m):
torch.nn.init.normal_(m.weight, 0, 0.01)
verify_model(bn.eval(), input_data=inp)
+@tvm.testing.uses_gpu
def test_forward_instancenorm():
inp_2d = torch.rand((1, 16, 10, 10))
inp_3d = torch.rand((1, 16, 10, 10, 10))
(torch.nn.InstanceNorm3d(16), inp_3d)]:
verify_model(ins_norm.eval(), input_data=inp)
+@tvm.testing.uses_gpu
def test_forward_layernorm():
def init_weight(m):
torch.nn.init.normal_(m.weight, 0, 0.01)
verify_model(ln.eval(), input_data=inp)
+@tvm.testing.uses_gpu
def test_forward_groupnorm():
input_shape = [10, 6, 5, 5]
input_data = torch.rand(input_shape).float()
verify_model(torch.nn.GroupNorm(10, 10).eval(), input_data=input_data)
+@tvm.testing.uses_gpu
def test_forward_reshape():
torch.set_grad_enabled(False)
input_shape = [2, 1, 10, 1, 10]
verify_model(Reshape2().float().eval(), input_data=input_data)
+@tvm.testing.uses_gpu
def test_flatten():
class Flatten(Module):
def forward(self, x):
verify_model(BatchFlatten(), input_data=inp)
+@tvm.testing.uses_gpu
def test_forward_transpose():
torch.set_grad_enabled(False)
input_shape = [1, 3, 10, 10]
verify_model(Transpose2().float().eval(), input_data=input_data)
verify_model(Transpose3().float().eval(), input_data=input_data)
+@tvm.testing.uses_gpu
def test_forward_size():
torch.set_grad_enabled(False)
input_shape = [1, 3]
verify_model(Size1().float().eval(), input_data=input_data)
+@tvm.testing.uses_gpu
def test_type_as():
torch.set_grad_enabled(False)
input_shape = [1, 3]
verify_model(_create_module(torch.float16), input_data=input_data)
+@tvm.testing.uses_gpu
def test_forward_view():
torch.set_grad_enabled(False)
input_shape = [1, 3, 10, 10]
verify_model(View2().float().eval(), input_data=input_data)
verify_model(View3().float().eval(), input_data=input_data)
-
+@tvm.testing.uses_gpu
def test_forward_select():
torch.set_grad_enabled(False)
input_shape = [1, 3, 10, 10]
verify_model(IndexedSelect(x, 1).eval(), input_data=indices)
+@tvm.testing.uses_gpu
def test_forward_clone():
torch.set_grad_enabled(False)
input_shape = [10]
verify_model(Clone1().float().eval(), input_data=input_data)
+@tvm.testing.uses_gpu
def test_forward_gather():
torch.set_grad_enabled(False)
verify_model(Gather3().float().eval(), input_data=[input_data, index])
+@tvm.testing.uses_gpu
def test_forward_logsoftmax():
torch.set_grad_enabled(False)
input_shape = [1, 3, 10, 10]
verify_model(LogSoftmax1().float().eval(), input_data=input_data)
+@tvm.testing.uses_gpu
def test_forward_norm():
torch.set_grad_enabled(False)
input_shape = [1, 3, 10, 10]
verify_model(Norm10().float().eval(), input_data=input_data)
+@tvm.testing.uses_gpu
def test_forward_frobenius_norm():
torch.set_grad_enabled(False)
input_shape = [1, 3, 10, 10]
verify_model(FroNorm4().float().eval(), input_data=input_data)
+@tvm.testing.uses_gpu
def test_forward_sigmoid():
torch.set_grad_enabled(False)
input_shape = [1, 3, 10, 10]
input_data = torch.rand(input_shape).float()
verify_model(torch.nn.Sigmoid().eval(), input_data=input_data)
+@tvm.testing.uses_gpu
def test_forward_dense():
torch.set_grad_enabled(False)
input_shape = [1, 3, 10, 10]
)
assert not any([op.name == "multiply" for op in list_ops(mod['main'])])
+@tvm.testing.uses_gpu
def test_forward_dropout():
torch.set_grad_enabled(False)
input_shape = [1, 3, 10, 10]
verify_model(torch.nn.Dropout3d(p=0.5).eval(), input_data=input_data)
verify_model(torch.nn.AlphaDropout(p=0.5).eval(), input_data=input_data[0, 0])
+@tvm.testing.uses_gpu
def test_forward_slice():
torch.set_grad_enabled(False)
input_shape = [1, 3, 10, 10]
verify_model(Slice3().float().eval(), input_data=input_data)
+@tvm.testing.uses_gpu
def test_forward_mean():
torch.set_grad_enabled(False)
input_shape = [1, 3, 10, 10]
input_data = torch.rand(input_shape).float()
verify_model(Mean1().float().eval(), input_data=input_data)
+@tvm.testing.uses_gpu
def test_forward_expand():
torch.set_grad_enabled(False)
verify_model(Expand2().float().eval(), input_data=input_data)
+@tvm.testing.uses_gpu
def test_forward_pow():
torch.set_grad_enabled(False)
input_shape = [1, 3, 10, 10]
input_data = torch.rand(input_shape).float()
verify_model(Pow1().float().eval(), input_data=input_data)
+@tvm.testing.uses_gpu
def test_forward_chunk():
torch.set_grad_enabled(False)
input_shape = [1, 3, 14, 14]
input_data = torch.rand(input_shape).float()
verify_model(Chunk1().float().eval(), input_data=input_data)
+@tvm.testing.uses_gpu
def test_upsample():
class Upsample(Module):
def __init__(self, size=None, scale=None,
verify_model(Upsample(scale=2, mode="bilinear", align_corners=True), inp)
verify_model(Upsample(size=(50, 50), mode="bilinear", align_corners=True), inp)
+@tvm.testing.uses_gpu
def test_to():
""" test for aten::to(...) """
class ToCPU(Module):
verify_model(ToDouble().eval(), torch.tensor(0.8))
+@tvm.testing.uses_gpu
def test_adaptive_pool3d():
for ishape in [(1, 32, 16, 16, 16),
(1, 32, 9, 15, 15),
verify_model(torch.nn.AdaptiveMaxPool3d((7, 8, 9)).eval(), inp)
+@tvm.testing.uses_gpu
def test_forward_functional_pad():
torch.set_grad_enabled(False)
pad = (0, 0)
verify_model(Pad1().float().eval(), input_data=input_data)
+@tvm.testing.uses_gpu
def test_forward_zero_pad2d():
inp = torch.rand((1, 1, 3, 3))
verify_model(torch.nn.ZeroPad2d(2).eval(), inp)
verify_model(torch.nn.ZeroPad2d((1, 1, 2, 0)).eval(), inp)
+@tvm.testing.uses_gpu
def test_forward_constant_pad1d():
inp = torch.rand((1, 2, 4))
verify_model(torch.nn.ConstantPad2d(2, 3.5).eval(), inp)
verify_model(torch.nn.ConstantPad2d((3, 1), 3.5).eval(), inp)
+@tvm.testing.uses_gpu
def test_forward_constant_pad2d():
inp = torch.rand((1, 2, 2, 2))
verify_model(torch.nn.ConstantPad2d(2, 3.5).eval(), inp)
verify_model(torch.nn.ConstantPad2d((3, 0, 2, 1), 3.5).eval(), inp)
+@tvm.testing.uses_gpu
def test_forward_constant_pad3d():
inp = torch.rand((1, 3, 2, 2, 2))
verify_model(torch.nn.ConstantPad3d(3, 3.5).eval(), inp)
verify_model(torch.nn.ConstantPad3d((3, 4, 5, 6, 0, 1), 3.5).eval(), inp)
+@tvm.testing.uses_gpu
def test_forward_reflection_pad1d():
inp = torch.rand((1, 2, 4))
verify_model(torch.nn.ReflectionPad1d(2).eval(), inp)
verify_model(torch.nn.ReflectionPad1d((2, 3)).eval(), inp)
+@tvm.testing.uses_gpu
def test_forward_reflection_pad2d():
inp = torch.rand((1, 1, 3, 3))
verify_model(torch.nn.ReflectionPad2d(2).eval(), inp)
verify_model(torch.nn.ReflectionPad2d((1, 3, 2, 4)).eval(), inp)
+@tvm.testing.uses_gpu
def test_forward_replication_pad1d():
inp = torch.rand((1, 2, 4))
verify_model(torch.nn.ReplicationPad1d(2).eval(), inp)
verify_model(torch.nn.ReplicationPad1d((2, 3)).eval(), inp)
+@tvm.testing.uses_gpu
def test_forward_replication_pad2d():
inp = torch.rand((1, 1, 3, 3))
verify_model(torch.nn.ReplicationPad2d(2).eval(), inp)
verify_model(torch.nn.ReplicationPad2d((1, 3, 2, 4)).eval(), inp)
+@tvm.testing.uses_gpu
def test_forward_replication_pad3d():
inp = torch.rand((1, 1, 3, 3, 3))
verify_model(torch.nn.ReplicationPad3d(3).eval(), inp)
verify_model(torch.nn.ReplicationPad3d((2, 3, 2, 5, 1, 4)).eval(), inp)
+@tvm.testing.uses_gpu
def test_forward_upsample3d():
inp = torch.arange(1, 9, dtype=torch.float32).view(1, 1, 2, 2, 2)
verify_model(torch.nn.Upsample(scale_factor=2, mode='nearest').eval(), inp)
verify_trace_model(NonMaxSupression(iou_thres), [in_boxes, in_scores])
+@tvm.testing.uses_gpu
def test_conv3d():
for ishape in [(1, 32, 16, 16, 16),
(1, 32, 9, 15, 15),
inp)
+@tvm.testing.uses_gpu
def test_conv3d_transpose():
for ishape in [(1, 8, 10, 5, 10),
(1, 8, 5, 8, 8),
# Model tests
+@tvm.testing.uses_gpu
def test_resnet18():
torch.set_grad_enabled(False)
verify_model("resnet18", atol=1e-4, rtol=1e-4)
+@tvm.testing.uses_gpu
def test_squeezenet1_0():
torch.set_grad_enabled(False)
verify_model("squeezenet1_0", atol=1e-4, rtol=1e-4)
+@tvm.testing.uses_gpu
def test_squeezenet1_1():
torch.set_grad_enabled(False)
verify_model("squeezenet1_1", atol=1e-4, rtol=1e-4)
+@tvm.testing.uses_gpu
def test_densenet121():
torch.set_grad_enabled(False)
verify_model("densenet121", atol=1e-4, rtol=1e-4)
+@tvm.testing.uses_gpu
def test_inception_v3():
torch.set_grad_enabled(False)
verify_model("inception_v3", atol=1e-4, rtol=1e-4)
+@tvm.testing.uses_gpu
def test_googlenet():
torch.set_grad_enabled(False)
verify_model("googlenet", atol=1e-4, rtol=1e-4)
+@tvm.testing.uses_gpu
def test_mnasnet0_5():
torch.set_grad_enabled(False)
verify_model("mnasnet0_5", atol=1e-4, rtol=1e-4)
+@tvm.testing.uses_gpu
def test_mobilenet_v2():
torch.set_grad_enabled(False)
verify_model("mobilenet_v2", atol=1e-4, rtol=1e-4)
"""
#TODO: Fix VGG and AlexNet issues (probably due to pooling)
+@tvm.testing.uses_gpu
def test_alexnet():
torch.set_grad_enabled(False)
verify_model("alexnet")
+@tvm.testing.uses_gpu
def test_vgg11():
torch.set_grad_enabled(False)
verify_model("vgg11")
+@tvm.testing.uses_gpu
def test_vgg11_bn():
torch.set_grad_enabled(False)
verify_model("vgg11_bn")
"""
+@tvm.testing.uses_gpu
def test_custom_conversion_map():
def get_roi_align():
pool_size = 5
verify_model(model, inputs, custom_map)
+@tvm.testing.uses_gpu
def test_segmentaton_models():
class SegmentationModelWrapper(Module):
def __init__(self, model):
verify_model(SegmentationModelWrapper(deeplab.eval()), inp, atol=1e-4, rtol=1e-4)
+@tvm.testing.uses_gpu
def test_3d_models():
input_shape = (1, 3, 4, 56, 56)
resnet3d = torchvision.models.video.r3d_18(pretrained=True).eval()
rtol=1e-5, atol=1e-5)
+@tvm.testing.uses_gpu
def test_control_flow():
class SimpleIf(torch.nn.Module):
def __init__(self, N, M):
verify_script_model(pt_model.eval(), [(10, 20)])
+@tvm.testing.uses_gpu
def test_simple_rnn():
# The mixed tracing and scripting example from
# https://pytorch.org/tutorials/beginner/Intro_to_TorchScript_tutorial.html#mixing-scripting-and-tracing
verify_script_model(RNNLoop().eval(), [(10, 10, 4)])
+@tvm.testing.uses_gpu
def test_forward_reduce_sum():
torch.set_grad_enabled(False)
input_shape = [1, 3, 10, 10]
verify_model(ReduceSum5().float().eval(), input_data=input_data)
+@tvm.testing.uses_gpu
def test_forward_reduce_prod():
torch.set_grad_enabled(False)
input_shape = [1, 3, 10, 10]
verify_model(ReduceProd3().float().eval(), input_data=input_data)
+@tvm.testing.uses_gpu
def test_forward_argmin():
torch.set_grad_enabled(False)
input_shape = [1, 3, 10, 10]
verify_model(ArgMin3().float().eval(), input_data=input_data)
+@tvm.testing.uses_gpu
def test_forward_argmax():
torch.set_grad_enabled(False)
input_shape = [1, 3, 10, 10]
verify_model(ArgMax3().float().eval(), input_data=input_data)
+@tvm.testing.uses_gpu
def test_forward_std():
torch.set_grad_enabled(False)
input_shape = [1, 3, 10, 10]
verify_model(Std9().float().eval(), input_data=input_data)
+@tvm.testing.uses_gpu
def test_forward_variance():
torch.set_grad_enabled(False)
input_shape = [1, 3, 10, 10]
verify_model(Variance9().float().eval(), input_data=input_data)
+@tvm.testing.uses_gpu
def test_forward_rsub():
torch.set_grad_enabled(False)
verify_model(Rsub2().float().eval(), input_data=[d1, d3])
+@tvm.testing.uses_gpu
def test_forward_embedding():
torch.set_grad_enabled(False)
verify_model(torch.nn.Embedding(4, 5, sparse=True).float().eval(), input_data=input_data)
+@tvm.testing.uses_gpu
def test_forward_onehot():
torch.set_grad_enabled(False)
verify_model(OneHot2().float().eval(), input_data=input_data)
+@tvm.testing.uses_gpu
def test_forward_isfinite():
torch.set_grad_enabled(False)
verify_model(IsFinite1().float().eval(), input_data=input_data)
+@tvm.testing.uses_gpu
def test_forward_isnan():
torch.set_grad_enabled(False)
verify_model(IsNan1().float().eval(), input_data=input_data)
+@tvm.testing.uses_gpu
def test_forward_isinf():
torch.set_grad_enabled(False)
verify_model(IsInf1().float().eval(), input_data=input_data)
+@tvm.testing.uses_gpu
def test_forward_clamp():
torch.set_grad_enabled(False)
input_shape = [1, 3, 10, 10]
verify_model(Clamp3().float().eval(), input_data=input_data)
+@tvm.testing.uses_gpu
def test_forward_ones():
torch.set_grad_enabled(False)
verify_model(Ones1().float().eval(), input_data=[])
+@tvm.testing.uses_gpu
def test_forward_ones_like():
torch.set_grad_enabled(False)
input_shape = [1, 3, 10, 10]
verify_model(OnesLike3().float().eval(), input_data=input_data)
+@tvm.testing.uses_gpu
def test_forward_zeros():
torch.set_grad_enabled(False)
verify_model(Zeros1().float().eval(), input_data=[])
+@tvm.testing.uses_gpu
def test_forward_zeros_like():
torch.set_grad_enabled(False)
input_shape = [1, 3, 10, 10]
verify_model(ZerosLike3().float().eval(), input_data=input_data)
+@tvm.testing.uses_gpu
def test_forward_full():
torch.set_grad_enabled(False)
verify_model(Full2().float().eval(), input_data=[])
+@tvm.testing.uses_gpu
def test_forward_full_like():
torch.set_grad_enabled(False)
input_shape = [1, 3, 10, 10]
verify_model(FullLike2().float().eval(), input_data=input_data)
verify_model(FullLike3().float().eval(), input_data=input_data)
+@tvm.testing.uses_gpu
def test_forward_linspace():
torch.set_grad_enabled(False)
verify_model(Linspace8().float().eval())
+@tvm.testing.uses_gpu
def test_forward_take():
torch.set_grad_enabled(False)
class Take1(Module):
verify_model(Take2().float().eval(), input_data=[input_data, indices])
+@tvm.testing.uses_gpu
def test_forward_topk():
torch.set_grad_enabled(False)
class Topk1(Module):
verify_model(Topk6().float().eval(), input_data=input_data)
+@tvm.testing.uses_gpu
def test_forward_logical_not():
torch.set_grad_enabled(False)
verify_model(LogicalNot1().float().eval(), input_data=input_data)
+@tvm.testing.uses_gpu
def test_forward_bitwise_not():
torch.set_grad_enabled(False)
verify_model(BitwiseNot1().float().eval(), input_data=input_data)
+@tvm.testing.uses_gpu
def test_forward_bitwise_xor():
torch.set_grad_enabled(False)
verify_model(BitwiseXor2().float().eval(), input_data=[lhs])
+@tvm.testing.uses_gpu
def test_forward_logical_xor():
torch.set_grad_enabled(False)
verify_model(LogicalXor2().float().eval(), input_data=[lhs])
+@tvm.testing.uses_gpu
def test_forward_unary():
torch.set_grad_enabled(False)
verify_model(Neg1().float().eval(), input_data=input_data)
+@tvm.testing.uses_gpu
def test_forward_where():
torch.set_grad_enabled(False)
verify_model(Where2().float().eval(), input_data=[x, y])
+@tvm.testing.uses_gpu
def test_forward_addcdiv():
torch.set_grad_enabled(False)
verify_model(Addcdiv2().float().eval(), input_data=[input_data, t1, t2])
+@tvm.testing.uses_gpu
def test_forward_addcmul():
torch.set_grad_enabled(False)
t2 = torch.rand([1, 3]).float()
verify_model(Addcmul2().float().eval(), input_data=[input_data, t1, t2])
+@tvm.testing.uses_gpu
def test_forward_traced_function():
def fn(t1, t2):
return t1 + t2
tensor2 = torch.randn(3, 4)
verify_model(fn, input_data=[tensor1, tensor2])
+@tvm.testing.uses_gpu
def test_forward_dtypes():
def fn(t1, t2):
return 2.5 * t1 + t2
verify_model(ModuleWithIntParameters(param), input_data=inp)
+@tvm.testing.uses_gpu
def test_weight_names():
tm = torch.jit.trace(torch.nn.Linear(3, 4), [torch.randn(2, 3)])
mod, params = relay.frontend.from_pytorch(tm, [('input', (2, 3))])
assert set(params.keys()) == set(n for n, p in tm.named_parameters())
+@tvm.testing.uses_gpu
def test_duplicate_weight_use():
# The test cases doesn't make any sense as a neural network,
# the issue popped up in shared input/output embeddings of bert,
verify_model(Test(), input_data=[torch.randn(5, 5)])
+@tvm.testing.uses_gpu
def test_forward_matmul():
torch.set_grad_enabled(False)
for device in ["llvm"]:
ctx = tvm.context(device, 0)
- if not ctx.exist:
+ if not tvm.testing.device_enabled(device):
print("Skip because %s is not enabled" % device)
continue
mod, params = relay.frontend.from_tensorflow(constant_graph,
from tvm.runtime.vm import VirtualMachine
from packaging import version as package_version
+import tvm.testing
+
#######################################################################
# Generic run functions for TVM & tensorflow
# ------------------------------------------
for device in ["llvm", "cuda"]:
ctx = tvm.context(device, 0)
- if not ctx.exist:
+ if not tvm.testing.device_enabled(device):
print("Skip because %s is not enabled" % device)
continue
if no_gpu and device == 'cuda':
_test_pooling_iteration(input_shape, **kwargs)
+@tvm.testing.uses_gpu
def test_forward_pooling():
""" Pooling """
# TensorFlow only supports NDHWC for max_pool3d on CPU
'Placeholder:0', 'DepthwiseConv2dNative:0')
+@tvm.testing.uses_gpu
def test_forward_convolution():
if is_gpu_available():
_test_convolution('conv', [4, 176, 8, 8], [1, 1, 176, 32], [1, 1], [1, 1], 'SAME', 'NCHW')
compare_tf_with_tvm(np.reshape(data_array, tensor_in_sizes).astype('float32'),
'Placeholder:0', 'Conv3D:0', cuda_layout="NCDHW")
+@tvm.testing.uses_gpu
def test_forward_convolution3d():
if is_gpu_available():
_test_convolution3d('conv', [4, 176, 8, 8, 8], [1, 1, 1, 176, 32], [1, 1, 1], [1, 1, 1], 'SAME', 'NCDHW')
compare_tf_with_tvm(data_array, 'Placeholder:0', 'conv3d_transpose:0', cuda_layout="NDHWC")
+@tvm.testing.uses_gpu
def test_forward_convolution3d_transpose():
if is_gpu_available():
_test_convolution3d_transpose(data_shape=[1, 10, 8, 8, 8],
'Placeholder:0', 'BiasAdd:0')
+@tvm.testing.uses_gpu
def test_forward_biasadd():
if is_gpu_available():
_test_biasadd([4, 176, 8, 8], 'NCHW')
_test_variable(np.random.uniform(size=(32, 100)).astype('float32'))
-def test_read_variable_op():
+@tvm.testing.parametrize_targets("llvm", "cuda")
+def test_read_variable_op(target, ctx):
""" Read Variable op test """
tf.reset_default_graph()
out_node,
)
- for device in ["llvm", "cuda"]:
- ctx = tvm.context(device, 0)
- if not ctx.exist:
- print("Skip because %s is not enabled" % device)
- continue
-
- tvm_output = run_tvm_graph(final_graph_def, in_data, in_node,
- target=device, out_names=out_name,
- num_output=len(out_name))
- for i in range(len(tf_output)):
- tvm.testing.assert_allclose(
- tf_output[i], tvm_output[i], atol=1e-4, rtol=1e-5)
+ tvm_output = run_tvm_graph(final_graph_def, in_data, in_node,
+ target=target, out_names=out_name,
+ num_output=len(out_name))
+ for i in range(len(tf_output)):
+ tvm.testing.assert_allclose(
+ tf_output[i], tvm_output[i], atol=1e-4, rtol=1e-5)
sess.close()
# --------
+@tvm.testing.requires_gpu
def test_forward_resnetv2():
'''test resnet model'''
if is_gpu_available():
sess, data, 'input_tensor:0', out_node + ':0')
for device in ["llvm", "cuda"]:
ctx = tvm.context(device, 0)
- if not ctx.exist:
+ if not tvm.testing.device_enabled(device):
print("Skip because %s is not enabled" % device)
continue
tvm_output = run_tvm_graph(graph_def, data, 'input_tensor', len(tf_output),
# TODO(kevinthesun): enable gpu test when VM heterogeneous execution is ready.
for device in ["llvm"]:
ctx = tvm.context(device, 0)
- if not ctx.exist:
+ if not tvm.testing.device_enabled(device):
print("Skip because %s is not enabled" % device)
continue
tvm_output = run_tvm_graph(graph_def, data, in_node, len(out_node),
# TODO(kevinthesun): enable gpu test when VM heterogeneous execution is ready.
for device in ["llvm"]:
ctx = tvm.context(device, 0)
- if not ctx.exist:
+ if not tvm.testing.device_enabled(device):
print("Skip because %s is not enabled" % device)
continue
tvm_output = run_tvm_graph(graph_def, np_data, ["data"], 1,
for device in ["llvm"]:
ctx = tvm.context(device, 0)
- if not ctx.exist:
+ if not tvm.testing.device_enabled(device):
print("Skip because %s is not enabled" % device)
continue
# specific language governing permissions and limitations
# under the License.
import tvm
+import tvm.testing
from tvm import te
import numpy as np
+@tvm.testing.requires_llvm
def test_dot():
nn = 12
n = tvm.runtime.convert(nn)
s = te.create_schedule(C.op)
def verify(target):
- if not tvm.runtime.enabled(target):
- print("Target %s is not enabled" % target)
- return
f = tvm.driver.build(s, [A, B, C], target)
# verify
ctx = tvm.cpu(0)
from tvm.contrib import nvcc
import numpy as np
import time
+import tvm.testing
+@tvm.testing.requires_gpu
def test_exp():
# graph
n = tvm.runtime.convert(1024)
# one line to build the function.
def check_device(device, host="stackvm"):
- if not tvm.runtime.enabled(host):
+ if not tvm.testing.device_enabled(host):
return
ctx = tvm.context(device, 0)
- if not ctx.exist:
- return
fexp = tvm.build(s, [A, B],
device, host,
name="myexp")
check_device("cuda", "llvm")
check_device("vulkan")
+@tvm.testing.requires_gpu
def test_fmod():
# graph
def run(dtype):
def check_device(device):
ctx = tvm.context(device, 0)
- if not ctx.exist:
+ if not tvm.testing.device_enabled(device):
print("skip because %s is not enabled.." % device)
return
target = tvm.target.create(device)
run("float32")
+@tvm.testing.requires_gpu
def test_multiple_cache_write():
# graph
n = tvm.runtime.convert(1024)
s[C].bind(tx, te.thread_axis("threadIdx.x"))
# one line to build the function.
def check_device(device, host="stackvm"):
- if not tvm.runtime.enabled(host):
+ if not tvm.testing.device_enabled(host):
return
ctx = tvm.context(device, 0)
- if not ctx.exist:
+ if not tvm.testing.device_enabled(device):
return
func = tvm.build(s, [A0, A1, C],
device, host,
# create iter var and assign them tags.
bx, tx = s[B].split(B.op.axis[0], factor=32)
# one line to build the function.
- if not tvm.runtime.enabled("llvm"):
+ if not tvm.testing.device_enabled("llvm"):
return
flog = tvm.build(s, [A, B],
b.asnumpy(), np.power(np.log(a.asnumpy()), 2.0), rtol=1e-5)
+@tvm.testing.uses_gpu
def test_popcount():
def run(dtype):
# graph
def check_device(device):
ctx = tvm.context(device, 0)
- if not ctx.exist:
+ if not tvm.testing.device_enabled(device):
print("skip because %s is not enabled.." % device)
return
target = tvm.target.create(device)
run('uint64')
+@tvm.testing.requires_gpu
def test_add():
def run(dtype):
# graph
# one line to build the function.
def check_device(device):
ctx = tvm.context(device, 0)
- if not ctx.exist:
+ if not tvm.testing.device_enabled(device):
print("skip because %s is not enabled.." % device)
return
fadd = tvm.build(s, [A, B, C],
run("uint64")
+@tvm.testing.requires_gpu
def try_warp_memory():
"""skip this in default test because it require higher arch"""
m = 128
# one line to build the function.
def check_device(device):
ctx = tvm.context(device, 0)
- if not ctx.exist:
+ if not tvm.testing.device_enabled(device):
print("skip because %s is not enabled.." % device)
return
f = tvm.build(s, [A, B], device)
# one line to build the function.
def check_device(device, host="llvm"):
- if not tvm.runtime.enabled(host):
+ if not tvm.testing.device_enabled(device):
return
ctx = tvm.context(device, 0)
- if not ctx.exist:
- return
fexp = tvm.build(s, [A, B],
device, host,
name="myexp")
# one line to build the function.
def check_device(device, host="llvm"):
- if not tvm.runtime.enabled(host):
+ if not tvm.testing.device_enabled(device):
return
ctx = tvm.context(device, 0)
- if not ctx.exist:
- return
fadd = tvm.build(s, [A, B, C, D],
device, host,
name="myadd")
from tvm import te
import numpy as np
import time
+import tvm.testing
+@tvm.testing.requires_gpu
def test_gemm():
# graph
nn = 1024
# one line to build the function.
def check_device(device):
ctx = tvm.context(device, 0)
- if not ctx.exist:
+ if not tvm.testing.device_enabled(device):
print("skip because %s is not enabled.." % device)
return
import tvm
from tvm import te
import numpy as np
+import tvm.testing
+@tvm.testing.requires_gpu
def test_reduce_prims():
def test_prim(reducer, np_reducer):
# graph
# one line to build the function.
def check_device(device, host="llvm"):
ctx = tvm.context(device, 0)
- if not tvm.runtime.enabled(host):
- return
- if not ctx.exist:
+ if not tvm.testing.device_enabled(device):
print("skip because %s is not enabled.." % device)
return
freduce = tvm.build(s,
s[BF].parallel(BF.op.axis[0])
# one line to build the function.
def check_target(target="llvm"):
- if not tvm.runtime.enabled(target):
+ if not tvm.testing.device_enabled(target):
return
ctx = tvm.cpu(0)
fapi = tvm.lower(s, args=[A, B])
s[BF].parallel(BF.op.axis[0])
# one line to build the function.
def check_target(target="llvm"):
- if not tvm.runtime.enabled(target):
+ if not tvm.testing.device_enabled(target):
return
ctx = tvm.cpu(0)
fapi = tvm.lower(s, args=[A, B])
check_target()
+@tvm.testing.requires_gpu
def test_rfactor_threads():
nn = 1027
mm = 10
# one line to build the function.
def check_target(device, host="stackvm"):
ctx = tvm.context(device, 0)
- if not ctx.exist:
+ if not tvm.testing.device_enabled(device):
print("skip because %s is not enabled.." % device)
return
check_target("opencl")
check_target("rocm")
+@tvm.testing.requires_gpu
def test_rfactor_elemwise_threads():
n = 1025
m = 10
# one line to build the function.
def check_target(device, host="stackvm"):
ctx = tvm.context(device, 0)
- if not ctx.exist:
+ if not tvm.testing.device_enabled(device):
print("skip because %s is not enabled.." % device)
return
fapi = tvm.lower(s, args=[A, C])
def check_target():
device = 'cpu'
- if not tvm.runtime.enabled(device):
+ if not tvm.testing.device_enabled(device):
print("skip because %s is not enabled.." % device)
return
ctx = tvm.context(device, 0)
check_target()
+@tvm.testing.requires_gpu
def test_rfactor_argmax():
def fcombine(x, y):
lhs = tvm.tir.Select((x[1] >= y[1]), x[0], y[0])
def check_target(device):
ctx = tvm.context(device, 0)
- if not ctx.exist:
+ if not tvm.testing.device_enabled(device):
print("skip because %s is not enabled.." % device)
return
fapi = tvm.lower(s, args=[A0, A1, B0, B1])
check_target("vulkan")
check_target("rocm")
+@tvm.testing.requires_gpu
def test_warp_reduction1():
nthx = 32
nthy = 4
def check_target(device, m, n):
ctx = tvm.context(device, 0)
- if not ctx.exist:
+ if not tvm.testing.device_enabled(device):
print("skip because %s is not enabled.." % device)
return
# This is a bug in normal reduction.
# check_target("cuda", m=10, n=37)
+@tvm.testing.requires_gpu
def test_warp_reduction2():
def fcombine(x, y):
return x[0] + y[0], x[1] * y[1]
def check_target(device):
ctx = tvm.context(device, 0)
- if not ctx.exist:
+ if not tvm.testing.device_enabled(device):
print("skip because %s is not enabled.." % device)
return
import tvm
from tvm import te
import numpy as np
+import tvm.testing
+@tvm.testing.requires_gpu
def test_scan():
m = te.size_var("m")
n = te.size_var("n")
# one line to build the function.
def check_device(device):
ctx = tvm.context(device, 0)
- if not ctx.exist:
+ if not tvm.testing.device_enabled(device):
print("skip because %s is not enabled.." % device)
return
fscan = tvm.build(s, [X, res],
from tvm import autotvm
from tvm.autotvm.tuner import RandomTuner
+import tvm.testing
+
@autotvm.template("testing/conv2d_no_batching")
def conv2d_no_batching(N, H, W, CI, CO, KH, KW):
"""An example template for testing"""
target=target, target_host=target_host)
return task, target
-def test_tuning():
- def check(target, target_host):
- ctx = tvm.context(target, 0)
- if not ctx.exist:
- logging.info("Skip test because %s is not available" % target)
- return
-
- # init task
- task, target = get_sample_task(target, target_host)
- logging.info("%s", task.config_space)
-
- measure_option = autotvm.measure_option(
- autotvm.LocalBuilder(),
- autotvm.LocalRunner())
+@tvm.testing.parametrize_targets("cuda", "opencl")
+def test_tuning(target, ctx):
+ # init task
+ task, target = get_sample_task(target, None)
+ logging.info("%s", task.config_space)
- tuner = RandomTuner(task)
- tuner.tune(n_trial=20, measure_option=measure_option)
+ measure_option = autotvm.measure_option(
+ autotvm.LocalBuilder(),
+ autotvm.LocalRunner())
- check("cuda", None)
- check("opencl", None)
+ tuner = RandomTuner(task)
+ tuner.tune(n_trial=20, measure_option=measure_option)
if __name__ == "__main__":
# only print log when invoked from main
import tvm.topi.testing
from tvm.topi.util import get_const_tuple
from pytest import skip
+import tvm.testing
def verify_conv2d_nchw(batch, in_channel, in_size, num_filter, kernel, stride, padding, dilation=1, add_bias=False, add_relu=False,
def check_device(device):
ctx = tvm.context(device, 0)
- if not ctx.exist:
- skip("s is not enabled" % device)
+ if not tvm.testing.device_enabled(device):
+ print("Skipping %s becuase it is not enabled" % device)
print("Running on target: %s" % device)
with tvm.target.create(device):
C = topi.nn.conv2d(A, W, stride, padding, dilation, layout='NCHW', out_dtype=dtype)
from mxnet import gluon
import logging
import os
+import tvm.testing
logging.basicConfig(level=logging.INFO)
logging.info('[final] validation: acc-top1=%f acc-top5=%f', top1, top5)
return top1
+@tvm.testing.requires_gpu
def test_quantize_acc(cfg, rec_val):
qconfig = qtz.qconfig(skip_conv_layers=[0],
nbit_input=cfg.nbit_input,
import numpy as np
import tvm
from tvm import relay
-from tvm.relay.testing import ctx_list, run_infer_type
+from tvm.relay.testing import run_infer_type
import tvm.topi.testing
import random
+import tvm.testing
+@tvm.testing.uses_gpu
def test_dyn_broadcast_to():
dtype = 'uint8'
rank = 3
x = np.random.uniform(size=x_shape).astype(dtype)
dyn_shape = (1, ) * rank
ref_res = np.broadcast_to(x, dyn_shape)
- for target, ctx in ctx_list():
+ for target, ctx in tvm.testing.enabled_targets():
if (target != 'cuda'): #skip cuda because we don't have dynamic support for GPU
for kind in ["vm", "debug"]:
mod = tvm.ir.IRModule.from_expr(func)
tvm.testing.assert_allclose(op_res.asnumpy(), ref_res, rtol=1e-5)
+@tvm.testing.uses_gpu
def test_dyn_one_hot():
def _get_oshape(indices_shape, depth, axis):
oshape = []
func = relay.Function([indices, depth_var], out)
indices_np = np.random.randint(0, depth, size=indices_shape).astype("int32")
out_np = tvm.topi.testing.one_hot(indices_np, on_value, off_value, depth, axis, dtype)
- for target, ctx in ctx_list():
+ for target, ctx in tvm.testing.enabled_targets():
if (target != 'cuda'): #skip cuda because we don't have dynamic support for GPU
for kind in ["vm", "debug"]:
mod = tvm.ir.IRModule.from_expr(func)
import tvm
from tvm import relay
from tvm import te
-from tvm.relay.testing import ctx_list
+from tvm.relay.testing import enabled_targets
import random
from test_dynamic_op_level3 import verify_func
import tvm.topi.testing
zz = run_infer_type(z)
func = relay.Function([x, scale_h_var, scale_w_var], z)
- for target, ctx in ctx_list():
+ for target, ctx in tvm.testing.enabled_targets():
if "llvm" not in target: continue
for kind in ["vm", "debug"]:
mod = tvm.ir.IRModule.from_expr(func)
from tvm import te
from tvm import relay
from tvm.relay import create_executor, transform
-from tvm.relay.testing import ctx_list, check_grad, run_infer_type
+from tvm.relay.testing import check_grad, run_infer_type
+import tvm.testing
def verify_func(func, data, ref_res):
assert isinstance(data, list)
- for target, ctx in ctx_list():
+ for target, ctx in tvm.testing.enabled_targets():
#TODO(mbrookhart): enable Cuda tests onces the VM supports dynamic shapes
if "llvm" not in target: continue
for kind in ["vm", "debug"]:
tvm.testing.assert_allclose(op_res.asnumpy(), ref_res, rtol=1e-5)
relay.backend.compile_engine.get().clear()
+@tvm.testing.uses_gpu
def test_dyn_reshape():
def verify_reshape(shape, newshape, oshape):
x = relay.var("x", relay.TensorType(shape, "float32"))
verify_reshape((2, 3, 4, 5), (-3, -3), (6, 20))
verify_reshape((2, 3, 4), (0, -3), (2, 12))
+@tvm.testing.uses_gpu
def test_dyn_shape_reshape():
def verify_reshape(shape, newshape, oshape):
x = relay.var("x", relay.TensorType(shape, "float32"))
verify_reshape((2, 3, 4), (8, 3), (8, 3))
verify_reshape((4, 7), (2, 7, 2), (2, 7, 2))
+@tvm.testing.uses_gpu
def test_dyn_tile():
def verify_tile(dshape, reps):
x = relay.var("x", relay.TensorType(dshape, "float32"))
verify_tile((2, 3), (3, 2, 1))
+@tvm.testing.uses_gpu
def test_dyn_zeros_ones():
def verify_zeros_ones(shape, dtype):
for op, ref in [(relay.zeros, np.zeros), (relay.ones, np.ones)]:
verify_zeros_ones((1, 3), 'int64')
verify_zeros_ones((8, 9, 1, 2), 'float32')
+@tvm.testing.uses_gpu
def test_dyn_full():
def verify_full(fill_value, src_shape, dtype):
x = relay.var("x", relay.scalar_type(dtype))
from tvm import te
from tvm import relay
from tvm.relay import transform
-from tvm.relay.testing import ctx_list, run_infer_type
+from tvm.relay.testing import run_infer_type
import tvm.topi.testing
+import tvm.testing
def test_resize_infer_type():
assert zz.checked_type == relay.TensorType((n, c, relay.Any(), relay.Any()), "int8")
+@tvm.testing.uses_gpu
def test_resize():
def verify_resize(dshape, scale, method, layout):
if layout == "NHWC":
zz = run_infer_type(z)
func = relay.Function([x, size_var], z)
- for target, ctx in ctx_list():
+ for target, ctx in tvm.testing.enabled_targets():
if "llvm" not in target: continue
for kind in ["vm", "debug"]:
mod = tvm.ir.IRModule.from_expr(func)
import tvm
from tvm import te
from tvm import relay
-from tvm.relay.testing import ctx_list
+import tvm.testing
+@tvm.testing.uses_gpu
def test_dynamic_topk():
def verify_topk(k, axis, ret_type, is_ascend, dtype):
shape = (20, 100)
np_values[i, :] = np_data[i, np_indices[i, :]]
np_indices = np_indices.astype(dtype)
- for target, ctx in ctx_list():
+ for target, ctx in tvm.testing.enabled_targets():
if "llvm" not in target: continue
for kind in ["vm", "debug"]:
mod = tvm.ir.IRModule.from_expr(func)
from tvm import topi
from tvm.relay.testing import run_infer_type
from tvm.relay.testing.temp_op_attr import TempOpAttr
+import tvm.testing
@autotvm.register_topi_compute("test/conv2d_1")
z3 = engine.lower(get_func(()), "llvm")
assert z1.same_as(z2)
assert not z3.same_as(z1)
- if tvm.context("cuda").exist:
+ if tvm.testing.device_enabled("cuda"):
z4 = engine.lower(get_func(()), "cuda")
assert not z3.same_as(z4)
# Test JIT target
for target in ["llvm"]:
ctx = tvm.context(target)
- if ctx.exist:
+ if tvm.testing.device_enabled(target):
f = engine.jit(get_func((10,)), target)
x = tvm.nd.array(np.ones(10).astype("float32"), ctx=ctx)
y = tvm.nd.empty((10,), ctx=ctx)
from tvm import relay
from tvm.contrib import graph_runtime
from tvm.relay.op import add
-from tvm.relay.testing.config import ctx_list
+import tvm.testing
# @tq, @jr should we put this in testing ns?
def check_rts(expr, args, expected_result, mod=None):
assert len(device_types) == 1
+@tvm.testing.uses_gpu
def test_gru_like():
def unit(rnn_dim):
X = relay.var("X", shape=(1, rnn_dim))
out_shape = (1, rnn_dim)
z = unit(rnn_dim)
- for target, ctx in ctx_list():
+ for target, ctx in tvm.testing.enabled_targets():
with tvm.transform.PassContext(opt_level=2):
graph, lib, params = relay.build(tvm.IRModule.from_expr(z), target)
m = graph_runtime.create(graph, lib, ctx)
# TODO(tqchen) add more types once the schedule register is fixed.
for target in ["llvm"]:
ctx = tvm.context(target, 0)
- if not ctx.exist:
+ if not tvm.testing.device_enabled(target):
return
intrp = create_executor(mod=mod, ctx=ctx, target=target)
result = intrp.evaluate(expr)(*args)
from tvm import te
from tvm import relay
from tvm.contrib.nvcc import have_fp16
+import tvm.testing
def test_basic_build():
atol=1e-5, rtol=1e-5)
+@tvm.testing.requires_cuda
def test_fp16_build():
dtype = "float16"
- if not tvm.runtime.enabled("cuda") or not tvm.gpu(0).exist:
- print("skip because cuda is not enabled.")
- return
-
ctx = tvm.gpu(0)
if dtype == "float16" and not have_fp16(ctx.compute_version):
print("skip because gpu does not support fp16")
atol=1e-5, rtol=1e-5)
-def test_fp16_conversion():
- def check_conversion(tgt, ctx):
- if not tvm.runtime.enabled(tgt):
- print("skip because {} is not enabled.".format(tgt))
- return
- elif tgt == "cuda" and ctx.exist and not have_fp16(ctx.compute_version):
- print("skip because gpu does not support fp16")
- return
-
- n = 10
+@tvm.testing.parametrize_targets("llvm", "cuda")
+def test_fp16_conversion(target, ctx):
+ if target == "cuda" and not have_fp16(ctx.compute_version):
+ print("skip because gpu does not support fp16")
+ return
- for (src, dst) in [('float32', 'float16'), ('float16', 'float32')]:
- x = relay.var("x", relay.TensorType((n,), src))
- y = x.astype(dst)
- func = relay.Function([x], y)
+ n = 10
- # init input
- X = tvm.nd.array(n * np.random.randn(n).astype(src) - n / 2)
+ for (src, dst) in [('float32', 'float16'), ('float16', 'float32')]:
+ x = relay.var("x", relay.TensorType((n,), src))
+ y = x.astype(dst)
+ func = relay.Function([x], y)
- # build
- with tvm.transform.PassContext(opt_level=1):
- g_json, mmod, params = relay.build(tvm.IRModule.from_expr(func), tgt)
+ # init input
+ X = tvm.nd.array(n * np.random.randn(n).astype(src) - n / 2)
- # test
- rt = tvm.contrib.graph_runtime.create(g_json, mmod, ctx)
- rt.set_input("x", X)
- rt.run()
- out = rt.get_output(0)
+ # build
+ with tvm.transform.PassContext(opt_level=1):
+ g_json, mmod, params = relay.build(tvm.IRModule.from_expr(func), target)
- np.testing.assert_allclose(out.asnumpy(), X.asnumpy().astype(dst),
- atol=1e-5, rtol=1e-5)
+ # test
+ rt = tvm.contrib.graph_runtime.create(g_json, mmod, ctx)
+ rt.set_input("x", X)
+ rt.run()
+ out = rt.get_output(0)
- for target, ctx in [('llvm', tvm.cpu()), ('cuda', tvm.gpu())]:
- check_conversion(target, ctx)
+ np.testing.assert_allclose(out.asnumpy(), X.asnumpy().astype(dst),
+ atol=1e-5, rtol=1e-5)
if __name__ == "__main__":
import tvm
from tvm import te
from tvm import relay
-from tvm.relay.testing import check_grad, ctx_list, run_infer_type
+from tvm.relay.testing import check_grad, run_infer_type
from tvm.relay.transform import gradient
+import tvm.testing
def sigmoid(x):
return x_copy
+@tvm.testing.uses_gpu
def test_unary_op():
def check_single_op(opfunc, ref, dtype):
shape = (10, 4)
fwd_func = run_infer_type(fwd_func)
bwd_func = run_infer_type(gradient(fwd_func))
- for target, ctx in ctx_list():
+ for target, ctx in tvm.testing.enabled_targets():
intrp = relay.create_executor(ctx=ctx, target=target)
op_res, (op_grad, ) = intrp.evaluate(bwd_func)(data)
np.testing.assert_allclose(op_grad.asnumpy(), ref_grad, rtol=0.01)
check_single_op(opfunc, ref, dtype)
+@tvm.testing.uses_gpu
def test_binary_op():
def inst(vars, sh):
return [vars.get(s, s) for s in sh]
fwd_func = run_infer_type(fwd_func)
bwd_func = run_infer_type(gradient(fwd_func))
- for target, ctx in ctx_list():
+ for target, ctx in tvm.testing.enabled_targets():
intrp = relay.create_executor(ctx=ctx, target=target)
op_res, (op_grad0, op_grad1) = intrp.evaluate(bwd_func)(x_data, y_data)
np.testing.assert_allclose(op_grad0.asnumpy(), ref_grad0, rtol=0.01)
import tvm
from tvm import te
from tvm import relay
-from tvm.relay.testing import check_grad, ctx_list, run_infer_type
+from tvm.relay.testing import check_grad, run_infer_type
from tvm.relay.transform import gradient
+import tvm.testing
def verify_max_pool2d_grad(x_shape, pool_size, strides, padding, ceil_mode):
padding=[ph, pw, ph, pw],
pool_type='max', ceil_mode=ceil_mode)
- for target, ctx in ctx_list():
+ for target, ctx in tvm.testing.enabled_targets():
intrp = relay.create_executor(ctx=ctx, target=target)
op_res, (op_grad, ) = intrp.evaluate(bwd_func)(data)
np.testing.assert_allclose(op_grad.asnumpy(), ref_grad, rtol=0.01)
+@tvm.testing.uses_gpu
def test_max_pool2d_grad():
verify_max_pool2d_grad((1, 4, 16, 16), pool_size=(2, 2), strides=(2, 2), padding=(0, 0), ceil_mode=False)
verify_max_pool2d_grad((1, 4, 16, 16), pool_size=(1, 1), strides=(1, 1), padding=(1, 1), ceil_mode=False)
padding=[ph, pw, ph, pw],
pool_type='avg', ceil_mode=ceil_mode)
- for target, ctx in ctx_list():
+ for target, ctx in tvm.testing.enabled_targets():
intrp = relay.create_executor(ctx=ctx, target=target)
op_res, (op_grad, ) = intrp.evaluate(bwd_func)(data)
np.testing.assert_allclose(op_grad.asnumpy(), ref_grad, rtol=0.01)
+@tvm.testing.uses_gpu
def test_avg_pool2d_grad():
verify_avg_pool2d_grad((1, 4, 16, 16), pool_size=(2, 2), strides=(2, 2), padding=(0, 0),
ceil_mode=False, count_include_pad=True)
strides=(1, 1), padding=[0, 0, 0, 0], pool_type='avg',
ceil_mode=False)
- for target, ctx in ctx_list():
+ for target, ctx in tvm.testing.enabled_targets():
intrp = relay.create_executor(ctx=ctx, target=target)
op_res, (op_grad, ) = intrp.evaluate(bwd_func)(data)
np.testing.assert_allclose(op_grad.asnumpy(), ref_grad, rtol=0.01)
+@tvm.testing.uses_gpu
def test_global_avg_pool2d_grad():
verify_global_avg_pool2d_grad((1, 4, 16, 16))
verify_global_avg_pool2d_grad((1, 8, 8, 24))
.detach().numpy()
- for target, ctx in ctx_list():
+ for target, ctx in tvm.testing.enabled_targets():
data = tvm.nd.array(data_pt.detach().numpy(), ctx)
weight = tvm.nd.array(weight_pt.detach().numpy(), ctx)
intrp = relay.create_executor(ctx=ctx, target=target)
np.testing.assert_allclose(grad_weight.asnumpy(), grad_weight_pt, rtol=1e-4, atol=1e-4)
+@tvm.testing.uses_gpu
def test_conv2d_grad():
verify_conv2d_grad((1, 4, 16, 16), (16, 4, 3, 3), [1, 1], [1, 1], [1, 1])
verify_conv2d_grad((1, 4, 16, 16), (16, 4, 1, 1), [1, 1], [0, 0], [1, 1])
import tvm
from tvm import te
from tvm import relay
-from tvm.relay.testing import check_grad, ctx_list, run_infer_type
+from tvm.relay.testing import check_grad, run_infer_type
from tvm.relay.transform import gradient
+import tvm.testing
+@tvm.testing.uses_gpu
def test_clip():
for dtype in ('float32', 'float64'):
ref = (lambda x: np.where(x > 10.0, np.zeros_like(x),
fwd_func = run_infer_type(fwd_func)
bwd_func = run_infer_type(gradient(fwd_func))
- for target, ctx in ctx_list():
+ for target, ctx in tvm.testing.enabled_targets():
intrp = relay.create_executor(ctx=ctx, target=target)
op_res, (op_grad, ) = intrp.evaluate(bwd_func)(data)
np.testing.assert_allclose(op_grad.asnumpy(), ref_grad, rtol=0.01)
import scipy
from tvm import relay
from tvm.relay import transform
-from tvm.relay.testing import ctx_list, run_infer_type
+from tvm.relay.testing import run_infer_type
import tvm.topi.testing
from tvm.contrib.nvcc import have_fp16
+import tvm.testing
def sigmoid(x):
one = np.ones_like(x)
return one / np.sqrt(x)
+@tvm.testing.uses_gpu
def test_unary_op():
def check_single_op(opfunc, ref, dtype):
shape = (10, 4)
data = np.random.rand(*shape).astype(dtype)
ref_res = ref(data)
func = relay.Function([x], y)
- for target, ctx in ctx_list():
+ for target, ctx in tvm.testing.enabled_targets():
# use graph by execuor default for testing, as we need
# create function explicitly to avoid constant-folding.
if dtype == 'float16' and target == 'cuda' and not have_fp16(tvm.gpu(0).compute_version):
check_single_op(opfunc, ref, dtype)
+@tvm.testing.uses_gpu
def test_binary_op():
def inst(vars, sh):
return [vars.get(s, s) for s in sh]
ref_res = ref(x_data, y_data)
func = relay.Function([x, y], z)
- for target, ctx in ctx_list():
+ for target, ctx in tvm.testing.enabled_targets():
# use graph by execuor default for testing, as we need
# create function explicitly to avoid constant-folding.
if dtype == 'float16' and target == 'cuda' and not have_fp16(tvm.gpu(0).compute_version):
check_binary_op(opfunc, ref, dtype)
+@tvm.testing.uses_gpu
def test_expand_dims():
# based on topi test
def verify_expand_dims(dshape, dtype, oshape, axis, num_newaxis):
x = relay.Var("x", relay.TensorType(dshape, dtype))
func = relay.Function([x], relay.expand_dims(x, axis, num_newaxis))
- for target, ctx in ctx_list():
+ for target, ctx in tvm.testing.enabled_targets():
if dtype == 'float16' and target == 'cuda' and not have_fp16(tvm.gpu(0).compute_version):
continue
data = np.random.uniform(size=dshape).astype(dtype)
verify_expand_dims((3, 10), dtype, (1, 3, 10), -3, 1)
+@tvm.testing.uses_gpu
def test_bias_add():
for dtype in ['float16', 'float32']:
xshape=(10, 2, 3, 4)
x_data = np.random.uniform(size=xshape).astype(dtype)
y_data = np.random.uniform(size=bshape).astype(dtype)
ref_res = x_data + y_data.reshape((2, 1, 1))
- for target, ctx in ctx_list():
+ for target, ctx in tvm.testing.enabled_targets():
if dtype == 'float16' and target == 'cuda' and not have_fp16(tvm.gpu(0).compute_version):
continue
intrp = relay.create_executor("graph", ctx=ctx, target=target)
assert yy.checked_type == relay.TensorType((n, t, 1, 100), dtype)
+@tvm.testing.uses_gpu
def test_softmax():
for dtype in ['float16', 'float32']:
# Softmax accuracy for float16 is poor
func = relay.Function([x], y)
x_data = np.random.uniform(size=shape).astype(dtype)
ref_res = tvm.topi.testing.softmax_python(x_data)
- for target, ctx in ctx_list():
+ for target, ctx in tvm.testing.enabled_targets():
intrp = relay.create_executor("graph", ctx=ctx, target=target)
op_res = intrp.evaluate(func)(x_data)
np.testing.assert_allclose(op_res.asnumpy(), ref_res, rtol=1e-5)
+@tvm.testing.uses_gpu
def test_log_softmax():
for dtype in ['float16', 'float32']:
# Softmax accuracy for float16 is poor
func = relay.Function([x], y)
x_data = np.random.uniform(size=shape).astype(dtype)
ref_res = tvm.topi.testing.log_softmax_python(x_data)
- for target, ctx in ctx_list():
+ for target, ctx in tvm.testing.enabled_targets():
intrp = relay.create_executor("graph", ctx=ctx, target=target)
op_res = intrp.evaluate(func)(x_data)
np.testing.assert_allclose(op_res.asnumpy(), ref_res, rtol=1e-5)
+@tvm.testing.uses_gpu
def test_concatenate():
for dtype in ['float16', 'float32']:
n, t, d = te.size_var("n"), te.size_var("t"), 100
t_data = np.random.uniform(size=()).astype(dtype)
ref_res = np.concatenate((x_data, y_data), axis=1) + t_data
- for target, ctx in ctx_list():
+ for target, ctx in tvm.testing.enabled_targets():
if dtype == 'float16' and target == 'cuda' and not have_fp16(tvm.gpu(0).compute_version):
continue
intrp1 = relay.create_executor("graph", ctx=ctx, target=target)
y = relay.nn.dense(x, w)
yy = run_infer_type(y)
+@tvm.testing.uses_gpu
def test_dense():
for dtype in ['float16', 'float32']:
# Dense accuracy for float16 is poor
w_data = np.random.rand(2, 5).astype(dtype)
ref_res = np.dot(x_data, w_data.T)
- for target, ctx in ctx_list():
+ for target, ctx in tvm.testing.enabled_targets():
intrp1 = relay.create_executor("graph", ctx=ctx, target=target)
intrp2 = relay.create_executor("debug", ctx=ctx, target=target)
op_res1 = intrp1.evaluate(func)(x_data, w_data)
import tvm.topi.testing
from tvm import relay
from tvm.relay import transform
-from tvm.relay.testing import ctx_list, run_infer_type
+from tvm.relay.testing import run_infer_type
from tvm import topi
import tvm.topi.testing
+import tvm.testing
+@tvm.testing.uses_gpu
def test_checkpoint():
dtype = "float32"
xs = [relay.var("x{}".format(i), dtype) for i in range(4)]
assert f.checked_type == f_checkpoint.checked_type
inputs = [np.random.uniform() for _ in range(len(xs))]
- for target, ctx in ctx_list():
+ for target, ctx in tvm.testing.enabled_targets():
for kind in ["graph", "debug"]:
intrp = relay.create_executor(kind, ctx=ctx, target=target)
f_res = intrp.evaluate(f)(*inputs)
tvm.ir.assert_structural_equal(df, df_parsed)
+@tvm.testing.uses_gpu
def test_collapse_sum_like():
shape = (3, 4, 5, 6)
shape_like = (4, 5, 6)
x = np.random.uniform(size=shape).astype(dtype)
y = np.random.uniform(size=shape_like).astype(dtype)
ref_res = np.sum(x, 0)
- for target, ctx in ctx_list():
+ for target, ctx in tvm.testing.enabled_targets():
for kind in ["graph", "debug"]:
intrp = relay.create_executor(kind, ctx=ctx, target=target)
op_res = intrp.evaluate(func)(x, y)
tvm.testing.assert_allclose(op_res.asnumpy(), ref_res, rtol=1e-5)
+@tvm.testing.uses_gpu
def test_collapse_sum_to():
shape = (3, 4, 5, 6)
shape_to = (4, 5, 6)
func = relay.Function([x], z)
x = np.random.uniform(size=shape).astype(dtype)
ref_res = np.sum(x, 0)
- for target, ctx in ctx_list():
+ for target, ctx in tvm.testing.enabled_targets():
for kind in ["graph", "debug"]:
intrp = relay.create_executor(kind, ctx=ctx, target=target)
op_res = intrp.evaluate(func)(x)
tvm.testing.assert_allclose(op_res.asnumpy(), ref_res, rtol=1e-5)
+@tvm.testing.uses_gpu
def test_broadcast_to():
shape = (4, 1, 6)
shape_like = (3, 4, 5, 6)
func = relay.Function([x], z)
x = np.random.uniform(size=shape).astype(dtype)
ref_res = np.broadcast_to(x, shape_like)
- for target, ctx in ctx_list():
+ for target, ctx in tvm.testing.enabled_targets():
for kind in ["graph", "debug"]:
intrp = relay.create_executor(kind, ctx=ctx, target=target)
op_res = intrp.evaluate(func)(x)
tvm.testing.assert_allclose(op_res.asnumpy(), ref_res, rtol=1e-5)
+@tvm.testing.uses_gpu
def test_broadcast_to_like():
shape = (4, 1, 6)
shape_like = (3, 4, 5, 6)
y = np.random.uniform(size=shape_like).astype(dtype)
ref_res = np.broadcast_to(x, shape_like)
- for target, ctx in ctx_list():
+ for target, ctx in tvm.testing.enabled_targets():
for kind in ["graph", "debug"]:
intrp = relay.create_executor(kind, ctx=ctx, target=target)
op_res = intrp.evaluate(func)(x, y)
y_data = np.random.uniform(size=slice_like).astype(dtype)
ref_res = np_slice_like(x_data, y_data, axes)
- for target, ctx in ctx_list():
+ for target, ctx in tvm.testing.enabled_targets():
for kind in ["graph", "debug"]:
intrp = relay.create_executor(kind, ctx=ctx, target=target)
op_res = intrp.evaluate(func)(x_data, y_data)
tvm.testing.assert_allclose(op_res.asnumpy(), ref_res, rtol=1e-5)
+@tvm.testing.uses_gpu
def test_slice_like():
d1, d2, d3, d4 = te.var("d1"), te.var("d2"), te.var("d3"), te.var("d4")
verify_slice_like(data=(d1, d2, d3), slice_like=(1, 2, 3), axes=None, output=(1, 2, 3))
axes=(2, 3),
output=(1, 3, 112, 112))
+@tvm.testing.uses_gpu
def test_reverse_reshape():
def verify_reverse_reshape(shape, newshape, oshape):
x = relay.var("x", relay.TensorType(shape, "float32"))
func = relay.Function([x], z)
x_data = np.random.uniform(low=-1, high=1, size=shape).astype("float32")
ref_res = np.reshape(x_data, oshape)
- for target, ctx in ctx_list():
+ for target, ctx in tvm.testing.enabled_targets():
for kind in ["graph", "debug"]:
intrp = relay.create_executor(kind, ctx=ctx, target=target)
op_res = intrp.evaluate(func)(x_data)
y_np = np.random.uniform(size=y_shape).astype(dtype)
z_np = tvm.topi.testing.batch_matmul(x_np, y_np)
- for target, ctx in ctx_list():
+ for target, ctx in tvm.testing.enabled_targets():
for kind in ["graph", "debug"]:
intrp = relay.create_executor(kind, ctx=ctx, target=target)
z = intrp.evaluate(func)(x_np, y_np)
tvm.testing.assert_allclose(z.asnumpy(), z_np, rtol=1e-5)
+@tvm.testing.uses_gpu
def test_batch_matmul():
b, m, n, k = te.size_var("b"), te.size_var("m"), te.size_var("n"), te.size_var("k")
x = relay.var("x", relay.TensorType((b, m, k), "float32"))
verify_batch_matmul((5, 16, 32), (5, 20, 32), (5, 16, 20))
verify_batch_matmul((30, 16, 32), (30, 20, 32), (30, 16, 20))
+@tvm.testing.uses_gpu
def test_shape_of():
shape = (10, 5, 12)
x = relay.var("x", shape=shape)
func = relay.Function([x], relay.op.shape_of(x))
func = run_infer_type(func)
x_data = np.random.rand(*shape).astype('float32')
- for target, ctx in ctx_list():
+ for target, ctx in tvm.testing.enabled_targets():
# Because using graph executor, this op will be optimized after
# constant folding pass, here we only test with interpreter
for kind in ["debug"]:
tvm.testing.assert_allclose(op_res.asnumpy(),
np.array(shape).astype('int32'))
+@tvm.testing.uses_gpu
def test_ndarray_size():
def verify_ndarray_size(shape):
x = relay.var("x", shape=shape)
x_data = np.random.uniform(size=shape).astype("float32")
ref_res = np.size(x_data)
- for target, ctx in ctx_list():
+ for target, ctx in tvm.testing.enabled_targets():
for kind in ["graph", "debug"]:
intrp = relay.create_executor(kind, ctx=ctx, target=target)
op_res = intrp.evaluate(func)(x_data)
np_data = np.random.uniform(low=0, high=255, size=dshape).astype(dtype)
np_out = tvm.topi.testing.adaptive_pool(np_data, out_size, pool_type, layout)
- for target, ctx in ctx_list():
+ for target, ctx in tvm.testing.enabled_targets():
intrp1 = relay.create_executor("graph", ctx=ctx, target=target)
relay_out = intrp1.evaluate(func)(np_data)
tvm.testing.assert_allclose(relay_out.asnumpy(), np_out, rtol=1e-5, atol=1e-5)
verify_adaptive_pool(dshape, out_size, pool_type, layout, dtype, opfunc)
+@tvm.testing.uses_gpu
def test_adaptive_pool():
verify_adaptive_pool2d((1, 9, 224, 224), (1, 1), "max")
verify_adaptive_pool2d((1, 3, 224, 224), (2, 3), "avg")
verify_adaptive_pool3d((1, 16, 32, 32, 32), (2, 4, 4), "max", layout="NDHWC")
+@tvm.testing.uses_gpu
def test_sequence_mask():
def _verify(data_shape, mask_value, axis, dtype, itype):
max_length = data_shape[axis]
valid_length_np = np.random.randint(0, max_length, size=nbatch).astype(itype)
gt_out_np = tvm.topi.testing.sequence_mask(data_np, valid_length_np, mask_value, axis)
- for target, ctx in ctx_list():
+ for target, ctx in tvm.testing.enabled_targets():
for kind in ["graph", "debug"]:
intrp = relay.create_executor(kind, ctx=ctx, target=target)
out_relay = intrp.evaluate(func)(data_np, valid_length_np)
_verify((2, 3, 5, 3), 0.0, 0, 'float32', 'int64')
_verify((5, 8, 3), 0.1, 1, 'float64', 'float32')
+@tvm.testing.uses_gpu
def test_one_hot():
def _get_oshape(indices_shape, depth, axis):
oshape = []
indices_np = np.random.randint(0, depth, size=indices_shape).astype("int32")
out_np = tvm.topi.testing.one_hot(indices_np, on_value, off_value, depth, axis, dtype)
- for target, ctx in ctx_list():
+ for target, ctx in tvm.testing.enabled_targets():
for kind in ["graph", "debug"]:
intrp = relay.create_executor(kind, ctx=ctx, target=target)
out_relay = intrp.evaluate(func)(indices_np)
_verify((3, 2, 4, 5), 6, 1, 0, 1, "int32")
_verify((3, 2, 4, 5), 6, 1.0, 0.0, 0, "float32")
+@tvm.testing.uses_gpu
def test_matrix_set_diag():
def _verify(input_shape, dtype):
diagonal_shape = list(input_shape[:-2])
diagonal_np = np.random.randint(-100, 100, size=diagonal_shape).astype(dtype)
out_np = tvm.topi.testing.matrix_set_diag(input_np, diagonal_np)
- for target, ctx in ctx_list():
+ for target, ctx in tvm.testing.enabled_targets():
for kind in ["graph", "debug"]:
intrp = relay.create_executor(kind, ctx=ctx, target=target)
out_relay = intrp.evaluate(func)(input_np, diagonal_np)
from tvm import autotvm
from tvm import relay
from tvm.relay import transform
-from tvm.relay.testing import ctx_list, run_infer_type
+from tvm.relay.testing import run_infer_type
from tvm.contrib import util
import tvm.topi.testing
from tvm.topi.cuda.conv3d_winograd import _infer_tile_size
+import tvm.testing
+@tvm.testing.uses_gpu
def test_conv1d_infer_type():
# symbolic in batch dimension
n, c, w = te.var("n"), 10, 224
(n, w, 16), "int32")
+@tvm.testing.uses_gpu
def test_conv1d_run():
def run_test_conv1d(dtype, out_dtype, scale, dshape, kshape,
padding=(1, 1),
ref_res = tvm.topi.testing.conv1d_ncw_python(
data.astype(out_dtype), kernel.astype(out_dtype), 1, padding, dilation)
- for target, ctx in ctx_list():
+ for target, ctx in tvm.testing.enabled_targets():
if target in except_targets:
continue
+ ctx = tvm.context(target, 0)
intrp1 = relay.create_executor("graph", ctx=ctx, target=target)
op_res1 = intrp1.evaluate(func)(data, kernel)
tvm.testing.assert_allclose(op_res1.asnumpy(), ref_res, rtol=1e-5, atol=1e-5)
padding=(1, 1), channels=10, kernel_size=3, dilation=3)
+@tvm.testing.uses_gpu
def test_conv2d_infer_type():
# symbolic in batch dimension
n, c, h, w = te.size_var("n"), 10, 224, 224
(n, h, w, 16), "int32")
+@tvm.testing.uses_gpu
def test_conv2d_run():
def run_test_conv2d(dtype, out_dtype, scale, dshape, kshape,
padding=(1, 1),
ref_res = fref(data.astype(out_dtype), dkernel.astype(out_dtype))
- for target, ctx in ctx_list():
+ for target, ctx in tvm.testing.enabled_targets():
if target in except_targets:
continue
+ ctx = tvm.context(target, 0)
intrp1 = relay.create_executor("graph", ctx=ctx, target=target)
op_res1 = intrp1.evaluate(func)(data, kernel)
tvm.testing.assert_allclose(op_res1.asnumpy(), ref_res, rtol=1e-4, atol=1e-4)
run_test_conv2d("float32", "float32", 1, dshape, kshape,
padding=(1, 1), channels=10, kernel_size=(3 ,3), dilation=(3, 3))
+@tvm.testing.uses_gpu
def test_conv2d_winograd():
class WinogradFallback(autotvm.FallbackContext):
def _query_inside(self, target, workload):
groups=groups)
with WinogradFallback(), tvm.transform.PassContext(opt_level=3):
- for target, ctx in ctx_list():
+ for target, ctx in tvm.testing.enabled_targets():
if target != 'cuda':
continue
+ ctx = tvm.context(target, 0)
params = {'w': tvm.nd.array(kernel)}
graph, lib, params = relay.build_module.build(mod, target=target, params=params)
module = tvm.contrib.graph_runtime.create(graph, lib, ctx)
padding=(2, 2), channels=192, kernel_size=(7, 7))
+@tvm.testing.uses_gpu
def test_conv3d_infer_type():
# symbolic in batch dimension
n, c, d, h, w = te.size_var("n"), 10, 224, 224, 224
(n, d, h, w, 16), "int32")
+@tvm.testing.uses_gpu
def test_conv3d_run():
def run_test_conv3d(dtype, out_dtype, scale, dshape, kshape,
padding=(1, 1, 1),
ref_res = fref(data.astype(out_dtype), dkernel.astype(out_dtype))
- for target, ctx in ctx_list():
+ for target, ctx in tvm.testing.enabled_targets():
if target in except_targets:
continue
+ ctx = tvm.context(target, 0)
intrp1 = relay.create_executor("graph", ctx=ctx, target=target)
op_res1 = intrp1.evaluate(func)(data, kernel)
run_test_conv3d("float32", "float32", 1, dshape, kshape,
padding=(1, 1, 1), channels=10, kernel_size=(3, 3 ,3))
+@tvm.testing.uses_gpu
def test_conv3d_ndhwc_run():
def run_test_conv3d(dtype, out_dtype, scale, dshape, kshape,
padding=(1, 1, 1),
ref_res = fref(data.astype(out_dtype), dkernel.astype(out_dtype))
- for target, ctx in ctx_list():
+ for target, ctx in tvm.testing.enabled_targets():
if target in except_targets:
continue
+ ctx = tvm.context(target, 0)
intrp1 = relay.create_executor("graph", ctx=ctx, target=target)
op_res1 = intrp1.evaluate(func)(data, kernel)
run_test_conv3d("float32", "float32", 1, dshape, kshape,
padding=(1, 1, 1), channels=10, kernel_size=(3, 3 ,3), except_targets=["cuda"])
+@tvm.testing.uses_gpu
def test_conv3d_winograd():
class WinogradFallback(autotvm.FallbackContext):
def _query_inside(self, target, workload):
groups=groups)
with WinogradFallback(), tvm.transform.PassContext(opt_level=3):
- for target, ctx in ctx_list():
+ for target, ctx in tvm.testing.enabled_targets():
if target != 'cuda':
continue
+ ctx = tvm.context(target, 0)
params = {'w': tvm.nd.array(kernel)}
graph, lib, params = relay.build_module.build(mod, target=target, params=params)
module = tvm.contrib.graph_runtime.create(graph, lib, ctx)
padding=(0, 2, 2), channels=120, kernel_size=(1, 5, 5))
+@tvm.testing.uses_gpu
def test_conv3d_transpose_infer_type():
# symbolic in batch dimension
n, c, d, h, w = te.size_var("n"), 10, 224, 224, 224
(n, 12, 226, 226, 226), "int32")
+@tvm.testing.uses_gpu
def test_conv3d_transpose_ncdhw_run():
dshape = (1, 3, 24, 24, 24)
kshape = (3, 4, 2, 2, 2)
kernel = np.random.uniform(size=kshape).astype(dtype)
ref_res = tvm.topi.testing.conv3d_transpose_ncdhw_python(data, kernel, 1, 1, 0)
- for target, ctx in ctx_list():
+ for target, ctx in tvm.testing.enabled_targets():
intrp1 = relay.create_executor("graph", ctx=ctx, target=target)
op_res1 = intrp1.evaluate(func)(data, kernel)
tvm.testing.assert_allclose(op_res1.asnumpy(), ref_res, rtol=1e-5, atol=1e-5)
+@tvm.testing.uses_gpu
def test_conv2d_transpose_infer_type():
# symbolic in batch dimension
n, c, h, w = te.size_var("n"), 10, 10, 12
(n, 15, 15, 11), "float32")
+@tvm.testing.uses_gpu
def test_conv2d_transpose_nchw_run():
dshape = (1, 3, 18, 18)
kshape = (3, 10, 3, 3)
ref_res = tvm.topi.testing.conv2d_transpose_nchw_python(
data, kernel, 2, 1, (1, 1))
- for target, ctx in ctx_list():
+ for target, ctx in tvm.testing.enabled_targets():
intrp1 = relay.create_executor("graph", ctx=ctx, target=target)
op_res1 = intrp1.evaluate(func)(data, kernel)
tvm.testing.assert_allclose(op_res1.asnumpy(), ref_res, rtol=1e-5, atol=1e-5)
+@tvm.testing.uses_gpu
def test_conv2d_transpose_nhwc_run():
dshape_nhwc = (1, 18, 18, 3)
kshape_hwoi = (3, 3, 10, 3)
ref_res = tvm.topi.testing.conv2d_transpose_nhwc_python(data, kernel, 'HWOI',
2, 1, output_padding=(1, 1))
- for target, ctx in ctx_list():
+ for target, ctx in tvm.testing.enabled_targets():
intrp1 = relay.create_executor("graph", ctx=ctx, target=target)
op_res1 = intrp1.evaluate(func)(data, kernel)
tvm.testing.assert_allclose(op_res1.asnumpy(), ref_res, rtol=1e-5, atol=1e-5)
+@tvm.testing.uses_gpu
def test_conv1d_transpose_ncw_run():
dshape = (1, 3, 18)
kshape = (3, 10, 3)
ref_res = tvm.topi.testing.conv1d_transpose_ncw_python(
data, kernel, 2, 1, output_padding=(1,))
- for target, ctx in ctx_list():
+ for target, ctx in tvm.testing.enabled_targets():
intrp1 = relay.create_executor("graph", ctx=ctx, target=target)
op_res1 = intrp1.evaluate(func)(data, kernel)
tvm.testing.assert_allclose(op_res1.asnumpy(), ref_res, rtol=1e-5, atol=1e-5)
+@tvm.testing.uses_gpu
def test_upsampling_infer_type():
n, c , h, w = te.size_var("n"), te.size_var("c"), te.size_var("h"), te.size_var("w")
scale = tvm.tir.const(2.0, "float64")
yy = run_infer_type(y)
assert yy.checked_type == relay.TensorType((n, c, 200, 400), "float32")
+@tvm.testing.uses_gpu
def test_upsampling3d_infer_type():
n, c, d, h, w = te.size_var("n"), te.size_var("c"),\
te.size_var("d"), te.size_var("h"), te.size_var("w")
func = relay.Function([x], y)
data = np.random.uniform(size=dshape).astype(dtype)
ref_res = reffunc(data.reshape(1, 3, 14, 2, 14, 2), axis=(3, 5))
- for target, ctx in ctx_list():
+ for target, ctx in tvm.testing.enabled_targets():
intrp1 = relay.create_executor("graph", ctx=ctx, target=target)
op_res1 = intrp1.evaluate(func)(data)
tvm.testing.assert_allclose(op_res1.asnumpy(), ref_res, rtol=1e-5, atol=1e-5)
func = relay.Function([x], y)
data = np.random.randint(low=-128, high=128, size=dshape)
ref_res = reffunc(data.reshape(1,3,14,2,14,2), axis=(3,5)).astype(dtype)
- for target, ctx in ctx_list():
+ for target, ctx in tvm.testing.enabled_targets():
intrp1 = relay.create_executor("graph", ctx=ctx, target=target)
op_res1 = intrp1.evaluate(func)(data)
tvm.testing.assert_allclose(op_res1.asnumpy(), ref_res, rtol=1e-5, atol=1e-5)
func = relay.Function([x], y)
data = np.random.uniform(size=dshape).astype(dtype)
ref_res = reffunc(data, axis=(2,3), keepdims=True)
- for target, ctx in ctx_list():
+ for target, ctx in tvm.testing.enabled_targets():
intrp1 = relay.create_executor("graph", ctx=ctx, target=target)
op_res1 = intrp1.evaluate(func)(data)
tvm.testing.assert_allclose(op_res1.asnumpy(), ref_res, rtol=1e-5, atol=1e-5)
+@tvm.testing.uses_gpu
def test_pool2d():
_test_pool2d(relay.nn.max_pool2d, np.max)
_test_pool2d(relay.nn.max_pool2d, np.max, pool_size=2, strides=2, padding=0)
_test_global_pool2d(relay.nn.global_avg_pool2d, np.mean)
+@tvm.testing.uses_gpu
def test_pool1d():
def _test_pool1d(opfunc, pool_size=(2,), strides=(2,), padding=(0, 0)):
data = np.random.uniform(size=dshape).astype(dtype)
ref_res = tvm.topi.testing.pool1d_ncw_python(data, (2,), (2,),
(0, 0), (1, 3, 16), pool_type, False)
- for target, ctx in ctx_list():
+ for target, ctx in tvm.testing.enabled_targets():
intrp1 = relay.create_executor("graph", ctx=ctx, target=target)
op_res1 = intrp1.evaluate(func)(data)
tvm.testing.assert_allclose(op_res1.asnumpy(), ref_res, rtol=1e-5, atol=1e-5)
_test_pool1d(relay.nn.avg_pool1d, pool_size=2, strides=2, padding=0)
+@tvm.testing.uses_gpu
def test_pool3d():
def _test_pool3d(opfunc,
data = np.random.uniform(size=dshape).astype(dtype)
ref_res = tvm.topi.testing.pool3d_ncdhw_python(data, pool_size, strides,
padding, out_shape, pool_type, False)
- for target, ctx in ctx_list():
+ for target, ctx in tvm.testing.enabled_targets():
intrp1 = relay.create_executor("graph", ctx=ctx, target=target)
op_res1 = intrp1.evaluate(func)(data)
tvm.testing.assert_allclose(op_res1.asnumpy(), ref_res, rtol=1e-5, atol=1e-5)
_test_pool3d(relay.nn.avg_pool3d, pool_size=2, padding=0, strides=2)
+@tvm.testing.uses_gpu
def test_avg_pool2d_no_count_pad():
kh, kw = (4, 4)
sh, sw = (2, 2)
ref_res = np.maximum(b_np, 0.0)
data = a_np
- for target, ctx in ctx_list():
+ for target, ctx in tvm.testing.enabled_targets():
intrp1 = relay.create_executor("graph", ctx=ctx, target=target)
op_res1 = intrp1.evaluate(func)(data)
tvm.testing.assert_allclose(op_res1.asnumpy(), ref_res, rtol=1e-5, atol=1e-5)
+@tvm.testing.uses_gpu
def test_flatten_infer_type():
d1, d2, d3, d4 = te.size_var("d1"), te.size_var("d2"), te.size_var("d3"), te.size_var("d4")
x = relay.var("x", relay.TensorType((d1, d2, d3, d4), "float32"))
x_data = np.random.uniform(low=-1, high=1, size=shape).astype(dtype)
ref_res = x_data.flatten().reshape(o_shape)
- for target, ctx in ctx_list():
+ for target, ctx in tvm.testing.enabled_targets():
intrp1 = relay.create_executor("graph", ctx=ctx, target=target)
intrp2 = relay.create_executor("debug", ctx=ctx, target=target)
op_res1 = intrp1.evaluate(func)(x_data)
op_res2 = intrp2.evaluate(func)(x_data)
tvm.testing.assert_allclose(op_res2.asnumpy(), ref_res, rtol=1e-5)
+@tvm.testing.uses_gpu
def test_pad_infer_type():
# entirely concrete case
n, c, h, w = 1, 2, 3, 4
yy = run_infer_type(y)
assert yy.checked_type == relay.TensorType((n + 2, 6, 9, w + 8), "float32")
+@tvm.testing.uses_gpu
def test_pad_run():
def _test_run(dtype):
dshape = (4, 10, 7, 7)
func = relay.Function([x], y)
data = np.random.uniform(size=dshape).astype(dtype)
ref_res = np.pad(data, ((1, 1), (2, 2), (3, 3), (4, 4)), 'constant')
- for target, ctx in ctx_list():
+ for target, ctx in tvm.testing.enabled_targets():
intrp1 = relay.create_executor("graph", ctx=ctx, target=target)
op_res1 = intrp1.evaluate(func)(data)
tvm.testing.assert_allclose(op_res1.asnumpy(), ref_res, rtol=1e-5, atol=1e-5)
_test_run('float32')
_test_run('int32')
+@tvm.testing.uses_gpu
def test_lrn():
n, c , h, w = te.size_var("n"), te.size_var("c"), te.size_var("h"), te.size_var("w")
x = relay.var("x", shape=(n, c , h, w))
x_data = np.random.uniform(low=-1, high=1, size=shape).astype(dtype)
ref_res = tvm.topi.testing.lrn_python(x_data, size, axis, bias, alpha, beta)
- for target, ctx in ctx_list():
+ for target, ctx in tvm.testing.enabled_targets():
intrp1 = relay.create_executor("graph", ctx=ctx, target=target)
intrp2 = relay.create_executor("debug", ctx=ctx, target=target)
op_res1 = intrp1.evaluate(func)(x_data)
op_res2 = intrp2.evaluate(func)(x_data)
tvm.testing.assert_allclose(op_res2.asnumpy(), ref_res, rtol=1e-5)
+@tvm.testing.uses_gpu
def test_l2_normalize():
n, c , h, w = te.size_var("n"), te.size_var("c"), te.size_var("h"), te.size_var("w")
x = relay.var("x", shape=(n, c , h, w))
x_data = np.random.uniform(low=-1, high=1, size=shape).astype(dtype)
ref_res = tvm.topi.testing.l2_normalize_python(x_data, eps, axis)
- for target, ctx in ctx_list():
+ for target, ctx in tvm.testing.enabled_targets():
intrp1 = relay.create_executor("graph", ctx=ctx, target=target)
intrp2 = relay.create_executor("debug", ctx=ctx, target=target)
op_res1 = intrp1.evaluate(func)(x_data)
return np.reshape(data, (shape[0], target_dim))
+@tvm.testing.uses_gpu
def test_batch_flatten():
t1 = relay.TensorType((5, 10, 5))
x = relay.Var("x", t1)
data = np.random.rand(5, 10, 5).astype(t1.dtype)
ref_res = batch_flatten(data)
- for target, ctx in ctx_list():
+ for target, ctx in tvm.testing.enabled_targets():
intrp = relay.create_executor("graph", ctx=ctx, target=target)
op_res = intrp.evaluate(func)(data)
np.testing.assert_allclose(op_res.asnumpy(), ref_res, rtol=0.01)
else:
ref = tvm.topi.testing.bilinear_resize_python(data, (int(round(h*scale_h)),
int(round(w*scale_w))), layout)
- for target, ctx in ctx_list():
+ for target, ctx in tvm.testing.enabled_targets():
executor = relay.create_executor("graph", ctx=ctx, target=target)
out = executor.evaluate(func)(data)
tvm.testing.assert_allclose(out.asnumpy(), ref, rtol=1e-5, atol=1e-5)
+@tvm.testing.uses_gpu
def test_upsampling():
_test_upsampling("NCHW", "nearest_neighbor")
_test_upsampling("NCHW", "bilinear", True)
ref = tvm.topi.testing.trilinear_resize3d_python(data, (int(round(d*scale_d)),\
int(round(h*scale_h)),\
int(round(w*scale_w))), layout)
- for target, ctx in ctx_list():
+ for target, ctx in tvm.testing.enabled_targets():
executor = relay.create_executor("graph", ctx=ctx, target=target)
out = executor.evaluate(func)(data)
tvm.testing.assert_allclose(out.asnumpy(), ref, rtol=1e-5, atol=1e-5)
+@tvm.testing.uses_gpu
def test_upsampling3d():
_test_upsampling3d("NCDHW", "nearest_neighbor")
_test_upsampling3d("NCDHW", "trilinear", "align_corners")
_test_upsampling3d("NDHWC", "nearest_neighbor")
_test_upsampling3d("NDHWC", "trilinear", "align_corners")
+@tvm.testing.uses_gpu
def test_conv2d_int8_intrinsics():
def _compile(ic, oc, target, data_layout, kernel_layout, dtypes):
input_dtype, weight_dtype, output_dtype = dtypes
assert "vpmulld" in asm and "vpadd" in asm
+@tvm.testing.uses_gpu
def test_depthwise_conv2d_int8():
input_dtype = 'uint8'
weight_dtype = 'int8'
graph, lib, params = relay.build(func, target, params=parameters)
+@tvm.testing.uses_gpu
def test_bitserial_conv2d_infer_type():
# Basic shape test with ambiguous batch.
n, c, h, w = te.size_var("n"), 32, 224, 224
(n, 32, 222, 222), "int16")
+@tvm.testing.uses_gpu
def test_bitpack_infer_type():
# Test axis packing shape inference.
o, i, h, w = 32, 32, 128, 128
# TODO(@jwfromm): Need to add bitserial_conv2d & bitpack run test cases
+@tvm.testing.uses_gpu
def test_correlation():
def _test_correlation(data_shape, kernel_size, max_displacement, stride1, stride2, padding, is_multiply, dtype='float32'):
data1 = relay.var("data1", relay.ty.TensorType(data_shape, dtype))
data2_np = np.random.uniform(size=data_shape).astype(dtype)
ref_res = tvm.topi.testing.correlation_nchw_python(data1_np, data2_np, kernel_size, max_displacement, stride1, stride2, padding, is_multiply)
- for target, ctx in ctx_list():
+ for target, ctx in tvm.testing.enabled_targets():
intrp1 = relay.create_executor("graph", ctx=ctx, target=target)
op_res1 = intrp1.evaluate(func)(data1_np, data2_np)
tvm.testing.assert_allclose(op_res1.asnumpy(), ref_res, rtol=1e-5, atol=1e-5)
from tvm import relay
from tvm.error import TVMError
from tvm.relay import create_executor, transform
-from tvm.relay.testing import ctx_list, check_grad, run_infer_type
+from tvm.relay.testing import check_grad, run_infer_type
+import tvm.testing
def test_zeros_ones():
(100, t, n), "float32")
+@tvm.testing.uses_gpu
def test_transpose():
def verify_transpose(dshape, axes):
x = relay.var("x", relay.TensorType(dshape, "float32"))
x_data = np.random.uniform(low=-1, high=1, size=dshape).astype("float32")
ref_res = np.transpose(x_data, axes=axes)
- for target, ctx in ctx_list():
+ for target, ctx in tvm.testing.enabled_targets():
for kind in ["graph", "debug"]:
intrp = relay.create_executor(kind, ctx=ctx, target=target)
op_res = intrp.evaluate(func)(x_data)
assert yy.checked_type == relay.TensorType(
(n, t, 2000), "float32")
+@tvm.testing.uses_gpu
def test_reshape():
def verify_reshape(shape, newshape, oshape):
x = relay.var("x", relay.TensorType(shape, "float32"))
check_grad(func)
x_data = np.random.uniform(low=-1, high=1, size=shape).astype("float32")
ref_res = np.reshape(x_data, oshape)
- for target, ctx in ctx_list():
+ for target, ctx in tvm.testing.enabled_targets():
for kind in ["graph", "debug"]:
intrp = relay.create_executor(kind, ctx=ctx, target=target)
op_res = intrp.evaluate(func)(x_data)
assert zz.checked_type == relay.TensorType((1, 8, 8), "float32")
+@tvm.testing.uses_gpu
def test_reshape_like():
def verify_reshape_like(shape, oshape):
x_data = np.random.uniform(low=-1, high=1, size=shape).astype("float32")
func = relay.Function([x, y], z)
- for target, ctx in ctx_list():
+ for target, ctx in tvm.testing.enabled_targets():
for kind in ["graph", "debug"]:
intrp = relay.create_executor(kind, ctx=ctx, target=target)
op_res = intrp.evaluate(func)(x_data, y_data)
verify_take((d1, d2), (d3, d4, d5), (d1, d3, d4, d5), 1)
verify_take((d1, d2, d3, d4), (d5, d6), (d1, d2, d5, d6, d4), -2)
+@tvm.testing.uses_gpu
def test_take():
def verify_take(src_shape, indices_src, axis=None, mode="clip"):
src_dtype = "float32"
np_mode = "raise" if mode == "fast" else mode
ref_res = np.take(x_data, indices=indices_src, axis=axis, mode=np_mode)
- for target, ctx in ctx_list():
+ for target, ctx in tvm.testing.enabled_targets():
for kind in ["graph", "debug"]:
intrp = relay.create_executor(kind, ctx=ctx, target=target)
op_res = intrp.evaluate(func)(x_data, indices_src)
assert yy.checked_type == relay.TensorType((1, 2), "int8")
+@tvm.testing.uses_gpu
def test_full():
def verify_full(fill_value, src_shape, dtype):
x = relay.var("x", relay.scalar_type(dtype))
z = relay.full(x, src_shape, dtype)
func = relay.Function([x], z)
ref_res = np.full(src_shape, fill_value)
- for target, ctx in ctx_list():
+ for target, ctx in tvm.testing.enabled_targets():
for kind in ["graph", "debug"]:
intrp = relay.create_executor(kind, ctx=ctx, target=target)
op_res = intrp.evaluate(func)(np.array(fill_value, dtype))
assert yy.checked_type == relay.TensorType((n, c, h, w), "float32")
+@tvm.testing.uses_gpu
def test_full_like():
def verify_full_like(base, fill_value, dtype):
x_data = np.random.uniform(low=-1, high=1, size=base).astype(dtype)
func = relay.Function([x, y], z)
ref_res = np.full_like(x_data, fill_value)
- for target, ctx in ctx_list():
+ for target, ctx in tvm.testing.enabled_targets():
for kind in ["graph", "debug"]:
intrp = relay.create_executor(kind, ctx=ctx, target=target)
op_res = intrp.evaluate(func)(x_data, np.array(fill_value, dtype))
verify_full_like((1, 1), 44.0, "float32")
+@tvm.testing.uses_gpu
def test_infer_type_leaky_relu():
n, c , h, w = te.size_var("n"), te.size_var("c"), te.size_var("h"), te.size_var("w")
x = relay.var("x", relay.TensorType((n, c, h, w), "float32"))
x_data = np.random.uniform(low=-1, high=1, size=shape).astype(dtype)
ref_res = np.where(x_data > 0, x_data, x_data * 0.1)
- for target, ctx in ctx_list():
+ for target, ctx in tvm.testing.enabled_targets():
intrp1 = relay.create_executor("graph", ctx=ctx, target=target)
intrp2 = relay.create_executor("debug", ctx=ctx, target=target)
op_res1 = intrp1.evaluate(func)(x_data)
else:
ref_res = (x_data < 0) * (x_data * a_data.reshape(1, 1, 3)) + (x_data>=0) * x_data
- for target, ctx in ctx_list():
+ for target, ctx in tvm.testing.enabled_targets():
intrp1 = relay.create_executor("graph", ctx=ctx, target=target)
intrp2 = relay.create_executor("debug", ctx=ctx, target=target)
op_res1 = intrp1.evaluate(func)(x_data, a_data)
tvm.testing.assert_allclose(op_res2.asnumpy(), ref_res, rtol=1e-5)
+@tvm.testing.uses_gpu
def test_infer_type_prelu():
n, c , h, w = te.size_var("n"), te.size_var("c"), te.size_var("h"), te.size_var("w")
verify_infer_type_prelu((n, c, h, w), (c,), 1, (n, c, h, w))
verify_infer_type_prelu((1, 2, 2, 3), None, 3, (1, 2, 2, 3))
+@tvm.testing.uses_gpu
def test_arange():
def verify_arange(start, stop, step):
dtype = "float32"
ref_res = np.arange(start, stop, step).astype(dtype)
func = relay.Function([], x)
- for target, ctx in ctx_list():
+ for target, ctx in tvm.testing.enabled_targets():
for kind in ["graph", "debug"]:
intrp = relay.create_executor(kind, ctx=ctx, target=target)
op_res = intrp.evaluate(func)()
# arange doesnt' support floating point right now, see type relation
# verify_arange(20, 1, -1.5)
+@tvm.testing.uses_gpu
def test_meshgrid():
def verify_meshgrid(lengths, indexing="ij"):
input_vars = []
# Get ref
ref_res = np.meshgrid(*input_data, indexing=indexing)
- for target, ctx in ctx_list():
+ for target, ctx in tvm.testing.enabled_targets():
for kind in ["graph", "debug"]:
intrp = relay.create_executor(kind, ctx=ctx, target=target)
op_res = intrp.evaluate(func)(*input_data)
# Length 0 signifies scalar.
verify_meshgrid([3, 5, 0])
+@tvm.testing.uses_gpu
def test_tile():
def verify_tile(dshape, reps):
x = relay.var("x", relay.TensorType(dshape, "float32"))
x_data = np.random.uniform(low=-1, high=1, size=dshape).astype("float32")
ref_res = np.tile(x_data, reps=reps)
- for target, ctx in ctx_list():
+ for target, ctx in tvm.testing.enabled_targets():
for kind in ["graph", "debug"]:
intrp = relay.create_executor(kind, ctx=ctx, target=target)
op_res = intrp.evaluate(func)(x_data)
verify_tile((2, 3, 4), (1, 2))
verify_tile((2, 3), (3, 2, 1))
+@tvm.testing.uses_gpu
def test_repeat():
def verify_repeat(dshape, repeats, axis):
x = relay.Var("x", relay.TensorType(dshape, "float32"))
func = relay.Function([x], relay.repeat(x, repeats, axis))
data = np.random.uniform(size=dshape).astype("float32")
ref_res = np.repeat(data, repeats, axis)
- for target, ctx in ctx_list():
+ for target, ctx in tvm.testing.enabled_targets():
for kind in ["graph", "debug"]:
intrp = relay.create_executor(kind, ctx=ctx, target=target)
op_res = intrp.evaluate(func)(data)
verify_repeat((3, 10), 2, -1)
verify_repeat((3, 2, 4), 3, 1)
+@tvm.testing.uses_gpu
def test_stack():
def verify_stack(dshapes, axis):
y = []
x_data = [np.random.normal(size=shape).astype("float32") for shape in dshapes]
ref_res = np.stack(x_data, axis=axis)
- for target, ctx in ctx_list():
+ for target, ctx in tvm.testing.enabled_targets():
for kind in ["graph", "debug"]:
intrp = relay.create_executor(kind, ctx=ctx, target=target)
op_res = intrp.evaluate(func)(*x_data)
verify_stack([(2, 2, 3, 4), (2, 2, 3, 4), (2, 2, 3, 4), (2, 2, 3, 4)], -1)
+@tvm.testing.uses_gpu
def test_reverse():
def verify_reverse(dshape, axis):
x = relay.var("x", relay.TensorType(dshape, "float32"))
func = relay.Function([x], z)
x_data = np.random.uniform(low=-1, high=1, size=dshape).astype("float32")
ref_res = np.flip(x_data, axis)
- for target, ctx in ctx_list():
+ for target, ctx in tvm.testing.enabled_targets():
for kind in ["graph", "debug"]:
intrp = relay.create_executor(kind, ctx=ctx, target=target)
op_res = intrp.evaluate(func)(x_data)
verify_reverse((2, 3, 4), -1)
+@tvm.testing.uses_gpu
def test_reverse_sequence():
def verify_reverse_sequence(x_data, seq_lengths, batch_axis, seq_axis, ref_res):
seq_lengths_data = np.array(seq_lengths).astype("int32")
assert zz.checked_type == x.type_annotation
func = relay.Function([x], z)
- for target, ctx in ctx_list():
+ for target, ctx in tvm.testing.enabled_targets():
for kind in ["graph", "debug"]:
intrp = relay.create_executor(kind, ctx=ctx, target=target)
op_res = intrp.evaluate(func)(x_data)
verify_scatter_add((16, 16, 4, 5), (16, 16, 4, 5), 3)
+@tvm.testing.uses_gpu
def test_gather():
def verify_gather(data, axis, indices, ref_res):
data = np.asarray(data, dtype='float32')
func = relay.Function([d, i], z)
- for target, ctx in ctx_list():
+ for target, ctx in tvm.testing.enabled_targets():
for kind in ["graph", "debug"]:
intrp = relay.create_executor(kind, ctx=ctx, target=target)
op_res = intrp.evaluate(func)(data, indices)
[-0.5700, 0.1558, -0.5700, 0.1558]]])
+@tvm.testing.uses_gpu
def test_gather_nd():
def verify_gather_nd(xshape, yshape, y_data):
x = relay.var("x", relay.TensorType(xshape, "float32"))
x_data = np.random.uniform(size=xshape).astype("float32")
ref_res = x_data[tuple(y_data)]
- for target, ctx in ctx_list():
+ for target, ctx in tvm.testing.enabled_targets():
for kind in ["graph", "debug"]:
intrp = relay.create_executor(kind, ctx=ctx, target=target)
op_res = intrp.evaluate(func)(x_data, y_data)
_verify_infiniteness_ops(relay.isinf, np.isinf)
+@tvm.testing.uses_gpu
def test_unravel_index():
def verify_unravel_index(indices, shape, dtype):
x_data = np.array(indices).astype(dtype)
func = relay.Function([x, y], z)
ref_res = np.unravel_index(x_data, y_data)
- for target, ctx in ctx_list():
+ for target, ctx in tvm.testing.enabled_targets():
for kind in ["graph", "debug"]:
intrp = relay.create_executor(kind, ctx=ctx, target=target)
op_res = intrp.evaluate(func)(x_data, y_data)
# output which is inline with Tensorflow
# verify_unravel_index([0, 1, 2, 5], [2, 2], dtype)
+@tvm.testing.uses_gpu
def test_sparse_to_dense():
def verify_sparse_to_dense(sparse_indices, sparse_values, default_value, output_shape, xpected):
sparse_indices_data = np.array(sparse_indices)
assert zz.checked_type == relay.ty.TensorType(output_shape, str(sparse_values_data.dtype))
func = relay.Function(args, d)
- for target, ctx in ctx_list():
+ for target, ctx in tvm.testing.enabled_targets():
for kind in ["graph", "debug"]:
intrp = relay.create_executor(kind, ctx=ctx, target=target)
if default_value is None:
import numpy as np
from tvm import relay
from tvm.relay import transform
-from tvm.relay.testing import ctx_list, run_infer_type
+from tvm.relay.testing import run_infer_type
import tvm.topi.testing
+import tvm.testing
+@tvm.testing.uses_gpu
def test_binary_op():
def check_binary_op(opfunc, ref):
n = te.size_var("n")
ref_res = ref(x_data, y_data)
func = relay.Function([x, y], z)
- for target, ctx in ctx_list():
+ for target, ctx in tvm.testing.enabled_targets():
intrp = relay.create_executor("graph", ctx=ctx, target=target)
op_res = intrp.evaluate(func)(x_data, y_data)
tvm.testing.assert_allclose(op_res.asnumpy(), ref_res)
check_binary_op(opfunc, ref)
+@tvm.testing.uses_gpu
def test_cmp_type():
for op, ref in ((relay.greater, np.greater),
(relay.greater_equal, np.greater_equal),
ref_res = ref(x_data, y_data)
func = relay.Function([x, y], z)
- for target, ctx in ctx_list():
+ for target, ctx in tvm.testing.enabled_targets():
intrp = relay.create_executor("graph", ctx=ctx, target=target)
op_res = intrp.evaluate(func)(x_data, y_data)
tvm.testing.assert_allclose(op_res.asnumpy(), ref_res)
+@tvm.testing.uses_gpu
def test_binary_int_broadcast_1():
for op, ref in [(relay.right_shift, np.right_shift),
(relay.left_shift, np.left_shift)]:
func = relay.Function([x, y], z)
ref_res = ref(x_data, y_data)
- for target, ctx in ctx_list():
+ for target, ctx in tvm.testing.enabled_targets():
intrp = relay.create_executor("graph", ctx=ctx, target=target)
op_res = intrp.evaluate(func)(x_data, y_data)
tvm.testing.assert_allclose(op_res.asnumpy(), ref_res)
+@tvm.testing.uses_gpu
def test_binary_int_broadcast_2():
for op, ref in [(relay.maximum, np.maximum),
(relay.minimum, np.minimum),
func = relay.Function([x, y], z)
ref_res = ref(x_data, y_data)
- for target, ctx in ctx_list():
+ for target, ctx in tvm.testing.enabled_targets():
intrp = relay.create_executor("graph", ctx=ctx, target=target)
op_res = intrp.evaluate(func)(x_data, y_data)
tvm.testing.assert_allclose(op_res.asnumpy(), ref_res)
+@tvm.testing.uses_gpu
def test_where():
shape = (3, 4)
dtype = "float32"
x = np.random.uniform(size=shape).astype(dtype)
y = np.random.uniform(size=shape).astype(dtype)
ref_res = np.where(condition, x, y)
- for target, ctx in ctx_list():
+ for target, ctx in tvm.testing.enabled_targets():
for kind in ["graph", "debug"]:
intrp = relay.create_executor(kind, ctx=ctx, target=target)
op_res = intrp.evaluate(func)(condition, x, y)
return
ref_res = ref_func(x_data + 0, axis=axis, keepdims=keepdims)
- for target, ctx in ctx_list():
+ for target, ctx in tvm.testing.enabled_targets():
intrp1 = relay.create_executor("graph", ctx=ctx, target=target)
intrp2 = relay.create_executor("debug", ctx=ctx, target=target)
op_res1 = intrp1.evaluate(func)(x_data)
op_res2 = intrp2.evaluate(func)(x_data)
tvm.testing.assert_allclose(op_res2.asnumpy(), ref_res, rtol=1e-5)
+@tvm.testing.uses_gpu
def test_reduce_functions():
def _with_keepdims(func):
def _wrapper(data, axis=None, keepdims=False):
ref_mean = np.mean(x_data, axis=axis, dtype=dtype, keepdims=keepdims)
ref_res = ref_func(x_data, axis=axis, dtype=dtype, keepdims=keepdims)
- for target, ctx in ctx_list():
+ for target, ctx in tvm.testing.enabled_targets():
intrp1 = relay.create_executor("graph", ctx=ctx, target=target)
intrp2 = relay.create_executor("debug", ctx=ctx, target=target)
op_res1 = intrp1.evaluate(func)(x_data)
tvm.testing.assert_allclose(op_res2[0].asnumpy(), ref_mean, rtol=1e-5)
tvm.testing.assert_allclose(op_res2[1].asnumpy(), ref_res, rtol=1e-5)
+@tvm.testing.uses_gpu
def test_mean_var_std():
for func in [[relay.mean_variance, np.var],
[relay.mean_std, np.std]]:
verify_mean_var_std(func, (128, 24, 128), (0, 2), True)
+@tvm.testing.uses_gpu
def test_strided_slice():
def verify(dshape, begin, end, strides, output, slice_mode="end",
attr_const=True, test_ref=True, dtype="int32"):
if not test_ref:
return
- for target, ctx in ctx_list():
+ for target, ctx in tvm.testing.enabled_targets():
intrp = relay.create_executor("graph", ctx=ctx, target=target)
op_res = intrp.evaluate(func)(x_data)
tvm.testing.assert_allclose(op_res.asnumpy(), ref_res)
verify((3, 4, 3), [1, 0, 0], [-1, 2, 3], [1, 1, 1],
(2, 2, 3), slice_mode="size", test_ref=True)
+@tvm.testing.uses_gpu
def test_strided_set():
def verify(dshape, begin, end, strides, vshape, test_ref=True):
x = relay.var("x", relay.TensorType(dshape, "float32"))
v_data = np.random.uniform(size=vshape).astype("float32")
ref_res = tvm.topi.testing.strided_set_python(
x_data, v_data, begin, end, strides)
- for target, ctx in ctx_list():
+ for target, ctx in tvm.testing.enabled_targets():
intrp = relay.create_executor("graph", ctx=ctx, target=target)
op_res = intrp.evaluate(func)(x_data, v_data)
tvm.testing.assert_allclose(op_res.asnumpy(), ref_res)
from tvm import te
from tvm import relay
from tvm.relay import transform
-from tvm.relay.testing import ctx_list, run_infer_type
+from tvm.relay.testing import run_infer_type
import tvm.topi.testing
+import tvm.testing
def test_resize_infer_type():
zz = run_infer_type(z)
assert zz.checked_type == relay.TensorType((n, c, 100, 200), "int8")
+@tvm.testing.uses_gpu
def test_resize():
def verify_resize(dshape, scale, method, layout, coord_trans):
if layout == "NHWC":
assert zz.checked_type == relay.TensorType(ref_res.shape, "float32")
func = relay.Function([x], z)
- for target, ctx in ctx_list():
+ for target, ctx in tvm.testing.enabled_targets():
for kind in ["graph", "debug"]:
intrp = relay.create_executor(kind, ctx=ctx, target=target)
op_res = intrp.evaluate(func)(x_data)
zz = run_infer_type(z)
assert zz.checked_type == relay.TensorType((n, c, 10, 10, 20), "int8")
-def test_resize3d():
+@tvm.testing.parametrize_targets
+def test_resize3d(target, ctx):
def verify_resize(dshape, scale, method, layout):
if layout == "NDHWC":
size = (dshape[1] * scale, dshape[2] * scale, dshape[3] * scale)
assert zz.checked_type == relay.TensorType(ref_res.shape, "float32")
func = relay.Function([x], z)
- for target, ctx in ctx_list():
- for kind in ["graph", "debug"]:
- intrp = relay.create_executor(kind, ctx=ctx, target=target)
- op_res = intrp.evaluate(func)(x_data)
- tvm.testing.assert_allclose(op_res.asnumpy(), ref_res, rtol=1e-4)
+ for kind in ["graph", "debug"]:
+ intrp = relay.create_executor(kind, ctx=ctx, target=target)
+ op_res = intrp.evaluate(func)(x_data)
+ tvm.testing.assert_allclose(op_res.asnumpy(), ref_res, rtol=1e-4)
for method in ["trilinear", "nearest_neighbor"]:
for layout in ["NDHWC", "NCDHW"]:
verify_resize((1, 4, 4, 4, 4), 2, method, layout)
+@tvm.testing.uses_gpu
def test_crop_and_resize():
def verify_crop_and_resize(img_shape, boxes, box_indices, crop_size,
layout, method, extrapolation_value=0.0):
assert zz.checked_type == relay.TensorType(ref_res.shape, "float32")
func = relay.Function([img, bx, bx_idx], z)
- for target, ctx in ctx_list():
+ for target, ctx in tvm.testing.enabled_targets():
for kind in ["graph", "debug"]:
intrp = relay.create_executor(kind, ctx=ctx, target=target)
op_res = intrp.evaluate(func)(image_data, boxes, box_indices)
verify_crop_and_resize((5, 3, 255, 255), boxes_nchw, indices_nchw,
size_nchw, 'NCHW', method, 0.1)
+@tvm.testing.uses_gpu
def test_multibox_prior():
def get_ref_result(dshape, sizes=(1.0,),
ratios=(1.0,), steps=(-1.0, -1.0),
data = np.random.uniform(low=-1, high=1, size=dshape).astype("float32")
func = relay.Function([x], z)
func = run_infer_type(func)
- for target, ctx in ctx_list():
+ for target, ctx in tvm.testing.enabled_targets():
intrp1 = relay.create_executor("graph", ctx=ctx, target=target)
op_res1 = intrp1.evaluate(func)(data)
tvm.testing.assert_allclose(op_res1.asnumpy(), ref_res, rtol=1e-5)
verify_multibox_prior(x, dshape, ref_res, clip=False, check_type_only=True)
+@tvm.testing.uses_gpu
def test_get_valid_counts():
def verify_get_valid_counts(dshape, score_threshold, id_index, score_index):
dtype = "float32"
assert "score_threshold" in z.astext()
func = relay.Function([x], z.astuple())
func = run_infer_type(func)
- for target, ctx in ctx_list():
+ for target, ctx in tvm.testing.enabled_targets():
intrp = relay.create_executor("debug", ctx=ctx, target=target)
out = intrp.evaluate(func)(np_data)
tvm.testing.assert_allclose(out[0].asnumpy(), np_out1, rtol=1e-3, atol=1e-04)
verify_get_valid_counts((16, 500, 5), 0.95, -1, 0)
+@tvm.testing.uses_gpu
def test_non_max_suppression():
def verify_nms(x0_data, x1_data, x2_data, x3_data, dshape, ref_res,
ref_indices_res, iou_threshold=0.5, force_suppress=False,
func = run_infer_type(func)
func_indices = relay.Function([x0, x1, x2, x3], z_indices)
func_indices = run_infer_type(func_indices)
- for target, ctx in ctx_list():
+ for target, ctx in tvm.testing.enabled_targets():
intrp1 = relay.create_executor("graph", ctx=ctx, target=target)
op_res1 = intrp1.evaluate(func)(x0_data, x1_data, x2_data, x3_data)
tvm.testing.assert_allclose(op_res1.asnumpy(), ref_res, rtol=1e-5)
np_indices_result, top_k=2)
+@tvm.testing.uses_gpu
def test_multibox_transform_loc():
def test_default_value():
num_anchors = 3
nms = relay.vision.non_max_suppression(mtl[0], mtl[1], mtl[0], return_indices=False)
func = relay.Function([cls_prob, loc_pred, anchors], nms)
func = run_infer_type(func)
- for target, ctx in ctx_list():
+ for target, ctx in tvm.testing.enabled_targets():
intrp1 = relay.create_executor("graph", ctx=ctx, target=target)
op_res1 = intrp1.evaluate(func)(np_cls_prob, np_loc_preds,
np_anchors)
test_threshold()
+@tvm.testing.uses_gpu
def test_roi_align():
def verify_roi_align(data_shape, rois_shape, pooled_size, spatial_scale, sample_ratio):
data = relay.var("data", relay.ty.TensorType(data_shape, "float32"))
ref_res = tvm.topi.testing.roi_align_nchw_python(np_data, np_rois, pooled_size=pooled_size,
spatial_scale=spatial_scale,
sample_ratio=sample_ratio)
- for target, ctx in ctx_list():
+ for target, ctx in tvm.testing.enabled_targets():
intrp1 = relay.create_executor("graph", ctx=ctx, target=target)
op_res1 = intrp1.evaluate(func)(np_data, np_rois)
tvm.testing.assert_allclose(op_res1.asnumpy(), ref_res, rtol=1e-4)
verify_roi_align((4, 4, 16, 16), (32, 5), pooled_size=7, spatial_scale=0.5, sample_ratio=2)
+@tvm.testing.uses_gpu
def test_roi_pool():
def verify_roi_pool(data_shape, rois_shape, pooled_size, spatial_scale):
data = relay.var("data", relay.ty.TensorType(data_shape, "float32"))
np_rois[:, 0] = np.random.randint(low = 0, high = batch, size = num_roi).astype('float32')
ref_res = tvm.topi.testing.roi_pool_nchw_python(np_data, np_rois, pooled_size=pooled_size,
spatial_scale=spatial_scale)
- for target, ctx in ctx_list():
+ for target, ctx in tvm.testing.enabled_targets():
intrp1 = relay.create_executor("graph", ctx=ctx, target=target)
op_res1 = intrp1.evaluate(func)(np_data, np_rois)
tvm.testing.assert_allclose(op_res1.asnumpy(), ref_res, rtol=1e-4)
verify_roi_pool((4, 4, 16, 16), (32, 5), pooled_size=7, spatial_scale=0.5)
+@tvm.testing.uses_gpu
def test_proposal():
def verify_proposal(np_cls_prob, np_bbox_pred, np_im_info, np_out, attrs):
cls_prob = relay.var("cls_prob", relay.ty.TensorType(np_cls_prob.shape, "float32"))
func = relay.Function([cls_prob, bbox_pred, im_info], z)
func = run_infer_type(func)
for target in ['llvm', 'cuda']:
- if not tvm.runtime.enabled(target):
+ if not tvm.testing.device_enabled(target):
print("Skip test because %s is not enabled." % target)
continue
ctx = tvm.context(target, 0)
verify_yolo_reorg((n, c, 20, 20), 10, (n, c*10*10, 2, 2))
verify_yolo_reorg((n, c, h, w), 2, (n, c*2*2, idxd(h, 2), idxd(w, 2)))
+@tvm.testing.uses_gpu
def test_yolo_reorg():
def verify_yolo_reorg(shape, stride):
x_data = np.random.uniform(low=-1, high=1, size=shape).astype("float32")
func = relay.Function([x], z)
- for target, ctx in ctx_list():
+ for target, ctx in tvm.testing.enabled_targets():
for kind in ["graph", "debug"]:
intrp = relay.create_executor(kind, ctx=ctx, target=target)
op_res = intrp.evaluate(func)(x_data)
verify_yolo_reorg((1, 4, 6, 6), 2)
+@tvm.testing.uses_gpu
def test_deformable_conv2d():
def test_infer_type(batch, in_channel, size, out_channel, deformable_groups, groups):
data_shape = (batch, in_channel, size, size)
kernel = np.random.uniform(size=kernel_shape).astype(dtype)
ref_res = tvm.topi.testing.deformable_conv2d_nchw_python(data, offset, kernel, stride=(1, 1), padding=(1, 1), dilation=(1, 1), deformable_groups=deformable_groups, groups=groups)
- for target, ctx in ctx_list():
+ for target, ctx in tvm.testing.enabled_targets():
for kind in ["graph", "debug"]:
intrp1 = relay.create_executor(kind, ctx=ctx, target=target)
op_res1 = intrp1.evaluate(func)(data, offset, kernel)
test_run(2, 4, 16, 4, 4, 1)
+@tvm.testing.uses_gpu
def test_depth_to_space():
def verify_depth_to_space(dshape, block_size, layout, mode):
if layout == "NHWC":
assert zz.checked_type == relay.TensorType(ref_res.shape, "float32")
func = relay.Function([x], z)
- for target, ctx in ctx_list():
+ for target, ctx in tvm.testing.enabled_targets():
for kind in ["graph", "debug"]:
intrp = relay.create_executor(kind, ctx=ctx, target=target)
op_res = intrp.evaluate(func)(x_data)
verify_depth_to_space((1, 4, 4, 4), 2, layout, mode)
+@tvm.testing.uses_gpu
def test_space_to_depth():
def verify_space_to_depth(dshape, block_size, layout):
if layout == "NHWC":
assert zz.checked_type == relay.TensorType(ref_res.shape, "float32")
func = relay.Function([x], z)
- for target, ctx in ctx_list():
+ for target, ctx in tvm.testing.enabled_targets():
for kind in ["graph", "debug"]:
intrp = relay.create_executor(kind, ctx=ctx, target=target)
op_res = intrp.evaluate(func)(x_data)
(n, 10, 217, 217), "float32")
+@tvm.testing.uses_gpu
def test_dilation2d_run():
def run_test_dilation2d(indata, kernel, out,
dtype='float32',
**attrs)
func = relay.Function([x, w], y)
- for target, ctx in ctx_list():
+ for target, ctx in tvm.testing.enabled_targets():
if target in except_targets:
continue
intrp = relay.create_executor("graph", ctx=ctx, target=target)
data_layout='NHWC', kernel_layout='HWI')
+@tvm.testing.uses_gpu
def test_affine_grid():
def verify_affine_grid(num_batch, target_shape):
dtype = 'float32'
data_np = np.random.uniform(size=data_shape).astype(dtype)
ref_res = tvm.topi.testing.affine_grid_python(data_np, target_shape)
- for target, ctx in ctx_list():
+ for target, ctx in tvm.testing.enabled_targets():
for kind in ["graph", "debug"]:
intrp1 = relay.create_executor(kind, ctx=ctx, target=target)
op_res1 = intrp1.evaluate(func)(data_np)
verify_affine_grid(4, (16, 32))
+@tvm.testing.uses_gpu
def test_grid_sample():
def verify_grid_sample(data_shape, grid_shape):
dtype = 'float32'
grid_np = np.random.uniform(size=grid_shape, low=-1.5, high=1.5).astype(dtype)
ref_res = tvm.topi.testing.grid_sample_nchw_python(data_np, grid_np, method='bilinear')
- for target, ctx in ctx_list():
+ for target, ctx in tvm.testing.enabled_targets():
for kind in ["graph", "debug"]:
intrp1 = relay.create_executor(kind, ctx=ctx, target=target)
op_res1 = intrp1.evaluate(func)(data_np, grid_np)
import tvm
from tvm import te
from tvm import relay
-from tvm.relay.testing import ctx_list
+import tvm.testing
+@tvm.testing.uses_gpu
def test_argsort():
def verify_argsort(shape, axis, is_ascend, dtype):
x = relay.var("x", relay.TensorType(shape, "float32"))
else:
ref_res = np.argsort(-x_data, axis=axis)
- for target, ctx in ctx_list():
+ for target, ctx in tvm.testing.enabled_targets():
for kind in ["graph", "debug"]:
intrp = relay.create_executor(kind, ctx=ctx, target=target)
op_res = intrp.evaluate(func)(x_data)
verify_argsort((3, 5, 6), axis=-1, is_ascend=False, dtype=dtype)
+@tvm.testing.uses_gpu
def test_topk():
def verify_topk(k, axis, ret_type, is_ascend, dtype):
shape = (20, 100)
np_values[i, :] = np_data[i, np_indices[i, :]]
np_indices = np_indices.astype(dtype)
- for target, ctx in ctx_list():
+ for target, ctx in tvm.testing.enabled_targets():
for kind in ["graph", "debug"]:
intrp = relay.create_executor(kind, ctx=ctx, target=target)
op_res = intrp.evaluate(func)(np_data)
from tvm import relay
from tvm.relay import transform, analysis
from tvm.relay.testing.temp_op_attr import TempOpAttr
-from tvm.relay.testing import ctx_list, run_infer_type
+from tvm.relay.testing import run_infer_type
import numpy as np
+import tvm.testing
def run_opt_pass(expr, passes):
passes = passes if isinstance(passes, list) else [passes]
assert tvm.ir.structural_equal(a, b), "Actual = \n" + str(a)
+@tvm.testing.uses_gpu
def test_alter_layout_strided_slice():
"""Test rewriting strided_slice during alter_iop_layout"""
def before():
mod_before['main'] = a
mod_new['main'] = b
with relay.build_config(opt_level=3):
- for target, ctx in ctx_list():
+ for target, ctx in tvm.testing.enabled_targets():
for kind in ["graph", "debug", "vm"]:
ex_before = relay.create_executor(kind, mod=mod_before, ctx=ctx, target=target)
ex_new = relay.create_executor(kind, mod=mod_new, ctx=ctx, target=target)
from tvm.contrib import graph_runtime
from tvm.relay.expr_functor import ExprMutator
from tvm.relay import transform
-
+import tvm.testing
def run_opt_pass(expr, passes):
passes = passes if isinstance(passes, list) else [passes]
tvm.testing.assert_allclose(res, ref_res, rtol=1e-5, atol=1e-5)
-def test_check_run():
- for dev, tgt in [("opencl", "opencl"), ("cuda", "cuda"),
- ("opencl", str(tvm.target.intel_graphics()))]:
- if not tvm.runtime.enabled(dev):
- print("Skip test because %s is not enabled." % dev)
- continue
- run_fusible_network(dev, tgt)
- run_unpropagatable_graph(dev, tgt)
+@tvm.testing.requires_opencl
+def test_check_run_opencl():
+ dev = "opencl"
+ tgt = "opencl"
+ run_fusible_network(dev, tgt)
+ run_unpropagatable_graph(dev, tgt)
-def test_tuple_get_item():
+@tvm.testing.requires_opencl
+def test_check_run_opencl_intel():
+ dev = "opencl"
+ tgt = str(tvm.target.intel_graphics())
+ run_fusible_network(dev, tgt)
+ run_unpropagatable_graph(dev, tgt)
+
+
+@tvm.testing.requires_cuda
+def test_check_run_cuda():
dev = "cuda"
- if not tvm.runtime.enabled(dev):
- print("Skip test because %s is not enabled." % dev)
- return
+ tgt = "cuda"
+ run_fusible_network(dev, tgt)
+ run_unpropagatable_graph(dev, tgt)
+
+@tvm.testing.requires_cuda
+def test_tuple_get_item():
+ dev = "cuda"
cpu_ctx = tvm.cpu(0)
gpu_ctx = tvm.context(dev)
from tvm import relay
from tvm.relay import transform
from tvm.relay.build_module import bind_params_by_name
-from tvm.relay.testing import run_infer_type, create_workload, ctx_list
+from tvm.relay.testing import run_infer_type, create_workload
import tvm.topi.testing
+import tvm.testing
def run_opt_pass(expr, opt_pass):
assert isinstance(opt_pass, tvm.transform.Pass)
def verify_func(func, data, ref_res, rtol=1e-5, atol=1e-7):
assert isinstance(data, list)
- for target, ctx in ctx_list():
+ for target, ctx in tvm.testing.enabled_targets():
for kind in ["graph", "vm", "debug"]:
mod = tvm.ir.IRModule.from_expr(func)
intrp = relay.create_executor(kind, mod=mod, ctx=ctx, target=target)
tvm.testing.assert_allclose(op_res.asnumpy(), ref_res, rtol=rtol, atol=atol)
+@tvm.testing.uses_gpu
def test_dynamic_to_static_reshape():
def verify_reshape(shape, newshape, oshape):
x = relay.var("x", relay.TensorType(shape, "float32"))
verify_reshape((4, 7), (2, 7, 2), (2, 7, 2))
+@tvm.testing.uses_gpu
def test_dynamic_to_static_double_reshape():
def verify_reshape(shape, newshape):
x = relay.var("x", relay.TensorType(shape, "float32"))
verify_reshape((4, 7), (2, 7, 2))
+@tvm.testing.uses_gpu
def test_dynamic_to_static_quad_reshape():
def verify_reshape(shape, newshape):
x = relay.var("x", relay.TensorType(shape, "float32"))
verify_reshape((4, 7), (2, 7, 2))
+@tvm.testing.uses_gpu
def test_dynamic_to_static_tile():
def verify_tile(shape, reps, oshape):
x = relay.var("x", relay.TensorType(shape, "float32"))
verify_tile((4, 7), (4, 2), (16, 14))
+@tvm.testing.uses_gpu
def test_dynamic_to_static_topk():
def verify_topk(k, axis, ret_type, is_ascend, dtype):
shape = (20, 100)
assert isinstance(zz, relay.Call)
assert zz.op == relay.op.get("topk")
- for target, ctx in ctx_list():
+ for target, ctx in tvm.testing.enabled_targets():
if "llvm" not in target: continue
for kind in ["graph", "vm", "debug"]:
mod = tvm.ir.IRModule.from_expr(func2)
verify_topk(k, axis, ret_type, False, "float32")
+@tvm.testing.uses_gpu
def test_dynamic_to_static_broadcast_to():
def verify_broadcast_to(shape, broadcast_shape):
x = relay.var("x", relay.TensorType(shape, "float32"))
verify_broadcast_to((3, 1), (3, 3))
+@tvm.testing.uses_gpu
def test_dynamic_to_static_zeros_ones():
def verify_ones_zeros(shape, dtype):
for op, ref in [(relay.zeros, np.zeros), (relay.ones, np.ones)]:
verify_ones_zeros((9, 8, 3, 4), 'float32')
+@tvm.testing.uses_gpu
def test_dynamic_to_static_resize():
def verify_resize(shape, scale, method, layout):
if layout == "NHWC":
verify_resize((1, 4, 4, 4), 2, method, layout)
+@tvm.testing.uses_gpu
def test_dynamic_to_static_one_hot():
def _verify(indices_shape, depth, on_value, off_value, axis, dtype):
indices = relay.var("indices", relay.TensorType(indices_shape, "int32"))
_verify((3, 2, 4, 5), 6, 1, 0, 1, "int32")
_verify((3, 2, 4, 5), 6, 1.0, 0.0, 0, "float32")
+@tvm.testing.uses_gpu
def test_dynamic_to_static_full():
def verify_full(fill_value, fill_shape, dtype):
x = relay.var("x", relay.scalar_type(dtype))
func = run_infer_type(relay.Function([x, y], z))
func2 = run_opt_pass(run_opt_pass(func, transform.DynamicToStatic()), transform.InferType())
-
+
zz = func2.body
assert isinstance(zz, relay.Call)
assert zz.op == relay.op.get("full")
ref_res = np.full(fill_shape, fill_value).astype(dtype)
y_data = np.random.uniform(low=-1, high=1, size=fill_shape).astype('int64')
verify_func(func2, [fill_value, y_data], ref_res)
-
+
verify_full(4, (1, 2, 3, 4), 'int32')
verify_full(4.0, (1, 2, 8, 10), 'float32')
from tvm import relay
from tvm.relay import transform
from tvm.relay.testing import run_opt_pass
+import tvm.testing
def test_fuse_simple():
assert tvm.ir.structural_equal(m["main"], after)
+@tvm.testing.uses_gpu
def test_fuse_bcast_reduce_scalar():
"""Test fusion case with broadcast and reduction involving scalar"""
orig = before()
m = fuse2(tvm.IRModule.from_expr(orig))
- for tgt, _ in tvm.relay.testing.config.ctx_list():
+ for tgt, ctx in tvm.testing.enabled_targets():
relay.build(m, tgt)
after = run_opt_pass(expected(), transform.InferType())
assert tvm.ir.structural_equal(m["main"], after)
from tvm import relay
from tvm.relay import create_executor, transform
from tvm.relay.testing import rand, run_infer_type
+import tvm.testing
from tvm.testing import assert_allclose
import pytest
from tvm.relay import Function, Call
from tvm.relay import analysis
from tvm.relay import transform as _transform
-from tvm.relay.testing import ctx_list, run_infer_type
+from tvm.relay.testing import run_infer_type
+import tvm.testing
def get_var_func():
assert tvm.ir.structural_equal(func, ref_func)
+@tvm.testing.uses_gpu
def test_module_pass():
shape = (5, 10)
dtype = 'float32'
x_nd = get_rand(shape, dtype)
y_nd = get_rand(shape, dtype)
ref_res = x_nd.asnumpy() + y_nd.asnumpy()
- for target, ctx in ctx_list():
+ for target, ctx in tvm.testing.enabled_targets():
exe1 = relay.create_executor("graph", ctx=ctx, target=target)
exe2 = relay.create_executor("debug", ctx=ctx, target=target)
res1 = exe1.evaluate(new_add)(x_nd, y_nd)
assert tvm.ir.structural_equal(mod["main"], mod2["main"])
+@tvm.testing.uses_gpu
def test_function_pass():
shape = (10, )
dtype = 'float32'
# Execute the add function.
x_nd = get_rand(shape, dtype)
ref_res = np.log(x_nd.asnumpy() * 2)
- for target, ctx in ctx_list():
+ for target, ctx in tvm.testing.enabled_targets():
exe1 = relay.create_executor("graph", ctx=ctx, target=target)
exe2 = relay.create_executor("debug", ctx=ctx, target=target)
res1 = exe1.evaluate(new_log)(x_nd)
assert info.name == "xyz"
+@tvm.testing.uses_gpu
def test_sequential_pass():
shape = (10, )
dtype = 'float32'
x_nd = get_rand(shape, dtype)
y_nd = get_rand(shape, dtype)
ref_res = np.subtract(x_nd.asnumpy() * 2, y_nd.asnumpy() * 2)
- for target, ctx in ctx_list():
+ for target, ctx in tvm.testing.enabled_targets():
exe1 = relay.create_executor("graph", ctx=ctx, target=target)
exe2 = relay.create_executor("debug", ctx=ctx, target=target)
res1 = exe1.evaluate(new_sub)(x_nd, y_nd)
# Execute the updated abs function.
x_nd = get_rand((5, 10), dtype)
ref_res = np.abs(x_nd.asnumpy() * 2)
- for target, ctx in ctx_list():
+ for target, ctx in tvm.testing.enabled_targets():
exe1 = relay.create_executor("graph", ctx=ctx, target=target)
exe2 = relay.create_executor("debug", ctx=ctx, target=target)
res1 = exe1.evaluate(new_abs)(x_nd)
from tvm import runtime
from tvm import relay
from tvm.relay.scope_builder import ScopeBuilder
-from tvm.relay.testing.config import ctx_list
from tvm.relay.prelude import Prelude
from tvm.relay.loops import while_loop
from tvm.relay import testing
+import tvm.testing
def check_result(args, expected_result, mod=None):
"""
"""
# TODO(@zhiics, @icemelon9): Disable the gpu test for now until the heterogeneous support
# is ready
- for target, ctx in ctx_list():
+ for target, ctx in tvm.testing.enabled_targets():
if "cuda" in target:
continue
vm = relay.create_executor('vm', ctx=ctx, target=target, mod=mod)
res = veval(f, x_data)
tvm.testing.assert_allclose(res.asnumpy(), np.split(x_data, 3, axis=0)[0])
+@tvm.testing.uses_gpu
def test_id():
x = relay.var('x', shape=(10, 10), dtype='float64')
f = relay.Function([x], x)
mod["main"] = f
check_result([x_data], x_data, mod=mod)
+@tvm.testing.uses_gpu
def test_op():
x = relay.var('x', shape=(10, 10))
f = relay.Function([x], x + x)
x = relay.op.nn.batch_flatten(x)
return relay.op.min(x, axis=[0, 1])
+@tvm.testing.uses_gpu
def test_cond():
x = relay.var('x', shape=(10, 10))
y = relay.var('y', shape=(10, 10))
# diff
check_result([x_data, y_data], False, mod=mod)
+@tvm.testing.uses_gpu
def test_simple_if():
x = relay.var('x', shape=(10, 10))
y = relay.var('y', shape=(10, 10))
res = vmobj_to_list(vm.evaluate()(False))
assert(res == [1, 0])
+@tvm.testing.uses_gpu
def test_simple_call():
mod = tvm.IRModule({})
sum_up = relay.GlobalVar('sum_up')
mod["main"] = relay.Function([iarg], sum_up(iarg))
check_result([i_data], i_data, mod=mod)
+@tvm.testing.uses_gpu
def test_count_loop():
mod = tvm.IRModule({})
sum_up = relay.GlobalVar('sum_up')
tvm.testing.assert_allclose(result.asnumpy(), i_data)
check_result([i_data], i_data, mod=mod)
+@tvm.testing.uses_gpu
def test_sum_loop():
mod = tvm.IRModule({})
sum_up = relay.GlobalVar('sum_up')
mod["main"] = relay.Function([iarg, aarg], sum_up(iarg, aarg))
check_result([i_data, accum_data], sum(range(1, loop_bound + 1)), mod=mod)
+@tvm.testing.uses_gpu
def test_tuple_fst():
ttype = relay.TupleType([relay.TensorType((1,)), relay.TensorType((10,))])
tup = relay.var('tup', type_annotation=ttype)
mod["main"] = f
check_result([(i_data, j_data)], i_data, mod=mod)
+@tvm.testing.uses_gpu
def test_tuple_second():
ttype = relay.TupleType([relay.TensorType((1,)), relay.TensorType((10,))])
tup = relay.var('tup', type_annotation=ttype)
obj = vmobj_to_list(result)
tvm.testing.assert_allclose(obj, np.array([3,2,1]))
+@tvm.testing.uses_gpu
def test_let_tensor():
sb = relay.ScopeBuilder()
shape = (1,)
mod["main"] = f
check_result([x_data], x_data + 42.0, mod=mod)
+@tvm.testing.uses_gpu
def test_let_scalar():
sb = relay.ScopeBuilder()
res = veval(main)
tvm.testing.assert_allclose(res.asnumpy(), 3.0)
+@tvm.testing.uses_gpu
def test_add_op_scalar():
"""
test_add_op_scalar:
mod["main"] = func
check_result([x_data, y_data], x_data + y_data, mod=mod)
+@tvm.testing.uses_gpu
def test_add_op_tensor():
"""
test_add_op_tensor:
mod["main"] = func
check_result([x_data, y_data], x_data + y_data, mod=mod)
+@tvm.testing.uses_gpu
def test_add_op_broadcast():
"""
test_add_op_broadcast:
comp = relay.vm.VMCompiler()
opt_mod, _ = comp.optimize(mod, target="llvm", params=params)
+@tvm.testing.uses_gpu
def test_loop_free_var():
x = relay.var('x', shape=(), dtype='int32')
i = relay.var('i', shape=(), dtype='int32')
mod["main"] = relay.Function(relay.analysis.free_vars(ret), ret)
check_result(args, expected, mod=mod)
+@tvm.testing.uses_gpu
def test_vm_reshape_tensor():
x_np = np.random.uniform(size=(8, 16)).astype("float32")
x = relay.var("x", shape=(8, 16), dtype="float32")
# under the License.
"""Common utility for topi test"""
-import tvm
-from tvm import te
from tvm import autotvm
from tvm.autotvm.task.space import FallbackConfigEntity
-from tvm import topi
-
-def get_all_backend():
- """return all supported target
-
- Returns
- -------
- targets: list
- A list of all supported targets
- """
- return ['llvm', 'cuda', 'opencl', 'metal', 'rocm', 'vulkan', 'nvptx',
- 'llvm -device=arm_cpu', 'opencl -device=mali', 'aocl_sw_emu']
class Int8Fallback(autotvm.FallbackContext):
def _query_inside(self, target, workload):
import tvm
from tvm import te
from tvm import topi
+import tvm.testing
import tvm.topi.testing
import numpy as np
from tvm.contrib.pickle_memoize import memoize
-from common import get_all_backend
def verify_fifo_buffer(buffer_shape, data_shape, axis, dtype='float32'):
buffer = te.placeholder(buffer_shape, name='buffer', dtype=dtype)
# Get the test data
buffer_np, data_np, out_np = get_ref_data()
- def check_device(device):
- ctx = tvm.context(device, 0)
- if not ctx.exist:
- print(' Skip because %s is not enabled' % device)
- return
+ def check_device(device, ctx):
print(' Running on target: {}'.format(device))
with tvm.target.create(device):
f(data_tvm, buffer_tvm, out_tvm)
tvm.testing.assert_allclose(out_tvm.asnumpy(), out_np)
- for device in get_all_backend():
- check_device(device)
+ for device, ctx in tvm.testing.enabled_targets():
+ check_device(device, ctx)
def verify_conv1d_integration():
batch_size = 1
# Get the test data
inc_input_np, input_window_np, kernel_np, context_np, output_window_np = get_data()
- def check_device(device):
- ctx = tvm.context(device, 0)
- if not ctx.exist:
- print(' Skip because %s is not enabled' % device)
- return
+ def check_device(device, ctx):
print(' Running on target: {}'.format(device))
conv2d_nchw, schedule_conv2d_nchw = tvm.topi.testing.get_conv2d_nchw_implement(device)
tvm.testing.assert_allclose(output_window_tvm.asnumpy(),
output_window_ref_tvm.asnumpy())
- for device in get_all_backend():
- check_device(device)
+ for device, ctx in tvm.testing.enabled_targets():
+ check_device(device, ctx)
+@tvm.testing.uses_gpu
def test_fifo_buffer():
for ndim in [1, 2, 3, 4, 5, 6]:
for axis in range(ndim):
.format(buffer_shape, data_shape, axis))
verify_fifo_buffer(buffer_shape, data_shape, axis)
+@tvm.testing.uses_gpu
def test_conv1d_integration():
print('Testing FIFO buffer with 1D convolution')
verify_conv1d_integration()
from tvm.topi.util import get_const_tuple
from tvm.contrib.pickle_memoize import memoize
-from common import get_all_backend
+import tvm.testing
_batch_matmul_implement = {
"generic": (topi.nn.batch_matmul, topi.generic.schedule_batch_matmul),
# get the test data
a_np, b_np, c_np = get_ref_data()
- def check_device(device):
- ctx = tvm.context(device, 0)
- if not ctx.exist:
- print("Skip because %s is not enabled" % device)
- return
+ def check_device(device, ctx):
print("Running on target: %s" % device)
with tvm.target.create(device):
fcompute, fschedule = tvm.topi.testing.dispatch(device, _batch_matmul_implement)
f(a, b, c)
tvm.testing.assert_allclose(c.asnumpy(), c_np, rtol=1e-5)
- for device in get_all_backend():
- check_device(device)
+ for device, ctx in tvm.testing.enabled_targets():
+ check_device(device, ctx)
+@tvm.testing.uses_gpu
def test_batch_matmul():
verify_batch_matmul(1, 16, 16, 32)
verify_batch_matmul(5, 16, 16, 32)
from tvm import te
from tvm import topi
import tvm.topi.testing
-from common import get_all_backend
def verify_broadcast_to_ele(in_shape, out_shape, fbcast):
def check_device(device):
ctx = tvm.context(device, 0)
- if not ctx.exist:
+ if not tvm.testing.device_enabled(device):
print("Skip because %s is not enabled" % device)
return
print("Running on target: %s" % device)
foo(data_nd, out_nd)
tvm.testing.assert_allclose(out_nd.asnumpy(), out_npy)
- for target in get_all_backend():
+ for target, ctx in tvm.testing.enabled_targets():
check_device(target)
check_device("sdaccel")
def check_device(device):
ctx = tvm.context(device, 0)
- if not ctx.exist:
+ if not tvm.testing.device_enabled(device):
print("Skip because %s is not enabled" % device)
return
print("Running on target: %s" % device)
foo(lhs_nd, rhs_nd, out_nd)
tvm.testing.assert_allclose(out_nd.asnumpy(), out_npy, rtol=1E-4, atol=1E-4)
- for target in get_all_backend():
+ for target, ctx in tvm.testing.enabled_targets():
check_device(target)
check_device("sdaccel")
+@tvm.testing.uses_gpu
def test_broadcast_to():
verify_broadcast_to_ele((1,), (10,), topi.broadcast_to)
verify_broadcast_to_ele((), (10,), topi.broadcast_to)
verify_broadcast_to_ele((1, 128, 1, 32), (64, 128, 64, 32), topi.broadcast_to)
+@tvm.testing.uses_gpu
def test_add():
verify_broadcast_binary_ele(
(), (), topi.add, np.add)
(5, 2, 3), (2, 1), topi.add, np.add)
+@tvm.testing.uses_gpu
def test_subtract():
verify_broadcast_binary_ele(
(5, 2, 3), (), topi.subtract, np.subtract)
(1, 32), (64, 32), topi.subtract, np.subtract)
+@tvm.testing.uses_gpu
def test_multiply():
verify_broadcast_binary_ele(
(5, 64, 128), (2, 5, 64, 1), topi.multiply, np.multiply)
+@tvm.testing.uses_gpu
def test_divide():
verify_broadcast_binary_ele(
None, (10,), topi.divide, np.divide, rhs_min=0.0001)
verify_broadcast_binary_ele(
(2, 3, 1, 32), (64, 32), topi.divide, np.divide, rhs_min=0.0001)
+@tvm.testing.uses_gpu
def test_floor_divide():
def _canonical_floor_div(a,b):
return np.floor(a / b)
verify_broadcast_binary_ele(
(2, 3, 64, 32), (64, 32), topi.floor_divide, _canonical_floor_div, rhs_min=0.0001)
+@tvm.testing.uses_gpu
def test_maximum_minmum():
verify_broadcast_binary_ele(
(32,), (64, 32), topi.maximum, np.maximum)
(1, 2, 2, 1, 32), (64, 32), topi.minimum, np.minimum)
+@tvm.testing.uses_gpu
def test_power():
verify_broadcast_binary_ele(
(1, 2, 2), (2,), topi.power, np.power, lhs_min=0.001, rhs_min=0.001, rhs_max=2)
+@tvm.testing.uses_gpu
def test_mod():
verify_broadcast_binary_ele(
(1, 2, 2), (2,), topi.mod, np.mod, lhs_min=0.001, rhs_min=1, dtype="int32")
+@tvm.testing.uses_gpu
def test_floor_mod():
def _canonical_floor_mod(a,b):
return a - np.floor(a / b) * b
verify_broadcast_binary_ele(
(3, 4, 5), (3, 4, 5), topi.floor_mod, _canonical_floor_mod, lhs_min=0.001, rhs_min=1, dtype="float32")
+@tvm.testing.uses_gpu
def test_cmp():
# explicit specify the output type
def greater(x, y):
lhs_min=-3, lhs_max=3, rhs_min=-3, rhs_max=3, dtype='int32')
+@tvm.testing.uses_gpu
def test_shift():
# explicit specify the output type
verify_broadcast_binary_ele(
dtype="int8", rhs_min=0, rhs_max=32)
+@tvm.testing.uses_gpu
def test_logical_single_ele():
def test_apply(
func,
assert (isinstance(B, tvm.tir.PrimExpr))
return
- def check_device(device):
- ctx = tvm.context(device, 0)
- if not ctx.exist:
- print("Skip because %s is not enabled" % device)
- return
+ def check_device(device, ctx):
print("Running on target: %s" % device)
with tvm.target.create(device):
s = tvm.topi.testing.get_broadcast_schedule(device)(B)
foo(data_nd, out_nd)
tvm.testing.assert_allclose(out_nd.asnumpy(), out_npy)
- for device in get_all_backend():
- check_device(device)
+ for device, ctx in tvm.testing.enabled_targets():
+ check_device(device, ctx)
test_apply(topi.logical_not, "logical_not", np.logical_not, np.array([True, False, 0, 1]))
test_apply(topi.logical_not, "logical_not", np.logical_not, np.array(np.arange(5) < 3))
+@tvm.testing.uses_gpu
def test_bitwise_not():
def test_apply(
func,
assert (isinstance(B, tvm.tir.PrimExpr))
return
- def check_device(device):
- ctx = tvm.context(device, 0)
- if not ctx.exist:
- print("Skip because %s is not enabled" % device)
- return
+ def check_device(device, ctx):
print("Running on target: %s" % device)
with tvm.target.create(device):
s = tvm.topi.testing.get_broadcast_schedule(device)(B)
foo(data_nd, out_nd)
tvm.testing.assert_allclose(out_nd.asnumpy(), out_npy)
- for device in get_all_backend():
- check_device(device)
+ for device, ctx in tvm.testing.enabled_targets():
+ check_device(device, ctx)
test_apply(topi.bitwise_not, "bitwise_not", np.bitwise_not, ())
test_apply(topi.bitwise_not, "bitwise_not", np.bitwise_not, (2, 1, 2))
+@tvm.testing.uses_gpu
def test_logical_binary_ele():
def test_apply(
func,
assert (isinstance(C, tvm.tir.PrimExpr))
return
- def check_device(device):
- ctx = tvm.context(device, 0)
- if not ctx.exist:
- print("Skip because %s is not enabled" % device)
- return
+ def check_device(device, ctx):
print("Running on target: %s" % device)
with tvm.target.create(device):
s = tvm.topi.testing.get_broadcast_schedule(device)(C)
foo(lhs_nd, rhs_nd, out_nd)
tvm.testing.assert_allclose(out_nd.asnumpy(), out_npy, rtol=1E-4, atol=1E-4)
- for device in get_all_backend():
- check_device(device)
+ for device, ctx in tvm.testing.enabled_targets():
+ check_device(device, ctx)
test_apply(topi.logical_and, "logical_and", np.logical_and, True, False)
test_apply(topi.logical_and, "logical_and", np.logical_and, [True, False], [False, False])
test_apply(topi.logical_xor, "logical_xor", np.logical_xor, [True, False], [False, False])
+@tvm.testing.uses_gpu
def test_bitwise_and():
verify_broadcast_binary_ele(
None, None, topi.bitwise_and, np.bitwise_and,
dtype="int32")
+@tvm.testing.uses_gpu
def test_bitwise_or():
verify_broadcast_binary_ele(
None, None, topi.bitwise_or, np.bitwise_or,
dtype="int32")
+@tvm.testing.uses_gpu
def test_bitwise_xor():
verify_broadcast_binary_ele(
None, None, topi.bitwise_xor, np.bitwise_xor,
import tvm
from tvm import te
from tvm import topi
+import tvm.testing
import tvm.topi.testing
from tvm.topi.util import get_const_tuple
from tvm.contrib.pickle_memoize import memoize
-from common import get_all_backend
def verify_clip(N, a_min, a_max, dtype):
A = te.placeholder((N, N), dtype=dtype, name='A')
return a_np, b_np
a_np, b_np = get_ref_data()
- def check_device(device):
- ctx = tvm.context(device, 0)
- if not ctx.exist:
- print("Skip because %s is not enabled" % device)
- return
+ def check_device(device, ctx):
print("Running on target: %s" % device)
with tvm.target.create(device):
s = tvm.topi.testing.get_injective_schedule(device)(B)
f(a, b)
tvm.testing.assert_allclose(b.asnumpy(), b_np, rtol=1e-5)
- for device in get_all_backend():
- check_device(device)
+ for device, ctx in tvm.testing.enabled_targets():
+ check_device(device, ctx)
+@tvm.testing.uses_gpu
def test_clip():
verify_clip(1024, -127, 127, 'float32')
verify_clip(1024, -127, 127, 'int16')
import tvm.topi.testing
from tvm.contrib.pickle_memoize import memoize
from tvm.topi.util import get_const_tuple
-from common import get_all_backend
_conv1d_ncw_implement = {
a_np, w_np, b_np = get_ref_data(layout)
- def check_device(device):
- ctx = tvm.context(device, 0)
- if not ctx.exist:
- print("Skip because %s is not enabled" % device)
- return
+ def check_device(device, ctx):
if layout == "NCW":
fcompute, fschedule = tvm.topi.testing.dispatch(device, _conv1d_ncw_implement)
else:
func(a, w, b)
tvm.testing.assert_allclose(b.asnumpy(), b_np, rtol=1e-5)
- for device in get_all_backend():
- check_device(device)
+ for device, ctx in tvm.testing.enabled_targets():
+ check_device(device, ctx)
+@tvm.testing.uses_gpu
def test_conv1d():
for layout in ["NCW", "NWC"]:
# Most basic test case
import tvm.topi.testing
from tvm.contrib.pickle_memoize import memoize
from tvm.topi.util import get_const_tuple
-from common import get_all_backend
+import tvm.testing
_conv1d_transpose_ncw_implement = {
"generic": (topi.nn.conv1d_transpose_ncw, topi.generic.schedule_conv1d_transpose_ncw),
a_np, w_np, b_np, c_np = get_ref_data()
- def check_device(device):
+ def check_device(device, ctx):
ctx = tvm.context(device, 0)
- if not ctx.exist:
- print("Skip because %s is not enabled" % device)
- return
with tvm.target.create(device):
fcompute, fschedule = tvm.topi.testing.dispatch(device, _conv1d_transpose_ncw_implement)
B = fcompute(A, W, stride, padding, A.dtype, output_padding)
tvm.testing.assert_allclose(b.asnumpy(), b_np, rtol=1e-5)
tvm.testing.assert_allclose(c.asnumpy(), c_np, rtol=1e-5)
- for device in get_all_backend():
- check_device(device)
+ for device, ctx in tvm.testing.enabled_targets():
+ check_device(device, ctx)
+@tvm.testing.uses_gpu
def test_conv1d_transpose_ncw():
verify_conv1d_transpose_ncw(1, 3, 224, 32, 5, 1, 0, (0,))
verify_conv1d_transpose_ncw(1, 3, 224, 32, 7, 1, 2, (0,))
from tvm import te
from tvm import autotvm
from tvm import topi
+import tvm.testing
import tvm.topi.testing
from tvm.contrib.pickle_memoize import memoize
from tvm.topi.nn.util import get_pad_tuple
from tvm.topi.util import get_const_tuple
-from common import get_all_backend
-
def _transform_data(data, bn):
# NCHW -> NCHW[x]c
batch_size, channel, height, width = data.shape
def check_device(device):
ctx = tvm.context(device, 0)
- if not ctx.exist:
+ if not tvm.testing.device_enabled(device):
print("Skip because %s is not enabled" % device)
return
print("Running on target: %s" % device)
import tvm.topi.testing
from tvm.contrib.pickle_memoize import memoize
from tvm.topi.util import get_const_tuple
+import tvm.testing
_conv2d_hwcn_implement = {
def check_device(device):
ctx = tvm.context(device, 0)
- if not ctx.exist:
+ if not tvm.testing.device_enabled(device):
print("Skip because %s is not enabled" % device)
return
print("Running on target: %s" % device)
check_device(device)
+@tvm.testing.requires_gpu
def test_conv2d_hwcn():
verify_conv2d_hwcn(1, 256, 32, 256, 3, 1, "SAME")
verify_conv2d_hwcn(1, 256, 32, 256, 3, 1, "SAME")
}
def verify_conv2d_hwnc(batch, in_channel, in_size, num_filter, kernel, stride,
- padding, dilation=1, devices='cuda', dtype='int4'):
+ padding, dilation=1, dtype='int4'):
"""Test the conv2d with tensorcore for hwnc layout"""
pad_top, pad_left, pad_bottom, pad_right = get_pad_tuple(padding, (kernel, kernel))
padding_sum = pad_top + pad_left + pad_bottom + pad_right
def check_device(device):
ctx = tvm.context(device, 0)
- if not ctx.exist:
+ if not tvm.testing.device_enabled(device):
print("Skip because %s is not enabled" % device)
return
if not nvcc.have_tensorcore(ctx.compute_version):
rtol = 1e-3
tvm.testing.assert_allclose(c.asnumpy().transpose((2, 0, 1, 3)), c_np, rtol=rtol)
- check_device(devices)
+ check_device('cuda')
+@tvm.testing.requires_tensorcore
def test_conv2d_hwnc_tensorcore():
"""Test the conv2d with tensorcore for hwnc layout"""
verify_conv2d_hwnc(8, 64, 56, 64, 3, 1, 1, dtype='int8')
from tvm.topi.util import get_const_tuple
from tvm.topi.arm_cpu.conv2d_gemm import is_aarch64_arm
-from common import get_all_backend, Int8Fallback
+from common import Int8Fallback
+import tvm.testing
def compile_conv2d_NHWC_gemm_int8_arm(batch, in_channel, in_size, num_filter, kernel, stride, padding,
dilation=1, add_bias=False, add_relu=False):
device = "llvm --device arm_cpu --mtriple aarch64-linux-gnu"
ctx = tvm.context(device, 0)
- if not ctx.exist:
+ if not tvm.testing.device_enabled(device):
print("Skip because %s is not enabled" % device)
return
print("Compiling on arm AArch64 target: %s" % device)
def check_device(device):
ctx = tvm.context(device, 0)
- if not ctx.exist:
+ if not tvm.testing.device_enabled(device):
print("Skip because %s is not enabled" % device)
return
print("Running on target: %s" % device)
def check_device(device):
ctx = tvm.context(device, 0)
- if not ctx.exist:
+ if not tvm.testing.device_enabled(device):
print("Skip because %s is not enabled" % device)
return
if device == "cuda" and not tvm.contrib.nvcc.have_int8(ctx.compute_version):
def check_device(device):
ctx = tvm.context(device, 0)
- if not ctx.exist:
+ if not tvm.testing.device_enabled(device):
print("Skip because %s is not enabled" % device)
return
if device == "cuda" and not tvm.contrib.nvcc.have_int8(ctx.compute_version):
check_device(device)
+@tvm.testing.requires_cuda
def test_conv2d_nchw():
with Int8Fallback():
# ResNet18 workloads where channels in / out are multiple of oc_block_factor
from tvm.topi.nn.util import get_pad_tuple
from tvm.topi.util import get_const_tuple
-from common import get_all_backend
+import tvm.testing
def verify_conv2d_nchw(batch, in_channel, in_size, num_filter, kernel, stride, padding, dilation=1, add_bias=False, add_relu=False,\
use_cudnn=False):
def check_device(device):
ctx = tvm.context(device, 0)
- if not ctx.exist:
+ if not tvm.testing.device_enabled(device):
print("Skip because %s is not enabled" % device)
return
print("Running on target: %s" % device)
func(a, w, c)
tvm.testing.assert_allclose(c.asnumpy(), c_np, rtol=1e-4)
- for device in get_all_backend():
+ for device, ctx in tvm.testing.enabled_targets():
with autotvm.tophub.context(device): # load tophub pre-tuned parameters
check_device(device)
check_device("cuda -model=unknown -libs=cudnn")
+@tvm.testing.uses_gpu
def test_conv2d_nchw():
# ResNet18 workloads
verify_conv2d_nchw(1, 3, 224, 64, 7, 2, 3)
import tvm.topi.testing
from tvm.contrib.pickle_memoize import memoize
from tvm.topi.util import get_const_tuple
-
+import tvm.testing
_conv2d_nhwc_implement = {
a_np, w_np, b_np = get_ref_data()
def check_device(device):
- if not tvm.runtime.enabled(device):
+ if not tvm.testing.device_enabled(device):
print("Skip because %s is not enabled" % device)
return
print("Running on target: %s" % device)
check_device(device)
+@tvm.testing.uses_gpu
def test_conv2d_nhwc():
verify_conv2d_nhwc(1, 256, 32, 256, 3, 1, "SAME")
verify_conv2d_nhwc(4, 128, 16, 128, 5, 2, "SAME")
def check_device(device):
ctx = tvm.context(device, 0)
- if not ctx.exist:
+ if not tvm.testing.device_enabled(device):
print("Skip because %s is not enabled" % device)
return
print("Running on target: %s" % device)
from tvm.contrib import nvcc
from tvm.topi.nn.util import get_pad_tuple
from tvm.topi.util import get_const_tuple
+import tvm.testing
_conv2d_nhwc_tensorcore_implement = {
def check_device(device):
ctx = tvm.context(device, 0)
- if not ctx.exist:
+ if not tvm.testing.device_enabled(device):
print("Skip because %s is not enabled" % device)
return
if not nvcc.have_tensorcore(ctx.compute_version):
check_device(devices)
+@tvm.testing.requires_cuda
+@tvm.testing.requires_gpu
def test_conv2d_nhwc_tensorcore():
"""Test the conv2d with tensorcore for nhwc layout"""
verify_conv2d_nhwc(16, 16, 14, 16, 3, 1, 1)
import tvm.topi.testing
from tvm import te
from tvm.contrib.pickle_memoize import memoize
-from tvm.contrib import nvcc
from tvm.topi.nn.util import get_pad_tuple
from tvm.topi.util import get_const_tuple
+import tvm.testing
_conv2d_nhwc_winograd_tensorcore = {
def check_device(device):
ctx = tvm.context(device, 0)
- if not ctx.exist:
- print("Skip because %s is not enabled" % device)
- return
print("Running on target: %s" % device)
with tvm.target.create(device):
if bgemm == "direct":
check_device(devices)
+@tvm.testing.requires_cuda
+@tvm.testing.requires_gpu
def test_conv2d_nhwc_winograd_direct():
"""Test the conv2d with winograd for nhwc layout"""
# resnet 18 workloads
verify_conv2d_nhwc(2, 48, 56, 48, 3, 1, "SAME", add_relu=True, add_bias=True)
verify_conv2d_nhwc(1, 48, 35, 48, 5, 1, "VALID")
+
+@tvm.testing.requires_cuda
+@tvm.testing.requires_tensorcore
def test_conv2d_nhwc_winograd_tensorcore():
"""Test the conv2d with winograd for nhwc layout"""
- if not tvm.gpu(0).exist or not tvm.runtime.enabled("cuda"):
- print("skip because cuda is not enabled..")
- return
- if not nvcc.have_tensorcore(tvm.gpu(0).compute_version):
- return
verify_conv2d_nhwc(8, 64, 56, 64, 3, 1, 1, bgemm="tensorcore")
verify_conv2d_nhwc(8, 128, 28, 128, 3, 1, 1, bgemm="tensorcore")
verify_conv2d_nhwc(8, 256, 14, 256, 3, 1, 1, bgemm="tensorcore")
from tvm.contrib.pickle_memoize import memoize
from tvm.topi.util import get_const_tuple
-from common import get_all_backend
+import tvm.testing
_conv2d_transpose_nchw_implement = {
a_np, w_np, b_np, c_np = get_ref_data()
- def check_device(device):
- ctx = tvm.context(device, 0)
- if not ctx.exist:
- print("Skip because %s is not enabled" % device)
- return
+ def check_device(device, ctx):
print("Running on target: %s" % device)
with tvm.target.create(device):
fcompute, fschedule = tvm.topi.testing.dispatch(device, _conv2d_transpose_nchw_implement)
func2(a, w, c)
tvm.testing.assert_allclose(b.asnumpy(), b_np, rtol=1e-5)
tvm.testing.assert_allclose(c.asnumpy(), c_np, rtol=1e-5)
- for device in get_all_backend():
- check_device(device)
+ for device, ctx in tvm.testing.enabled_targets():
+ check_device(device, ctx)
+@tvm.testing.uses_gpu
def test_conv2d_transpose_nchw():
verify_conv2d_transpose_nchw(1, 3, (224, 224), 1, (1, 1), (1, 1), (0, 0, 0, 0), (0, 0))
verify_conv2d_transpose_nchw(1, 3, (224, 224), 32, (3, 3), (1, 1), (0, 0, 0, 0), (0, 0))
from tvm.contrib.pickle_memoize import memoize
from tvm.topi.nn.util import get_pad_tuple
from tvm.topi.util import get_const_tuple
+import tvm.testing
_conv2d_nchw_winograd_implement = {
def check_device(device):
ctx = tvm.context(device, 0)
- if not ctx.exist:
+ if not tvm.testing.device_enabled(device):
print("Skip because %s is not enabled" % device)
return
print("Running on target: %s" % device)
check_device(device)
+@tvm.testing.uses_gpu
def test_conv2d_nchw():
# inception v3 workloads
verify_conv2d_nchw(1, 128, 17, 192, 7, 1, 3, devices=['cuda'])
from tvm import te
from tvm import autotvm
from tvm import topi
+import tvm.testing
import tvm.topi.testing
from tvm.contrib.pickle_memoize import memoize
from tvm.topi.nn.util import get_pad_tuple3d
from tvm.topi.util import get_const_tuple
-from common import get_all_backend
-
_conv3d_ncdhw_implement = {
"generic": (topi.nn.conv3d_ncdhw, topi.generic.schedule_conv3d_ncdhw),
"cpu": (topi.x86.conv3d_ncdhw, topi.x86.schedule_conv3d_ncdhw),
a_np, w_np, b_np, c_np = get_ref_data()
- def check_device(device):
- ctx = tvm.context(device, 0)
- if not ctx.exist:
- print("Skip because %s is not enabled" % device)
- return
+ def check_device(device, ctx):
print("Running on target: %s" % device)
fcompute, fschedule = tvm.topi.testing.dispatch(device, _conv3d_ncdhw_implement)
with tvm.target.create(device):
func(a, w, c)
tvm.testing.assert_allclose(c.asnumpy(), c_np, rtol=1e-4)
- for device in get_all_backend():
+ for device, ctx in tvm.testing.enabled_targets():
with autotvm.tophub.context(device): # load tophub pre-tuned parameters
- check_device(device)
+ check_device(device, ctx)
+@tvm.testing.uses_gpu
def test_conv3d_ncdhw():
#3DCNN workloads
verify_conv3d_ncdhw(1, 32, 32, 5, 1, 1, 0)
from tvm.contrib.pickle_memoize import memoize
from tvm.topi.util import get_const_tuple
-from common import get_all_backend
_conv3d_ndhwc_implement = {
"generic": (topi.nn.conv3d_ndhwc, topi.generic.schedule_conv3d_ndhwc),
return a_np, w_np, b_np
a_np, w_np, b_np = get_ref_data()
- def check_device(device):
- ctx = tvm.context(device, 0)
- if not ctx.exist:
- print("Skip because %s is not enabled" % device)
- return
+ def check_device(device, ctx):
print("Running on target: %s" % device)
fcompute, fschedule = tvm.topi.testing.dispatch(device, _conv3d_ndhwc_implement)
with tvm.target.create(device):
func(a, w, b)
tvm.testing.assert_allclose(b.asnumpy(), b_np, rtol=1e-5)
- for device in get_all_backend():
- check_device(device)
+ for device, ctx in tvm.testing.enabled_targets():
+ check_device(device, ctx)
+@tvm.testing.uses_gpu
def test_conv3d_ndhwc():
verify_conv3d_ndhwc(1, 16, 32, 16, 3, 1, "SAME")
verify_conv3d_ndhwc(4, 32, 16, 32, 5, 2, "SAME")
from tvm.contrib import nvcc
from tvm.topi.nn.util import get_pad_tuple3d
from tvm.topi.util import get_const_tuple
+import tvm.testing
_conv3d_ndhwc_tensorcore_implement = {
def check_device(device):
ctx = tvm.context(device, 0)
- if not ctx.exist:
- print("Skip because %s is not enabled" % device)
- return
- if not nvcc.have_tensorcore(ctx.compute_version):
- print("skip because gpu does not support Tensor Cores")
- return
print("Running on target: %s" % device)
with tvm.target.create(device):
fcompute, fschedule = tvm.topi.testing.dispatch(device, _conv3d_ndhwc_tensorcore_implement)
check_device(devices)
+@tvm.testing.requires_tensorcore
+@tvm.testing.requires_cuda
def test_conv3d_ndhwc_tensorcore():
"""Test the conv3d with tensorcore for ndhwc layout"""
verify_conv3d_ndhwc(16, 16, 14, 16, 3, 1, 1)
import tvm
from tvm import te
from tvm import topi
+import tvm.testing
import tvm.topi.testing
from tvm.contrib.pickle_memoize import memoize
from tvm.topi.util import get_const_tuple
-from common import get_all_backend
-
_conv3d_transpose_ncdhw_implement = {
"generic": (topi.nn.conv3d_transpose_ncdhw, topi.generic.schedule_conv3d_transpose_ncdhw),
a_np, w_np, b_np, c_np = get_ref_data()
- def check_device(device):
- ctx = tvm.context(device, 0)
- if not ctx.exist:
- print("Skip because %s is not enabled" % device)
- return
+ def check_device(device, ctx):
print("Running on target: %s" % device)
with tvm.target.create(device):
fcompute, fschedule = tvm.topi.testing.dispatch(device, _conv3d_transpose_ncdhw_implement)
func2(a, w, c)
tvm.testing.assert_allclose(b.asnumpy(), b_np, atol=1e-4, rtol=1e-4)
tvm.testing.assert_allclose(c.asnumpy(), c_np, atol=1e-4, rtol=1e-4)
- for device in get_all_backend():
- check_device(device)
+ for device, ctx in tvm.testing.enabled_targets():
+ check_device(device, ctx)
+@tvm.testing.uses_gpu
def test_conv3d_transpose_ncdhw():
verify_conv3d_transpose_ncdhw(1, 3, (24, 24, 24), 1, (1, 1, 1), (1, 1, 1), (0, 0, 0, 0, 0, 0), (0, 0, 0))
verify_conv3d_transpose_ncdhw(1, 3, (24, 24, 24), 2, (3, 3, 3), (1, 1, 1), (0, 0, 0, 0, 0, 0), (0, 0, 0))
from tvm import te
from tvm import autotvm
from tvm import topi
+import tvm.testing
import tvm.topi.testing
from tvm.contrib.pickle_memoize import memoize
from tvm.topi.nn.util import get_pad_tuple3d
from tvm.topi.util import get_const_tuple
-from common import get_all_backend
_conv3d_ncdhw_implement = {
"gpu": (topi.cuda.conv3d_ncdhw_winograd, topi.cuda.schedule_conv3d_ncdhw_winograd),
def check_device(device):
ctx = tvm.context(device, 0)
- if not ctx.exist:
+ if not tvm.testing.device_enabled(device):
print("Skip because %s is not enabled" % device)
return
print("Running on target: %s" % device)
check_device(device)
+@tvm.testing.requires_gpu
def test_conv3d_ncdhw():
# Try without depth transformation
#3DCNN workloads
from tvm.contrib.pickle_memoize import memoize
from tvm.topi.util import get_const_tuple
-from common import get_all_backend
-
-
_correlation_implement = {
"generic": (topi.nn.correlation_nchw, topi.generic.schedule_correlation_nchw),
"cuda": (topi.cuda.correlation_nchw, topi.cuda.schedule_correlation_nchw),
a_np, b_np, c_np = get_ref_data()
- def check_device(device):
- ctx = tvm.context(device, 0)
- if not ctx.exist:
- print("Skip because %s is not enabled" % device)
- return
+ def check_device(device, ctx):
print("Running on target: %s" % device)
fcompute, fschedule = tvm.topi.testing.dispatch(
device, _correlation_implement)
func(a, b, c)
tvm.testing.assert_allclose(c.asnumpy(), c_np, rtol=1e-5)
- for device in get_all_backend():
- check_device(device)
+ for device, ctx in tvm.testing.enabled_targets():
+ check_device(device, ctx)
+@tvm.testing.uses_gpu
def test_correlation_nchw():
verify_correlation_nchw((1, 3, 10, 10), kernel_size=1, max_displacement=4,
stride1=1, stride2=1, pad_size=4, is_multiply=True)
from tvm.contrib.pickle_memoize import memoize
from tvm.topi.util import get_const_tuple
-from common import get_all_backend
+import tvm.testing
_deformable_conv2d_implement = {
def check_device(device):
ctx = tvm.context(device, 0)
- if not ctx.exist:
+ if not tvm.testing.device_enabled(device):
print("Skip because %s is not enabled" % device)
return
print("Running on target: %s" % device)
check_device(device)
+@tvm.testing.uses_gpu
def test_deformable_conv2d_nchw():
verify_deformable_conv2d_nchw(1, 16, 7, 16, 1, 1, 0, deformable_groups=4)
verify_deformable_conv2d_nchw(1, 16, 7, 16, 3, 1, 1, dilation=2, deformable_groups=4)
from tvm.topi.util import get_const_tuple
from tvm.contrib.pickle_memoize import memoize
-from common import get_all_backend, Int8Fallback
+from common import Int8Fallback
+import tvm.testing
_dense_implement = {
"generic": [(topi.nn.dense, topi.generic.schedule_dense)],
# get the test data
a_np, b_np, c_np, d_np = get_ref_data()
- def check_device(device):
- ctx = tvm.context(device, 0)
- if not ctx.exist:
- print("Skip because %s is not enabled" % device)
- return
+ def check_device(device, ctx):
print("Running on target: %s" % device)
for fcompute, fschedule in tvm.topi.testing.dispatch(device, _dense_implement):
with tvm.target.create(device):
f(a, b, c, d)
tvm.testing.assert_allclose(d.asnumpy(), d_np, rtol=1e-5)
- for device in get_all_backend():
- check_device(device)
+ for device, ctx in tvm.testing.enabled_targets():
+ check_device(device, ctx)
def verify_dense_int8(batch, in_dim, out_dim, use_bias=True):
def check_device(device):
ctx = tvm.context(device, 0)
- if not ctx.exist:
- print("Skip because %s is not enabled" % device)
- return
if device == "cuda" and not tvm.contrib.nvcc.have_int8(ctx.compute_version):
print("Skip because int8 intrinsics are not available")
return
check_device(device)
+@tvm.testing.uses_gpu
def test_dense():
verify_dense(1, 1024, 1000, use_bias=True)
verify_dense(1, 1024, 1000, use_bias=False)
verify_dense(128, 1024, 1000, use_bias=True)
+@tvm.testing.requires_cuda
+@tvm.testing.requires_gpu
def test_dense_int8():
with Int8Fallback():
verify_dense_int8(2, 1024, 1000, use_bias=True)
from tvm.topi.util import get_const_tuple
from tvm import te
from tvm.contrib.pickle_memoize import memoize
-from tvm.contrib import nvcc
+import tvm.testing
_dense_implement = {
def check_device(device):
ctx = tvm.context(device, 0)
- if not ctx.exist:
- print("Skip because %s is not enabled" % device)
- return
- if not nvcc.have_tensorcore(ctx.compute_version):
- print("skip because gpu does not support Tensor Cores")
- return
print("Running on target: %s" % device)
for fcompute, fschedule in tvm.topi.testing.dispatch(device, _dense_implement):
with tvm.target.create(device):
tvm.testing.assert_allclose(d.asnumpy(), d_np, rtol=1e-3)
- for device in ['cuda']:
- check_device(device)
+ check_device('cuda')
+@tvm.testing.requires_tensorcore
def test_dense_tensorcore():
"""Test cases"""
verify_dense(8, 16, 32, use_bias=True)
import tvm
from tvm import te
from tvm import topi
+import tvm.testing
import tvm.topi.testing
-from common import get_all_backend
-
def verify_depth_to_space(block_size, batch, in_channel, in_height, in_width, layout='NCHW', mode='DCR'):
out_channel = int(in_channel / (block_size * block_size))
a_np = np.transpose(a_np, axes=[0, 2, 3, 1])
b_np = np.transpose(b_np, axes=[0, 2, 3, 1])
- def check_device(device):
- ctx = tvm.context(device, 0)
- if not ctx.exist:
- print("Skip because %s is not enabled" % device)
- return
+ def check_device(device, ctx):
print("Running on target: %s" % device)
with tvm.target.create(device):
s = tvm.topi.testing.get_injective_schedule(device)(B)
f(a, b)
tvm.testing.assert_allclose(b.asnumpy(), b_np, rtol=1e-3, atol=1e-3)
- for device in get_all_backend():
- check_device(device)
+ for device, ctx in tvm.testing.enabled_targets():
+ check_device(device, ctx)
+@tvm.testing.uses_gpu
def test_depth_to_space():
for layout in ['NCHW', 'NHWC']:
for mode in ['DCR', 'CDR']:
from tvm.topi.nn.util import get_pad_tuple
from tvm.contrib.pickle_memoize import memoize
-from common import get_all_backend
+import tvm.testing
_depthwise_conv2d_nchw_implement = {
"generic": [(topi.nn.depthwise_conv2d_nchw, topi.generic.schedule_depthwise_conv2d_nchw)],
dtype = 'float32'
- def check_device(device):
- ctx = tvm.context(device, 0)
- if not ctx.exist:
- print("Skip because %s is not enabled" % device)
- return
+ def check_device(device, ctx):
print("Running on target: %s" % device)
impl_list = tvm.topi.testing.dispatch(device, _depthwise_conv2d_nchw_implement)[:]
tvm.testing.assert_allclose(scale_shift_tvm.asnumpy(), scale_shift_scipy, rtol=1e-5)
tvm.testing.assert_allclose(relu_tvm.asnumpy(), relu_scipy, rtol=1e-5)
- for device in get_all_backend():
+ for device, ctx in tvm.testing.enabled_targets():
with autotvm.tophub.context(device): # load tophub pre-tuned parameters
- check_device(device)
+ check_device(device, ctx)
def depthwise_conv2d_with_workload_nhwc(batch, in_channel, in_height, channel_multiplier, filter_height, stride_h, padding, dilation=1):
dtype = 'float32'
- def check_device(device):
- ctx = tvm.context(device, 0)
- if not ctx.exist:
- print("Skip because %s is not enabled" % device)
- return
+ def check_device(device, ctx):
print("Running on target: %s" % device)
fcompute, fschedule = tvm.topi.testing.dispatch(device, _depthwise_conv2d_nhwc_implement)
tvm.testing.assert_allclose(scale_shift_tvm.asnumpy(), scale_shift_scipy, rtol=1e-5)
tvm.testing.assert_allclose(relu_tvm.asnumpy(), relu_scipy, rtol=1e-5)
- for device in get_all_backend():
+ for device, ctx in tvm.testing.enabled_targets():
with autotvm.tophub.context(device): # load tophub pre-tuned parameters
- check_device(device)
+ check_device(device, ctx)
def _transform_data(data, bn):
# NCHW -> NCHW[x]c
def check_device(device):
ctx = tvm.context(device, 0)
- if not ctx.exist:
+ if not tvm.testing.device_enabled(device):
print("Skip because %s is not enabled" % device)
return
print("Running on target: %s" % device)
check_device(device)
+@tvm.testing.uses_gpu
def test_depthwise_conv2d():
# mobilenet workloads
depthwise_conv2d_with_workload_nchw(1, 32, 112, 1, 3, 1, "SAME")
from tvm.topi.nn.util import get_pad_tuple
import tvm.topi.testing
from tvm.topi.cuda.depthwise_conv2d import schedule_depthwise_conv2d_backward_input_nhwc
+import tvm.testing
def verify_depthwise_conv2d_back_input(batch, in_channel, in_h, channel_multiplier, filter_h, stride_h, padding_h):
def check_device(device):
ctx = tvm.context(device, 0)
- if not ctx.exist:
+ if not tvm.testing.device_enabled(device):
print("Skip because %s is not enabled" % device)
return
print("Running on target: %s" % device)
check_device("vulkan")
check_device("nvptx")
+@tvm.testing.requires_gpu
def test_topi_depthwise_conv2d_backward_input_nhwc():
verify_depthwise_conv2d_back_input(16, 256, 56, 1, 3, 1, 1)
verify_depthwise_conv2d_back_input(16, 256, 56, 2, 3, 1, 1)
from tvm.topi.util import get_const_tuple
from tvm.topi.nn.util import get_pad_tuple
from tvm.topi.cuda.depthwise_conv2d import schedule_depthwise_conv2d_backward_weight_nhwc
+import tvm.testing
def verify_depthwise_conv2d_back_weight(batch, in_channel, in_h, channel_multiplier, filter_h, stride_h, padding_h):
def check_device(device):
ctx = tvm.context(device, 0)
- if not ctx.exist:
+ if not tvm.testing.device_enabled(device):
print("Skip because %s is not enabled" % device)
return
print("Running on target: %s" % device)
check_device("vulkan")
check_device("nvptx")
+@tvm.testing.requires_gpu
def test_topi_depthwise_conv2d_backward_weight_nhwc():
verify_depthwise_conv2d_back_weight(16, 256, 56, 1, 3, 1, 1)
verify_depthwise_conv2d_back_weight(16, 256, 56, 2, 3, 1, 1)
from tvm.contrib.pickle_memoize import memoize
from tvm.topi.util import get_const_tuple
-from common import get_all_backend, Int8Fallback
+from common import Int8Fallback
+import tvm.testing
_group_conv2d_nchw_implement = {
def check_device(device):
ctx = tvm.context(device, 0)
- if not ctx.exist:
+ if not tvm.testing.device_enabled(device):
print("Skip because %s is not enabled" % device)
return
def check_device(device):
ctx = tvm.context(device, 0)
- if not ctx.exist:
+ if not tvm.testing.device_enabled(device):
print("Skip because %s is not enabled" % device)
return
if device == "cuda" and not tvm.contrib.nvcc.have_int8(ctx.compute_version):
check_device(device)
+@tvm.testing.uses_gpu
def test_group_conv2d_nchw():
# ResNeXt-50 workload
verify_group_conv2d_nchw(1, 128, 56, 128, 3, 1, 1, 1, 32)
+@tvm.testing.requires_cuda
def test_group_conv2d_NCHWc_int8():
with Int8Fallback():
# ResNeXt-50 workload
from tvm.topi.util import get_const_tuple
import pytest
-from common import get_all_backend
-
def _transform_data(data, bn):
# NCHW -> NCHW[x]c
batch_size, channel, height, width = data.shape
def check_device(device):
ctx = tvm.context(device, 0)
- if not ctx.exist:
+ if not tvm.testing.device_enabled(ctx):
print("Skip because %s is not enabled" % device)
return
print("Running on target: %s" % device)
check_device(device)
autotvm.GLOBAL_SCOPE.silent = False
+@tvm.testing.uses_gpu
@pytest.mark.skip
def test_conv2d_NCHWc():
# ResNet50 workloads
import tvm.topi.testing
from tvm.contrib.pickle_memoize import memoize
-from common import get_all_backend
def verify_resize(batch, in_channel, in_height, in_width, out_height, out_width,
layout='NCHW', coord_trans="align_corners", method="bilinear"):
scale_w = out_width / in_width
b_np = tvm.topi.testing.upsampling_python(a_np, (scale_h, scale_w), layout)
- def check_device(device):
- ctx = tvm.context(device, 0)
- if not ctx.exist:
- print("Skip because %s is not enabled" % device)
- return
+ def check_device(device, ctx):
print("Running on target: %s" % device)
with tvm.target.create(device):
s = tvm.topi.testing.get_injective_schedule(device)(B)
tvm.testing.assert_allclose(b.asnumpy(), b_np, rtol=1e-3, atol=1e-3)
- for device in get_all_backend():
- check_device(device)
+ for device, ctx in tvm.testing.enabled_targets():
+ check_device(device, ctx)
+@tvm.testing.uses_gpu
def test_resize():
# Scale NCHW
verify_resize(4, 16, 32, 32, 50, 50, 'NCHW')
scale_w = out_width / in_width
b_np = tvm.topi.testing.upsampling3d_python(a_np, (scale_d, scale_h, scale_w), layout)
- def check_device(device):
- ctx = tvm.context(device, 0)
- if not ctx.exist:
- print("Skip because %s is not enabled" % device)
- return
+ def check_device(device, ctx):
print("Running on target: %s" % device)
with tvm.target.create(device):
s = tvm.topi.testing.get_injective_schedule(device)(B)
tvm.testing.assert_allclose(b.asnumpy(), b_np, rtol=1e-3, atol=1e-3)
- for device in get_all_backend():
- check_device(device)
+ for device, ctx in tvm.testing.enabled_targets():
+ check_device(device, ctx)
+@tvm.testing.uses_gpu
def test_resize3d():
# Trilinear
verify_resize3d(4, 8, 16, 16, 16, 25, 25, 25, 'NCDHW')
verify_resize3d(4, 8, 16, 16, 16, 25, 25, 25, 'NDHWC', method="nearest_neighbor")
+@tvm.testing.uses_gpu
def test_crop_and_resize():
def verify_crop_and_resize(image_shape, np_boxes, np_box_indices, np_crop_size, layout='NHWC',
method="bilinear", extrapolation_value=0.0):
baseline_np = tvm.topi.testing.crop_and_resize_python(np_images, np_boxes, np_box_indices,
np_crop_size, layout, method,
extrapolation_value)
- def check_device(device):
- ctx = tvm.context(device, 0)
- if not ctx.exist:
- print("Skip because %s is not enabled" % device)
- return
+ def check_device(device, ctx):
print("Running on target: %s" % device)
with tvm.target.create(device):
s = tvm.topi.testing.get_injective_schedule(device)(out)
tvm.testing.assert_allclose(tvm_out.asnumpy(), baseline_np, rtol=1e-3, atol=1e-3)
- for device in get_all_backend():
- check_device(device)
+ for device, ctx in tvm.testing.enabled_targets():
+ check_device(device, ctx)
boxes_1 = np.array([[.2, .3, .7, .9]], dtype="float32")
boxes_2 = np.array([[.2, .3, .7, .9], [0, .1, .8, 1]], dtype="float32")
verify_crop_and_resize((1, 3, 224, 224), boxes_1, indices_1, size_1, layout="NCHW")
+@tvm.testing.uses_gpu
def test_affine_grid():
def verify_affine_grid(num_batch, target_shape):
dtype = "float32"
data_np, out_np = get_ref_data()
- def check_device(device):
- ctx = tvm.context(device, 0)
- if not ctx.exist:
- print("Skip because %s is not enabled" % device)
- return
+ def check_device(device, ctx):
print("Running on target: %s" % device)
with tvm.target.create(device):
s = tvm.topi.testing.get_injective_schedule(device)(out)
tvm.testing.assert_allclose(
tvm_out.asnumpy(), out_np, rtol=1e-5, atol=1e-5)
- for device in get_all_backend():
- check_device(device)
+ for device, ctx in tvm.testing.enabled_targets():
+ check_device(device, ctx)
verify_affine_grid(1, (16, 32))
verify_affine_grid(4, (16, 32))
+@tvm.testing.uses_gpu
def test_grid_sample():
def verify_grid_sample(data_shape, grid_shape):
dtype = "float32"
data_np, grid_np, out_np = get_ref_data()
- def check_device(device):
- ctx = tvm.context(device, 0)
- if not ctx.exist:
- print("Skip because %s is not enabled" % device)
- return
+ def check_device(device, ctx):
print("Running on target: %s" % device)
with tvm.target.create(device):
s = tvm.topi.testing.get_injective_schedule(device)(out)
tvm.testing.assert_allclose(
tvm_out.asnumpy(), out_np, rtol=1e-5, atol=1e-5)
- for device in get_all_backend():
- check_device(device)
+ for device, ctx in tvm.testing.enabled_targets():
+ check_device(device, ctx)
verify_grid_sample((4, 4, 16, 32), (4, 2, 8, 8))
verify_grid_sample((4, 4, 16, 32), (4, 2, 32, 32))
from tvm import topi
from tvm.topi.util import get_const_tuple
import tvm.topi.testing
+import tvm.testing
_lrn_schedule = {
"generic": topi.generic.schedule_lrn,
b_np = tvm.topi.testing.lrn_python(a_np, size, axis, bias, alpha, beta)
def check_device(device):
- if not tvm.runtime.enabled(device):
+ if not tvm.testing.device_enabled(device):
print("Skip because %s is not enabled" % device)
return
print("Running on target: %s" % device)
for device in ['llvm', 'cuda', 'opencl', 'metal', 'rocm', 'vulkan', 'nvptx']:
check_device(device)
+@tvm.testing.uses_gpu
def test_lrn():
verify_lrn((1, 3, 5, 5), 3, 1, 1.0, 1.0, 0.5)
verify_lrn((1, 3, 5, 5), 3, 3, 1.0, 1.0, 0.5)
from tvm import topi
import tvm.topi.testing
from tvm.topi import util
-from common import get_all_backend
def test_util():
assert util.get_const_tuple((x, x)) == (100, 100)
+@tvm.testing.uses_gpu
def test_ewise():
def test_apply(
func,
a_np += ((np.abs(np.fmod(a_np, 1)) - 0.5) < 1e-6) * 1e-4
b_np = f_numpy(a_np)
- def check_device(device):
- ctx = tvm.context(device, 0)
- if not ctx.exist:
- print("Skip because %s is not enabled" % device)
- return
+ def check_device(device, ctx):
print("Running on target: %s" % device)
with tvm.target.create(device):
s = tvm.topi.testing.get_injective_schedule(device)(B)
foo(a, b)
tvm.testing.assert_allclose(b.asnumpy(), b_np, rtol=1e-5, atol=1e-5)
- for target in get_all_backend():
- check_device(target)
+ for target, ctx in tvm.testing.enabled_targets():
+ check_device(target, ctx)
def test_isnan(
low,
a_np += ((np.abs(np.fmod(a_np, 1)) - 0.5) < 1e-6) * 1e-5
b_np = np.isnan(a_np)
- def check_device(device):
- ctx = tvm.context(device, 0)
- if not ctx.exist:
- print("Skip because %s is not enabled" % device)
- return
+ def check_device(device, ctx):
print("Running on target: %s" % device)
with tvm.target.create(device):
s = tvm.topi.testing.get_injective_schedule(device)(B)
foo(a, b)
tvm.testing.assert_allclose(b.asnumpy(), b_np, rtol=1e-5, atol=1e-5)
- for target in get_all_backend():
- check_device(target)
+ for target, ctx in tvm.testing.enabled_targets():
+ check_device(target, ctx)
def test_infiniteness_ops(topi_op, ref_op, name):
for dtype in ['float32', 'float64', 'int32', 'int16']:
a_np.ravel()[np.random.choice(a_np.size, int(a_np.size * 0.5), replace=False)] = np.nan
b_np = ref_op(a_np)
- def check_device(device):
- ctx = tvm.context(device, 0)
- if not ctx.exist:
- print("Skip because %s is not enabled" % device)
- return
+ def check_device(device, ctx):
with tvm.target.create(device):
s = tvm.topi.testing.get_injective_schedule(device)(B)
foo = tvm.build(s, [A, B], device, name=name)
foo(a, b)
tvm.testing.assert_allclose(b.asnumpy(), b_np, rtol=1e-5, atol=1e-5)
- for target in get_all_backend():
- check_device(target)
+ for target, ctx in tvm.testing.enabled_targets():
+ check_device(target, ctx)
test_apply(topi.floor, "floor", np.floor, -100, 100)
test_apply(topi.ceil, "ceil", np.ceil, -100, 100)
test_infiniteness_ops(topi.isinf, np.isinf, 'isinf')
+@tvm.testing.uses_gpu
def test_cast():
def verify(from_dtype, to_dtype, low=-100, high=100):
shape = (5, 4)
a_np = a_np - a_np[2, 3]
b_np = a_np.astype(to_dtype)
- for device in get_all_backend():
- ctx = tvm.context(device, 0)
- if not ctx.exist:
- print("Skip because %s is not enabled" % device)
- continue
+ for device, ctx in tvm.testing.enabled_targets():
print("Running on target: %s" % device)
with tvm.target.create(device):
s = tvm.topi.testing.get_injective_schedule(device)(B)
def check_device(device):
ctx = tvm.context(device, 0)
- if not ctx.exist:
+ if not tvm.testing.device_enabled(device):
print("Skip because %s is not enabled" % device)
return
with tvm.target.create(device):
import tvm
from tvm import te
from tvm import topi
+import tvm.testing
import tvm.topi.testing
from tvm.topi.util import get_const_tuple
-from common import get_all_backend
+import tvm.testing
_pool_schedule = {
"generic": topi.generic.schedule_pool,
b_np[:, :, i, j] = np.max(pad_np[:, :, i*sh:i*sh+kh, j*sw:j*sw+kw], axis=(2, 3))
b_np = np.maximum(b_np, 0.0)
- def check_device(device):
- ctx = tvm.context(device, 0)
- if not ctx.exist:
- print("Skip because %s is not enabled" % device)
- return
+ def check_device(device, ctx):
print("Running on target: %s" % device)
with tvm.target.create(device):
s_func = tvm.topi.testing.dispatch(device, _pool_schedule)
f(a, b)
tvm.testing.assert_allclose(b.asnumpy(), b_np, rtol=2e-5, atol=1e-5)
- for device in get_all_backend():
- check_device(device)
+ for device, ctx in tvm.testing.enabled_targets():
+ check_device(device, ctx)
def verify_pool_grad(n, ic, ih, kh, sh, padding, pool_type, ceil_mode, count_include_pad=True,
add_relu=False):
if add_relu:
pool_grad_np = np.maximum(pool_grad_np, 0.)
- def check_device(device):
- ctx = tvm.context(device, 0)
- if not ctx.exist:
- print("Skip because %s is not enabled" % device)
- return
+ def check_device(device, ctx):
print("Running on target: %s" % device)
with tvm.target.create(device):
s_func = tvm.topi.testing.dispatch(device, _pool_grad_schedule)
f(a, out_grad, pool_grad)
tvm.testing.assert_allclose(pool_grad.asnumpy(), pool_grad_np, rtol=1e-5)
- for device in get_all_backend():
- check_device(device)
+ for device, ctx in tvm.testing.enabled_targets():
+ check_device(device, ctx)
+@tvm.testing.uses_gpu
def test_pool():
"""test cases of pool"""
verify_pool(1, 256, 32, 2, 2, [0, 0, 0, 0], 'avg', False, True)
verify_pool(1, 256, 31, 3, 3, [1, 0, 3, 2], 'max', False)
verify_pool(1, 256, 31, 3, 3, [3, 2, 1, 0], 'max', True)
+@tvm.testing.uses_gpu
def test_pool_grad():
"""test cases of pool_grad"""
verify_pool_grad(1, 256, 32, 3, 2, [1, 1, 1, 1], 'avg', False, False)
b_np = np.max(a_np, axis=axis, keepdims=True)
b_np = np.maximum(b_np, 0.0)
- def check_device(device):
- ctx = tvm.context(device, 0)
- if not ctx.exist:
- print("Skip because %s is not enabled" % device)
- return
+ def check_device(device, ctx):
print("Running on target: %s" % device)
with tvm.target.create(device):
s_func = tvm.topi.testing.dispatch(device, _adaptive_pool_schedule)
f(a, b)
tvm.testing.assert_allclose(b.asnumpy(), b_np, rtol=1e-5)
- for device in get_all_backend():
- check_device(device)
+ for device, ctx in tvm.testing.enabled_targets():
+ check_device(device, ctx)
+@tvm.testing.uses_gpu
def test_global_pool():
"""test cases of global_pool"""
verify_global_pool((1, 1024, 7, 7), 'avg')
assert len(out_size) == 3
out = topi.nn.adaptive_pool3d(data, out_size, pool_type, layout)
- def check_device(device):
- ctx = tvm.context(device, 0)
- if not ctx.exist:
- print("Skip because %s is not enabled" % device)
- return
+ def check_device(device, ctx):
print("Running on target: %s" % device)
with tvm.target.create(device):
s_func = tvm.topi.testing.dispatch(device, _adaptive_pool_schedule)
f(a, b)
tvm.testing.assert_allclose(b.asnumpy(), np_out, rtol=4e-5, atol=1e-6)
- for device in get_all_backend():
- check_device(device)
+ for device, ctx in tvm.testing.enabled_targets():
+ check_device(device, ctx)
+@tvm.testing.uses_gpu
def test_adaptive_pool():
"""test cases of adaptive_pool"""
verify_adaptive_pool((1, 3, 224, 224), (1, 1), "max")
ref_np = tvm.topi.testing.pool3d_ncdhw_python(input_np, kernel, stride, padding,
output_shape, pool_type, count_include_pad, ceil_mode)
- def check_device(device):
- ctx = tvm.context(device, 0)
- if not ctx.exist:
- print("Skip because %s is not enabled" % device)
- return
+ def check_device(device, ctx):
print("Running on target: %s" % device)
with tvm.target.create(device):
s_func = tvm.topi.testing.dispatch(device, _pool_schedule)
f(a, b)
tvm.testing.assert_allclose(b.asnumpy(), ref_np, rtol=1e-5)
- for device in get_all_backend():
- check_device(device)
+ for device, ctx in tvm.testing.enabled_targets():
+ check_device(device, ctx)
+@tvm.testing.uses_gpu
def test_pool3d():
"""test cases of pool3d"""
verify_pool3d(1, 256, 32, 2, 2, [0, 0, 0, 0, 0, 0], 'avg', False, True)
ref_np = tvm.topi.testing.pool1d_ncw_python(input_np, kernel, stride, padding,
output_shape, pool_type, count_include_pad, ceil_mode)
- def check_device(device):
- ctx = tvm.context(device, 0)
- if not ctx.exist:
- print("Skip because %s is not enabled" % device)
- return
+ def check_device(device, ctx):
print("Running on target: %s" % device)
with tvm.target.create(device):
s_func = tvm.topi.testing.dispatch(device, _pool_schedule)
f(a, b)
tvm.testing.assert_allclose(b.asnumpy(), ref_np, rtol=1e-5)
- for device in get_all_backend():
- check_device(device)
+ for device, ctx in tvm.testing.enabled_targets():
+ check_device(device, ctx)
+@tvm.testing.uses_gpu
def test_pool1d():
"""test cases of pool1d"""
verify_pool1d(1, 256, 32, 2, 2, [0, 0], 'avg', False, True)
from tvm import topi
import tvm.topi.testing
-from common import get_all_backend
def _my_npy_argmax(arr, axis, keepdims):
if not keepdims:
else:
raise NotImplementedError
- def check_device(device):
- ctx = tvm.context(device, 0)
- if not ctx.exist:
- print("Skip because %s is not enabled" % device)
- return
+ def check_device(device, ctx):
print("Running on target: %s" % device)
with tvm.target.create(device):
s = tvm.topi.testing.get_reduce_schedule(device)(B)
tvm.testing.assert_allclose(out_tvm_val, in_npy_map.min(axis=axis), 1E-3, 1E-3)
else:
tvm.testing.assert_allclose(out_tvm.asnumpy(), out_npy, 1E-3, 1E-3)
- for device in get_all_backend():
- check_device(device)
+ for device, ctx in tvm.testing.enabled_targets():
+ check_device(device, ctx)
+@tvm.testing.uses_gpu
def test_reduce_map():
verify_reduce_map_ele(in_shape=(32,),
from tvm.topi.util import get_const_tuple
from tvm.contrib.nvcc import have_fp16
-from common import get_all_backend
+import tvm.testing
def verify_relu(m, n, dtype="float32"):
A = te.placeholder((m, n), name='A', dtype=dtype)
a_np = np.random.uniform(low=-1.0, high=1.0, size=get_const_tuple(A.shape)).astype(A.dtype)
b_np = a_np * (a_np > 0)
- def check_device(device):
- ctx = tvm.context(device, 0)
- if not ctx.exist:
- print("Skip because %s is not enabled" % device)
- return
+ def check_device(device, ctx):
if dtype == "float16" and device == "cuda" and not have_fp16(tvm.gpu(0).compute_version):
print("Skip because %s does not have fp16 support" % device)
return
foo(a, b)
tvm.testing.assert_allclose(b.asnumpy(), b_np, rtol=1e-5)
- for device in get_all_backend():
- check_device(device)
+ for device, ctx in tvm.testing.enabled_targets():
+ check_device(device, ctx)
def verify_leaky_relu(m, alpha):
out_np = _prelu_numpy(x_np, w_np)
tvm.testing.assert_allclose(b.asnumpy(), out_np, rtol=1e-5)
+@tvm.testing.uses_gpu
def test_relu():
verify_relu(10, 128, "float32")
verify_relu(128, 64, "float16")
+@tvm.testing.uses_gpu
def test_schedule_big_array():
verify_relu(1024 * 100 , 512)
import tvm
from tvm import te
import tvm.topi.testing
+import tvm.testing
_reorg_schedule = {
"generic": topi.generic.schedule_reorg,
def check_device(device):
'''Cheching devices is enabled or not'''
ctx = tvm.context(device, 0)
- if not ctx.exist:
+ if not tvm.testing.device_enabled(device):
print("Skip because %s is not enabled" % device)
return
print("Running on target: %s" % device)
for device in ['llvm', 'cuda']:
check_device(device)
+@tvm.testing.uses_gpu
def test_reorg():
verify_reorg(1, 20, 8, 2)
import tvm
from tvm import te
from tvm import topi
+import tvm.testing
import tvm.topi.testing
import logging
from tvm.topi.util import get_const_tuple
-from common import get_all_backend
_softmax_schedule = {
"generic": topi.generic.schedule_softmax,
"hls": topi.hls.schedule_softmax,
}
-def check_device(A, B, a_np, b_np, device, name):
- ctx = tvm.context(device, 0)
- if not ctx.exist:
- print("Skip because %s is not enabled" % device)
- return
+def check_device(A, B, a_np, b_np, device, ctx, name):
print("Running on target: %s" % device)
with tvm.target.create(device):
s_func = tvm.topi.testing.dispatch(device, _softmax_schedule)
a_np = np.random.uniform(size=get_const_tuple(A.shape)).astype(A.dtype)
b_np = tvm.topi.testing.softmax_python(a_np)
- for device in get_all_backend():
- check_device(A, B, a_np, b_np, device, "softmax")
+ for device, ctx in tvm.testing.enabled_targets():
+ check_device(A, B, a_np, b_np, device, ctx, "softmax")
def verify_softmax_4d(shape, dtype="float32"):
A = te.placeholder(shape, dtype=dtype, name='A')
b_np = tvm.topi.testing.softmax_python(a_np.transpose(0, 2, 3, 1).reshape(h*w, c))
b_np = b_np.reshape(1, h, w, c).transpose(0, 3, 1, 2)
- for device in get_all_backend():
- check_device(A, B, a_np, b_np, device, "softmax")
+ for device, ctx in tvm.testing.enabled_targets():
+ check_device(A, B, a_np, b_np, device, ctx, "softmax")
+@tvm.testing.uses_gpu
def test_softmax():
verify_softmax(32, 10)
verify_softmax(3, 4)
a_np = np.random.uniform(size=get_const_tuple(A.shape)).astype(A.dtype)
b_np = tvm.topi.testing.log_softmax_python(a_np)
- for device in get_all_backend():
- check_device(A, B, a_np, b_np, device, "log_softmax")
+ for device, ctx in tvm.testing.enabled_targets():
+ check_device(A, B, a_np, b_np, device, ctx, "log_softmax")
+@tvm.testing.uses_gpu
def test_log_softmax():
verify_log_softmax(32, 10)
verify_log_softmax(3, 4)
from tvm import te
from tvm import topi
import tvm.topi.testing
+import tvm.testing
_argsort_implement = {
"generic": (topi.argsort, topi.generic.schedule_argsort),
np_indices = np_indices[:, :dshape[axis]]
def check_device(device):
- ctx = tvm.context(device, 0)
- if not ctx.exist:
+ if not tvm.testing.device_enabled(device):
print("Skip because %s is not enabled" % device)
return
+ ctx = tvm.context(device, 0)
print("Running on target: %s" % device)
with tvm.target.create(device):
fcompute, fschedule = tvm.topi.testing.dispatch(device, _argsort_implement)
def check_device(device):
ctx = tvm.context(device, 0)
- if not ctx.exist:
+ if not tvm.testing.device_enabled(device):
print("Skip because %s is not enabled" % device)
return
print("Running on target: %s" % device)
check_device(device)
+@tvm.testing.uses_gpu
def test_argsort():
np.random.seed(0)
for axis in [0, -1, 1]:
verify_argsort(axis, False)
+@tvm.testing.uses_gpu
def test_topk():
np.random.seed(0)
for k in [0, 1, 5]:
from tvm import topi
import tvm.topi.testing
-from common import get_all_backend
-
def verify_space_to_depth(block_size, batch, in_channel, in_height, in_width, layout='NCHW'):
out_channel = int(in_channel * (block_size * block_size))
a_np = np.transpose(a_np, axes=[0, 2, 3, 1])
b_np = np.transpose(b_np, axes=[0, 2, 3, 1])
- def check_device(device):
- ctx = tvm.context(device, 0)
- if not ctx.exist:
- print("Skip because %s is not enabled" % device)
- return
+ def check_device(device, ctx):
print("Running on target: %s" % device)
with tvm.target.create(device):
s = tvm.topi.testing.get_injective_schedule(device)(B)
f(a, b)
tvm.testing.assert_allclose(b.asnumpy(), b_np, rtol=1e-3, atol=1e-3)
- for device in get_all_backend():
- check_device(device)
+ for device, ctx in tvm.testing.enabled_targets():
+ check_device(device, ctx)
+@tvm.testing.uses_gpu
def test_space_to_depth():
for layout in ['NCHW', 'NHWC']:
# Simplest possible case
from collections import namedtuple
import time
import scipy.sparse as sp
+import tvm.testing
_sparse_dense_implement = {
"generic": (topi.nn.sparse_dense, topi.generic.schedule_sparse_dense),
def check_device(device):
ctx = tvm.context(device, 0)
- if not ctx.exist:
+ if not tvm.testing.device_enabled(device):
print("Skip because %s is not enabled" % device)
return
print("Running on target: %s" % device)
def check_device(device):
ctx = tvm.context(device, 0)
- if not ctx.exist:
+ if not tvm.testing.device_enabled(device):
print("Skip because %s is not enabled" % device)
return
print("Running on target: %s" % device)
def check_device(device):
ctx = tvm.context(device, 0)
- if not ctx.exist:
+ if not tvm.testing.device_enabled(device):
print("Skip because %s is not enabled" % device)
return
print("Running on target: %s" % device)
def check_device(device):
ctx = tvm.context(device, 0)
- if not ctx.exist:
+ if not tvm.testing.device_enabled(device):
print("Skip because %s is not enabled" % device)
return
print("Running on target: %s" % device)
def check_device(device):
ctx = tvm.context(device, 0)
- if not ctx.exist:
+ if not tvm.testing.device_enabled(device):
print("Skip because %s is not enabled" % device)
return
print("Running on target: %s" % device)
for device in ['llvm', 'cuda']:
check_device(device)
+@tvm.testing.uses_gpu
def test_sparse_dense_bsr():
M, N, K, BS_R, BS_C, density = 1, 64, 128, 8, 16, 0.9
verify_sparse_dense_bsr(M, N, K, BS_R, BS_C, density, use_relu=True)
verify_sparse_dense_bsr(M, N, K, BS_R, BS_C, density, use_relu=False)
+@tvm.testing.uses_gpu
def test_sparse_dense_bsr_randomized():
for _ in range(20):
BS_R = np.random.randint(1, 16)
def check_device(device):
ctx = tvm.context(device, 0)
- if not ctx.exist:
+ if not tvm.testing.device_enabled(device):
print("Skip because %s is not enabled" % device)
return
print("Running on target: %s" % device)
check_device(device)
-def test_sparse_dense():
- test_sparse_dense_csr()
- test_sparse_dense_bsr()
- test_sparse_dense_bsr_randomized()
-
if __name__ == "__main__":
test_csrmv()
test_csrmm()
test_dense()
- test_sparse_dense()
+ test_sparse_dense_csr()
+ test_sparse_dense_bsr()
+ test_sparse_dense_bsr_randomized()
test_sparse_transpose_csr()
import tvm.topi.testing
from tvm.contrib.pickle_memoize import memoize
from tvm.contrib.nvcc import have_fp16
+import tvm.testing
def verify_elemwise_sum(num_args, dtype):
shape = (3,5,4)
np_nd = get_ref_data()
def check_device(device):
- if not tvm.runtime.enabled(device):
+ if not tvm.testing.device_enabled(device):
print("Skip because %s is not enabled" % device)
return
np_nd = get_ref_data()
def check_device(device):
- if not tvm.runtime.enabled(device):
+ if not tvm.testing.device_enabled(device):
print("Skip because %s is not enabled" % device)
return
def verify_vectorization(n, m, dtype):
def check_device(device):
- if not tvm.runtime.enabled(device):
+ if not tvm.testing.device_enabled(device):
print("Skip because %s is not enabled" % device)
return
if dtype == "float16" and device == "cuda" and not have_fp16(tvm.gpu(0).compute_version):
for device in ["cuda"]:
check_device(device)
+@tvm.testing.requires_gpu
+@tvm.testing.requires_cuda
def test_vectorization():
verify_vectorization(128, 64, "float16")
import tvm.topi.testing
from tvm.contrib.nvcc import have_fp16
-from common import get_all_backend
+import tvm.testing
def verify_expand_dims(in_shape, out_shape, axis, num_newaxis):
A = te.placeholder(shape=in_shape, name="A")
B = topi.expand_dims(A, axis, num_newaxis)
- def check_device(device):
- ctx = tvm.context(device, 0)
- if not ctx.exist:
- print("Skip because %s is not enabled" % device)
- return
+ def check_device(device, ctx):
print("Running on target: %s" % device)
with tvm.target.create(device):
s = tvm.topi.testing.get_broadcast_schedule(device)(B)
foo(data_nd, out_nd)
tvm.testing.assert_allclose(out_nd.asnumpy(), out_npy)
- for device in get_all_backend():
- check_device(device)
+ for device, ctx in tvm.testing.enabled_targets():
+ check_device(device, ctx)
def verify_reinterpret(in_shape, in_dtype, out_dtype, generator):
A = te.placeholder(shape=in_shape, name="A", dtype=in_dtype)
B = topi.reinterpret(A, out_dtype)
- def check_device(device):
- ctx = tvm.context(device, 0)
- if not ctx.exist:
- print("Skip because %s is not enabled" % device)
- return
+ def check_device(device, ctx):
if in_dtype == "float16" and device == 'cuda' and not have_fp16(ctx.compute_version):
print("Skip because %s does not have fp16 support" % device)
return
foo(data_nd, out_nd)
np.testing.assert_equal(out_nd.asnumpy(), out_npy)
- for device in get_all_backend():
- check_device(device)
+ for device, ctx in tvm.testing.enabled_targets():
+ check_device(device, ctx)
def verify_transpose(in_shape, axes):
A = te.placeholder(shape=in_shape, name="A")
B = topi.transpose(A, axes)
- def check_device(device):
- ctx = tvm.context(device, 0)
- if not ctx.exist:
- print("Skip because %s is not enabled" % device)
- return
+ def check_device(device, ctx):
print("Running on target: %s" % device)
with tvm.target.create(device):
s = tvm.topi.testing.get_injective_schedule(device)(B)
foo(data_nd, out_nd)
tvm.testing.assert_allclose(out_nd.asnumpy(), out_npy)
- for device in get_all_backend():
- check_device(device)
+ for device, ctx in tvm.testing.enabled_targets():
+ check_device(device, ctx)
def verify_reshape(src_shape, dst_shape):
A = te.placeholder(shape=src_shape, name="A")
B = topi.reshape(A, dst_shape)
- def check_device(device):
- ctx = tvm.context(device, 0)
- if not ctx.exist:
- print("Skip because %s is not enabled" % device)
- return
+ def check_device(device, ctx):
print("Running on target: %s" % device)
with tvm.target.create(device):
s = tvm.topi.testing.get_injective_schedule(device)(B)
foo(data_nd, out_nd)
tvm.testing.assert_allclose(out_nd.asnumpy(), out_npy)
- for device in get_all_backend():
- check_device(device)
+ for device, ctx in tvm.testing.enabled_targets():
+ check_device(device, ctx)
def verify_squeeze(src_shape, axis):
A = te.placeholder(shape=src_shape, name="A")
B = topi.squeeze(A, axis=axis)
- def check_device(device):
- ctx = tvm.context(device, 0)
- if not ctx.exist:
- print("Skip because %s is not enabled" % device)
- return
+ def check_device(device, ctx):
print("Running on target: %s" % device)
with tvm.target.create(device):
s = tvm.topi.testing.get_injective_schedule(device)(B)
foo(data_nd, out_nd)
tvm.testing.assert_allclose(out_nd.asnumpy(), out_npy)
- for device in get_all_backend():
- check_device(device)
+ for device, ctx in tvm.testing.enabled_targets():
+ check_device(device, ctx)
def verify_concatenate(shapes, axis):
for i, shape in enumerate(shapes):
tensor_l.append(te.placeholder(shape, name="A" + str(i)))
out_tensor = topi.concatenate(a_tuple=tensor_l, axis=axis)
- def check_device(device):
- ctx = tvm.context(device, 0)
- if not ctx.exist:
- print("Skip because %s is not enabled" % device)
- return
+ def check_device(device, ctx):
print("Running on target: %s" % device)
with tvm.target.create(device):
s = get_concat_schedule(device)(out_tensor)
foo(*(data_nds + [out_nd]))
tvm.testing.assert_allclose(out_nd.asnumpy(), out_npy)
- for device in get_all_backend():
- check_device(device)
+ for device, ctx in tvm.testing.enabled_targets():
+ check_device(device, ctx)
def verify_stack(shapes, axis):
tensor_l = []
for i, shape in enumerate(shapes):
tensor_l.append(te.placeholder(shape, name="A" + str(i)))
out_tensor = topi.stack(tensor_l, axis)
- def check_device(device):
- ctx = tvm.context(device, 0)
- if not ctx.exist:
- print("Skip because %s is not enabled" % device)
- return
+ def check_device(device, ctx):
print("Running on target: %s" % device)
with tvm.target.create(device):
s = tvm.topi.testing.get_broadcast_schedule(device)(out_tensor)
foo(*(data_nds + [out_nd]))
tvm.testing.assert_allclose(out_nd.asnumpy(), out_npy)
- for device in get_all_backend():
- check_device(device)
+ for device, ctx in tvm.testing.enabled_targets():
+ check_device(device, ctx)
def verify_split(src_shape, indices_or_sections, axis):
A = te.placeholder(shape=src_shape, name="A")
tensor_l = topi.split(A, indices_or_sections, axis=axis)
- def check_device(device):
- ctx = tvm.context(device, 0)
- if not ctx.exist:
- print("Skip because %s is not enabled" % device)
- return
+ def check_device(device, ctx):
print("Running on target: %s" % device)
with tvm.target.create(device):
s = tvm.topi.testing.get_injective_schedule(device)(tensor_l)
for out_nd, out_npy in zip(out_nds, out_npys):
tvm.testing.assert_allclose(out_nd.asnumpy(), out_npy)
- for device in get_all_backend():
- check_device(device)
+ for device, ctx in tvm.testing.enabled_targets():
+ check_device(device, ctx)
def verify_expand_like(in_shape, out_shape, axis):
s = te.create_schedule([C.op])
def check_device(device):
- if not tvm.runtime.enabled(device):
- print("Skip because %s is not enabled" % device)
- return
print("Running on target: %s" % device)
ctx = tvm.context(device, 0)
B = topi.flip(A, axis) + 1
def check_device(device):
ctx = tvm.context(device, 0)
- if not ctx.exist:
+ if not tvm.testing.device_enabled(device):
print("Skip because %s is not enabled" % device)
return
print("Running on target: %s" % device)
check_device(device)
+@tvm.testing.uses_gpu
def test_reverse_sequence():
def verify_reverse_sequence(in_data, seq_lengths, batch_axis, seq_axis, ref_res):
seq_lengths = np.array(seq_lengths).astype("int32")
B = te.placeholder(shape=seq_lengths.shape, name="B", dtype=str(seq_lengths.dtype))
C = topi.reverse_sequence(A, B, seq_axis, batch_axis)
- def check_device(device):
- ctx = tvm.context(device, 0)
- if not ctx.exist:
- print("Skip because %s is not enabled" % device)
- return
+ def check_device(device, ctx):
print("Running on target: %s" % device)
with tvm.target.create(device):
s = tvm.topi.testing.get_injective_schedule(device)(C)
foo(data_nd, seq_lengths_nd, out_nd)
tvm.testing.assert_allclose(out_nd.asnumpy(), ref_res)
- for device in get_all_backend():
- check_device(device)
+ for device, ctx in tvm.testing.enabled_targets():
+ check_device(device, ctx)
indata = np.array(np.arange(0, 16)).reshape([4, 4]).astype("int32")
result = [[0, 5, 10, 15],
def check_device(device):
ctx = tvm.context(device, 0)
- if not ctx.exist:
+ if not tvm.testing.device_enabled(device):
print("Skip because %s is not enabled" % device)
return
print("Running on target: %s" % device)
def check_device(device):
ctx = tvm.context(device, 0)
- if not ctx.exist:
+ if not tvm.testing.device_enabled(device):
print("Skip because %s is not enabled" % device)
return
print("Running on target: %s" % device)
def check_device(device):
ctx = tvm.context(device, 0)
- if not ctx.exist:
+ if not tvm.testing.device_enabled(device):
print("Skip because %s is not enabled" % device)
return
print("Running on target: %s" % device)
var_indices = te.placeholder(shape=indices.shape, dtype=indices.dtype.name, name="indices")
out_tensor = topi.gather(var_data, axis, var_indices)
- def check_device(device):
- ctx = tvm.context(device, 0)
- if not ctx.exist:
- print("Skip because %s is not enabled" % device)
- return
+ def check_device(device, ctx):
print("Running on target: %s" % device)
with tvm.target.create(device):
s = tvm.topi.testing.get_injective_schedule(device)(out_tensor)
func(data_nd, indices_nd, out_nd)
tvm.testing.assert_allclose(out_nd.asnumpy(), out_npys)
- for device in get_all_backend():
- check_device(device)
+ for device, ctx in tvm.testing.enabled_targets():
+ check_device(device, ctx)
def verify_gather_nd(src_shape, indices_src, indices_dtype):
src_dtype = "float32"
indices = te.placeholder(shape=indices_src.shape, dtype=indices_dtype, name="indices")
out_tensor = topi.gather_nd(a=A, indices=indices)
- def check_device(device):
- ctx = tvm.context(device, 0)
- if not ctx.exist:
- print("Skip because %s is not enabled" % device)
- return
+ def check_device(device, ctx):
print("Running on target: %s" % device)
with tvm.target.create(device):
s = tvm.topi.testing.get_injective_schedule(device)(out_tensor)
func(data_nd, indices_nd, out_nd)
tvm.testing.assert_allclose(out_nd.asnumpy(), out_npys)
- for device in get_all_backend():
- check_device(device)
+ for device, ctx in tvm.testing.enabled_targets():
+ check_device(device, ctx)
def verify_arange(start, stop, step):
if start is None and step is None:
A = topi.arange(start, stop, step)
a_np = np.arange(start, stop, step)
- def check_device(device):
- ctx = tvm.context(device, 0)
- if not ctx.exist:
- print("Skip because %s is not enabled" % device)
- return
+ def check_device(device, ctx):
print("Running on target: %s" % device)
with tvm.target.create(device):
s = tvm.topi.testing.get_injective_schedule(device)(A)
f(a_nd)
tvm.testing.assert_allclose(a_nd.asnumpy(), a_np)
- for device in get_all_backend():
- check_device(device)
+ for device, ctx in tvm.testing.enabled_targets():
+ check_device(device, ctx)
def verify_repeat(in_shape, repeats, axis):
A = te.placeholder(shape=in_shape, name="A")
B = topi.repeat(A, repeats, axis)
- def check_device(device):
- ctx = tvm.context(device, 0)
- if not ctx.exist:
- print("Skip because %s is not enabled" % device)
- return
+ def check_device(device, ctx):
print("Running on target: %s" % device)
with tvm.target.create(device):
s = tvm.topi.testing.get_broadcast_schedule(device)(B)
foo(data_nd, out_nd)
tvm.testing.assert_allclose(out_nd.asnumpy(), out_npy)
- for device in get_all_backend():
- check_device(device)
+ for device, ctx in tvm.testing.enabled_targets():
+ check_device(device, ctx)
def verify_tile(in_shape, reps):
A = te.placeholder(shape=in_shape, name="A")
B = topi.tile(A, reps)
- def check_device(device):
- ctx = tvm.context(device, 0)
- if not ctx.exist:
- print("Skip because %s is not enabled" % device)
- return
+ def check_device(device, ctx):
print("Running on target: %s" % device)
with tvm.target.create(device):
s = tvm.topi.testing.get_broadcast_schedule(device)(B)
foo(data_nd, out_nd)
tvm.testing.assert_allclose(out_nd.asnumpy(), out_npy)
- for device in get_all_backend():
- check_device(device)
+ for device, ctx in tvm.testing.enabled_targets():
+ check_device(device, ctx)
def verify_where(in_shape):
Cond = te.placeholder(shape=in_shape, name="cond")
A = te.placeholder(shape=in_shape, name="A")
B = te.placeholder(shape=in_shape, name="B")
C = topi.where(Cond, A, B)
- def check_device(device):
- ctx = tvm.context(device, 0)
- if not ctx.exist:
- print("Skip because %s is not enabled" % device)
- return
+ def check_device(device, ctx):
print("Running on target: %s" % device)
with tvm.target.create(device):
s = tvm.topi.testing.get_broadcast_schedule(device)(C)
f(cond_nd, x_nd, y_nd, out_nd)
tvm.testing.assert_allclose(out_nd.asnumpy(), out_npy)
- for device in get_all_backend():
- check_device(device)
+ for device, ctx in tvm.testing.enabled_targets():
+ check_device(device, ctx)
def verify_one_hot(indices_shape, depth, on_value, off_value, axis, dtype):
indices = te.placeholder(shape=indices_shape, name="indices", dtype="int32")
on_value_const = tvm.tir.const(on_value, dtype)
off_value_const = tvm.tir.const(off_value, dtype)
one_hot_result = topi.transform.one_hot(indices, on_value_const, off_value_const, depth, axis, dtype)
- def check_device(device):
- ctx = tvm.context(device, 0)
- if not ctx.exist:
- print("Skip because %s is not enabled" % device)
- return
+ def check_device(device, ctx):
print("Running on target: %s" % device)
with tvm.target.create(device):
s = tvm.topi.testing.get_injective_schedule(device)(one_hot_result)
out_topi = out_nd.asnumpy()
tvm.testing.assert_allclose(out_topi, out_npy)
- for device in get_all_backend():
- check_device(device)
+ for device, ctx in tvm.testing.enabled_targets():
+ check_device(device, ctx)
def verify_unravel_index(indices, shape, dtype):
Y = te.placeholder(shape=y_data.shape, dtype=dtype, name="Y")
Z = topi.unravel_index(X, Y)
- def check_device(device):
- ctx = tvm.context(device, 0)
- if not ctx.exist:
- print("Skip because %s is not enabled" % device)
- return
+ def check_device(device, ctx):
print("Running on target: %s" % device)
with tvm.target.create(device):
s = tvm.topi.testing.get_injective_schedule(device)(Z)
foo(datax_nd, datay_nd, out_nd)
tvm.testing.assert_allclose(out_nd.asnumpy(), out_npy)
- for device in get_all_backend():
- check_device(device)
+ for device, ctx in tvm.testing.enabled_targets():
+ check_device(device, ctx)
def verify_sparse_to_dense(sparse_indices, sparse_values, default_value, output_shape, xpected):
sparse_indices_data = np.array(sparse_indices)
args = [A, B, C]
D = topi.sparse_to_dense(A, output_shape, B, C)
- def check_device(device):
- ctx = tvm.context(device, 0)
- if not ctx.exist:
- print("Skip because %s is not enabled" % device)
- return
+ def check_device(device, ctx):
print("Running on target: %s" % device)
with tvm.target.create(device):
s = tvm.topi.testing.get_injective_schedule(device)(D)
tvm.testing.assert_allclose(out_nd.asnumpy(), np.array(xpected))
- for device in get_all_backend():
- check_device(device)
+ for device, ctx in tvm.testing.enabled_targets():
+ check_device(device, ctx)
def verify_matrix_set_diag(input_shape, dtype):
diagonal_shape = list(input_shape[:-2])
input = te.placeholder(shape=input_shape, name="input", dtype=dtype)
diagonal = te.placeholder(shape=diagonal_shape, name="diagonal", dtype=dtype)
matrix_set_diag_result = topi.transform.matrix_set_diag(input, diagonal)
- def check_device(device):
+ def check_device(device, ctx):
ctx = tvm.context(device, 0)
- if not ctx.exist:
- print("Skip because %s is not enabled" % device)
- return
print("Running on target: %s" % device)
with tvm.target.create(device):
s = tvm.topi.testing.get_injective_schedule(device)(matrix_set_diag_result)
out_topi = out_nd.asnumpy()
tvm.testing.assert_allclose(out_topi, out_npy)
- for device in get_all_backend():
- check_device(device)
+ for target, ctx in tvm.testing.enabled_targets():
+ check_device(target, ctx)
+@tvm.testing.uses_gpu
def test_strided_slice():
verify_strided_slice((3, 4, 3), [0, 0, 0], [4, -5, 4], [1, -1, 2])
verify_strided_slice((3, 4, 3), [1, 1, 0], [4, 4, 3], [2, 1, 1])
verify_strided_slice((3, 4, 3), [1, 1, 0], [4, 4, 3])
verify_strided_slice((3, 4, 3), [0, 2, 0], [1, 2, 3])
+@tvm.testing.uses_gpu
def test_strided_set():
verify_strided_set((3, 4, 3), (3, 2, 2), [0, 3, 0], [4, 1, 4], [1, -1, 2])
verify_strided_set((3, 4, 3), (3, 1, 2), [0, 0, 0], [4, -5, 4], [1, -1, 2])
verify_strided_set((3, 4, 3), (2, 3, 3), [1, 1, 0], [4, 4, 3])
verify_strided_set((3, 4, 3), (2, 3, 3), [1, 1], [4, 4, 3])
+@tvm.testing.uses_gpu
def test_expand_dims():
verify_expand_dims((3, 10), (3, 10, 1, 1), 2, 2)
verify_expand_dims((3, 10), (1, 3, 10), -3, 1)
+@tvm.testing.uses_gpu
def test_reinterpret():
verify_reinterpret((1000,), "float32", "int32",
lambda shape: np.random.randn(*shape) * 1000)
lambda shape: np.random.randint(0, 2 ** 32 - 1, size=shape))
+@tvm.testing.uses_gpu
def test_transpose():
verify_transpose((3, 10, 2), (1, 0, 2))
verify_transpose((3, 10, 5), (2, 0, 1))
verify_transpose((3, 10), None)
+@tvm.testing.uses_gpu
def test_reshape():
verify_reshape((1, 2, 3, 4), (2, 3, 4))
verify_reshape((4, 2, 3, 4), (2, 4, 12))
verify_reshape((4, 0), (2, 0, 2))
+@tvm.testing.uses_gpu
def test_where():
verify_where((1, 2, 3, 4))
+@tvm.testing.requires_gpu
def test_squeeze():
verify_squeeze((1, 2, 3, 4), 0)
verify_squeeze((1, 2, 1, 4), None)
C = te.compute((1,), lambda i: E[(2 * A[0] - 1).astype('int32')])
for device in ['cuda', 'opencl']:
ctx = tvm.context(device, 0)
- if ctx.exist:
+ if tvm.testing.device_enabled(device):
with tvm.target.create(device):
s = tvm.topi.testing.get_injective_schedule(device)(C)
func = tvm.build(s, [A, C])
assert c.asnumpy()[0] == 2
+@tvm.testing.uses_gpu
def test_concatenate():
verify_concatenate([(2,), (2,), (2,)], -1)
verify_concatenate([(2, 3, 4), (2, 2, 4), (2, 5, 4)], 1)
verify_concatenate([(1, 14400), (1, 2400), (1, 640), (1, 240)], 1)
+@tvm.testing.uses_gpu
def test_stack():
verify_stack([(2,), (2,), (2,)], -1)
verify_stack([(2,), (2,), (2,)], 1)
verify_stack([(2, 2, 3, 4), (2, 2, 3, 4), (2, 2, 3, 4), (2, 2, 3, 4)], -1)
+@tvm.testing.uses_gpu
def test_split():
verify_split((2, 12, 3), 3, 1)
verify_split((2, 12, 3), [2, 4], 1)
verify_split((10, 12, 24), [5, 7, 9], -1)
+@tvm.testing.uses_gpu
def test_flip():
verify_flip((3, 4, 3), 1)
verify_flip((3, 4, 3), 0)
verify_flip((3, 4, 3), -3)
verify_flip((3, 4, 3), -2)
+@tvm.testing.requires_llvm
def test_expand_like():
verify_expand_like((3,), (2, 3), [0])
verify_expand_like((2,), (2, 3), [1])
verify_expand_like((3, 4), (3, 5, 4), [1])
verify_expand_like((5, 7), (5, 6, 7, 8), [1, 3])
+@tvm.testing.uses_gpu
def test_take():
verify_take((4,), [1])
verify_take((4,), [[0,1,2,3]])
verify_take((3,4), [0, 2], axis=0, mode="fast")
verify_take((3,4), [0, 2], axis=1, mode="fast")
+@tvm.testing.uses_gpu
def test_gather():
verify_gather([[1, 2], [3, 4]], 1, [[0, 0], [1, 0]])
verify_gather(np.random.randn(4, 7, 5), 0, np.random.randint(low=0, high=4, size=(1, 7, 5)))
verify_gather(np.random.randn(4, 7, 5), 2, np.random.randint(low=0, high=5, size=(4, 7, 2)))
verify_gather(np.random.randn(4, 7, 5), 2, np.random.randint(low=0, high=5, size=(4, 7, 10)))
+@tvm.testing.uses_gpu
def test_gather_nd():
for indices_dtype in ['int32', 'float32']:
verify_gather_nd((4,), [[1.8]], indices_dtype)
verify_gather_nd((2, 3, 4, 5), [[1, 0], [2, 1], [3, 2], [4, 2]],
indices_dtype)
+@tvm.testing.uses_gpu
def test_arange():
verify_arange(None, 20, None)
verify_arange(None, 20, 2)
verify_arange(20, 1, -1)
verify_arange(20, 1, -1.5)
+@tvm.testing.uses_gpu
def test_repeat():
verify_repeat((2,), 1, 0)
verify_repeat((3, 2), 2, 0)
verify_repeat((3, 2, 4), 3, 1)
verify_repeat((1, 3, 2, 4), 4, -1)
+@tvm.testing.uses_gpu
def test_tile():
verify_tile((3, 2), (2, 3))
verify_tile((3, 2, 5), (2,))
verify_tile((3, ), (2, 3, 3))
verify_tile((4, 0), (5,))
+@tvm.testing.uses_gpu
def test_layout_transform():
in_shape = (1, 32, 8, 8)
A = te.placeholder(shape=in_shape, dtype="float32", name="A")
output = np.reshape(output, newshape=(1, 8, 8, 2, 16))
output = np.transpose(output, axes=(0, 3, 1, 2, 4))
- def check_device(device):
- ctx = tvm.context(device, 0)
- if not ctx.exist:
- print("Skip because %s is not enabled" % device)
- return
+ def check_device(device, ctx):
tvm_input = tvm.nd.array(input, ctx)
tvm_output = tvm.nd.empty(output.shape, ctx=ctx, dtype=B.dtype)
print("Running on target: %s" % device)
f(tvm_input, tvm_output)
tvm.testing.assert_allclose(tvm_output.asnumpy(), output)
- for backend in get_all_backend():
- check_device(backend)
+ for backend, ctx in tvm.testing.enabled_targets():
+ check_device(backend, ctx)
+@tvm.testing.uses_gpu
def test_shape():
in_shape = (8, 7, 13)
dtype = "int32"
input = np.random.uniform(size=in_shape).astype(A.dtype)
output = np.asarray(in_shape).astype(dtype)
- def check_device(device):
- ctx = tvm.context(device, 0)
- if not ctx.exist:
- print("Skip because %s is not enabled" % device)
- return
+ def check_device(device, ctx):
tvm_input = tvm.nd.array(input, ctx)
tvm_output = tvm.nd.empty(output.shape, ctx=ctx, dtype=dtype)
print("Running on target: %s" % device)
f(tvm_input, tvm_output)
tvm.testing.assert_allclose(tvm_output.asnumpy(), output)
- for backend in get_all_backend():
- check_device(backend)
+ for backend, ctx in tvm.testing.enabled_targets():
+ check_device(backend, ctx)
+@tvm.testing.uses_gpu
def test_sequence_mask():
for in_shape in (5, 10), (3, 4, 5, 4):
for axis in [0, 1]:
B_data = np.random.randint(1, max_length, (batch_size,)).astype(np.int32)
C_gt_data = tvm.topi.testing.sequence_mask(A_data, B_data, mask_value, axis)
- def check_device(device):
- ctx = tvm.context(device, 0)
- if not ctx.exist:
- print("Skip because %s is not enabled" % device)
- return
+ def check_device(device, ctx):
tvm_A = tvm.nd.array(A_data, ctx)
tvm_B = tvm.nd.array(B_data, ctx)
tvm_C = tvm.nd.empty(in_shape, ctx=ctx, dtype="float32")
f = tvm.build(s, [A, B, C], device, name="SequenceMask")
f(tvm_A, tvm_B, tvm_C)
tvm.testing.assert_allclose(tvm_C.asnumpy(), C_gt_data)
- for backend in get_all_backend():
- check_device(backend)
+ for backend, ctx in tvm.testing.enabled_targets():
+ check_device(backend, ctx)
+@tvm.testing.uses_gpu
def test_ndarray_size():
in_shape = (5, 11, 7)
dtype = "int32"
input = np.random.uniform(size=in_shape).astype(A.dtype)
output = np.asarray(np.size(input)).astype(dtype)
- def check_device(device):
- ctx = tvm.context(device, 0)
- if not ctx.exist:
- print("Skip because %s is not enabled" % device)
- return
+ def check_device(device, ctx):
tvm_input = tvm.nd.array(input, ctx=ctx)
tvm_output = tvm.nd.empty((), ctx=ctx, dtype=B.dtype)
print("Running on target: %s" % device)
f(tvm_input, tvm_output)
tvm.testing.assert_allclose(tvm_output.asnumpy(), output)
- for backend in get_all_backend():
- check_device(backend)
+ for backend, ctx in tvm.testing.enabled_targets():
+ check_device(backend, ctx)
+@tvm.testing.uses_gpu
def test_where_fusion():
"""integration test that where and zeros should be properly inlined"""
- def check_device(device):
+ def check_device(device, ctx):
with tvm.target.create(device):
- ctx = tvm.context(device, 0)
- if not ctx.exist:
- print("Skip because %s is not enabled" % device)
- return
print("Running on target: %s" % device)
conv2d_compute, conv2d_schedule = tvm.topi.testing.get_conv2d_nchw_implement(device)
data = te.placeholder((2, 1, 2, 4), 'int8', 'data')
s = conv2d_schedule(outs)
tvm.build(s, [data, w, add], target=backend)
- for backend in get_all_backend():
- check_device(backend)
+ for backend, ctx in tvm.testing.enabled_targets():
+ check_device(backend, ctx)
+@tvm.testing.uses_gpu
def test_one_hot():
verify_one_hot((3,), 3, 1, 0, -1, "int32")
verify_one_hot((3,), 3, 1.0, 0.0, -1, "float32")
verify_one_hot((3, 2, 4, 5), 6, 1.0, 0.0, 0, "float32")
+@tvm.testing.uses_gpu
def test_unravel_index():
for dtype in ["int32", "int64"]:
verify_unravel_index([0, 1, 2, 3], [2, 2], dtype)
verify_unravel_index(144, [5, 5, 5, 2], dtype)
verify_unravel_index([100, 13, 5], [5, 5, 5, 2], dtype)
+@tvm.testing.uses_gpu
def test_sparse_to_dense():
verify_sparse_to_dense(1, 3, 0, [5], [0, 3, 0, 0, 0]) #scalar
verify_sparse_to_dense([0, 1, 4], [3, 3, 3], 0, [5], [3, 3, 0, 0, 3]) #vector
#sparse_indices should not be > 2d tensor
#verify_sparse_to_dense([[[[0, 1, 4], [0, 2, 4]]]], [[[3.1, 3.1, 3.1]]], 3.5, [5], [3.1, 3.1, 3.5, 3.5, 3.1])
+@tvm.testing.uses_gpu
def test_matrix_set_diag():
for dtype in ['float32', 'int32']:
verify_matrix_set_diag((2, 2), dtype)
import math
from tvm.topi.util import nchw_pack_layout
-from common import get_all_backend
-
def verify_upsampling(batch, in_channel, in_height, in_width, scale_h, scale_w,
layout='NCHW', method="nearest_neighbor",
in_batch_block = 0, in_channel_block = 0):
else:
b_np = tvm.topi.testing.upsampling_python(a_np, (scale_h, scale_w), layout)
- def check_device(device):
- ctx = tvm.context(device, 0)
- if not ctx.exist:
- print("Skip because %s is not enabled" % device)
- return
+ def check_device(device, ctx):
print("Running on target: %s" % device)
with tvm.target.create(device):
s = tvm.topi.testing.get_injective_schedule(device)(B)
tvm.testing.assert_allclose(b.asnumpy(), b_np, rtol=1e-5, atol=1e-5)
- for device in get_all_backend():
- check_device(device)
+ for device, ctx in tvm.testing.enabled_targets():
+ check_device(device, ctx)
+@tvm.testing.uses_gpu
def test_upsampling():
# nearest_neighbor - NCHW
verify_upsampling(8, 16, 32, 32, 2.0, 2.0)
else:
b_np = tvm.topi.testing.upsampling3d_python(a_np, (scale_d, scale_h, scale_w), layout)
- def check_device(device):
- ctx = tvm.context(device, 0)
- if not ctx.exist:
- print("Skip because %s is not enabled" % device)
- return
+ def check_device(device, ctx):
print("Running on target: %s" % device)
with tvm.target.create(device):
s = tvm.topi.testing.get_injective_schedule(device)(B)
tvm.testing.assert_allclose(b.asnumpy(), b_np, rtol=1e-5, atol=1e-5)
- for device in get_all_backend():
- check_device(device)
+ for device, ctx in tvm.testing.enabled_targets():
+ check_device(device, ctx)
+@tvm.testing.uses_gpu
def test_upsampling3d():
# nearest_neighbor - NCDHW
verify_upsampling3d(8, 8, 16, 16, 16, 2.0, 2.0, 2.0)
verify_get_shape((2, 3, 32, 32, 16, 8), "OIHW16i8o", "HWO8oI16i", (32, 32, 2, 8, 3, 16))
if __name__ == "__main__":
- test_get_shape()
\ No newline at end of file
+ test_get_shape()
from tvm.contrib.pickle_memoize import memoize
from tvm.topi.util import get_const_tuple
from tvm.topi.vision import ssd, non_max_suppression, get_valid_counts
+import pytest
+import tvm.testing
_get_valid_counts_implement = {
"generic": (topi.vision.get_valid_counts, topi.generic.schedule_get_valid_counts),
def check_device(device):
ctx = tvm.context(device, 0)
- if not ctx.exist:
+ if not tvm.testing.device_enabled(device):
print("Skip because %s is not enabled" % device)
return
print("Running on target: %s" % device)
tvm.testing.assert_allclose(tvm_out1.asnumpy(), np_out1, rtol=1e-3)
tvm.testing.assert_allclose(tvm_out2.asnumpy(), np_out2, rtol=1e-3)
- """ Skip this test as it is intermittent
- see https://github.com/apache/incubator-tvm/pull/4901#issuecomment-595040094
for device in ['llvm', 'cuda', 'opencl']:
- # Disable gpu test for now
- if device != "llvm":
- continue
check_device(device)
- """
+@tvm.testing.uses_gpu
+@pytest.mark.skip("Skip this test as it is intermittent."
+ "See https://github.com/apache/incubator-tvm/pull/4901#issuecomment-595040094")
def test_get_valid_counts():
verify_get_valid_counts((1, 1000, 5), 0.5, -1, 0)
verify_get_valid_counts((1, 2500, 6), 0, 0, 1)
def check_device(device):
ctx = tvm.context(device, 0)
- if not ctx.exist:
+ if not tvm.testing.device_enabled(device):
print("Skip because %s is not enabled" % device)
return
print("Running on target: %s" % device)
for device in ['llvm', 'cuda', 'opencl']:
check_device(device)
-
+@tvm.testing.uses_gpu
def test_non_max_suppression():
np_data = np.array([[[0, 0.8, 1, 20, 25, 45], [1, 0.7, 30, 60, 50, 80],
[0, 0.4, 4, 21, 19, 40], [2, 0.9, 35, 61, 52, 79],
def check_device(device):
ctx = tvm.context(device, 0)
- if not ctx.exist:
+ if not tvm.testing.device_enabled(device):
print("Skip because %s is not enabled" % device)
return
print("Running on target: %s" % device)
check_device(device)
+@tvm.testing.uses_gpu
def test_multibox_prior():
verify_multibox_prior((1, 3, 50, 50))
verify_multibox_prior((1, 3, 224, 224), sizes=(0.5, 0.25, 0.1), ratios=(1, 2, 0.5))
verify_multibox_prior((1, 32, 32, 32), sizes=(0.5, 0.25), ratios=(1, 2), steps=(2, 2), clip=True)
+@tvm.testing.uses_gpu
def test_multibox_detection():
batch_size = 1
num_anchors = 3
def check_device(device):
ctx = tvm.context(device, 0)
- if not ctx.exist:
+ if not tvm.testing.device_enabled(device):
print("Skip because %s is not enabled" % device)
return
print("Running on target: %s" % device)
def check_device(device):
ctx = tvm.context(device, 0)
- if not ctx.exist:
+ if not tvm.testing.device_enabled(device):
print("Skip because %s is not enabled" % device)
return
print("Running on target: %s" % device)
check_device(device)
+@tvm.testing.uses_gpu
def test_roi_align():
verify_roi_align(1, 16, 32, 64, 7, 1.0, -1)
verify_roi_align(4, 16, 32, 64, 7, 0.5, 2)
def check_device(device):
ctx = tvm.context(device, 0)
- if not ctx.exist:
+ if not tvm.testing.device_enabled(device):
print("Skip because %s is not enabled" % device)
return
print("Running on target: %s" % device)
check_device(device)
+@tvm.testing.uses_gpu
def test_roi_pool():
verify_roi_pool(1, 4, 16, 32, 7, 1.0)
verify_roi_pool(4, 4, 16, 32, 7, 0.5)
def check_device(device):
ctx = tvm.context(device, 0)
- if not ctx.exist:
+ if not tvm.testing.device_enabled(device):
print("Skip because %s is not enabled" % device)
return
print("Running on target: %s" % device)
check_device(device)
+@tvm.testing.uses_gpu
def test_proposal():
attrs = {'scales': (0.5,),'ratios': (0.5,),
'feature_stride': 16,
from tvm import topi
from tvm import te, auto_scheduler
import tempfile
+import tvm.testing
from test_auto_scheduler_common import matmul_auto_scheduler_test, get_tiled_matmul
def test_record_split_reorder_fuse_annotation():
- if not tvm.runtime.enabled("llvm"):
+ if not tvm.testing.device_enabled("llvm"):
return
A = te.placeholder((512, 512), name='A')
def test_record_compute_at_root_inline_cache_read_write():
- if not tvm.runtime.enabled("llvm"):
+ if not tvm.testing.device_enabled("llvm"):
return
A = te.placeholder((512, 512), name='A')
def test_record_follow_split_follow_fused_split():
- if not tvm.runtime.enabled("llvm"):
+ if not tvm.testing.device_enabled("llvm"):
return
A = te.placeholder((512, 512), name='A')
def test_record_pragma_storage_align_rfactor():
- if not tvm.runtime.enabled("llvm"):
+ if not tvm.testing.device_enabled("llvm"):
return
A = te.placeholder((512, 512), name='A')
def test_measure_local_builder_runner():
- if not tvm.runtime.enabled("llvm"):
+ if not tvm.testing.device_enabled("llvm"):
return
dag, s0 = get_tiled_matmul()
def test_measure_local_builder_rpc_runner():
- if not tvm.runtime.enabled("llvm"):
+ if not tvm.testing.device_enabled("llvm"):
return
dag, s0 = get_tiled_matmul()
tuning_options = auto_scheduler.TuningOptions(num_measure_trials=num_measure_trials,
runner=runner, verbose=1, measure_callbacks=[auto_scheduler.RecordToFile(log_file)])
sch, args = auto_scheduler.auto_schedule(task, search_policy, tuning_options)
+ print("*"*80)
+ print(target)
+ print("*"*80)
inp, res = auto_scheduler.load_best(log_file, workload_key, target)
print("==== Python Code ====")
print()
+@tvm.testing.requires_llvm
def test_workload_registry_search_basic():
- if not tvm.runtime.enabled("llvm"):
- return
# wrap the search in a new thread to avoid the conflict
# between python's multiprocessing and tvm's thread pool
t = PropagatingThread(target=search_common, kwargs={'seed': 944563397})
t.join()
+@tvm.testing.requires_llvm
def test_sketch_search_policy_basic():
- if not tvm.runtime.enabled("llvm"):
- return
# wrap the search in a new thread to avoid the conflict
# between python's multiprocessing and tvm's thread pool
t = PropagatingThread(target=search_common,
t.join()
+@tvm.testing.requires_llvm
def test_sketch_search_policy_xgbmodel():
- if not tvm.runtime.enabled("llvm"):
- return
# wrap the search in a new thread to avoid the conflict
# between python's multiprocessing and tvm's thread pool
t = PropagatingThread(target=search_common,
t.join()
+@tvm.testing.requires_cuda
def test_sketch_search_policy_cuda_rpc_runner():
- if not tvm.runtime.enabled("cuda"):
- return
measure_ctx = auto_scheduler.LocalRPCMeasureContext()
# wrap the search in a new thread to avoid the conflict
# between python's multiprocessing and tvm's thread pool
""" Test sketch generation. """
import tvm
+import tvm.testing
from tvm import te, auto_scheduler
from tvm.auto_scheduler import _ffi_api
from tvm.auto_scheduler.loop_state import Stage
assert sketches[1] != sketches[2]
+@tvm.testing.requires_cuda
def test_cuda_matmul_sketch():
- if not tvm.context("cuda", 0).exist:
- return
-
sketches = generate_sketches(matmul_auto_scheduler_test, (512, 512, 512), 'cuda')
''' 1 multi-level tiling sketch '''
assert len(sketches) == 1
assert_is_tiled(sketches[1].stages[5])
+@tvm.testing.requires_cuda
def test_cuda_conv2d_bn_relu_sketch():
- if not tvm.context("cuda", 0).exist:
- return
-
sketches = generate_sketches(conv2d_nchw_bn_relu_auto_scheduler_test,
(1, 56, 56, 512, 512, 3, 1, 1), 'cuda')
''' 1 multi-level tiling sketch '''
assert_is_tiled(sketches[0].stages[12])
+@tvm.testing.requires_cuda
def test_cuda_max_pool2d_sketch():
- if not tvm.context("cuda", 0).exist:
- return
-
sketches = generate_sketches(max_pool2d_auto_scheduler_test, (1, 56, 56, 512, 0), 'cuda')
''' 1 default sketch '''
assert len(sketches) == 1
assert len(sketches[0].transform_steps) == 0
+@tvm.testing.requires_cuda
def test_cuda_min_sketch():
- if not tvm.context("cuda", 0).exist:
- return
-
sketches = generate_sketches(min_nm_auto_scheduler_test, (10, 1024), 'cuda')
''' 1 cross thread reuction sketch + 1 default sketch '''
assert len(sketches) == 2
assert len(sketches[1].transform_steps) == 0
+@tvm.testing.requires_cuda
def test_cuda_softmax_sketch():
- if not tvm.context("cuda", 0).exist:
- return
-
sketches = generate_sketches(softmax_nm_auto_scheduler_test, (2, 1024), 'cuda')
''' (1 cross thread reuction sketch + 1 default sketch) * (1 cross thread reuction sketch + 1 default sketch) '''
assert len(sketches) == (2 * 2)
assert_compute_at_condition(sketches[3].stages[2], "inlined")
+@tvm.testing.requires_cuda
def test_cuda_conv2d_winograd_sketch():
- if not tvm.context("cuda", 0).exist:
- return
-
sketches = generate_sketches(conv2d_winograd_nhwc_auto_scheduler_test,
(1, 28, 28, 128, 128, 3, 1, 1), 'cuda')
''' 1 multi-level tiling sketch '''
if __name__ == '__main__':
test_gridsearch_tuner()
- test_random_tuner()
\ No newline at end of file
+ test_random_tuner()
wrap_error(Module4, 60)
wrap_error(Module5, 70)
wrap_error(Module6, 77)
- wrap_error(Module7, 84)
\ No newline at end of file
+ wrap_error(Module7, 84)
from tvm import rpc
from tvm.contrib import util, graph_runtime
+@tvm.testing.requires_llvm
def test_graph_simple():
n = 4
A = te.placeholder((n,), name='A')
graph = json.dumps(graph)
def check_verify():
- if not tvm.runtime.enabled("llvm"):
- print("Skip because llvm is not enabled")
- return
mlib = tvm.build(s, [A, B], "llvm", name="myadd")
mod = graph_runtime.create(graph, mlib, tvm.cpu(0))
a = np.random.uniform(size=(n,)).astype(A.dtype)
np.testing.assert_equal(out.asnumpy(), a + 1)
def check_remote():
- if not tvm.runtime.enabled("llvm"):
- print("Skip because llvm is not enabled")
- return
mlib = tvm.build(s, [A, B], "llvm", name="myadd")
server = rpc.Server("localhost")
remote = rpc.connect(server.host, server.port)
params = {'x': x_in}
graph, lib, params = relay.build(func, target="llvm", params=params)
- if not tvm.runtime.enabled("llvm"):
- print("Skip because llvm is not enabled")
- return
mod_shared = graph_runtime.create(graph, lib, tvm.cpu(0))
mod_shared.load_params(relay.save_param_dict(params))
num_mods = 10
from tvm.contrib import util
from tvm.contrib.debugger import debug_runtime as graph_runtime
+@tvm.testing.requires_llvm
def test_graph_simple():
n = 4
A = te.placeholder((n,), name='A')
graph = json.dumps(graph)
def check_verify():
- if not tvm.runtime.enabled("llvm"):
- print("Skip because llvm is not enabled")
- return
mlib = tvm.build(s, [A, B], "llvm", name="myadd")
try:
mod = graph_runtime.create(graph, mlib, tvm.cpu(0))
assert(not os.path.exists(directory))
def check_remote():
- if not tvm.runtime.enabled("llvm"):
- print("Skip because llvm is not enabled")
- return
mlib = tvm.build(s, [A, B], "llvm", name="myadd")
server = rpc.Server("localhost")
remote = rpc.connect(server.host, server.port)
import tvm
from tvm.contrib import graph_runtime
from tvm.contrib.debugger import debug_runtime
+import tvm.testing
def input_shape(mod):
return [int(x) for x in mod["main"].checked_type.arg_types[0].shape]
return out
def test_legacy_compatibility():
- if not tvm.runtime.enabled("llvm"):
+ if not tvm.testing.device_enabled("llvm"):
print("Skip because llvm is not enabled")
return
mod, params = relay.testing.synthetic.get_workload()
tvm.testing.assert_allclose(out, verify(data), atol=1e-5)
def test_cpu():
- if not tvm.runtime.enabled("llvm"):
+ if not tvm.testing.device_enabled("llvm"):
print("Skip because llvm is not enabled")
return
mod, params = relay.testing.synthetic.get_workload()
out = gmod.get_output(0).asnumpy()
tvm.testing.assert_allclose(out, verify(data), atol=1e-5)
+@tvm.testing.requires_cuda
+@tvm.testing.requires_gpu
def test_gpu():
- if not tvm.runtime.enabled("cuda"):
- print("Skip because cuda is not enabled")
- return
mod, params = relay.testing.synthetic.get_workload()
with relay.build_config(opt_level=3):
complied_graph_lib = relay.build_module.build(mod, "cuda", params=params)
out = gmod.get_output(0).asnumpy()
tvm.testing.assert_allclose(out, verify(data), atol=1e-5)
+@tvm.testing.uses_gpu
def test_mod_export():
def verify_cpu_export(obj_format):
- if not tvm.runtime.enabled("llvm"):
+ if not tvm.testing.device_enabled("llvm"):
print("Skip because llvm is not enabled")
return
mod, params = relay.testing.synthetic.get_workload()
tvm.testing.assert_allclose(out, verify(data), atol=1e-5)
def verify_gpu_export(obj_format):
- if not tvm.runtime.enabled("cuda"):
+ if not tvm.testing.device_enabled("cuda"):
print("Skip because cuda is not enabled")
return
mod, params = relay.testing.synthetic.get_workload()
tvm.testing.assert_allclose(out, verify(data), atol=1e-5)
def verify_rpc_cpu_export(obj_format):
- if not tvm.runtime.enabled("llvm"):
+ if not tvm.testing.device_enabled("llvm"):
print("Skip because llvm is not enabled")
return
mod, params = relay.testing.synthetic.get_workload()
tvm.testing.assert_allclose(out, verify(data), atol=1e-5)
def verify_rpc_gpu_export(obj_format):
- if not tvm.runtime.enabled("cuda"):
+ if not tvm.testing.device_enabled("cuda"):
print("Skip because cuda is not enabled")
return
mod, params = relay.testing.synthetic.get_workload()
verify_rpc_cpu_export(obj_format)
verify_rpc_gpu_export(obj_format)
+@tvm.testing.uses_gpu
def test_remove_package_params():
def verify_cpu_remove_package_params(obj_format):
- if not tvm.runtime.enabled("llvm"):
+ if not tvm.testing.device_enabled("llvm"):
print("Skip because llvm is not enabled")
return
mod, params = relay.testing.synthetic.get_workload()
tvm.testing.assert_allclose(out, verify(data), atol=1e-5)
def verify_gpu_remove_package_params(obj_format):
- if not tvm.runtime.enabled("cuda"):
+ if not tvm.testing.device_enabled("cuda"):
print("Skip because cuda is not enabled")
return
mod, params = relay.testing.synthetic.get_workload()
tvm.testing.assert_allclose(out, verify(data), atol=1e-5)
def verify_rpc_cpu_remove_package_params(obj_format):
- if not tvm.runtime.enabled("llvm"):
+ if not tvm.testing.device_enabled("llvm"):
print("Skip because llvm is not enabled")
return
mod, params = relay.testing.synthetic.get_workload()
tvm.testing.assert_allclose(out, verify(data), atol=1e-5)
def verify_rpc_gpu_remove_package_params(obj_format):
- if not tvm.runtime.enabled("cuda"):
+ if not tvm.testing.device_enabled("cuda"):
print("Skip because cuda is not enabled")
return
mod, params = relay.testing.synthetic.get_workload()
verify_rpc_gpu_remove_package_params(obj_format)
def test_debug_graph_runtime():
- if not tvm.runtime.enabled("llvm"):
+ if not tvm.testing.device_enabled("llvm"):
print("Skip because llvm is not enabled")
return
mod, params = relay.testing.synthetic.get_workload()
import tvm
from tvm import te
+import tvm.testing
+
from tvm.contrib import util
header_file_dir_path = util.tempdir()
return csource_module
+@tvm.testing.uses_gpu
def test_mod_export():
def verify_gpu_mod_export(obj_format):
for device in ["llvm", "cuda"]:
- if not tvm.runtime.enabled(device):
+ if not tvm.testing.device_enabled(device):
print("skip because %s is not enabled..." % device)
return
def verify_multi_dso_mod_export(obj_format):
for device in ["llvm"]:
- if not tvm.runtime.enabled(device):
+ if not tvm.testing.device_enabled(device):
print("skip because %s is not enabled..." % device)
return
def verify_json_import_dso(obj_format):
for device in ["llvm"]:
- if not tvm.runtime.enabled(device):
+ if not tvm.testing.device_enabled(device):
print("skip because %s is not enabled..." % device)
return
print("Skip test because gcc is not available.")
for device in ["llvm"]:
- if not tvm.runtime.enabled(device):
+ if not tvm.testing.device_enabled(device):
print("skip because %s is not enabled..." % device)
return
import sys
import numpy as np
import subprocess
+import tvm.testing
runtime_py = """
import os
"""
def test_dso_module_load():
- if not tvm.runtime.enabled("llvm"):
+ if not tvm.testing.device_enabled("llvm"):
return
dtype = 'int64'
temp = util.tempdir()
shell=True)
+@tvm.testing.requires_gpu
def test_device_module_dump():
# graph
n = tvm.runtime.convert(1024)
def check_device(device):
ctx = tvm.context(device, 0)
- if not ctx.exist:
+ if not tvm.testing.device_enabled(device):
print("Skip because %s is not enabled" % device)
return
temp = util.tempdir()
def check_stackvm(device):
ctx = tvm.context(device, 0)
- if not ctx.exist:
+ if not tvm.testing.device_enabled(device):
print("Skip because %s is not enabled" % device)
return
temp = util.tempdir()
def check_llvm():
ctx = tvm.cpu(0)
- if not tvm.runtime.enabled("llvm"):
+ if not tvm.testing.device_enabled("llvm"):
print("Skip because llvm is not enabled" )
return
temp = util.tempdir()
def check_system_lib():
ctx = tvm.cpu(0)
- if not tvm.runtime.enabled("llvm"):
+ if not tvm.testing.device_enabled("llvm"):
print("Skip because llvm is not enabled" )
return
temp = util.tempdir()
import tvm
from tvm import te
import numpy as np
-
-def enabled_ctx_list():
- ctx_list = [('cpu', tvm.cpu(0)),
- ('gpu', tvm.gpu(0)),
- ('cl', tvm.opencl(0)),
- ('metal', tvm.metal(0)),
- ('rocm', tvm.rocm(0)),
- ('vulkan', tvm.vulkan(0)),
- ('vpi', tvm.vpi(0))]
- for k, v in ctx_list:
- assert tvm.context(k, 0) == v
- ctx_list = [x[1] for x in ctx_list if x[1].exist]
- return ctx_list
-
-ENABLED_CTX_LIST = enabled_ctx_list()
-print("Testing using contexts:", ENABLED_CTX_LIST)
+import tvm.testing
+@tvm.testing.uses_gpu
def test_nd_create():
- for ctx in ENABLED_CTX_LIST:
+ for target, ctx in tvm.testing.enabled_targets():
for dtype in ["uint8", "int8", "uint16", "int16", "uint32", "int32",
"float32"]:
x = np.random.randint(0, 10, size=(3, 4))
rev = remote.download("dat.bin")
assert(rev == blob)
+@tvm.testing.requires_llvm
def test_rpc_remote_module():
if not tvm.runtime.enabled("rpc"):
return
"rpc.Connect", server1.host, server1.port, "x1"])
def check_remote(remote):
- if not tvm.runtime.enabled("llvm"):
- print("Skip because llvm is not enabled")
- return
temp = util.tempdir()
ctx = remote.cpu(0)
f = tvm.build(s, [A, B], "llvm", name="myadd")
np.testing.assert_equal(b.asnumpy(), a.asnumpy() + 1)
def check_minrpc():
- if not tvm.runtime.enabled("llvm"):
- print("Skip because llvm is not enabled")
- return
if tvm.get_global_func("rpc.PopenSession", allow_missing=True) is None:
return
# export to minrpc
runtime initializes. We leave it as an example
on how to do rpc when we want to do linking on remote.
"""
- if not tvm.runtime.enabled("llvm"):
- print("Skip because llvm is not enabled")
- return
- if not tvm.runtime.enabled("opencl"):
+ if not tvm.testing.device_enabled("opencl"):
print("Skip because opencl is not enabled")
return
temp = util.tempdir()
import tvm
from tvm import te
import ctypes
+import tvm.testing
+@tvm.testing.uses_gpu
def test_synthetic():
for device in ["llvm", "cuda"]:
- if not tvm.runtime.enabled(device):
+ if not tvm.testing.device_enabled(device):
print("skip because %s is not enabled..." % device)
return
tvm.testing.assert_allclose(out, verify(data), atol=1e-5)
+@tvm.testing.uses_gpu
def test_cuda_lib():
ctx = tvm.gpu(0)
for device in ["llvm", "cuda"]:
- if not tvm.runtime.enabled(device):
+ if not tvm.testing.device_enabled(device):
print("skip because %s is not enabled..." % device)
return
nn = 12
if __name__ == "__main__":
test_synthetic()
- #test_system_lib()
+ test_cuda_lib()
import tvm
from tvm import te
import numpy as np
+import tvm.testing
+@tvm.testing.uses_gpu
def test_cmp_load_store():
n = 32
A = te.placeholder((n,), name='A')
def check_llvm():
- if not tvm.runtime.enabled("llvm"):
+ if not tvm.testing.device_enabled("llvm"):
return
s = te.create_schedule(D.op)
xo, xi = s[C].split(C.op.axis[0], factor=4)
d.asnumpy(), np.logical_and(a.asnumpy() > b.asnumpy(), a.asnumpy() > 1).astype('float32'))
def check_device(device):
- ctx = tvm.context(device, 0)
- if not ctx.exist:
+ if not tvm.testing.device_enabled(device):
return
+ ctx = tvm.context(device, 0)
s = te.create_schedule(D.op)
for stage in [C, D]:
xo, xi = s[stage].split(stage.op.axis[0], factor=4)
from tvm.contrib import util, cc
import numpy as np
+@tvm.testing.requires_llvm
def test_llvm_add_pipeline():
nn = 1024
n = tvm.runtime.convert(nn)
assert struct.unpack(endian + 'h', arr[0x12:0x14])[0] == e_machine
def build_i386():
- if not tvm.runtime.enabled("llvm"):
- print("Skip because llvm is not enabled..")
- return
temp = util.tempdir()
target = "llvm -mtriple=i386-pc-linux-gnu"
f = tvm.build(s, [A, B, C], target)
import unittest
from tvm.contrib.nvcc import have_fp16, have_int8
from tvm.contrib import nvcc
+import tvm.testing
tx = te.thread_axis("threadIdx.x")
bx = te.thread_axis("blockIdx.x")
+@tvm.testing.requires_gpu
+@tvm.testing.requires_cuda
def test_cuda_vectorize_add():
num_thread = 8
def check_cuda(dtype, n, lanes):
- if not tvm.gpu(0).exist or not tvm.runtime.enabled("cuda"):
- print("skip because cuda is not enabled..")
- return
if dtype == "float16" and not have_fp16(tvm.gpu(0).compute_version):
print("Skip because gpu does not have fp16 support")
return
check_cuda("float16", 64, 6)
check_cuda("float16", 64, 8)
+@tvm.testing.requires_gpu
+@tvm.testing.requires_cuda
def test_cuda_multiply_add():
num_thread = 8
def check_cuda(dtype, n, lanes):
- if not tvm.gpu(0).exist or not tvm.runtime.enabled("cuda"):
- print("skip because cuda is not enabled..")
- return
if dtype == "int8" and not have_int8(tvm.gpu(0).compute_version):
print("skip because gpu does not support int8")
return
tvm.testing.assert_allclose(d.asnumpy(), np_d)
check_cuda("int8", 64, 4)
+@tvm.testing.requires_gpu
+@tvm.testing.requires_cuda
def test_cuda_vectorize_load():
num_thread = 8
def check_cuda(dtype, n, lanes):
- if not tvm.gpu(0).exist or not tvm.runtime.enabled("cuda"):
- print("skip because cuda is not enabled..")
- return
ctx = tvm.gpu(0)
A = te.placeholder((n,), name='A', dtype="%sx%d" % (dtype, lanes))
B = te.compute((n,), lambda i: A[i], name='B')
check_cuda("int8", 64, 8)
check_cuda("int8", 64, 16)
+@tvm.testing.requires_gpu
+@tvm.testing.requires_cuda
def test_cuda_make_int8():
def check_cuda(n, value, lanes):
- if not tvm.gpu(0).exist or not tvm.runtime.enabled("cuda"):
- print("skip because cuda is not enabled..")
- return
dtype = 'int8'
ctx = tvm.gpu(0)
A = te.compute((n, lanes), lambda i,j: tvm.tir.const(value, dtype=dtype))
check_cuda(64, -3, 2)
+@tvm.testing.requires_gpu
+@tvm.testing.requires_cuda
def test_cuda_inf_nan():
target = 'cuda'
def check_inf_nan(ctx, n, value, dtype):
# Only need to test compiling here
fun(a, c)
- if not tvm.gpu(0).exist or not tvm.runtime.enabled("cuda"):
- print("skip because cuda is not enabled..")
- return
-
ctx = tvm.context(target, 0)
check_inf_nan(ctx, 1, -float('inf'), 'float32')
check_inf_nan(ctx, 1, float('nan'), 'float64')
+@tvm.testing.requires_gpu
+@tvm.testing.requires_cuda
def test_cuda_shuffle():
- if not tvm.gpu(0).exist or not tvm.runtime.enabled("cuda"):
- print("skip because cuda is not enabled..")
- return
-
idxm = tvm.tir.indexmod
a = te.placeholder((64, ), 'int32')
b = te.placeholder((64, ), 'int32')
module(nda, ndb, ndc)
tvm.testing.assert_allclose(ndc.asnumpy(), ref)
-def test_crossthread_reduction1():
- def check(device):
- ctx = tvm.context(device, 0)
- if not ctx.exist or not tvm.runtime.enabled(device):
- print("skip because", device, "is not enabled..")
- return
- n = te.var("n")
- m = te.var("m")
- A = te.placeholder((n, m), name='A')
- k = te.reduce_axis((0, m), "m")
- B = te.compute((n,), lambda i: te.sum(A[i, k], axis=k), name="B")
-
- def sched(nthd):
- s = te.create_schedule(B.op)
- ko, _ = s[B].split(B.op.reduce_axis[0], nparts=nthd)
- s[B].bind(ko, te.thread_axis("threadIdx.x"))
- s[B].bind(B.op.axis[0], te.thread_axis("blockIdx.x"))
- func = tvm.build(s, [A, B], device)
- return func
-
- def verify(nthd):
- func = sched(nthd)
- nn = 3
- # checks three typical cases
- vals = [nthd-1, nthd, nthd+1]
- for kk in [x for x in vals]:
- size = (nn, kk)
- a = tvm.nd.array(np.random.uniform(size=size).astype(A.dtype), ctx)
- b = tvm.nd.array(np.zeros(nn, dtype=B.dtype), ctx)
- func(a, b)
- tvm.testing.assert_allclose(b.asnumpy(), \
- np.sum(a.asnumpy(), axis=1), rtol=1e-3)
-
- verify(16)
- verify(32)
- verify(64)
-
- check("cuda")
- check("rocm")
-
-
-def test_crossthread_reduction2():
- def check(device):
- ctx = tvm.context(device, 0)
- if not ctx.exist or not tvm.runtime.enabled(device):
- print("skip because", device, "is not enabled..")
- return
-
- n = te.var("n")
- k0 = te.var("k0")
- k1 = te.var("k1")
- A = te.placeholder((n, k0, k1), name='A')
- k0 = te.reduce_axis((0, k0), "k0")
- k1 = te.reduce_axis((0, k1), "k1")
- B = te.compute((n,), lambda i: te.sum(A[i, k0, k1], axis=(k0, k1)), name="B")
-
- def sched(nthdx, nthdy):
- s = te.create_schedule(B.op)
- k0o, _ = s[B].split(B.op.reduce_axis[0], nparts=nthdx)
- k1o, _ = s[B].split(B.op.reduce_axis[1], nparts=nthdy)
- s[B].bind(k0o, te.thread_axis("threadIdx.x"))
- s[B].bind(k1o, te.thread_axis("threadIdx.y"))
- s[B].bind(B.op.axis[0], te.thread_axis("blockIdx.x"))
- func = tvm.build(s, [A, B], device)
- return func
-
- def verify(nthdx, nthdy):
- func = sched(nthdx, nthdy)
- nn = 3
- # checks three typical cases
- vx = [nthdx-1, nthdx, nthdx+1]
- vy = [nthdy-1, nthdy, nthdy+1]
- for kk0, kk1 in [(x, y) for x in vx for y in vy]:
- size = (nn, kk0, kk1)
- a = tvm.nd.array(np.random.uniform(size=size).astype(A.dtype), ctx)
- b = tvm.nd.array(np.zeros(nn, dtype=B.dtype), ctx)
- func(a, b)
- tvm.testing.assert_allclose(b.asnumpy(), \
- np.sum(a.asnumpy(), axis=(1, 2)), rtol=1e-3)
-
- verify(16, 16)
- verify(32, 32)
- verify(16, 32)
- verify(32, 16)
-
- check("cuda")
- check("rocm")
+@tvm.testing.parametrize_targets("cuda", "rocm")
+def test_crossthread_reduction1(target, ctx):
+ n = te.var("n")
+ m = te.var("m")
+ A = te.placeholder((n, m), name='A')
+ k = te.reduce_axis((0, m), "m")
+ B = te.compute((n,), lambda i: te.sum(A[i, k], axis=k), name="B")
+ def sched(nthd):
+ s = te.create_schedule(B.op)
+ ko, _ = s[B].split(B.op.reduce_axis[0], nparts=nthd)
+ s[B].bind(ko, te.thread_axis("threadIdx.x"))
+ s[B].bind(B.op.axis[0], te.thread_axis("blockIdx.x"))
+ func = tvm.build(s, [A, B], target)
+ return func
+
+ def verify(nthd):
+ func = sched(nthd)
+ nn = 3
+ # checks three typical cases
+ vals = [nthd-1, nthd, nthd+1]
+ for kk in [x for x in vals]:
+ size = (nn, kk)
+ a = tvm.nd.array(np.random.uniform(size=size).astype(A.dtype), ctx)
+ b = tvm.nd.array(np.zeros(nn, dtype=B.dtype), ctx)
+ func(a, b)
+ tvm.testing.assert_allclose(b.asnumpy(), \
+ np.sum(a.asnumpy(), axis=1), rtol=1e-3)
+
+ verify(16)
+ verify(32)
+ verify(64)
+
+
+@tvm.testing.parametrize_targets("cuda", "rocm")
+def test_crossthread_reduction2(target, ctx):
+ n = te.var("n")
+ k0 = te.var("k0")
+ k1 = te.var("k1")
+ A = te.placeholder((n, k0, k1), name='A')
+ k0 = te.reduce_axis((0, k0), "k0")
+ k1 = te.reduce_axis((0, k1), "k1")
+ B = te.compute((n,), lambda i: te.sum(A[i, k0, k1], axis=(k0, k1)), name="B")
+
+ def sched(nthdx, nthdy):
+ s = te.create_schedule(B.op)
+ k0o, _ = s[B].split(B.op.reduce_axis[0], nparts=nthdx)
+ k1o, _ = s[B].split(B.op.reduce_axis[1], nparts=nthdy)
+ s[B].bind(k0o, te.thread_axis("threadIdx.x"))
+ s[B].bind(k1o, te.thread_axis("threadIdx.y"))
+ s[B].bind(B.op.axis[0], te.thread_axis("blockIdx.x"))
+ func = tvm.build(s, [A, B], target)
+ return func
+
+ def verify(nthdx, nthdy):
+ func = sched(nthdx, nthdy)
+ nn = 3
+ # checks three typical cases
+ vx = [nthdx-1, nthdx, nthdx+1]
+ vy = [nthdy-1, nthdy, nthdy+1]
+ for kk0, kk1 in [(x, y) for x in vx for y in vy]:
+ size = (nn, kk0, kk1)
+ a = tvm.nd.array(np.random.uniform(size=size).astype(A.dtype), ctx)
+ b = tvm.nd.array(np.zeros(nn, dtype=B.dtype), ctx)
+ func(a, b)
+ tvm.testing.assert_allclose(b.asnumpy(), \
+ np.sum(a.asnumpy(), axis=(1, 2)), rtol=1e-3)
+
+ verify(16, 16)
+ verify(32, 32)
+ verify(16, 32)
+ verify(32, 16)
+
+@tvm.testing.requires_gpu
+@tvm.testing.requires_cuda
def test_cuda_reduction_binding():
- if not tvm.gpu(0).exist or not tvm.runtime.enabled("cuda"):
- print("skip because cuda is not enabled..")
- return
-
k = te.reduce_axis((0, 32), 'k')
A = te.placeholder((96, 32), name='A')
B = te.compute( (96,), lambda m:
fcuda = tvm.build(s, [A, B], "cuda")
-def test_rfactor_predicates():
- def check(device):
- ctx = tvm.context(device, 0)
- if not ctx.exist or not tvm.runtime.enabled(device):
- print("skip because", device, "is not enabled..")
- return
-
- n = te.reduce_axis((0, 129), 'n')
- A = te.placeholder((129,), name='A')
- B = te.compute( (1, ), lambda b:
- te.sum(A[n],
- axis=n),
- name='B'
- )
+@tvm.testing.parametrize_targets("cuda", "rocm")
+def test_rfactor_predicates(target, ctx):
+ n = te.reduce_axis((0, 129), 'n')
+ A = te.placeholder((129,), name='A')
+ B = te.compute( (1, ), lambda b:
+ te.sum(A[n],
+ axis=n),
+ name='B'
+ )
- s = te.create_schedule(B.op)
-
- _, ni = s[B].split(s[B].op.reduce_axis[0], factor=8)
+ s = te.create_schedule(B.op)
- BF = s.rfactor(B, ni, 0)
- s[B].set_store_predicate(tx.var.equal(0))
+ _, ni = s[B].split(s[B].op.reduce_axis[0], factor=8)
- s[B].bind(s[B].op.reduce_axis[0], tx)
- s[B].bind(s[B].op.axis[0], bx)
+ BF = s.rfactor(B, ni, 0)
+ s[B].set_store_predicate(tx.var.equal(0))
- s[BF].compute_at(s[B], s[B].op.axis[0])
+ s[B].bind(s[B].op.reduce_axis[0], tx)
+ s[B].bind(s[B].op.axis[0], bx)
- _, noi = s[BF].split(s[BF].op.reduce_axis[0], factor=2)
+ s[BF].compute_at(s[B], s[B].op.axis[0])
- BF2 = s.rfactor(BF, noi, 0)
+ _, noi = s[BF].split(s[BF].op.reduce_axis[0], factor=2)
- s[BF].bind(s[BF].op.axis[0], tx)
- s[BF2].compute_at(s[BF], s[BF].op.axis[1])
+ BF2 = s.rfactor(BF, noi, 0)
- fcuda = tvm.build(s, [A, B], device)
+ s[BF].bind(s[BF].op.axis[0], tx)
+ s[BF2].compute_at(s[BF], s[BF].op.axis[1])
- check("cuda")
- check("rocm")
+ fcuda = tvm.build(s, [A, B], target)
-@unittest.skipIf(not tvm.gpu(0).exist or not tvm.runtime.enabled("cuda"), "skip because cuda is not enabled..")
+@tvm.testing.requires_gpu
+@tvm.testing.requires_cuda
def test_cuda_const_float_to_half():
# This import is required to use nvcc to perform code gen;
# otherwise it is found that the code gen is done by nvrtc.
func(a, c)
np.testing.assert_equal(c.asnumpy(), a_np > b.value)
+@tvm.testing.requires_gpu
+@tvm.testing.requires_cuda
def test_cuda_reduction():
def check(device, dtype, m=32, n=32):
- ctx = tvm.context(device, 0)
- if not ctx.exist or not tvm.runtime.enabled(device):
- print("skip because", device, "is not enabled..")
+ if not tvm.testing.device_enabled(device):
+ print("Skipping", device)
return
- if dtype == "float16" and not have_fp16(ctx.compute_version):
- print("Skip because gpu does not have fp16 support")
- return
-
+ ctx = tvm.context(device, 0)
a = te.placeholder((m, n), name="a", dtype=dtype)
b = te.placeholder((m, n), name="b", dtype=dtype)
c = a + b
check("rocm", "float32")
check("cuda", "float16")
+@tvm.testing.requires_gpu
+@tvm.testing.requires_cuda
def test_cuda_mix_threaded_and_normal_reduction():
def check(device, dtype, m=32, n=32):
- ctx = tvm.context(device, 0)
- if not ctx.exist or not tvm.runtime.enabled(device):
- print("skip because", device, "is not enabled..")
+ if not tvm.testing.device_enabled(device):
+ print("Skipping", device)
return
+ ctx = tvm.context(device, 0)
if dtype == "float16" and not have_fp16(ctx.compute_version):
print("Skip because gpu does not have fp16 support")
return
check("rocm", "float32")
check("cuda", "float16")
+@tvm.testing.requires_gpu
+@tvm.testing.requires_cuda
def test_cuda_floordiv_with_vectorization():
- if not tvm.gpu(0).exist or not tvm.runtime.enabled("cuda"):
- print("skip because cuda is not enabled..")
- return
-
with tvm.target.cuda():
# B[i] = A[floordiv(i, k)]
n = 256
func(a_nd, b_nd)
tvm.testing.assert_allclose(b_nd.asnumpy(), b_np, rtol=1e-3)
+@tvm.testing.requires_gpu
+@tvm.testing.requires_cuda
def test_cuda_floormod_with_vectorization():
- if not tvm.gpu(0).exist or not tvm.runtime.enabled("cuda"):
- print("skip because cuda is not enabled..")
- return
-
with tvm.target.cuda():
# B[i] = A[floormod(i, k)]
n = 256
func(a_nd, b_nd)
tvm.testing.assert_allclose(b_nd.asnumpy(), b_np, rtol=1e-3)
+@tvm.testing.requires_gpu
+@tvm.testing.requires_cuda
def test_vectorized_casts():
- if not tvm.gpu(0).exist or not tvm.runtime.enabled("cuda"):
- print("skip because cuda is not enabled..")
- return
-
def check(t0, t1):
if (t0 == "float16" or t1 == "float16") and not have_fp16(tvm.gpu(0).compute_version):
print("Skip because gpu does not have fp16 support")
s[B].bind(iio, tx)
return s
+@tvm.testing.requires_gpu
+@tvm.testing.requires_cuda
def test_vectorized_intrin1():
test_funcs = [
(tvm.tir.floor, lambda x : np.floor(x)),
(tvm.tir.sqrt, lambda x : np.sqrt(x)),
]
def run_test(tvm_intrin, np_func, dtype):
- if not tvm.gpu(0).exist or not tvm.runtime.enabled("cuda"):
- print("skip because cuda is not enabled..")
- return
if dtype == "float16" and not have_fp16(tvm.gpu(0).compute_version):
print("Skip because gpu does not have fp16 support")
return
run_test(*func, "float32")
run_test(*func, "float16")
+@tvm.testing.requires_gpu
+@tvm.testing.requires_cuda
def test_vectorized_intrin2(dtype="float32"):
c2 = tvm.tir.const(2, dtype=dtype)
test_funcs = [
(tvm.tir.fmod, lambda x : np.fmod(x, 2.0))
]
def run_test(tvm_intrin, np_func):
- if not tvm.gpu(0).exist or not tvm.runtime.enabled("cuda"):
- print("skip because cuda is not enabled..")
- return
-
n = 128
A = te.placeholder((n,), dtype=dtype, name='A')
B = te.compute((n,), lambda i: tvm_intrin(A[i], c2), name='B')
for func in test_funcs:
run_test(*func)
+@tvm.testing.requires_gpu
+@tvm.testing.requires_cuda
def test_vectorized_popcount():
def ref_popcount(x):
cnt = 0
return cnt
def run_test(dtype):
- if not tvm.gpu(0).exist or not tvm.runtime.enabled("cuda"):
- print("skip because cuda is not enabled..")
- return
-
n = 128
A = te.placeholder((n,), dtype=dtype, name='A')
B = te.compute((n,), lambda i: tvm.tir.popcount(A[i]), name='B')
run_test("uint32")
run_test("uint64")
+@tvm.testing.requires_gpu
+@tvm.testing.requires_cuda
def test_cuda_vectorize_load_permute_pad():
def check_cuda(dtype, n, l, padding, lanes):
- if not tvm.gpu(0).exist or not tvm.runtime.enabled("cuda"):
- print("skip because cuda is not enabled..")
- return
if dtype == "float16" and not have_fp16(tvm.gpu(0).compute_version):
print("Skip because gpu does not have fp16 support")
return
tvm.tir.stmt_functor.ir_transform(stmt['main'].body, pre_visit, post_visit)
- if not tvm.gpu(0).exist or not tvm.runtime.enabled("cuda"):
- print("CUDA device not found, skip the verification.")
- return
- else:
- tgt = tvm.target.cuda()
- mod = tvm.build(s, args, tgt)
- # To check if every vectorize loop transforms to correct instruction
- # print(mod.imported_modules[0].get_source())
-
- ctx = tvm.context("cuda", 0)
- a = tvm.nd.array(np.random.uniform(size=(512, 512)).astype("float32"), ctx)
- b = tvm.nd.array(np.random.uniform(size=(512, 512)).astype("float32"), ctx)
- c = tvm.nd.array(np.zeros((512, 512), dtype="float32"), ctx)
- mod(a, b, c)
- tvm.testing.assert_allclose(c.asnumpy(), np.dot(
- a.asnumpy(), b.asnumpy()), rtol=1e-5)
-
+ tgt = tvm.target.cuda()
+ mod = tvm.build(s, args, tgt)
+ # To check if every vectorize loop transforms to correct instruction
+ # print(mod.imported_modules[0].get_source())
+
+ ctx = tvm.context("cuda", 0)
+ a = tvm.nd.array(np.random.uniform(size=(512, 512)).astype("float32"), ctx)
+ b = tvm.nd.array(np.random.uniform(size=(512, 512)).astype("float32"), ctx)
+ c = tvm.nd.array(np.zeros((512, 512), dtype="float32"), ctx)
+ mod(a, b, c)
+ tvm.testing.assert_allclose(c.asnumpy(), np.dot(
+ a.asnumpy(), b.asnumpy()), rtol=1e-5)
+
+@tvm.testing.requires_gpu
+@tvm.testing.requires_cuda
def test_vectorized_cooperative_fetching_x():
N = 512
A = te.placeholder((N, N), name='A', dtype='float32')
vcf_check_common(s, [A, B, C])
+@tvm.testing.requires_gpu
+@tvm.testing.requires_cuda
def test_vectorized_cooperative_fetching_xy():
N = 512
A = te.placeholder((N, N), name='A')
vcf_check_common(s, [A, B, C])
+@tvm.testing.requires_gpu
+@tvm.testing.requires_cuda
def test_unrolled_vectorization():
- if not tvm.gpu(0).exist or not tvm.runtime.enabled("cuda"):
- print("skip because cuda is not enabled..")
- return
-
dtype = 'float32'
target = 'cuda'
from tvm import te
from tvm.contrib import util
import numpy as np
+import tvm.testing
+@tvm.testing.requires_gpu
def test_large_uint_imm():
value = (1 << 63) + 123
other = tvm.tir.const(3, "uint64")
s[A].bind(xo, te.thread_axis("blockIdx.x"))
def check_target(device):
- ctx = tvm.context(device, 0)
- if not ctx.exist:
+ if not tvm.testing.device_enabled(device):
return
+ ctx = tvm.context(device, 0)
f = tvm.build(s, [A], device)
# launch the kernel.
a = tvm.nd.empty((n, ), dtype=A.dtype, ctx=ctx)
check_target("vulkan")
+@tvm.testing.requires_gpu
def test_add_pipeline():
n = te.size_var('n')
A = te.placeholder((n,), name='A')
s[D].bind(xo, te.thread_axis("blockIdx.x"))
def check_target(device, host="stackvm"):
- ctx = tvm.context(device, 0)
- if not ctx.exist:
- return
- if not tvm.runtime.enabled(host):
+ if not tvm.testing.device_enabled(device) or not tvm.testing.device_enabled(host):
return
+ ctx = tvm.context(device, 0)
mhost = tvm.driver.build(s, [A, B, D], target=device, target_host=host)
f = mhost.entry_func
# launch the kernel.
import tvm
from tvm import te
import numpy as np
+import tvm.testing
+@tvm.testing.uses_gpu
def test_add_pipeline():
nn = 64
max_threads = 4
print(tvm.lower(s_gpu, [A, C_gpu], simple_mode=True))
def check_target(target):
- if not tvm.runtime.enabled(target):
+ if not tvm.testing.device_enabled(target):
return
s = s_gpu if target in ['opencl', 'cuda'] else s_cpu
C = C_gpu if target in ['opencl', 'cuda'] else C_cpu
def check_target(target):
- if not tvm.runtime.enabled(target):
+ if not tvm.testing.device_enabled(target):
return
# build and invoke the kernel.
f = tvm.build(s, [A, C], target)
s = te.create_schedule(C.op)
def check_target(target):
- if not tvm.runtime.enabled(target):
+ if not tvm.testing.device_enabled(target):
return
# build and invoke the kernel.
f = tvm.build(s, [A, C], target)
import re
+@tvm.testing.requires_llvm
def test_llvm_intrin():
ib = tvm.tir.ir_builder.create()
n = tvm.runtime.convert(4)
fcode = tvm.build(mod, None, "llvm")
+@tvm.testing.requires_llvm
def test_llvm_void_intrin():
ib = tvm.tir.ir_builder.create()
A = ib.pointer("uint8", name="A")
fcode = tvm.build(mod, None, "llvm")
+@tvm.testing.requires_llvm
def test_llvm_overloaded_intrin():
# Name lookup for overloaded intrinsics in LLVM 4- requires a name
# that includes the overloaded types.
f = tvm.build(s, [A, C], target = 'llvm')
+@tvm.testing.requires_llvm
def test_llvm_import():
# extern "C" is necessary to get the correct signature
cc_code = """
tvm.tir.call_pure_extern("float32", "my_add", A(*i), 1.0),
name='B')
def check_llvm(use_file):
- if not tvm.runtime.enabled("llvm"):
- return
if not clang.find_clang(required=False):
print("skip because clang is not available")
return
+@tvm.testing.requires_llvm
def test_llvm_lookup_intrin():
ib = tvm.tir.ir_builder.create()
A = ib.pointer("uint8x8", name="A")
fcode = tvm.build(mod, None, "llvm")
+@tvm.testing.requires_llvm
def test_llvm_large_uintimm():
value = (1 << 63) + 123
other = tvm.tir.const(3, "uint64")
s = te.create_schedule(A.op)
def check_llvm():
- if not tvm.runtime.enabled("llvm"):
- return
f = tvm.build(s, [A], "llvm")
ctx = tvm.cpu(0)
# launch the kernel.
check_llvm()
+@tvm.testing.requires_llvm
def test_llvm_add_pipeline():
nn = 1024
n = tvm.runtime.convert(nn)
s[C].vectorize(xi)
def check_llvm():
- if not tvm.runtime.enabled("llvm"):
- return
# Specifically allow offset to test codepath when offset is available
Ab = tvm.tir.decl_buffer(
A.shape, A.dtype,
check_llvm()
+@tvm.testing.requires_llvm
def test_llvm_persist_parallel():
n = 128
A = te.placeholder((n,), name='A')
s[C].pragma(xi, "parallel_stride_pattern")
def check_llvm():
- if not tvm.runtime.enabled("llvm"):
- return
# BUILD and invoke the kernel.
f = tvm.build(s, [A, C], "llvm")
ctx = tvm.cpu(0)
check_llvm()
+@tvm.testing.requires_llvm
def test_llvm_flip_pipeline():
def check_llvm(nn, base):
- if not tvm.runtime.enabled("llvm"):
- return
n = tvm.runtime.convert(nn)
A = te.placeholder((n + base), name='A')
C = te.compute((n,), lambda i: A(nn + base- i - 1), name='C')
check_llvm(128, 1)
+@tvm.testing.requires_llvm
def test_llvm_vadd_pipeline():
def check_llvm(n, lanes):
- if not tvm.runtime.enabled("llvm"):
- return
A = te.placeholder((n,), name='A', dtype="float32x%d" % lanes)
B = te.compute((n,), lambda i: A[i], name='B')
C = te.compute((n,), lambda i: B[i] + tvm.tir.const(1, A.dtype), name='C')
check_llvm(512, 2)
+@tvm.testing.requires_llvm
def test_llvm_madd_pipeline():
def check_llvm(nn, base, stride):
- if not tvm.runtime.enabled("llvm"):
- return
n = tvm.runtime.convert(nn)
A = te.placeholder((n + base, stride), name='A')
C = te.compute((n, stride), lambda i, j: A(base + i, j) + 1, name='C')
check_llvm(4, 0, 3)
+@tvm.testing.requires_llvm
def test_llvm_temp_space():
nn = 1024
n = tvm.runtime.convert(nn)
s = te.create_schedule(C.op)
def check_llvm():
- if not tvm.runtime.enabled("llvm"):
- return
# build and invoke the kernel.
f = tvm.build(s, [A, C], "llvm")
ctx = tvm.cpu(0)
c.asnumpy(), a.asnumpy() + 1 + 1)
check_llvm()
+@tvm.testing.requires_llvm
def test_multiple_func():
nn = 1024
n = tvm.runtime.convert(nn)
s[C].parallel(xo)
s[C].vectorize(xi)
def check_llvm():
- if not tvm.runtime.enabled("llvm"):
- return
# build two functions
f2 = tvm.lower(s, [A, B, C], name="fadd1")
f1 = tvm.lower(s, [A, B, C], name="fadd2")
+@tvm.testing.requires_llvm
def test_llvm_condition():
def check_llvm(n, offset):
- if not tvm.runtime.enabled("llvm"):
- return
A = te.placeholder((n, ), name='A')
C = te.compute((n,), lambda i: tvm.tir.if_then_else(i >= offset, A[i], 0.0), name='C')
s = te.create_schedule(C.op)
check_llvm(64, 8)
+@tvm.testing.requires_llvm
def test_llvm_bool():
def check_llvm(n):
- if not tvm.runtime.enabled("llvm"):
- return
A = te.placeholder((n, ), name='A', dtype="int32")
C = te.compute((n,), lambda i: A[i].equal(1).astype("float"), name='C')
s = te.create_schedule(C.op)
check_llvm(64)
+@tvm.testing.requires_llvm
def test_rank_zero():
def check_llvm(n):
- if not tvm.runtime.enabled("llvm"):
- return
A = te.placeholder((n, ), name='A')
scale = te.placeholder((), name='scale')
k = te.reduce_axis((0, n), name="k")
tvm.testing.assert_allclose(d.asnumpy(), d_np)
check_llvm(64)
+@tvm.testing.requires_llvm
def test_rank_zero_bound_checkers():
def check_llvm(n):
- if not tvm.runtime.enabled("llvm"):
- return
with tvm.transform.PassContext(config={"tir.instrument_bound_checkers": True}):
A = te.placeholder((n, ), name='A')
scale = te.placeholder((), name='scale')
check_llvm(64)
+@tvm.testing.requires_llvm
def test_alignment():
n = tvm.runtime.convert(1024)
A = te.placeholder((n,), name='A')
assert has_call_to_assume()
+@tvm.testing.requires_llvm
def test_llvm_div():
"""Check that the semantics of div and mod is correct"""
def check(start, end, dstart, dend, dtype, floor_div=False):
check(0, 255, dstart, dend, 'uint8', floor_div=False)
check(0, 255, dstart, dend, 'uint8', floor_div=True)
+@tvm.testing.requires_llvm
def test_llvm_fp_math():
def check_llvm_reciprocal(n):
A = te.placeholder((n,), name='A')
check_llvm_sigmoid(16)
+@tvm.testing.requires_llvm
def test_dwarf_debug_information():
nn = 1024
n = tvm.runtime.convert(nn)
s[C].parallel(xo)
s[C].vectorize(xi)
def check_llvm_object():
- if not tvm.runtime.enabled("llvm"):
- return
if tvm.target.codegen.llvm_version_major() < 5:
return
if tvm.target.codegen.llvm_version_major() > 6:
assert re.search(r"""DW_AT_name.*fadd2""", str(output))
def check_llvm_ir():
- if not tvm.runtime.enabled("llvm"):
- return
if tvm.target.codegen.llvm_version_major() < 5:
return
if tvm.target.codegen.llvm_version_major() > 6:
check_llvm_ir()
+@tvm.testing.requires_llvm
def test_llvm_shuffle():
a = te.placeholder((8, ), 'int32')
b = te.placeholder((8, ), 'int32')
''' Convert a numpy array of float to bf16 and cast back'''
return np_bf162np_float(np_float2np_bf16(arr))
+@tvm.testing.requires_llvm
def test_llvm_bf16():
def dotest(do_vectorize):
np.random.seed(122)
dotest(True)
dotest(False)
+@tvm.testing.requires_llvm
def test_llvm_crt_static_lib():
A = te.placeholder((32, ), dtype='bfloat16')
B = te.placeholder((32, ), dtype='bfloat16')
# under the License.
import tvm
from tvm import te
+import tvm.testing
target = 'opencl'
+@tvm.testing.requires_gpu
+@tvm.testing.requires_opencl
def test_opencl_ternary_expression():
def check_if_then_else(ctx, n, dtype):
A = te.placeholder((n,), name='A', dtype=dtype)
# Only need to test compiling here
fun(a, c)
- if not tvm.runtime.enabled(target):
- print("skip because opencl is not enabled..")
- return
-
ctx = tvm.context(target, 0)
check_if_then_else(ctx, 1, 'int8')
check_select(ctx, 1, 'int16')
check_select(ctx, 1, 'uint16')
+@tvm.testing.requires_gpu
+@tvm.testing.requires_opencl
def test_opencl_inf_nan():
def check_inf_nan(ctx, n, value, dtype):
A = te.placeholder((n,), name='A', dtype=dtype)
# Only need to test compiling here
fun(a, c)
- if not tvm.runtime.enabled(target):
- print("skip because opencl is not enabled..")
- return
-
ctx = tvm.context(target, 0)
check_inf_nan(ctx, 1, -float('inf'), 'float32')
check_inf_nan(ctx, 1, float('nan'), 'float64')
+@tvm.testing.requires_gpu
+@tvm.testing.requires_opencl
def test_opencl_max():
def check_max(ctx, n, dtype):
A = te.placeholder((n,), name='A', dtype=dtype)
# Only need to test compiling here
fun(a, c)
- if not tvm.runtime.enabled(target):
- print("skip because opencl is not enabled..")
- return
-
ctx = tvm.context(target, 0)
check_max(ctx, 1, 'int8')
bx = te.thread_axis("blockIdx.x")
by = te.thread_axis("blockIdx.y")
-@unittest.skipIf(not tvm.rocm(0).exist or not tvm.runtime.enabled("rocm"), "skip because rocm is not enabled..")
+@tvm.testing.requires_rocm
def test_rocm_cross_thread_reduction():
# based on the reduction tutorial
n = te.size_var("n")
b.asnumpy(), np.sum(a.asnumpy(), axis=1), rtol=1e-4)
-@unittest.skipIf(not tvm.rocm(0).exist or not tvm.runtime.enabled("rocm"), "skip because rocm is not enabled..")
+@tvm.testing.requires_rocm
def test_rocm_inf_nan():
def check_inf_nan(ctx, n, value, dtype):
A = te.placeholder((n,), name='A', dtype=dtype)
check_inf_nan(ctx, 1, float('nan'), 'float32')
check_inf_nan(ctx, 1, float('nan'), 'float64')
-@unittest.skipIf(not tvm.rocm(0).exist or not tvm.runtime.enabled("rocm"), "skip because rocm is not enabled..")
+@tvm.testing.requires_rocm
def test_rocm_reduction_binding():
k = te.reduce_axis((0, 32), 'k')
A = te.placeholder((96, 32), name='A')
mo, _ = s[B].split(B.op.axis[0], 32)
s[B].bind(mo, bx)
-@unittest.skipIf(not tvm.rocm(0).exist or not tvm.runtime.enabled("rocm"), "skip because rocm is not enabled..")
+@tvm.testing.requires_rocm
def test_rocm_copy():
def check_rocm(dtype, n):
peturb = np.random.uniform(low=0.5, high=1.5)
check_rocm(dtype, int(peturb * (2 ** logN)))
-@unittest.skipIf(not tvm.rocm(0).exist or not tvm.runtime.enabled("rocm"), "skip because rocm is not enabled..")
+@tvm.testing.requires_rocm
def test_rocm_vectorize_add():
num_thread = 8
# specific language governing permissions and limitations
# under the License.
import tvm
+import tvm.testing
from tvm import te
import numpy as np
def run_jit(fapi, check):
for target in ["llvm", "stackvm"]:
- if not tvm.runtime.enabled(target):
+ if not tvm.testing.device_enabled(target):
continue
f = tvm.driver.build(fapi, target=target)
s = f.get_source()
import numpy as np
+@tvm.testing.requires_vulkan
def test_vector_comparison():
- if not tvm.runtime.enabled("vulkan"):
- print("Skipping due to no Vulkan module")
- return
-
target = 'vulkan'
def check_correct_assembly(dtype):
bx = te.thread_axis("blockIdx.x")
+@tvm.testing.requires_vulkan
def test_vulkan_copy():
def check_vulkan(dtype, n):
- if not tvm.vulkan(0).exist or not tvm.runtime.enabled("vulkan"):
- print("skip because vulkan is not enabled..")
- return
A = te.placeholder((n,), name='A', dtype=dtype)
ctx = tvm.vulkan(0)
a_np = np.random.uniform(size=(n,)).astype(A.dtype)
check_vulkan(dtype, int(peturb * (2 ** logN)))
+@tvm.testing.requires_vulkan
def test_vulkan_vectorize_add():
num_thread = 8
def check_vulkan(dtype, n, lanes):
- if not tvm.vulkan(0).exist or not tvm.runtime.enabled("vulkan"):
- print("skip because vulkan is not enabled..")
- return
A = te.placeholder((n,), name='A', dtype="%sx%d" % (dtype, lanes))
B = te.compute((n,), lambda i: A[i]+tvm.tir.const(1, A.dtype), name='B')
s = te.create_schedule(B.op)
check_vulkan("float16", 64, 2)
+@tvm.testing.requires_vulkan
def test_vulkan_stress():
"""
Launch a randomized test with multiple kernels per stream, multiple uses of
def run_stress():
def worker():
- if not tvm.vulkan(0).exist or not tvm.runtime.enabled("vulkan"):
- print("skip because vulkan is not enabled..")
- return
A = te.placeholder((n,), name='A', dtype="float32")
B = te.placeholder((n,), name='B', dtype="float32")
functions = [
import tvm
from tvm import te
-from tvm.testing import check_numerical_grads, assert_allclose
+from tvm.testing import assert_allclose
from tvm import topi
from tvm.topi.util import get_const_tuple
import pytest
def check_device(device, host="llvm"):
ctx = tvm.context(device, 0)
- if not tvm.runtime.enabled(host):
- return
- if not ctx.exist:
- print("skip because %s is not enabled.." % device)
+ if not tvm.testing.device_enabled(host):
return
sout = te.create_schedule(out.op)
out_data = tvm.nd.empty(out_shape, out.dtype)
mout(out_data, *[tvm.nd.array(d) for d in list(in_data)])
return out_data.asnumpy().sum()
- check_numerical_grads(forward, [d.asnumpy() for d in input_data + arg_vals], g_res)
+ tvm.testing.check_numerical_grads(forward, [d.asnumpy() for d in input_data + arg_vals], g_res)
check_device("cpu")
from tvm.te.hybrid import script
from tvm.te.hybrid.runtime import HYBRID_GLOBALS
+import tvm.testing
+
@pytest.mark.skip
def run_and_check(func, args, var_dict={}, target='llvm', sch=None, outs=None):
def tvm_val_2_py_val(val):
run_and_check(func, ins, outs=outs)
+@tvm.testing.requires_gpu
+@tvm.testing.requires_cuda
def test_bind():
- if not tvm.gpu(0).exist:
- print('[Warning] No GPU found! Skip bind test!')
- return
-
@script
def vec_add(a, b):
c = output_tensor((1000, ), 'float32')
func, ins, outs = run_and_check(triangle, [a, b])
run_and_check(func, ins, outs=outs)
+@tvm.testing.requires_gpu
+@tvm.testing.requires_cuda
def test_allocate():
@te.hybrid.script
def blur2d(a):
func, ins, outs = run_and_check(blur2d, [a])
run_and_check(func, ins, outs=outs)
- if tvm.gpu().exist:
- @te.hybrid.script
- def share_vec_add(a, b):
- c = output_tensor((256, ), 'float32')
- shared = allocate((256, ), 'float32', 'shared')
- for i in bind("threadIdx.x", 256):
- shared[i] = a[i]
- local = allocate((256, ), 'float32', 'local')
- for i in bind("threadIdx.x", 256):
- local[i] = b[i]
- for i in bind("threadIdx.x", 256):
- c[i] = shared[i] + local[i]
- return c
-
- a = te.placeholder((256, ), dtype='float32', name='a')
- b = te.placeholder((256, ), dtype='float32', name='b')
- c = share_vec_add(a, b)
- func, ins, outs = run_and_check(share_vec_add, [a, b], target='cuda')
- run_and_check(func, ins, outs=outs, target='cuda')
- else:
- print('[Warning] No GPU found! Skip shared mem test!')
+ @te.hybrid.script
+ def share_vec_add(a, b):
+ c = output_tensor((256, ), 'float32')
+ shared = allocate((256, ), 'float32', 'shared')
+ for i in bind("threadIdx.x", 256):
+ shared[i] = a[i]
+ local = allocate((256, ), 'float32', 'local')
+ for i in bind("threadIdx.x", 256):
+ local[i] = b[i]
+ for i in bind("threadIdx.x", 256):
+ c[i] = shared[i] + local[i]
+ return c
+
+ a = te.placeholder((256, ), dtype='float32', name='a')
+ b = te.placeholder((256, ), dtype='float32', name='b')
+ c = share_vec_add(a, b)
+ func, ins, outs = run_and_check(share_vec_add, [a, b], target='cuda')
+ run_and_check(func, ins, outs=outs, target='cuda')
def test_upstream():
@te.hybrid.script
from tvm import te
from tvm import topi
import numpy as np
-from tvm.contrib import nvcc
+import tvm.testing
def tensor_core_matmul(warp_tile_m=16, m=64, n=32, l=96):
A = te.placeholder((n, l), name='A', dtype='float16')
c_np[bs, :, :] = np.dot(a_np[bs, :, :], b_np[bs, :, :])
np.testing.assert_allclose(c_np, c.asnumpy(), rtol=1e-3)
+@tvm.testing.requires_tensorcore
def test_tensor_core_matmul():
- if not tvm.gpu(0).exist or not tvm.runtime.enabled("cuda"):
- print("skip because cuda is not enabled..")
- return
- if not nvcc.have_tensorcore(tvm.gpu(0).compute_version):
- print("skip because gpu does not support tensor core")
- return
-
tensor_core_matmul(16) #test with warp_tile 16x16x16
tensor_core_matmul(8) #test with warp_tile 8x32x16
tensor_core_matmul(32) #test with warp_tile 32x8x16
+@tvm.testing.requires_tensorcore
def test_tensor_core_batch_matmul():
- if not tvm.gpu(0).exist or not tvm.runtime.enabled("cuda"):
- print("skip because cuda is not enabled..")
- return
- if not nvcc.have_tensorcore(tvm.gpu(0).compute_version):
- print("skip because gpu does not support tensor core")
- return
-
tensor_core_batch_matmul()
if __name__ == '__main__':
from tvm import te
import numpy as np
from tvm.topi.testing import conv2d_nhwc_python
-from tvm.contrib import nvcc
+import tvm.testing
VERIFY = True
return te.decl_tensor_intrin(C.op, intrin_func, binds={A: BA, C: BC})
+@tvm.testing.requires_tensorcore
def test_tensor_core_batch_matmal():
- if not tvm.gpu(0).exist or not tvm.runtime.enabled("cuda"):
- print("skip because cuda is not enabled..")
- return
- if not nvcc.have_tensorcore(tvm.gpu(0).compute_version):
- print("skip because gpu does not support tensor core")
- return
-
batch_size = 4
n = 512
m, l = n, n
+@tvm.testing.requires_tensorcore
def test_tensor_core_batch_conv():
- if not tvm.gpu(0).exist or not tvm.runtime.enabled("cuda"):
- print("skip because cuda is not enabled..")
- return
- if not nvcc.have_tensorcore(tvm.gpu(0).compute_version):
- print("skip because gpu does not support tensor core")
- return
-
# The sizes of inputs and filters
batch_size = 32
height = 14
from tvm import topi
import tvm.topi.testing
from tvm.topi.util import get_const_tuple
+import tvm.testing
def test_operator_type_and_tags():
raise NotImplementedError()
def check_device(device):
- ctx = tvm.context(device, 0)
- if not ctx.exist:
+ if not tvm.testing.device_enabled(device):
print("Skip because %s is not enabled" % device)
return
+ ctx = tvm.context(device, 0)
print("Running on target: %s" % device)
with tvm.target.create(device):
s = tvm.topi.testing.get_elemwise_schedule(device)(B)
def check_device(device):
ctx = tvm.context(device, 0)
- if not ctx.exist:
+ if not tvm.testing.device_enabled(device):
print("Skip because %s is not enabled" % device)
return
print("Running on target: %s" % device)
check_device(device)
+@tvm.testing.uses_gpu
def verify_conv2d_scalar_bop(batch, in_size, in_channel, num_filter, kernel, stride, padding, typ="add"):
def check_device(device):
ctx = tvm.context(device, 0)
- if not ctx.exist:
+ if not tvm.testing.device_enabled(device):
print("Skip because %s is not enabled" % device)
return
print("Running on target: %s" % device)
check_device(device)
+@tvm.testing.uses_gpu
def test_tensor_scalar_bop():
verify_tensor_scalar_bop((1,), typ="add")
verify_tensor_scalar_bop((3, 5), typ="sub")
verify_tensor_scalar_bop((2, 3, 1, 32), typ="div")
+@tvm.testing.uses_gpu
def test_broadcast_bop():
verify_broadcast_bop((2, 3), (), typ="add")
verify_broadcast_bop((5, 2, 3), (1,), typ="add")
verify_broadcast_bop((2, 3, 1, 32), (64, 32), typ="div")
+@tvm.testing.uses_gpu
def test_conv2d_scalar_bop():
verify_conv2d_scalar_bop(1, 16, 4, 4, 3, 1, 1, typ="add")
verify_conv2d_scalar_bop(1, 32, 2, 1, 3, 1, 1, typ="sub")
import numpy as np
import tvm
from tvm import te
-from tvm.testing import check_numerical_grads
+import tvm.testing
def test_check_numerical_grads():
# Functions and their derivatives
func_forw = lambda x: np.sum(func(x)[0])
grads = [func(x_input)[1]]
- check_numerical_grads(func_forw, [x_input], grads)
+ tvm.testing.check_numerical_grads(func_forw, [x_input], grads)
# Check functions with multiple arguments
for f1 in functions:
func_forw = lambda x, y: np.sum(f1(x)[0] + f2(y)[0])
grads = [f1(x_input)[1], f2(y_input)[1]]
- check_numerical_grads(func_forw, [x_input, y_input], grads)
+ tvm.testing.check_numerical_grads(func_forw, [x_input, y_input], grads)
# Same thing but with keyword arguments
func_forw = lambda x, y: np.sum(f1(x)[0] + f2(y)[0])
grads = {'x': f1(x_input)[1], 'y': f2(y_input)[1]}
- check_numerical_grads(func_forw, {'x': x_input, 'y': y_input}, grads)
+ tvm.testing.check_numerical_grads(func_forw, {'x': x_input, 'y': y_input}, grads)
def _noise1(x, atol=1e-2, rtol=0.1):
# We go in random direction using twice the original tolerance to be sure this
grads = [_noise1(f1(x_input)[1]), _noise1(f2(y_input)[1])]
try:
- check_numerical_grads(func_forw, [x_input, y_input], grads)
+ tvm.testing.check_numerical_grads(func_forw, [x_input, y_input], grads)
except AssertionError as e:
pass
else:
- raise AssertionError("check_numerical_grads didn't raise an exception")
+ raise AssertionError("tvm.testing.check_numerical_grads didn't raise an exception")
func_forw = lambda x, y: np.sum(f1(x)[0] + f2(y)[0])
grads = {'x': _noise2(f1(x_input)[1]), 'y': _noise2(f2(y_input)[1])}
try:
- check_numerical_grads(func_forw, {'x': x_input, 'y': y_input}, grads)
+ tvm.testing.check_numerical_grads(func_forw, {'x': x_input, 'y': y_input}, grads)
except AssertionError as e:
pass
else:
- raise AssertionError("check_numerical_grads didn't raise an exception")
+ raise AssertionError("tvm.testing.check_numerical_grads didn't raise an exception")
if __name__ == "__main__":
- test_check_numerical_grads()
+ test_tvm.testing.check_numerical_grads()
"""Test gpu code verifier"""
import tvm
from tvm import te
+import tvm.testing
def get_verify_pass(valid, **kwargs):
def _fverify(f, *_):
return tvm.tir.transform.prim_func_pass(_fverify, opt_level=0)
+@tvm.testing.requires_gpu
def test_shared_memory():
def check_shared_memory(dtype):
N = 1024
# thread usage: M
for target in ['opencl', 'cuda']:
- if not tvm.context(target).exist:
+ if not tvm.testing.device_enabled(target):
continue
valid = [None]
with tvm.transform.PassContext(config={"tir.add_lower_pass": [
check_shared_memory('float32')
check_shared_memory('int8x4')
+@tvm.testing.requires_gpu
def test_local_memory():
N = 1024
M = 128
# thread usage: M
for target in ['opencl', 'cuda']:
- if not tvm.context(target).exist:
+ if not tvm.testing.device_enabled(target):
continue
valid = [None]
tvm.build(s, [A, B], target)
assert valid[0]
+@tvm.testing.requires_gpu
def test_num_thread():
N = 1024
M = 128
# thread usage: N
for target in ['opencl', 'cuda']:
- if not tvm.context(target).exist:
+ if not tvm.testing.device_enabled(target):
continue
valid = [None]
tvm.build(s, [A, B], target)
assert valid[0]
+@tvm.testing.requires_gpu
def test_multiple_kernels():
N = 1024
# thread usage: N
for target in ['opencl', 'cuda']:
- if not tvm.context(target).exist:
+ if not tvm.testing.device_enabled(target):
continue
valid = [None]
tvm.build(s, [A, C], target)
assert valid[0]
+@tvm.testing.requires_gpu
def test_wrong_bind():
N = 1024
s[B].bind(s[B].op.axis[1], te.thread_axis("threadIdx.x"))
for target in ['opencl', 'cuda']:
- if not tvm.context(target).exist:
+ if not tvm.testing.device_enabled(target):
continue
valid = [None]
tvm.build(s, [A, B], target)
assert not valid[0]
+@tvm.testing.requires_gpu
def test_vectorize():
N = 1024
s[B].vectorize(ji)
for target in ['opencl', 'cuda']:
- if not tvm.context(target).exist:
+ if not tvm.testing.device_enabled(target):
continue
valid = [None]
tvm.lower(s, [A, B])
assert not valid[0]
+@tvm.testing.requires_gpu
def test_vthread():
N = 1024
s[B].bind(s[B].op.axis[1], te.thread_axis("vthread"))
for target in ['opencl', 'cuda']:
- if not tvm.context(target).exist:
+ if not tvm.testing.device_enabled(target):
continue
valid = [None]
import tvm
import pytest
from tvm import te
+import tvm.testing
# The following DLDeviceType/TVMDeviceExtType values
# are originally defined in dlpack.h and c_runtime_api.h.
# All computations are bound.
# So VerifyMemory pass is expected to succeed.
#
+@tvm.testing.uses_gpu
def test_verify_memory_all_bind():
n = te.var("n")
A = te.placeholder((n,), name='A')
mod = tvm.lower(s, [A, B])
for dev_type in gpu_devices + other_devices:
- binded_mod = tvm.tir.transform.Apply(
- lambda f: f.with_attr("target", tvm.target.create(dev_type)))(mod)
- tvm.tir.transform.VerifyMemory()(binded_mod)
+ if tvm.testing.device_enabled(dev_type):
+ binded_mod = tvm.tir.transform.Apply(
+ lambda f: f.with_attr("target", tvm.target.create(dev_type)))(mod)
+ tvm.tir.transform.VerifyMemory()(binded_mod)
# Computations are not bound.
# So VerifyMemory pass fails when device type is GPU.
#
+@tvm.testing.uses_gpu
def test_verify_memory_not_bind():
n = te.var("n")
A = te.placeholder((n,), name='A')
mod = tvm.lower(s, [A, B])
for dev_type in gpu_devices:
- binded_mod = tvm.tir.transform.Apply(
- lambda f: f.with_attr("target", tvm.target.create(dev_type)))(mod)
- with pytest.raises(RuntimeError):
- tvm.tir.transform.VerifyMemory()(binded_mod)
+ if tvm.testing.device_enabled(dev_type):
+ binded_mod = tvm.tir.transform.Apply(
+ lambda f: f.with_attr("target", tvm.target.create(dev_type)))(mod)
+ with pytest.raises(RuntimeError):
+ tvm.tir.transform.VerifyMemory()(binded_mod)
for dev_type in other_devices:
- binded_mod = tvm.tir.transform.Apply(
- lambda f: f.with_attr("target", tvm.target.create(dev_type)))(mod)
- tvm.tir.transform.VerifyMemory()(binded_mod)
+ if tvm.testing.device_enabled(dev_type):
+ binded_mod = tvm.tir.transform.Apply(
+ lambda f: f.with_attr("target", tvm.target.create(dev_type)))(mod)
+ tvm.tir.transform.VerifyMemory()(binded_mod)
# Computations are partially bound.
# So VerifyMemory pass fails when device type is GPU.
#
+@tvm.testing.uses_gpu
def test_verify_memory_partially_bind():
n = te.var("n")
A = te.placeholder((n,), name='A')
mod = tvm. lower(s, [A, B, C, D])
for dev_type in gpu_devices:
- binded_mod = tvm.tir.transform.Apply(
- lambda f: f.with_attr("target", tvm.target.create(dev_type)))(mod)
- with pytest.raises(RuntimeError):
- tvm.tir.transform.VerifyMemory()(binded_mod)
+ if tvm.testing.device_enabled(dev_type):
+ binded_mod = tvm.tir.transform.Apply(
+ lambda f: f.with_attr("target", tvm.target.create(dev_type)))(mod)
+ with pytest.raises(RuntimeError):
+ tvm.tir.transform.VerifyMemory()(binded_mod)
for dev_type in other_devices:
- binded_mod = tvm.tir.transform.Apply(
- lambda f: f.with_attr("target", tvm.target.create(dev_type)))(mod)
- tvm.tir.transform.VerifyMemory()(binded_mod)
+ if tvm.testing.device_enabled(dev_type):
+ binded_mod = tvm.tir.transform.Apply(
+ lambda f: f.with_attr("target", tvm.target.create(dev_type)))(mod)
+ tvm.tir.transform.VerifyMemory()(binded_mod)
assert_simplified_equal(index_simplified, index_direct)
+@tvm.testing.requires_llvm
def test_buffer_broadcast():
m0, m1, m2 = te.size_var("m0"), te.size_var("m1"), te.size_var("m2")
n0, n1, n2 = te.size_var("n0"), te.size_var("n1"), te.size_var("n2")
s = te.create_schedule(C.op)
def check():
- if not tvm.runtime.enabled("llvm"):
- return
fadd = tvm.build(s, [A, B, C], target='llvm', name='bcast_add', binds={A:Ab, B:Bb})
ctx = tvm.cpu(0)
a = tvm.nd.array(np.random.uniform(size=(2, 4, 3)).astype(A.dtype), ctx)
check()
+@tvm.testing.requires_llvm
def test_buffer_broadcast_expr():
n0, m0, x = te.size_var('n0'), te.size_var('m0'), te.size_var('x')
n1, m1 = te.size_var('n1'), te.size_var('m1')
s = te.create_schedule(C.op)
def check_stride():
- if not tvm.runtime.enabled("llvm"):
- return
fadd = tvm.build(s, [A, B, C, o1, x], target='llvm', name='bcast_add',
binds={A:Ab, B:Bb, C:Cc})
ctx = tvm.cpu(0)
tvm.testing.assert_allclose(c.asnumpy(), a.asnumpy() + b.asnumpy())
def check_no_stride():
- if not tvm.runtime.enabled("llvm"):
- return
fadd = tvm.build(s, [A, B, C, o1, x], target='llvm', name='bcast_add',
binds={A: Ab, B: Bb, C: Cc})
ctx = tvm.cpu(0)
tvm.testing.assert_allclose(c.asnumpy(), a.asnumpy() + b.asnumpy())
def check_auto_bind():
- if not tvm.runtime.enabled("llvm"):
- return
# Let build bind buffers
fadd = tvm.build(s, [A, B, C, o1, x], target='llvm', name='bcast_add')
ctx = tvm.cpu(0)
import tvm
from tvm import te
import numpy as np
+import tvm.testing
def test_for():
ib = tvm.tir.ir_builder.create()
name="vector_add", dtype=dtype)
s = te.create_schedule(C.op)
def check_target(target):
- if not tvm.runtime.enabled(target):
+ if not tvm.testing.device_enabled(target):
return
# build and invoke the kernel.
fadd = tvm.build(s, [A, B, C], target)
tvm.testing.assert_allclose(c.asnumpy(), a.asnumpy() + b.asnumpy())
check_target("llvm")
+@tvm.testing.requires_gpu
def test_gpu():
n = te.size_var('n')
dtype = "float32"
stmt = tvm.te.schedule.ScheduleOps(s, bounds)
def check_target(target):
n = 1024
- if not tvm.runtime.enabled(target):
+ if not tvm.testing.device_enabled(target):
return
# build and invoke the kernel.
fadd = tvm.build(s, [A, B, C], target)
from tvm import relay
import numpy as np
import pytest
-from tvm.relay.testing import ctx_list
+from tvm.testing import enabled_targets
var_list = []
kernel = np.random.uniform(-scale, scale, size=kshape).astype(dtype)
params = {'w': tvm.nd.array(kernel)}
- for target, ctx in ctx_list():
+ for target, ctx in enabled_targets():
with tvm.transform.PassContext(opt_level=3):
graph, lib, params = relay.build_module.build(mod, target=target, params=params)
m = tvm.contrib.graph_runtime.create(graph, lib, ctx)
return ret
+@tvm.testing.requires_llvm
@pytest.mark.xfail
def test_out_of_bounds_llvm(index_a, index_b):
n = te.size_var("n")
c = tvm.nd.array(np.zeros(1024, dtype=C.dtype), ctx)
fadd (a, b, c)
+@tvm.testing.requires_llvm
def test_in_bounds_llvm():
n = te.size_var("n")
A = te.placeholder ((n,), name='A')
c = tvm.nd.array(np.zeros(1024, dtype=C.dtype), ctx)
fadd (a, b, c)
+@tvm.testing.requires_llvm
@pytest.mark.xfail
def test_out_of_bounds_vectorize_llvm(nn, index_a, index_b):
n = tvm.runtime.convert(nn)
c = tvm.nd.array(np.zeros(n, dtype=c.dtype), ctx)
f(a, b, c)
+@tvm.testing.requires_llvm
def test_in_bounds_vectorize_llvm():
n = 512
lanes = 2
f(a, c)
tvm.testing.assert_allclose(c.asnumpy(), a.asnumpy() + 1)
+@tvm.testing.requires_llvm
def test_in_bounds_loop_partition_basic_llvm():
n = te.size_var('n')
A = te.placeholder((n, ), name='A')
t = tvm.nd.empty((32,), T.dtype, ctx)
f(a, b, t)
+@tvm.testing.requires_llvm
@pytest.mark.xfail
def test_out_of_bounds_loop_partition_basic_llvm(index_a, index_b):
n = te.size_var('n')
assert(len(branch_collector) == 2)
+@tvm.testing.requires_llvm
def test_in_bounds_const_loop_partition_llvm():
with tvm.transform.PassContext(config={
"tir.instrument_bound_checkers": True,
t = tvm.nd.empty((n,), T.dtype, ctx)
f(a, b, t)
+@tvm.testing.requires_llvm
@pytest.mark.xfail
def test_out_of_bounds_const_loop_partition_llvm(index_a, index_b):
with tvm.transform.PassContext(config={
t = tvm.nd.empty((n,), T.dtype, ctx)
f(a, b, t)
+@tvm.testing.requires_llvm
def test_in_bounds_conv_llvm(loop_tiling=False):
HSTR = WSTR = 1
in_channel = 128
conv_out = tvm.nd.empty ((batch_size, out_channel, out_height, out_width), "float32", ctx)
f(data_input, kernel_input, conv_out)
+@tvm.testing.requires_llvm
@pytest.mark.xfail
def test_out_of_bounds_conv_llvm(data_offsets, kernel_offsets, loop_tiling=False):
HSTR = WSTR = 1
conv_out = tvm.nd.empty ((batch_size, out_channel, out_height, out_width), "float32", ctx)
f(data_input, kernel_input, conv_out)
+@tvm.testing.requires_llvm
def test_in_bounds_tensors_with_same_shapes1D_llvm():
n = te.size_var('n')
k = te.size_var('k')
t = tvm.nd.empty((32,), T.dtype, ctx)
f(a, b, t)
+@tvm.testing.requires_llvm
@pytest.mark.xfail
def test_out_of_bounds_tensors_with_diff_shapes1D_llvm(a_shape, b_shape, c_shape):
n = te.size_var('n')
t = tvm.nd.empty((c_shape,), T.dtype, ctx)
f(a, b, t)
+@tvm.testing.requires_llvm
def test_in_bounds_tensors_with_same_shapes2D_llvm():
n = te.size_var('n')
k = te.size_var('k')
t = tvm.nd.empty((32, 32), T.dtype, ctx)
f(a, b, t)
+@tvm.testing.requires_llvm
@pytest.mark.xfail
def test_out_of_bounds_tensors_with_diff_shapes2D_llvm(a_shape, b_shape, c_shape):
n = te.size_var('n')
t = tvm.nd.empty((c_shape[0],c_shape[1]), T.dtype, ctx)
f(a, b, t)
+@tvm.testing.requires_llvm
def test_in_bounds_tensors_with_same_shapes3D_llvm():
n = te.size_var('n')
k = te.size_var('k')
t = tvm.nd.empty((32, 32, 32), T.dtype, ctx)
f(a, b, t)
+@tvm.testing.requires_llvm
@pytest.mark.xfail
def test_out_of_bounds_tensors_with_diff_shapes3D_llvm(a_shape, b_shape, c_shape):
n = te.size_var('n')
t = tvm.nd.empty((c_shape[0],c_shape[1],c_shape[2]), T.dtype, ctx)
f(a, b, t)
+@tvm.testing.requires_llvm
@pytest.mark.xfail
def test_out_of_bounds_tensors_with_zero_shape_op_with_not_zero_shape_llvm():
- if not tvm.runtime.enabled("llvm"):
- return
n = 64
A = te.placeholder((n, ), name='A')
scale = te.placeholder((), name='scale')
# specific language governing permissions and limitations
# under the License.
import tvm
+import tvm.testing
from tvm import te
import numpy as np
C = te.compute((n,), make_binds)
s = te.create_schedule([C.op])
- if not tvm.runtime.enabled("llvm"):
- return
-
f = tvm.build(s, [A, B, C], "llvm")
a = tvm.nd.array(np.array([x for x, y in data], dtype=expr.dtype))
b = tvm.nd.array(np.array([y for x, y in data], dtype=expr.dtype))
return list(itertools.product(x, y))
+@tvm.testing.requires_llvm
def test_lower_floordiv():
data = get_ref_data()
for dtype in ["int32", "int64", "int16"]:
check_value(res, x, y, [(a, b) for a, b in data if b == 8], lambda a, b: a // b)
+@tvm.testing.requires_llvm
def test_lower_floormod():
data = get_ref_data()
for dtype in ["int32", "int64", "int16"]:
from tvm.contrib.nvcc import have_fp16
import numpy as np
+import tvm.testing
+@tvm.testing.requires_cuda
def test_lower_warp_memory_local_scope():
m = 128
A = te.placeholder((m,), name='A')
assert(fdevice.body.body.value.value == "local")
assert(fdevice.body.body.body.extents[0].value == 2)
+@tvm.testing.requires_cuda
def test_lower_warp_memory_correct_indices():
n = 32
A = te.placeholder((2, n, n), name='A', dtype="float32")
assert "threadIdx.x" in idx_names
assert "threadIdx.y" not in idx_names
+@tvm.testing.requires_gpu
+@tvm.testing.requires_cuda
def test_lower_warp_memory_cuda_end_to_end():
def check_cuda(dtype):
- if not tvm.gpu(0).exist or not tvm.runtime.enabled("cuda"):
- print("skip because cuda is not enabled..")
- return
if dtype == "float16" and not have_fp16(tvm.gpu(0).compute_version):
print("Skip because gpu does not have fp16 support")
return
check_cuda("float32")
check_cuda("float16")
+@tvm.testing.requires_gpu
+@tvm.testing.requires_cuda
def test_lower_warp_memory_cuda_half_a_warp():
def check_cuda(dtype):
- if not tvm.gpu(0).exist or not tvm.runtime.enabled("cuda"):
- print("skip because cuda is not enabled..")
- return
if dtype == "float16" and not have_fp16(tvm.gpu(0).compute_version):
print("Skip because gpu does not have fp16 support")
return
check_cuda("float32")
check_cuda("float16")
+@tvm.testing.requires_gpu
+@tvm.testing.requires_cuda
def test_lower_warp_memory_cuda_2_buffers():
def check_cuda(dtype):
- if not tvm.gpu(0).exist or not tvm.runtime.enabled("cuda"):
- print("skip because cuda is not enabled..")
- return
if dtype == "float16" and not have_fp16(tvm.gpu(0).compute_version):
print("Skip because gpu does not have fp16 support")
return
check_cuda("float32")
check_cuda("float16")
+@tvm.testing.requires_gpu
def test_lower_warp_memory_roundup():
def check(device, m):
A = te.placeholder((m,), name='A')
tvm.testing.assert_allclose(B_nd.asnumpy(), B_np)
for device in ['cuda', 'rocm']:
- if not tvm.context(device, 0).exist or not tvm.runtime.enabled(device):
+ if not tvm.testing.device_enabled(device):
print("skip because", device,"is not enabled..")
continue
check(device, m=31)
# under the License.
import tvm
from tvm import te
+import tvm.testing
+@tvm.testing.requires_cuda
def test_thread_storage_sync():
m = te.size_var('m')
l = te.size_var('l')
set +u
if [[ ! -z $CI_PYTEST_ADD_OPTIONS ]]; then
- export PYTEST_ADDOPTS="-v $CI_PYTEST_ADD_OPTIONS"
+ export PYTEST_ADDOPTS="-v $CI_PYTEST_ADD_OPTIONS $PYTEST_ADDOPTS"
else
- export PYTEST_ADDOPTS="-v "
+ export PYTEST_ADDOPTS="-v $PYTEST_ADDOPTS"
fi
set -u
export TVM_BIND_THREADS=0
export OMP_NUM_THREADS=1
+export TVM_TEST_TARGETS="llvm;cuda"
+
find . -type f -path "*.pyc" | xargs rm -f
# Rebuild cython
export TVM_BIND_THREADS=0
export OMP_NUM_THREADS=1
+export TVM_TEST_TARGETS="llvm"
+
find . -type f -path "*.pyc" | xargs rm -f
# Rebuild cython
TVM_FFI=ctypes python3 -m pytest tests/python/integration
TVM_FFI=ctypes python3 -m pytest tests/python/contrib
-TVM_FFI=ctypes python3 -m pytest tests/python/relay
+TVM_TEST_TARGETS="${TVM_RELAY_TEST_TARGETS:-llvm;cuda}" TVM_FFI=ctypes python3 -m pytest tests/python/relay
# Do not enable OpenGL
# TVM_FFI=cython python -m pytest tests/webgl
# specific language governing permissions and limitations
# under the License.
+export TVM_TEST_TARGETS="cuda;opencl;metal;rocm;vulkan;nvptx;opencl -device=mali,aocl_sw_emu"
+export PYTEST_ADDOPTS="-m gpu $PYTEST_ADDOPTS"
+export TVM_RELAY_TEST_TARGETS="cuda"
+
./tests/scripts/task_python_integration.sh
# specific language governing permissions and limitations
# under the License.
+export TVM_TEST_TARGETS="cuda;opencl;metal;rocm;vulkan;nvptx;opencl -device=mali,aocl_sw_emu"
+export PYTEST_ADDOPTS="-m gpu $PYTEST_ADDOPTS"
+
./tests/scripts/task_python_unittest.sh
from tvm import te
from matplotlib import pyplot as plt
-from tvm.relay.testing.config import ctx_list
from tvm import relay
from tvm.contrib import graph_runtime
from tvm.contrib.download import download_testdata
model_name = supported_model[0]
dshape = (1, 3, 512, 512)
-target_list = ctx_list()
######################################################################
# Download and pre-process demo image
class_IDs, scores, bounding_boxs = m.get_output(0), m.get_output(1), m.get_output(2)
return class_IDs, scores, bounding_boxs
-for target, ctx in target_list:
- lib = build(target)
- class_IDs, scores, bounding_boxs = run(lib, ctx)
+for target in ["llvm", "cuda"]:
+ ctx = tvm.context(target, 0)
+ if ctx.exist:
+ lib = build(target)
+ class_IDs, scores, bounding_boxs = run(lib, ctx)
######################################################################
# Display result