~~~~~~~~~~~~~~~~~~
Both software emulation and compilation are supported. To define a function,
-you need to use ``tvm.hybrid.script`` decorator to indicate this is a hybrid function:
+you need to use ``tvm.te.hybrid.script`` decorator to indicate this is a hybrid function:
.. code-block:: python
- @tvm.hybrid.script
+ @tvm.te.hybrid.script
def outer_product(a, b, c):
c = output_tensor((100, 99), 'float32')
for i in range(a.shape[0]):
Tuning
~~~~~~
-Follow up the example above, you can use some tvm like interfaces to tune the code:
+Follow up the example above, you can use some tvm like interfaces to tune the code:
.. code-block:: python
This is a preliminary function, so users should be in charge of the correctness
of the functionality after tuning. Specifically, users should be careful when
- fusing and reorderding imperfect loops.
+ fusing and reorderding imperfect loops.
Loops
~~~~~
.. code-block:: python
- @tvm.hybrid.script
+ @tvm.te.hybrid.script
def foo(a, b): # b is a tvm.container.Array
c = output_tensor(a.shape, a.dtype)
for i in const_range(len(a)): # because you have b access, i should be explicitly annotated as const_range
from tvm import te
from tvm.contrib import util
-from tvm.hybrid import script
-from tvm.hybrid.runtime import HYBRID_GLOBALS
+from tvm.te.hybrid import script
+from tvm.te.hybrid.runtime import HYBRID_GLOBALS
@pytest.mark.skip
def run_and_check(func, args, var_dict={}, target='llvm', sch=None, outs=None):
module_args = [i for i in args if isinstance(i, (te.tensor.Tensor, tvm.tir.Var))]
module_outs = [outs] if isinstance(outs, te.tensor.Tensor) else outs
- h_module = tvm.hybrid.build(sch, module_args, module_outs)
+ h_module = te.hybrid.build(sch, module_args, module_outs)
return h_module, module_args, module_outs
temp = util.tempdir()
path = temp.relpath('%s.py' % func.name)
func.save(path)
- func_ = tvm.hybrid.HybridModule()
+ func_ = te.hybrid.HybridModule()
func_.load(path)
run_and_check(func_, ins, {n: 99, m: 101}, outs=outs)
run_and_check(func, ins, outs=outs, target='cuda')
- @tvm.hybrid.script
+ @te.hybrid.script
def foo(a):
c = output_tensor((a.shape[0],), a.dtype)
total = allocate((1,), a.dtype, 'local')
func, ins, outs = run_and_check(foo, [a], target='cuda')
run_and_check(func, ins, outs=outs, target='cuda')
- @tvm.hybrid.script
+ @te.hybrid.script
def max_threads(a):
b = output_tensor(a.shape, a.dtype)
n = a.shape[0]
# test non caconical loops
def test_non_zero():
- @tvm.hybrid.script
+ @te.hybrid.script
def blur(a):
b = output_tensor((30, 30), 'float32')
for i in range(2, 32):
func, ins, outs = run_and_check(blur, [a])
run_and_check(func, ins, outs=outs)
- @tvm.hybrid.script
+ @te.hybrid.script
def triangle(a, b):
c = output_tensor((10, 10), dtype='float32')
for i in range(10):
run_and_check(func, ins, outs=outs)
def test_allocate():
- @tvm.hybrid.script
+ @te.hybrid.script
def blur2d(a):
b = output_tensor((30, 30), 'float32')
for i in range(30):
run_and_check(func, ins, outs=outs)
if tvm.gpu().exist:
- @tvm.hybrid.script
+ @te.hybrid.script
def share_vec_add(a, b):
c = output_tensor((256, ), 'float32')
shared = allocate((256, ), 'float32', 'shared')
print('[Warning] No GPU found! Skip shared mem test!')
def test_upstream():
- @tvm.hybrid.script
+ @te.hybrid.script
def upstream(a):
b = output_tensor((20, ), 'float32')
for i in range(20):
tvm.testing.assert_allclose(tvm_d.asnumpy(), ref, 1e-5, 1e-5)
def test_downstream():
- @tvm.hybrid.script
+ @te.hybrid.script
def downstream(a):
b = output_tensor((20, ), 'float32')
for i in range(20):
tvm.testing.assert_allclose(tvm_c.asnumpy(), ref, 1e-5, 1e-5)
def test_const_param():
- @tvm.hybrid.script
+ @te.hybrid.script
def add_something(a, b):
c = output_tensor((11, ), 'int32')
for i in range(11):
tvm.testing.assert_allclose(nd_c.asnumpy(), ref, 1e-5, 1e-5)
def test_value_index():
- @tvm.hybrid.script
+ @te.hybrid.script
def kernel_a(a):
b = output_tensor((16, ), 'int32')
c = output_tensor((4, 4), 'int32')
c[i // 4, i % 4] = a[i] + 1
return b, c
- @tvm.hybrid.script
+ @te.hybrid.script
def kernel_b(b, a):
c = output_tensor((4, 4), 'int32')
for i in range(4):
tvm.testing.assert_allclose(res.asnumpy(), ref)
def test_func_call():
- @tvm.hybrid.script
+ @te.hybrid.script
def foo(a, b):
for i in range(len(a)):
a[i] = i + 1.0
run_and_check(func, ins, outs=outs)
def test_bool():
- @tvm.hybrid.script
+ @te.hybrid.script
def foo(a):
b = output_tensor(a.shape, a.dtype)
b[0] = 1.2
run_and_check(func, ins, outs=outs)
def test_const_range():
- @tvm.hybrid.script
+ @te.hybrid.script
def foo(a, b):
c = output_tensor(a.shape, a.dtype)
d = output_tensor(a.shape, 'int32')
func, ins, outs = run_and_check(foo, [a, b])
run_and_check(func, ins, outs=outs)
- @tvm.hybrid.script
+ @te.hybrid.script
def goo(a, b):
c = output_tensor(a.shape, a.dtype)
len_b = len(b)
func, ins, outs = run_and_check(goo, [a, b])
run_and_check(func, ins, outs=outs)
- @tvm.hybrid.script
+ @te.hybrid.script
def hoo(a, b):
c = output_tensor(a.shape, a.dtype)
len_b = len(b)
constant_list = [[1, 2], [3, n]]
const_value = 1
- @tvm.hybrid.script
+ @te.hybrid.script
def add_something(a):
c = output_tensor((constant_tuple[1],), 'int32')
for i in range(constant_tuple[1]):